From a70818d06585c5e1fca0bc1a0fc46fdac46046e5 Mon Sep 17 00:00:00 2001 From: jsing Date: Wed, 22 Feb 2023 05:57:19 +0000 Subject: [PATCH] Replace bn_sub_part_words() with bn_sub(). Now that bn_sub() handles word arrays with potentially different lengths, we no longer need bn_sub_part_words() - call bn_sub() instead. This allows us to entirely remove the unnecessarily complex bn_sub_part_words() code. ok tb@ --- lib/libcrypto/arch/i386/Makefile.inc | 3 +- lib/libcrypto/bn/asm/bn-586.pl | 210 --------------------------- lib/libcrypto/bn/bn_local.h | 4 +- lib/libcrypto/bn/bn_mul.c | 172 +++------------------- 4 files changed, 19 insertions(+), 370 deletions(-) diff --git a/lib/libcrypto/arch/i386/Makefile.inc b/lib/libcrypto/arch/i386/Makefile.inc index 67c22262e61..6134dfdc155 100644 --- a/lib/libcrypto/arch/i386/Makefile.inc +++ b/lib/libcrypto/arch/i386/Makefile.inc @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.inc,v 1.7 2023/01/14 15:45:43 jsing Exp $ +# $OpenBSD: Makefile.inc,v 1.8 2023/02/22 05:57:19 jsing Exp $ # i386-specific libcrypto build rules @@ -16,7 +16,6 @@ SRCS+= bf_cbc.c SSLASM+= bf bf-586 # bn CFLAGS+= -DOPENSSL_IA32_SSE2 -CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS SSLASM+= bn bn-586 SSLASM+= bn co-586 CFLAGS+= -DOPENSSL_BN_ASM_MONT diff --git a/lib/libcrypto/bn/asm/bn-586.pl b/lib/libcrypto/bn/asm/bn-586.pl index b502fe60ee2..71b775af8d6 100644 --- a/lib/libcrypto/bn/asm/bn-586.pl +++ b/lib/libcrypto/bn/asm/bn-586.pl @@ -17,7 +17,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &bn_div_words("bn_div_words"); &bn_add_words("bn_add_words"); &bn_sub_words("bn_sub_words"); -&bn_sub_part_words("bn_sub_part_words"); &asm_finish(); @@ -566,212 +565,3 @@ sub bn_sub_words &function_end($name); } - -sub bn_sub_part_words - { - local($name)=@_; - - &function_begin($name,""); - - &comment(""); - $a="esi"; - $b="edi"; - $c="eax"; - $r="ebx"; - $tmp1="ecx"; - $tmp2="edx"; - $num="ebp"; - - &mov($r,&wparam(0)); # get r - &mov($a,&wparam(1)); # get a - &mov($b,&wparam(2)); # get b - &mov($num,&wparam(3)); # get num - &xor($c,$c); # clear carry - &and($num,0xfffffff8); # num / 8 - - &jz(&label("aw_finish")); - - &set_label("aw_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($a,32); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("aw_loop")); - - &set_label("aw_finish",0); - &mov($num,&wparam(3)); # get num - &and($num,7); - &jz(&label("aw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("Tail Round $i"); - &mov($tmp1,&DWP(0,$a,"",0)); # *a - &mov($tmp2,&DWP(0,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP(0,$r,"",0),$tmp1); # *r - &add($a, 4); - &add($b, 4); - &add($r, 4); - &dec($num) if ($i != 6); - &jz(&label("aw_end")) if ($i != 6); - } - &set_label("aw_end",0); - - &cmp(&wparam(4),0); - &je(&label("pw_end")); - - &mov($num,&wparam(4)); # get dl - &cmp($num,0); - &je(&label("pw_end")); - &jge(&label("pw_pos")); - - &comment("pw_neg"); - &mov($tmp2,0); - &sub($tmp2,$num); - &mov($num,$tmp2); - &and($num,0xfffffff8); # num / 8 - &jz(&label("pw_neg_finish")); - - &set_label("pw_neg_loop",0); - for ($i=0; $i<8; $i++) - { - &comment("dl<0 Round $i"); - - &mov($tmp1,0); - &mov($tmp2,&DWP($i*4,$b,"",0)); # *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - } - - &comment(""); - &add($b,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_neg_loop")); - - &set_label("pw_neg_finish",0); - &mov($tmp2,&wparam(4)); # get dl - &mov($num,0); - &sub($num,$tmp2); - &and($num,7); - &jz(&label("pw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("dl<0 Tail Round $i"); - &mov($tmp1,0); - &mov($tmp2,&DWP($i*4,$b,"",0));# *b - &sub($tmp1,$c); - &mov($c,0); - &adc($c,$c); - &sub($tmp1,$tmp2); - &adc($c,0); - &dec($num) if ($i != 6); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jz(&label("pw_end")) if ($i != 6); - } - - &jmp(&label("pw_end")); - - &set_label("pw_pos",0); - - &and($num,0xfffffff8); # num / 8 - &jz(&label("pw_pos_finish")); - - &set_label("pw_pos_loop",0); - - for ($i=0; $i<8; $i++) - { - &comment("dl>0 Round $i"); - - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &sub($tmp1,$c); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jnc(&label("pw_nc".$i)); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_pos_loop")); - - &set_label("pw_pos_finish",0); - &mov($num,&wparam(4)); # get dl - &and($num,7); - &jz(&label("pw_end")); - - for ($i=0; $i<7; $i++) - { - &comment("dl>0 Tail Round $i"); - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &sub($tmp1,$c); - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &jnc(&label("pw_tail_nc".$i)); - &dec($num) if ($i != 6); - &jz(&label("pw_end")) if ($i != 6); - } - &mov($c,1); - &jmp(&label("pw_end")); - - &set_label("pw_nc_loop",0); - for ($i=0; $i<8; $i++) - { - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &set_label("pw_nc".$i,0); - } - - &comment(""); - &add($a,32); - &add($r,32); - &sub($num,8); - &jnz(&label("pw_nc_loop")); - - &mov($num,&wparam(4)); # get dl - &and($num,7); - &jz(&label("pw_nc_end")); - - for ($i=0; $i<7; $i++) - { - &mov($tmp1,&DWP($i*4,$a,"",0)); # *a - &mov(&DWP($i*4,$r,"",0),$tmp1); # *r - &set_label("pw_tail_nc".$i,0); - &dec($num) if ($i != 6); - &jz(&label("pw_nc_end")) if ($i != 6); - } - - &set_label("pw_nc_end",0); - &mov($c,0); - - &set_label("pw_end",0); - -# &mov("eax",$c); # $c is "eax" - - &function_end($name); - } - diff --git a/lib/libcrypto/bn/bn_local.h b/lib/libcrypto/bn/bn_local.h index 3e37238c5e8..4576c36c917 100644 --- a/lib/libcrypto/bn/bn_local.h +++ b/lib/libcrypto/bn/bn_local.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_local.h,v 1.16 2023/02/22 05:46:37 jsing Exp $ */ +/* $OpenBSD: bn_local.h,v 1.17 2023/02/22 05:57:19 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -264,8 +264,6 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, int tnb, BN_ULONG *t); void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); -BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, - int cl, int dl); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); diff --git a/lib/libcrypto/bn/bn_mul.c b/lib/libcrypto/bn/bn_mul.c index 1d56e57b76b..5e270b988f3 100644 --- a/lib/libcrypto/bn/bn_mul.c +++ b/lib/libcrypto/bn/bn_mul.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_mul.c,v 1.33 2023/02/15 18:10:16 jsing Exp $ */ +/* $OpenBSD: bn_mul.c,v 1.34 2023/02/22 05:57:19 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -269,144 +269,6 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) } #endif -#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) -/* - * Here follows a specialised variant of bn_sub_words(), which has the property - * performing operations on arrays of different sizes. The sizes of those arrays - * is expressed through cl, which is the common length (basically, - * min(len(a),len(b))), and dl, which is the delta between the two lengths, - * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the - * operations that require a result array as parameter, it must have the length - * cl+abs(dl). - */ -BN_ULONG -bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl, - int dl) -{ - BN_ULONG c, t; - - assert(cl >= 0); - c = bn_sub_words(r, a, b, cl); - - if (dl == 0) - return c; - - r += cl; - a += cl; - b += cl; - - if (dl < 0) { - for (;;) { - t = b[0]; - r[0] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[1]; - r[1] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[2]; - r[2] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - t = b[3]; - r[3] = (0 - t - c) & BN_MASK2; - if (t != 0) - c = 1; - if (++dl >= 0) - break; - - b += 4; - r += 4; - } - } else { - int save_dl = dl; - while (c) { - t = a[0]; - r[0] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[1]; - r[1] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[2]; - r[2] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - t = a[3]; - r[3] = (t - c) & BN_MASK2; - if (t != 0) - c = 0; - if (--dl <= 0) - break; - - save_dl = dl; - a += 4; - r += 4; - } - if (dl > 0) { - if (save_dl > dl) { - switch (save_dl - dl) { - case 1: - r[1] = a[1]; - if (--dl <= 0) - break; - case 2: - r[2] = a[2]; - if (--dl <= 0) - break; - case 3: - r[3] = a[3]; - if (--dl <= 0) - break; - } - a += 4; - r += 4; - } - } - if (dl > 0) { - for (;;) { - r[0] = a[0]; - if (--dl <= 0) - break; - r[1] = a[1]; - if (--dl <= 0) - break; - r[2] = a[2]; - if (--dl <= 0) - break; - r[3] = a[3]; - if (--dl <= 0) - break; - - a += 4; - r += 4; - } - } - } - return c; -} -#endif - void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) { @@ -504,15 +366,15 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, zero = neg = 0; switch (c1 * 3 + c2) { case -4: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + bn_sub(t, n, &a[n], tna, a, n); /* - */ + bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */ break; case -3: zero = 1; break; case -2: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ + bn_sub(t, n, &a[n], tna, a, n); /* - */ + bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */ neg = 1; break; case -1: @@ -521,16 +383,16 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna, zero = 1; break; case 2: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + bn_sub(t, n, a, n, &a[n], tna); /* + */ + bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */ neg = 1; break; case 3: zero = 1; break; case 4: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); + bn_sub(t, n, a, n, &a[n], tna); + bn_sub(&t[n], n, &b[n], tnb, b, n); break; } @@ -630,14 +492,14 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, neg = 0; switch (c1 * 3 + c2) { case -4: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + bn_sub(t, n, &a[n], tna, a, n); /* - */ + bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */ break; case -3: /* break; */ case -2: - bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ + bn_sub(t, n, &a[n], tna, a, n); /* - */ + bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */ neg = 1; break; case -1: @@ -645,15 +507,15 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna, case 1: /* break; */ case 2: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ - bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ + bn_sub(t, n, a, n, &a[n], tna); /* + */ + bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */ neg = 1; break; case 3: /* break; */ case 4: - bn_sub_part_words(t, a, &(a[n]), tna, n - tna); - bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); + bn_sub(t, n, a, n, &a[n], tna); + bn_sub(&t[n], n, &b[n], tnb, b, n); break; } /* The zero case isn't yet implemented here. The speedup -- 2.20.1