Replace bn_sub_part_words() with bn_sub().
authorjsing <jsing@openbsd.org>
Wed, 22 Feb 2023 05:57:19 +0000 (05:57 +0000)
committerjsing <jsing@openbsd.org>
Wed, 22 Feb 2023 05:57:19 +0000 (05:57 +0000)
Now that bn_sub() handles word arrays with potentially different lengths,
we no longer need bn_sub_part_words() - call bn_sub() instead. This allows
us to entirely remove the unnecessarily complex bn_sub_part_words() code.

ok tb@

lib/libcrypto/arch/i386/Makefile.inc
lib/libcrypto/bn/asm/bn-586.pl
lib/libcrypto/bn/bn_local.h
lib/libcrypto/bn/bn_mul.c

index 67c2226..6134dfd 100644 (file)
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile.inc,v 1.7 2023/01/14 15:45:43 jsing Exp $
+# $OpenBSD: Makefile.inc,v 1.8 2023/02/22 05:57:19 jsing Exp $
 
 # i386-specific libcrypto build rules
 
@@ -16,7 +16,6 @@ SRCS+= bf_cbc.c
 SSLASM+= bf bf-586
 # bn
 CFLAGS+= -DOPENSSL_IA32_SSE2
-CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS
 SSLASM+= bn bn-586
 SSLASM+= bn co-586
 CFLAGS+= -DOPENSSL_BN_ASM_MONT
index b502fe6..71b775a 100644 (file)
@@ -17,7 +17,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 &bn_div_words("bn_div_words");
 &bn_add_words("bn_add_words");
 &bn_sub_words("bn_sub_words");
-&bn_sub_part_words("bn_sub_part_words");
 
 &asm_finish();
 
@@ -566,212 +565,3 @@ sub bn_sub_words
 
        &function_end($name);
        }
-
-sub bn_sub_part_words
-       {
-       local($name)=@_;
-
-       &function_begin($name,"");
-
-       &comment("");
-       $a="esi";
-       $b="edi";
-       $c="eax";
-       $r="ebx";
-       $tmp1="ecx";
-       $tmp2="edx";
-       $num="ebp";
-
-       &mov($r,&wparam(0));    # get r
-        &mov($a,&wparam(1));   # get a
-       &mov($b,&wparam(2));    # get b
-        &mov($num,&wparam(3)); # get num
-       &xor($c,$c);            # clear carry
-        &and($num,0xfffffff8); # num / 8
-
-       &jz(&label("aw_finish"));
-
-       &set_label("aw_loop",0);
-       for ($i=0; $i<8; $i++)
-               {
-               &comment("Round $i");
-
-               &mov($tmp1,&DWP($i*4,$a,"",0));         # *a
-                &mov($tmp2,&DWP($i*4,$b,"",0));        # *b
-               &sub($tmp1,$c);
-                &mov($c,0);
-               &adc($c,$c);
-                &sub($tmp1,$tmp2);
-               &adc($c,0);
-                &mov(&DWP($i*4,$r,"",0),$tmp1);        # *r
-               }
-
-       &comment("");
-       &add($a,32);
-        &add($b,32);
-       &add($r,32);
-        &sub($num,8);
-       &jnz(&label("aw_loop"));
-
-       &set_label("aw_finish",0);
-       &mov($num,&wparam(3));  # get num
-       &and($num,7);
-        &jz(&label("aw_end"));
-
-       for ($i=0; $i<7; $i++)
-               {
-               &comment("Tail Round $i");
-               &mov($tmp1,&DWP(0,$a,"",0));    # *a
-                &mov($tmp2,&DWP(0,$b,"",0));# *b
-               &sub($tmp1,$c);
-                &mov($c,0);
-               &adc($c,$c);
-                &sub($tmp1,$tmp2);
-               &adc($c,0);
-               &mov(&DWP(0,$r,"",0),$tmp1);    # *r
-               &add($a, 4);
-               &add($b, 4);
-               &add($r, 4);
-                &dec($num) if ($i != 6);
-                &jz(&label("aw_end")) if ($i != 6);
-               }
-       &set_label("aw_end",0);
-
-       &cmp(&wparam(4),0);
-       &je(&label("pw_end"));
-
-       &mov($num,&wparam(4));  # get dl
-       &cmp($num,0);
-       &je(&label("pw_end"));
-       &jge(&label("pw_pos"));
-
-       &comment("pw_neg");
-       &mov($tmp2,0);
-       &sub($tmp2,$num);
-       &mov($num,$tmp2);
-       &and($num,0xfffffff8);  # num / 8
-       &jz(&label("pw_neg_finish"));
-
-       &set_label("pw_neg_loop",0);
-       for ($i=0; $i<8; $i++)
-       {
-           &comment("dl<0 Round $i");
-
-           &mov($tmp1,0);
-           &mov($tmp2,&DWP($i*4,$b,"",0));     # *b
-           &sub($tmp1,$c);
-           &mov($c,0);
-           &adc($c,$c);
-           &sub($tmp1,$tmp2);
-           &adc($c,0);
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-       }
-           
-       &comment("");
-       &add($b,32);
-       &add($r,32);
-       &sub($num,8);
-       &jnz(&label("pw_neg_loop"));
-           
-       &set_label("pw_neg_finish",0);
-       &mov($tmp2,&wparam(4)); # get dl
-       &mov($num,0);
-       &sub($num,$tmp2);
-       &and($num,7);
-       &jz(&label("pw_end"));
-           
-       for ($i=0; $i<7; $i++)
-       {
-           &comment("dl<0 Tail Round $i");
-           &mov($tmp1,0);
-           &mov($tmp2,&DWP($i*4,$b,"",0));# *b
-           &sub($tmp1,$c);
-           &mov($c,0);
-           &adc($c,$c);
-           &sub($tmp1,$tmp2);
-           &adc($c,0);
-           &dec($num) if ($i != 6);
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-           &jz(&label("pw_end")) if ($i != 6);
-       }
-
-       &jmp(&label("pw_end"));
-       
-       &set_label("pw_pos",0);
-       
-       &and($num,0xfffffff8);  # num / 8
-       &jz(&label("pw_pos_finish"));
-
-       &set_label("pw_pos_loop",0);
-
-       for ($i=0; $i<8; $i++)
-       {
-           &comment("dl>0 Round $i");
-
-           &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
-           &sub($tmp1,$c);
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-           &jnc(&label("pw_nc".$i));
-       }
-           
-       &comment("");
-       &add($a,32);
-       &add($r,32);
-       &sub($num,8);
-       &jnz(&label("pw_pos_loop"));
-           
-       &set_label("pw_pos_finish",0);
-       &mov($num,&wparam(4));  # get dl
-       &and($num,7);
-       &jz(&label("pw_end"));
-           
-       for ($i=0; $i<7; $i++)
-       {
-           &comment("dl>0 Tail Round $i");
-           &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
-           &sub($tmp1,$c);
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-           &jnc(&label("pw_tail_nc".$i));
-           &dec($num) if ($i != 6);
-           &jz(&label("pw_end")) if ($i != 6);
-       }
-       &mov($c,1);
-       &jmp(&label("pw_end"));
-
-       &set_label("pw_nc_loop",0);
-       for ($i=0; $i<8; $i++)
-       {
-           &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-           &set_label("pw_nc".$i,0);
-       }
-           
-       &comment("");
-       &add($a,32);
-       &add($r,32);
-       &sub($num,8);
-       &jnz(&label("pw_nc_loop"));
-           
-       &mov($num,&wparam(4));  # get dl
-       &and($num,7);
-       &jz(&label("pw_nc_end"));
-           
-       for ($i=0; $i<7; $i++)
-       {
-           &mov($tmp1,&DWP($i*4,$a,"",0));     # *a
-           &mov(&DWP($i*4,$r,"",0),$tmp1);     # *r
-           &set_label("pw_tail_nc".$i,0);
-           &dec($num) if ($i != 6);
-           &jz(&label("pw_nc_end")) if ($i != 6);
-       }
-
-       &set_label("pw_nc_end",0);
-       &mov($c,0);
-
-       &set_label("pw_end",0);
-
-#      &mov("eax",$c);         # $c is "eax"
-
-       &function_end($name);
-       }
-
index 3e37238..4576c36 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_local.h,v 1.16 2023/02/22 05:46:37 jsing Exp $ */
+/* $OpenBSD: bn_local.h,v 1.17 2023/02/22 05:57:19 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -264,8 +264,6 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
     int n, int tna, int tnb, BN_ULONG *t);
 void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
-BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
-    int cl, int dl);
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
     const BN_ULONG *np, const BN_ULONG *n0, int num);
 
index 1d56e57..5e270b9 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_mul.c,v 1.33 2023/02/15 18:10:16 jsing Exp $ */
+/* $OpenBSD: bn_mul.c,v 1.34 2023/02/22 05:57:19 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -269,144 +269,6 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
 }
 #endif
 
-#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
-/*
- * Here follows a specialised variant of bn_sub_words(), which has the property
- * performing operations on arrays of different sizes. The sizes of those arrays
- * is expressed through cl, which is the common length (basically,
- * min(len(a),len(b))), and dl, which is the delta between the two lengths,
- * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the
- * operations that require a result array as parameter, it must have the length
- * cl+abs(dl).
- */
-BN_ULONG
-bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
-    int dl)
-{
-       BN_ULONG c, t;
-
-       assert(cl >= 0);
-       c = bn_sub_words(r, a, b, cl);
-
-       if (dl == 0)
-               return c;
-
-       r += cl;
-       a += cl;
-       b += cl;
-
-       if (dl < 0) {
-               for (;;) {
-                       t = b[0];
-                       r[0] = (0 - t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 1;
-                       if (++dl >= 0)
-                               break;
-
-                       t = b[1];
-                       r[1] = (0 - t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 1;
-                       if (++dl >= 0)
-                               break;
-
-                       t = b[2];
-                       r[2] = (0 - t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 1;
-                       if (++dl >= 0)
-                               break;
-
-                       t = b[3];
-                       r[3] = (0 - t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 1;
-                       if (++dl >= 0)
-                               break;
-
-                       b += 4;
-                       r += 4;
-               }
-       } else {
-               int save_dl = dl;
-               while (c) {
-                       t = a[0];
-                       r[0] = (t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 0;
-                       if (--dl <= 0)
-                               break;
-
-                       t = a[1];
-                       r[1] = (t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 0;
-                       if (--dl <= 0)
-                               break;
-
-                       t = a[2];
-                       r[2] = (t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 0;
-                       if (--dl <= 0)
-                               break;
-
-                       t = a[3];
-                       r[3] = (t - c) & BN_MASK2;
-                       if (t != 0)
-                               c = 0;
-                       if (--dl <= 0)
-                               break;
-
-                       save_dl = dl;
-                       a += 4;
-                       r += 4;
-               }
-               if (dl > 0) {
-                       if (save_dl > dl) {
-                               switch (save_dl - dl) {
-                               case 1:
-                                       r[1] = a[1];
-                                       if (--dl <= 0)
-                                               break;
-                               case 2:
-                                       r[2] = a[2];
-                                       if (--dl <= 0)
-                                               break;
-                               case 3:
-                                       r[3] = a[3];
-                                       if (--dl <= 0)
-                                               break;
-                               }
-                               a += 4;
-                               r += 4;
-                       }
-               }
-               if (dl > 0) {
-                       for (;;) {
-                               r[0] = a[0];
-                               if (--dl <= 0)
-                                       break;
-                               r[1] = a[1];
-                               if (--dl <= 0)
-                                       break;
-                               r[2] = a[2];
-                               if (--dl <= 0)
-                                       break;
-                               r[3] = a[3];
-                               if (--dl <= 0)
-                                       break;
-
-                               a += 4;
-                               r += 4;
-                       }
-               }
-       }
-       return c;
-}
-#endif
-
 void
 bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
 {
@@ -504,15 +366,15 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
        zero = neg = 0;
        switch (c1 * 3 + c2) {
        case -4:
-               bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-               bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+               bn_sub(t, n, &a[n], tna, a, n); /* - */
+               bn_sub(&t[n], n, b, n, &b[n], tnb);     /* - */
                break;
        case -3:
                zero = 1;
                break;
        case -2:
-               bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-               bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+               bn_sub(t, n, &a[n], tna, a, n); /* - */
+               bn_sub(&t[n], n, &b[n], tnb, b, n);     /* + */
                neg = 1;
                break;
        case -1:
@@ -521,16 +383,16 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
                zero = 1;
                break;
        case 2:
-               bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
-               bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+               bn_sub(t, n, a, n, &a[n], tna); /* + */
+               bn_sub(&t[n], n, b, n, &b[n], tnb);     /* - */
                neg = 1;
                break;
        case 3:
                zero = 1;
                break;
        case 4:
-               bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
-               bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+               bn_sub(t, n, a, n, &a[n], tna);
+               bn_sub(&t[n], n, &b[n], tnb, b, n);
                break;
        }
 
@@ -630,14 +492,14 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
        neg = 0;
        switch (c1 * 3 + c2) {
        case -4:
-               bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-               bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+               bn_sub(t, n, &a[n], tna, a, n);         /* - */
+               bn_sub(&t[n], n, b, n, &b[n], tnb);     /* - */
                break;
        case -3:
                /* break; */
        case -2:
-               bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
-               bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+               bn_sub(t, n, &a[n], tna, a, n);         /* - */
+               bn_sub(&t[n], n, &b[n], tnb, b, n);     /* + */
                neg = 1;
                break;
        case -1:
@@ -645,15 +507,15 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
        case 1:
                /* break; */
        case 2:
-               bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
-               bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+               bn_sub(t, n, a, n, &a[n], tna);         /* + */
+               bn_sub(&t[n], n, b, n, &b[n], tnb);     /* - */
                neg = 1;
                break;
        case 3:
                /* break; */
        case 4:
-               bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
-               bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+               bn_sub(t, n, a, n, &a[n], tna);
+               bn_sub(&t[n], n, &b[n], tnb, b, n);
                break;
        }
                /* The zero case isn't yet implemented here. The speedup