-# $OpenBSD: Makefile.inc,v 1.7 2023/01/14 15:45:43 jsing Exp $
+# $OpenBSD: Makefile.inc,v 1.8 2023/02/22 05:57:19 jsing Exp $
# i386-specific libcrypto build rules
SSLASM+= bf bf-586
# bn
CFLAGS+= -DOPENSSL_IA32_SSE2
-CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS
SSLASM+= bn bn-586
SSLASM+= bn co-586
CFLAGS+= -DOPENSSL_BN_ASM_MONT
&bn_div_words("bn_div_words");
&bn_add_words("bn_add_words");
&bn_sub_words("bn_sub_words");
-&bn_sub_part_words("bn_sub_part_words");
&asm_finish();
&function_end($name);
}
-
-sub bn_sub_part_words
- {
- local($name)=@_;
-
- &function_begin($name,"");
-
- &comment("");
- $a="esi";
- $b="edi";
- $c="eax";
- $r="ebx";
- $tmp1="ecx";
- $tmp2="edx";
- $num="ebp";
-
- &mov($r,&wparam(0)); # get r
- &mov($a,&wparam(1)); # get a
- &mov($b,&wparam(2)); # get b
- &mov($num,&wparam(3)); # get num
- &xor($c,$c); # clear carry
- &and($num,0xfffffff8); # num / 8
-
- &jz(&label("aw_finish"));
-
- &set_label("aw_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("Round $i");
-
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
-
- &comment("");
- &add($a,32);
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("aw_loop"));
-
- &set_label("aw_finish",0);
- &mov($num,&wparam(3)); # get num
- &and($num,7);
- &jz(&label("aw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("Tail Round $i");
- &mov($tmp1,&DWP(0,$a,"",0)); # *a
- &mov($tmp2,&DWP(0,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP(0,$r,"",0),$tmp1); # *r
- &add($a, 4);
- &add($b, 4);
- &add($r, 4);
- &dec($num) if ($i != 6);
- &jz(&label("aw_end")) if ($i != 6);
- }
- &set_label("aw_end",0);
-
- &cmp(&wparam(4),0);
- &je(&label("pw_end"));
-
- &mov($num,&wparam(4)); # get dl
- &cmp($num,0);
- &je(&label("pw_end"));
- &jge(&label("pw_pos"));
-
- &comment("pw_neg");
- &mov($tmp2,0);
- &sub($tmp2,$num);
- &mov($num,$tmp2);
- &and($num,0xfffffff8); # num / 8
- &jz(&label("pw_neg_finish"));
-
- &set_label("pw_neg_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &comment("dl<0 Round $i");
-
- &mov($tmp1,0);
- &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- }
-
- &comment("");
- &add($b,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_neg_loop"));
-
- &set_label("pw_neg_finish",0);
- &mov($tmp2,&wparam(4)); # get dl
- &mov($num,0);
- &sub($num,$tmp2);
- &and($num,7);
- &jz(&label("pw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("dl<0 Tail Round $i");
- &mov($tmp1,0);
- &mov($tmp2,&DWP($i*4,$b,"",0));# *b
- &sub($tmp1,$c);
- &mov($c,0);
- &adc($c,$c);
- &sub($tmp1,$tmp2);
- &adc($c,0);
- &dec($num) if ($i != 6);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jz(&label("pw_end")) if ($i != 6);
- }
-
- &jmp(&label("pw_end"));
-
- &set_label("pw_pos",0);
-
- &and($num,0xfffffff8); # num / 8
- &jz(&label("pw_pos_finish"));
-
- &set_label("pw_pos_loop",0);
-
- for ($i=0; $i<8; $i++)
- {
- &comment("dl>0 Round $i");
-
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &sub($tmp1,$c);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jnc(&label("pw_nc".$i));
- }
-
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_pos_loop"));
-
- &set_label("pw_pos_finish",0);
- &mov($num,&wparam(4)); # get dl
- &and($num,7);
- &jz(&label("pw_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &comment("dl>0 Tail Round $i");
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &sub($tmp1,$c);
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &jnc(&label("pw_tail_nc".$i));
- &dec($num) if ($i != 6);
- &jz(&label("pw_end")) if ($i != 6);
- }
- &mov($c,1);
- &jmp(&label("pw_end"));
-
- &set_label("pw_nc_loop",0);
- for ($i=0; $i<8; $i++)
- {
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &set_label("pw_nc".$i,0);
- }
-
- &comment("");
- &add($a,32);
- &add($r,32);
- &sub($num,8);
- &jnz(&label("pw_nc_loop"));
-
- &mov($num,&wparam(4)); # get dl
- &and($num,7);
- &jz(&label("pw_nc_end"));
-
- for ($i=0; $i<7; $i++)
- {
- &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
- &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
- &set_label("pw_tail_nc".$i,0);
- &dec($num) if ($i != 6);
- &jz(&label("pw_nc_end")) if ($i != 6);
- }
-
- &set_label("pw_nc_end",0);
- &mov($c,0);
-
- &set_label("pw_end",0);
-
-# &mov("eax",$c); # $c is "eax"
-
- &function_end($name);
- }
-
-/* $OpenBSD: bn_local.h,v 1.16 2023/02/22 05:46:37 jsing Exp $ */
+/* $OpenBSD: bn_local.h,v 1.17 2023/02/22 05:57:19 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
int n, int tna, int tnb, BN_ULONG *t);
void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
-BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
- int cl, int dl);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, int num);
-/* $OpenBSD: bn_mul.c,v 1.33 2023/02/15 18:10:16 jsing Exp $ */
+/* $OpenBSD: bn_mul.c,v 1.34 2023/02/22 05:57:19 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
}
#endif
-#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
-/*
- * Here follows a specialised variant of bn_sub_words(), which has the property
- * performing operations on arrays of different sizes. The sizes of those arrays
- * is expressed through cl, which is the common length (basically,
- * min(len(a),len(b))), and dl, which is the delta between the two lengths,
- * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the
- * operations that require a result array as parameter, it must have the length
- * cl+abs(dl).
- */
-BN_ULONG
-bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
- int dl)
-{
- BN_ULONG c, t;
-
- assert(cl >= 0);
- c = bn_sub_words(r, a, b, cl);
-
- if (dl == 0)
- return c;
-
- r += cl;
- a += cl;
- b += cl;
-
- if (dl < 0) {
- for (;;) {
- t = b[0];
- r[0] = (0 - t - c) & BN_MASK2;
- if (t != 0)
- c = 1;
- if (++dl >= 0)
- break;
-
- t = b[1];
- r[1] = (0 - t - c) & BN_MASK2;
- if (t != 0)
- c = 1;
- if (++dl >= 0)
- break;
-
- t = b[2];
- r[2] = (0 - t - c) & BN_MASK2;
- if (t != 0)
- c = 1;
- if (++dl >= 0)
- break;
-
- t = b[3];
- r[3] = (0 - t - c) & BN_MASK2;
- if (t != 0)
- c = 1;
- if (++dl >= 0)
- break;
-
- b += 4;
- r += 4;
- }
- } else {
- int save_dl = dl;
- while (c) {
- t = a[0];
- r[0] = (t - c) & BN_MASK2;
- if (t != 0)
- c = 0;
- if (--dl <= 0)
- break;
-
- t = a[1];
- r[1] = (t - c) & BN_MASK2;
- if (t != 0)
- c = 0;
- if (--dl <= 0)
- break;
-
- t = a[2];
- r[2] = (t - c) & BN_MASK2;
- if (t != 0)
- c = 0;
- if (--dl <= 0)
- break;
-
- t = a[3];
- r[3] = (t - c) & BN_MASK2;
- if (t != 0)
- c = 0;
- if (--dl <= 0)
- break;
-
- save_dl = dl;
- a += 4;
- r += 4;
- }
- if (dl > 0) {
- if (save_dl > dl) {
- switch (save_dl - dl) {
- case 1:
- r[1] = a[1];
- if (--dl <= 0)
- break;
- case 2:
- r[2] = a[2];
- if (--dl <= 0)
- break;
- case 3:
- r[3] = a[3];
- if (--dl <= 0)
- break;
- }
- a += 4;
- r += 4;
- }
- }
- if (dl > 0) {
- for (;;) {
- r[0] = a[0];
- if (--dl <= 0)
- break;
- r[1] = a[1];
- if (--dl <= 0)
- break;
- r[2] = a[2];
- if (--dl <= 0)
- break;
- r[3] = a[3];
- if (--dl <= 0)
- break;
-
- a += 4;
- r += 4;
- }
- }
- }
- return c;
-}
-#endif
-
void
bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
{
zero = neg = 0;
switch (c1 * 3 + c2) {
case -4:
- bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
- bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+ bn_sub(t, n, &a[n], tna, a, n); /* - */
+ bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
break;
case -3:
zero = 1;
break;
case -2:
- bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
- bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+ bn_sub(t, n, &a[n], tna, a, n); /* - */
+ bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */
neg = 1;
break;
case -1:
zero = 1;
break;
case 2:
- bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
- bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+ bn_sub(t, n, a, n, &a[n], tna); /* + */
+ bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
neg = 1;
break;
case 3:
zero = 1;
break;
case 4:
- bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
- bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+ bn_sub(t, n, a, n, &a[n], tna);
+ bn_sub(&t[n], n, &b[n], tnb, b, n);
break;
}
neg = 0;
switch (c1 * 3 + c2) {
case -4:
- bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
- bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+ bn_sub(t, n, &a[n], tna, a, n); /* - */
+ bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
break;
case -3:
/* break; */
case -2:
- bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
- bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
+ bn_sub(t, n, &a[n], tna, a, n); /* - */
+ bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */
neg = 1;
break;
case -1:
case 1:
/* break; */
case 2:
- bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
- bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
+ bn_sub(t, n, a, n, &a[n], tna); /* + */
+ bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
neg = 1;
break;
case 3:
/* break; */
case 4:
- bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
- bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
+ bn_sub(t, n, a, n, &a[n], tna);
+ bn_sub(&t[n], n, &b[n], tnb, b, n);
break;
}
/* The zero case isn't yet implemented here. The speedup