Move {mul,sqr}_add_c{,2} macros from bn_asm.c to bn_local.h.
authorjsing <jsing@openbsd.org>
Fri, 20 Jan 2023 17:26:03 +0000 (17:26 +0000)
committerjsing <jsing@openbsd.org>
Fri, 20 Jan 2023 17:26:03 +0000 (17:26 +0000)
These depend on other macros that are in already in bn_local.h and this
makes them available to other source files. A lot more clean up will be
needed in the future.

Of course x86_64-gcc.c makes use of the same macro names - sprinkle some
undef in there for the time being.

ok tb@

lib/libcrypto/bn/asm/x86_64-gcc.c
lib/libcrypto/bn/bn_asm.c
lib/libcrypto/bn/bn_local.h

index e98ffe4..c6d6239 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: x86_64-gcc.c,v 1.7 2022/11/26 16:08:51 tb Exp $ */
+/* $OpenBSD: x86_64-gcc.c,v 1.8 2023/01/20 17:26:03 jsing Exp $ */
 #include "../bn_local.h"
 /*
  * x86_64 BIGNUM accelerator version 0.1, December 2002.
@@ -227,6 +227,11 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+#undef mul_add_c
+#undef mul_add_c2
+#undef sqr_add_c
+#undef sqr_add_c2
+
 /*
  * Keep in mind that carrying into high part of multiplication result
  * can not overflow, because it cannot be all-ones.
index e5627cf..8406348 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_asm.c,v 1.17 2022/11/30 01:47:19 jsing Exp $ */
+/* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -484,154 +484,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 #undef bn_sqr_comba8
 #undef bn_sqr_comba4
 
-/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
-/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
-/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
-/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
-
-#ifdef BN_LLONG
-/*
- * Keep in mind that additions to multiplication result can not
- * overflow, because its high half cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)                do {    \
-       BN_ULONG hi;                            \
-       BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
-       t += c0;                /* no carry */  \
-       c0 = (BN_ULONG)Lw(t);                   \
-       hi = (BN_ULONG)Hw(t);                   \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)       do {    \
-       BN_ULONG hi;                            \
-       BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
-       BN_ULLONG tt = t+c0;    /* no carry */  \
-       c0 = (BN_ULONG)Lw(tt);                  \
-       hi = (BN_ULONG)Hw(tt);                  \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       t += c0;                /* no carry */  \
-       c0 = (BN_ULONG)Lw(t);                   \
-       hi = (BN_ULONG)Hw(t);                   \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)                do {    \
-       BN_ULONG hi;                            \
-       BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
-       t += c0;                /* no carry */  \
-       c0 = (BN_ULONG)Lw(t);                   \
-       hi = (BN_ULONG)Hw(t);                   \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#elif defined(BN_UMULT_LOHI)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)                do {    \
-       BN_ULONG ta = (a), tb = (b);            \
-       BN_ULONG lo, hi;                        \
-       BN_UMULT_LOHI(lo,hi,ta,tb);             \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)       do {    \
-       BN_ULONG ta = (a), tb = (b);            \
-       BN_ULONG lo, hi, tt;                    \
-       BN_UMULT_LOHI(lo,hi,ta,tb);             \
-       c0 += lo; tt = hi+((c0<lo)?1:0);        \
-       c1 += tt; c2 += (c1<tt)?1:0;            \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)                do {    \
-       BN_ULONG ta = (a)[i];                   \
-       BN_ULONG lo, hi;                        \
-       BN_UMULT_LOHI(lo,hi,ta,ta);             \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2)     \
-       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#elif defined(BN_UMULT_HIGH)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)                do {    \
-       BN_ULONG ta = (a), tb = (b);            \
-       BN_ULONG lo = ta * tb;                  \
-       BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)       do {    \
-       BN_ULONG ta = (a), tb = (b), tt;        \
-       BN_ULONG lo = ta * tb;                  \
-       BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
-       c0 += lo; tt = hi + ((c0<lo)?1:0);      \
-       c1 += tt; c2 += (c1<tt)?1:0;            \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)                do {    \
-       BN_ULONG ta = (a)[i];                   \
-       BN_ULONG lo = ta * ta;                  \
-       BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
-       c0 += lo; hi += (c0<lo)?1:0;            \
-       c1 += hi; c2 += (c1<hi)?1:0;            \
-       } while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2)     \
-       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#else /* !BN_LLONG */
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)                do {    \
-       BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
-       BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
-       mul64(lo,hi,bl,bh);                     \
-       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)       do {    \
-       BN_ULONG tt;                            \
-       BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
-       BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
-       mul64(lo,hi,bl,bh);                     \
-       tt = hi;                                \
-       c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
-       c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
-       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)                do {    \
-       BN_ULONG lo, hi;                        \
-       sqr64(lo,hi,(a)[i]);                    \
-       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-       } while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#endif /* !BN_LLONG */
-
 void
 bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
index 08e7064..74e158d 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_local.h,v 1.4 2023/01/20 12:16:46 jsing Exp $ */
+/* $OpenBSD: bn_local.h,v 1.5 2023/01/20 17:26:03 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -481,15 +481,165 @@ struct bn_gencb_st {
        }
 #endif /* !BN_LLONG */
 
+/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
+/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
+/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
+/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
+
+#ifdef BN_LLONG
+/*
+ * Keep in mind that additions to multiplication result can not
+ * overflow, because its high half cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG hi;                            \
+       BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
+       t += c0;                /* no carry */  \
+       c0 = (BN_ULONG)Lw(t);                   \
+       hi = (BN_ULONG)Hw(t);                   \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG hi;                            \
+       BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
+       BN_ULLONG tt = t+c0;    /* no carry */  \
+       c0 = (BN_ULONG)Lw(tt);                  \
+       hi = (BN_ULONG)Hw(tt);                  \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       t += c0;                /* no carry */  \
+       c0 = (BN_ULONG)Lw(t);                   \
+       hi = (BN_ULONG)Hw(t);                   \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG hi;                            \
+       BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
+       t += c0;                /* no carry */  \
+       c0 = (BN_ULONG)Lw(t);                   \
+       hi = (BN_ULONG)Hw(t);                   \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#elif defined(BN_UMULT_LOHI)
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo, hi, tt;                    \
+       BN_UMULT_LOHI(lo,hi,ta,tb);             \
+       c0 += lo; tt = hi+((c0<lo)?1:0);        \
+       c1 += tt; c2 += (c1<tt)?1:0;            \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a)[i];                   \
+       BN_ULONG lo, hi;                        \
+       BN_UMULT_LOHI(lo,hi,ta,ta);             \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2)     \
+       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#elif defined(BN_UMULT_HIGH)
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a), tb = (b);            \
+       BN_ULONG lo = ta * tb;                  \
+       BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG ta = (a), tb = (b), tt;        \
+       BN_ULONG lo = ta * tb;                  \
+       BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
+       c0 += lo; tt = hi + ((c0<lo)?1:0);      \
+       c1 += tt; c2 += (c1<tt)?1:0;            \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG ta = (a)[i];                   \
+       BN_ULONG lo = ta * ta;                  \
+       BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
+       c0 += lo; hi += (c0<lo)?1:0;            \
+       c1 += hi; c2 += (c1<hi)?1:0;            \
+       } while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2)     \
+       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#else /* !BN_LLONG */
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)                do {    \
+       BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
+       BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
+       mul64(lo,hi,bl,bh);                     \
+       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)       do {    \
+       BN_ULONG tt;                            \
+       BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
+       BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
+       mul64(lo,hi,bl,bh);                     \
+       tt = hi;                                \
+       c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
+       c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
+       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)                do {    \
+       BN_ULONG lo, hi;                        \
+       sqr64(lo,hi,(a)[i]);                    \
+       c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+       c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+       } while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+       mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#endif /* !BN_LLONG */
+
 /* The least significant word of a BIGNUM. */
 #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
 
 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+
 void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
-void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
+
 int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
 int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
     int cl, int dl);