-/* $OpenBSD: bn_mul.c,v 1.31 2023/02/13 04:25:37 jsing Exp $ */
+/* $OpenBSD: bn_mul.c,v 1.32 2023/02/14 18:37:15 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
#include <openssl/opensslconf.h>
#include "bn_arch.h"
+#include "bn_internal.h"
#include "bn_local.h"
+/*
+ * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where
+ * a is an array of words and w is a single word. This should really be called
+ * bn_mulw_add_words() since only one input is an array. This is used as a step
+ * in the multiplication of word arrays.
+ */
#ifndef HAVE_BN_MUL_ADD_WORDS
-#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
-
BN_ULONG
-bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
+bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
{
- BN_ULONG c1 = 0;
+ BN_ULONG carry = 0;
assert(num >= 0);
if (num <= 0)
- return (c1);
+ return 0;
#ifndef OPENSSL_SMALL_FOOTPRINT
while (num & ~3) {
- mul_add(rp[0], ap[0], w, c1);
- mul_add(rp[1], ap[1], w, c1);
- mul_add(rp[2], ap[2], w, c1);
- mul_add(rp[3], ap[3], w, c1);
- ap += 4;
- rp += 4;
+ bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
+ bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]);
+ bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]);
+ bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]);
+ a += 4;
+ r += 4;
num -= 4;
}
#endif
while (num) {
- mul_add(rp[0], ap[0], w, c1);
- ap++;
- rp++;
+ bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
+ a++;
+ r++;
num--;
}
- return (c1);
+ return carry;
}
+#endif
-#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
-
-BN_ULONG
-bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
+/*
+ * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication
+ * (https://everything2.com/title/Comba+multiplication), where a and b are both
+ * four word arrays, producing an eight word array result.
+ */
+#ifndef HAVE_BN_MUL_COMBA4
+void
+bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG c = 0;
- BN_ULONG bl, bh;
+ BN_ULONG c0, c1, c2;
- assert(num >= 0);
- if (num <= 0)
- return ((BN_ULONG)0);
+ bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]);
- bl = LBITS(w);
- bh = HBITS(w);
+ bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
-#ifndef OPENSSL_SMALL_FOOTPRINT
- while (num & ~3) {
- mul_add(rp[0], ap[0], bl, bh, c);
- mul_add(rp[1], ap[1], bl, bh, c);
- mul_add(rp[2], ap[2], bl, bh, c);
- mul_add(rp[3], ap[3], bl, bh, c);
- ap += 4;
- rp += 4;
- num -= 4;
- }
-#endif
- while (num) {
- mul_add(rp[0], ap[0], bl, bh, c);
- ap++;
- rp++;
- num--;
- }
- return (c);
-}
+ bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
-#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
-#endif
+ bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
-#ifndef HAVE_BN_MUL_COMBA4
-void
-bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
-{
- BN_ULONG c1, c2, c3;
-
- c1 = 0;
- c2 = 0;
- c3 = 0;
- mul_add_c(a[0], b[0], c1, c2, c3);
- r[0] = c1;
- c1 = 0;
- mul_add_c(a[0], b[1], c2, c3, c1);
- mul_add_c(a[1], b[0], c2, c3, c1);
- r[1] = c2;
- c2 = 0;
- mul_add_c(a[2], b[0], c3, c1, c2);
- mul_add_c(a[1], b[1], c3, c1, c2);
- mul_add_c(a[0], b[2], c3, c1, c2);
- r[2] = c3;
- c3 = 0;
- mul_add_c(a[0], b[3], c1, c2, c3);
- mul_add_c(a[1], b[2], c1, c2, c3);
- mul_add_c(a[2], b[1], c1, c2, c3);
- mul_add_c(a[3], b[0], c1, c2, c3);
- r[3] = c1;
- c1 = 0;
- mul_add_c(a[3], b[1], c2, c3, c1);
- mul_add_c(a[2], b[2], c2, c3, c1);
- mul_add_c(a[1], b[3], c2, c3, c1);
- r[4] = c2;
- c2 = 0;
- mul_add_c(a[2], b[3], c3, c1, c2);
- mul_add_c(a[3], b[2], c3, c1, c2);
- r[5] = c3;
- c3 = 0;
- mul_add_c(a[3], b[3], c1, c2, c3);
- r[6] = c1;
- r[7] = c2;
+ bn_mulw_addtw(a[3], b[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]);
+
+ bn_mulw_addtw(a[2], b[3], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]);
+
+ bn_mulw_addtw(a[3], b[3], 0, c2, c1, &c2, &r[7], &r[6]);
}
#endif
+/*
+ * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication
+ * (https://everything2.com/title/Comba+multiplication), where a and b are both
+ * eight word arrays, producing a 16 word array result.
+ */
#ifndef HAVE_BN_MUL_COMBA8
void
bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
{
- BN_ULONG c1, c2, c3;
-
- c1 = 0;
- c2 = 0;
- c3 = 0;
- mul_add_c(a[0], b[0], c1, c2, c3);
- r[0] = c1;
- c1 = 0;
- mul_add_c(a[0], b[1], c2, c3, c1);
- mul_add_c(a[1], b[0], c2, c3, c1);
- r[1] = c2;
- c2 = 0;
- mul_add_c(a[2], b[0], c3, c1, c2);
- mul_add_c(a[1], b[1], c3, c1, c2);
- mul_add_c(a[0], b[2], c3, c1, c2);
- r[2] = c3;
- c3 = 0;
- mul_add_c(a[0], b[3], c1, c2, c3);
- mul_add_c(a[1], b[2], c1, c2, c3);
- mul_add_c(a[2], b[1], c1, c2, c3);
- mul_add_c(a[3], b[0], c1, c2, c3);
- r[3] = c1;
- c1 = 0;
- mul_add_c(a[4], b[0], c2, c3, c1);
- mul_add_c(a[3], b[1], c2, c3, c1);
- mul_add_c(a[2], b[2], c2, c3, c1);
- mul_add_c(a[1], b[3], c2, c3, c1);
- mul_add_c(a[0], b[4], c2, c3, c1);
- r[4] = c2;
- c2 = 0;
- mul_add_c(a[0], b[5], c3, c1, c2);
- mul_add_c(a[1], b[4], c3, c1, c2);
- mul_add_c(a[2], b[3], c3, c1, c2);
- mul_add_c(a[3], b[2], c3, c1, c2);
- mul_add_c(a[4], b[1], c3, c1, c2);
- mul_add_c(a[5], b[0], c3, c1, c2);
- r[5] = c3;
- c3 = 0;
- mul_add_c(a[6], b[0], c1, c2, c3);
- mul_add_c(a[5], b[1], c1, c2, c3);
- mul_add_c(a[4], b[2], c1, c2, c3);
- mul_add_c(a[3], b[3], c1, c2, c3);
- mul_add_c(a[2], b[4], c1, c2, c3);
- mul_add_c(a[1], b[5], c1, c2, c3);
- mul_add_c(a[0], b[6], c1, c2, c3);
- r[6] = c1;
- c1 = 0;
- mul_add_c(a[0], b[7], c2, c3, c1);
- mul_add_c(a[1], b[6], c2, c3, c1);
- mul_add_c(a[2], b[5], c2, c3, c1);
- mul_add_c(a[3], b[4], c2, c3, c1);
- mul_add_c(a[4], b[3], c2, c3, c1);
- mul_add_c(a[5], b[2], c2, c3, c1);
- mul_add_c(a[6], b[1], c2, c3, c1);
- mul_add_c(a[7], b[0], c2, c3, c1);
- r[7] = c2;
- c2 = 0;
- mul_add_c(a[7], b[1], c3, c1, c2);
- mul_add_c(a[6], b[2], c3, c1, c2);
- mul_add_c(a[5], b[3], c3, c1, c2);
- mul_add_c(a[4], b[4], c3, c1, c2);
- mul_add_c(a[3], b[5], c3, c1, c2);
- mul_add_c(a[2], b[6], c3, c1, c2);
- mul_add_c(a[1], b[7], c3, c1, c2);
- r[8] = c3;
- c3 = 0;
- mul_add_c(a[2], b[7], c1, c2, c3);
- mul_add_c(a[3], b[6], c1, c2, c3);
- mul_add_c(a[4], b[5], c1, c2, c3);
- mul_add_c(a[5], b[4], c1, c2, c3);
- mul_add_c(a[6], b[3], c1, c2, c3);
- mul_add_c(a[7], b[2], c1, c2, c3);
- r[9] = c1;
- c1 = 0;
- mul_add_c(a[7], b[3], c2, c3, c1);
- mul_add_c(a[6], b[4], c2, c3, c1);
- mul_add_c(a[5], b[5], c2, c3, c1);
- mul_add_c(a[4], b[6], c2, c3, c1);
- mul_add_c(a[3], b[7], c2, c3, c1);
- r[10] = c2;
- c2 = 0;
- mul_add_c(a[4], b[7], c3, c1, c2);
- mul_add_c(a[5], b[6], c3, c1, c2);
- mul_add_c(a[6], b[5], c3, c1, c2);
- mul_add_c(a[7], b[4], c3, c1, c2);
- r[11] = c3;
- c3 = 0;
- mul_add_c(a[7], b[5], c1, c2, c3);
- mul_add_c(a[6], b[6], c1, c2, c3);
- mul_add_c(a[5], b[7], c1, c2, c3);
- r[12] = c1;
- c1 = 0;
- mul_add_c(a[6], b[7], c2, c3, c1);
- mul_add_c(a[7], b[6], c2, c3, c1);
- r[13] = c2;
- c2 = 0;
- mul_add_c(a[7], b[7], c3, c1, c2);
- r[14] = c3;
- r[15] = c1;
+ BN_ULONG c0, c1, c2;
+
+ bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]);
+
+ bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
+
+ bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
+
+ bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
+
+ bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]);
+
+ bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]);
+
+ bn_mulw_addtw(a[6], b[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]);
+
+ bn_mulw_addtw(a[0], b[7], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]);
+
+ bn_mulw_addtw(a[7], b[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]);
+
+ bn_mulw_addtw(a[2], b[7], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]);
+
+ bn_mulw_addtw(a[7], b[3], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]);
+
+ bn_mulw_addtw(a[4], b[7], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]);
+
+ bn_mulw_addtw(a[7], b[5], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0);
+ bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]);
+
+ bn_mulw_addtw(a[6], b[7], 0, c2, c1, &c2, &c1, &c0);
+ bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]);
+
+ bn_mulw_addtw(a[7], b[7], 0, c2, c1, &c2, &r[15], &r[14]);
}
#endif
+/*
+ * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array
+ * of words and w is a single word. This should really be called bn_mulw_words()
+ * since only one input is an array. This is used as a step in the multiplication
+ * of word arrays.
+ */
#ifndef HAVE_BN_MUL_WORDS
-#if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
-
BN_ULONG
-bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
-{
- BN_ULONG c1 = 0;
-
- assert(num >= 0);
- if (num <= 0)
- return (c1);
-
-#ifndef OPENSSL_SMALL_FOOTPRINT
- while (num & ~3) {
- mul(rp[0], ap[0], w, c1);
- mul(rp[1], ap[1], w, c1);
- mul(rp[2], ap[2], w, c1);
- mul(rp[3], ap[3], w, c1);
- ap += 4;
- rp += 4;
- num -= 4;
- }
-#endif
- while (num) {
- mul(rp[0], ap[0], w, c1);
- ap++;
- rp++;
- num--;
- }
- return (c1);
-}
-#else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
-
-BN_ULONG
-bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
+bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
{
BN_ULONG carry = 0;
- BN_ULONG bl, bh;
assert(num >= 0);
if (num <= 0)
- return ((BN_ULONG)0);
-
- bl = LBITS(w);
- bh = HBITS(w);
+ return 0;
#ifndef OPENSSL_SMALL_FOOTPRINT
while (num & ~3) {
- mul(rp[0], ap[0], bl, bh, carry);
- mul(rp[1], ap[1], bl, bh, carry);
- mul(rp[2], ap[2], bl, bh, carry);
- mul(rp[3], ap[3], bl, bh, carry);
- ap += 4;
- rp += 4;
+ bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
+ bn_mulw_addw(a[1], w, carry, &carry, &r[1]);
+ bn_mulw_addw(a[2], w, carry, &carry, &r[2]);
+ bn_mulw_addw(a[3], w, carry, &carry, &r[3]);
+ a += 4;
+ r += 4;
num -= 4;
}
#endif
while (num) {
- mul(rp[0], ap[0], bl, bh, carry);
- ap++;
- rp++;
+ bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
+ a++;
+ r++;
num--;
}
- return (carry);
+ return carry;
}
-#endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
#endif
#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)