-/* $OpenBSD: bn_sqr.c,v 1.26 2023/02/16 10:41:03 jsing Exp $ */
+/* $OpenBSD: bn_sqr.c,v 1.27 2023/02/17 05:13:34 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx);
+/*
+ * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication
+ * (https://everything2.com/title/Comba+multiplication), where a is a
+ * four word array, producing an eight word array result.
+ */
#ifndef HAVE_BN_SQR_COMBA4
void
bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG c1, c2, c3;
-
- c1 = 0;
- c2 = 0;
- c3 = 0;
- sqr_add_c(a, 0, c1, c2, c3);
- r[0] = c1;
- c1 = 0;
- sqr_add_c2(a, 1, 0, c2, c3, c1);
- r[1] = c2;
- c2 = 0;
- sqr_add_c(a, 1, c3, c1, c2);
- sqr_add_c2(a, 2, 0, c3, c1, c2);
- r[2] = c3;
- c3 = 0;
- sqr_add_c2(a, 3, 0, c1, c2, c3);
- sqr_add_c2(a, 2, 1, c1, c2, c3);
- r[3] = c1;
- c1 = 0;
- sqr_add_c(a, 2, c2, c3, c1);
- sqr_add_c2(a, 3, 1, c2, c3, c1);
- r[4] = c2;
- c2 = 0;
- sqr_add_c2(a, 3, 2, c3, c1, c2);
- r[5] = c3;
- c3 = 0;
- sqr_add_c(a, 3, c1, c2, c3);
- r[6] = c1;
- r[7] = c2;
+ BN_ULONG c2, c1, c0;
+
+ bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
+
+ bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
+
+ bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
+
+ bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
+
+ bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]);
+
+ bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]);
+
+ bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]);
}
#endif
+/*
+ * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication
+ * (https://everything2.com/title/Comba+multiplication), where a is an
+ * eight word array, producing an 16 word array result.
+ */
#ifndef HAVE_BN_SQR_COMBA8
void
bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
- BN_ULONG c1, c2, c3;
-
- c1 = 0;
- c2 = 0;
- c3 = 0;
- sqr_add_c(a, 0, c1, c2, c3);
- r[0] = c1;
- c1 = 0;
- sqr_add_c2(a, 1, 0, c2, c3, c1);
- r[1] = c2;
- c2 = 0;
- sqr_add_c(a, 1, c3, c1, c2);
- sqr_add_c2(a, 2, 0, c3, c1, c2);
- r[2] = c3;
- c3 = 0;
- sqr_add_c2(a, 3, 0, c1, c2, c3);
- sqr_add_c2(a, 2, 1, c1, c2, c3);
- r[3] = c1;
- c1 = 0;
- sqr_add_c(a, 2, c2, c3, c1);
- sqr_add_c2(a, 3, 1, c2, c3, c1);
- sqr_add_c2(a, 4, 0, c2, c3, c1);
- r[4] = c2;
- c2 = 0;
- sqr_add_c2(a, 5, 0, c3, c1, c2);
- sqr_add_c2(a, 4, 1, c3, c1, c2);
- sqr_add_c2(a, 3, 2, c3, c1, c2);
- r[5] = c3;
- c3 = 0;
- sqr_add_c(a, 3, c1, c2, c3);
- sqr_add_c2(a, 4, 2, c1, c2, c3);
- sqr_add_c2(a, 5, 1, c1, c2, c3);
- sqr_add_c2(a, 6, 0, c1, c2, c3);
- r[6] = c1;
- c1 = 0;
- sqr_add_c2(a, 7, 0, c2, c3, c1);
- sqr_add_c2(a, 6, 1, c2, c3, c1);
- sqr_add_c2(a, 5, 2, c2, c3, c1);
- sqr_add_c2(a, 4, 3, c2, c3, c1);
- r[7] = c2;
- c2 = 0;
- sqr_add_c(a, 4, c3, c1, c2);
- sqr_add_c2(a, 5, 3, c3, c1, c2);
- sqr_add_c2(a, 6, 2, c3, c1, c2);
- sqr_add_c2(a, 7, 1, c3, c1, c2);
- r[8] = c3;
- c3 = 0;
- sqr_add_c2(a, 7, 2, c1, c2, c3);
- sqr_add_c2(a, 6, 3, c1, c2, c3);
- sqr_add_c2(a, 5, 4, c1, c2, c3);
- r[9] = c1;
- c1 = 0;
- sqr_add_c(a, 5, c2, c3, c1);
- sqr_add_c2(a, 6, 4, c2, c3, c1);
- sqr_add_c2(a, 7, 3, c2, c3, c1);
- r[10] = c2;
- c2 = 0;
- sqr_add_c2(a, 7, 4, c3, c1, c2);
- sqr_add_c2(a, 6, 5, c3, c1, c2);
- r[11] = c3;
- c3 = 0;
- sqr_add_c(a, 6, c1, c2, c3);
- sqr_add_c2(a, 7, 5, c1, c2, c3);
- r[12] = c1;
- c1 = 0;
- sqr_add_c2(a, 7, 6, c2, c3, c1);
- r[13] = c2;
- c2 = 0;
- sqr_add_c(a, 7, c3, c1, c2);
- r[14] = c3;
- r[15] = c1;
+ BN_ULONG c2, c1, c0;
+
+ bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
+
+ bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
+
+ bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
+
+ bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
+
+ bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
+
+ bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
+
+ bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]);
+
+ bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
+
+ bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]);
+
+ bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]);
+
+ bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]);
+
+ bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]);
+
+ bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0);
+ bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]);
+
+ bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]);
+
+ bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]);
}
#endif