-/* $OpenBSD: bn_asm.c,v 1.23 2023/01/23 12:17:57 jsing Exp $ */
+/* $OpenBSD: bn_asm.c,v 1.24 2023/01/28 17:07:02 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
#include "bn_local.h"
-#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
-
-#ifdef OPENSSL_NO_ASM
-#ifdef OPENSSL_BN_ASM_MONT
-/*
- * This is essentially reference implementation, which may or may not
- * result in performance improvement. E.g. on IA-32 this routine was
- * observed to give 40% faster rsa1024 private key operations and 10%
- * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
- * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
- * reference implementation, one to be used as starting point for
- * platform-specific assembler. Mentioned numbers apply to compiler
- * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
- * can vary not only from platform to platform, but even for compiler
- * versions. Assembler vs. assembler improvement coefficients can
- * [and are known to] differ and are to be documented elsewhere.
- */
-int
-bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num)
-{
- BN_ULONG c0, c1, ml, *tp, n0;
-#ifdef mul64
- BN_ULONG mh;
-#endif
- int i = 0, j;
-
-#if 0 /* template for platform-specific implementation */
- if (ap == bp)
- return bn_sqr_mont(rp, ap, np, n0p, num);
-#endif
- tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG));
- if (tp == NULL)
- return 0;
-
- n0 = *n0p;
-
- c0 = 0;
- ml = bp[0];
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- for (j = 0; j < num; ++j)
- mul(tp[j], ap[j], ml, mh, c0);
-#else
- for (j = 0; j < num; ++j)
- mul(tp[j], ap[j], ml, c0);
-#endif
-
- tp[num] = c0;
- tp[num + 1] = 0;
- goto enter;
-
- for (i = 0; i < num; i++) {
- c0 = 0;
- ml = bp[i];
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- for (j = 0; j < num; ++j)
- mul_add(tp[j], ap[j], ml, mh, c0);
-#else
- for (j = 0; j < num; ++j)
- mul_add(tp[j], ap[j], ml, c0);
-#endif
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num] = c1;
- tp[num + 1] = (c1 < c0 ? 1 : 0);
-enter:
- c1 = tp[0];
- ml = (c1 * n0) & BN_MASK2;
- c0 = 0;
-#ifdef mul64
- mh = HBITS(ml);
- ml = LBITS(ml);
- mul_add(c1, np[0], ml, mh, c0);
-#else
- mul_add(c1, ml, np[0], c0);
-#endif
- for (j = 1; j < num; j++) {
- c1 = tp[j];
-#ifdef mul64
- mul_add(c1, np[j], ml, mh, c0);
-#else
- mul_add(c1, ml, np[j], c0);
-#endif
- tp[j - 1] = c1 & BN_MASK2;
- }
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num - 1] = c1;
- tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
- }
-
- if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
- c0 = bn_sub_words(rp, tp, np, num);
- if (tp[num] != 0 || c0 == 0) {
- goto out;
- }
- }
- memcpy(rp, tp, num * sizeof(BN_ULONG));
-out:
- freezero(tp, (num + 2) * sizeof(BN_ULONG));
- return 1;
-}
-#else
-/*
- * Return value of 0 indicates that multiplication/convolution was not
- * performed to signal the caller to fall down to alternative/original
- * code-path.
- */
-int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num)
- { return 0;
-}
-#endif /* OPENSSL_BN_ASM_MONT */
-#endif
-
-#else /* !BN_MUL_COMBA */
-
-#ifdef OPENSSL_NO_ASM
-#ifdef OPENSSL_BN_ASM_MONT
-int
-bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0p, int num)
-{
- BN_ULONG c0, c1, *tp, n0 = *n0p;
- int i = 0, j;
-
- tp = calloc(NULL, num + 2, sizeof(BN_ULONG));
- if (tp == NULL)
- return 0;
-
- for (i = 0; i < num; i++) {
- c0 = bn_mul_add_words(tp, ap, num, bp[i]);
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num] = c1;
- tp[num + 1] = (c1 < c0 ? 1 : 0);
-
- c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
- c1 = (tp[num] + c0) & BN_MASK2;
- tp[num] = c1;
- tp[num + 1] += (c1 < c0 ? 1 : 0);
- for (j = 0; j <= num; j++)
- tp[j] = tp[j + 1];
- }
-
- if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
- c0 = bn_sub_words(rp, tp, np, num);
- if (tp[num] != 0 || c0 == 0) {
- goto out;
- }
- }
- memcpy(rp, tp, num * sizeof(BN_ULONG));
-out:
- freezero(tp, (num + 2) * sizeof(BN_ULONG));
- return 1;
-}
-#else
-int
-bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
- const BN_ULONG *np, const BN_ULONG *n0, int num)
-{
- return 0;
-}
-#endif /* OPENSSL_BN_ASM_MONT */
-#endif
-
-#endif /* !BN_MUL_COMBA */
-/* $OpenBSD: bn_mont.c,v 1.33 2023/01/16 16:53:19 jsing Exp $ */
+/* $OpenBSD: bn_mont.c,v 1.34 2023/01/28 17:07:02 jsing Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
#include "bn_local.h"
+#ifdef OPENSSL_NO_ASM
+#ifdef OPENSSL_BN_ASM_MONT
+int
+bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0p, int num)
+{
+ BN_ULONG c0, c1, *tp, n0 = *n0p;
+ int i = 0, j;
+
+ tp = calloc(NULL, num + 2, sizeof(BN_ULONG));
+ if (tp == NULL)
+ return 0;
+
+ for (i = 0; i < num; i++) {
+ c0 = bn_mul_add_words(tp, ap, num, bp[i]);
+ c1 = (tp[num] + c0) & BN_MASK2;
+ tp[num] = c1;
+ tp[num + 1] = (c1 < c0 ? 1 : 0);
+
+ c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
+ c1 = (tp[num] + c0) & BN_MASK2;
+ tp[num] = c1;
+ tp[num + 1] += (c1 < c0 ? 1 : 0);
+ for (j = 0; j <= num; j++)
+ tp[j] = tp[j + 1];
+ }
+
+ if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
+ c0 = bn_sub_words(rp, tp, np, num);
+ if (tp[num] != 0 || c0 == 0) {
+ goto out;
+ }
+ }
+ memcpy(rp, tp, num * sizeof(BN_ULONG));
+out:
+ freezero(tp, (num + 2) * sizeof(BN_ULONG));
+ return 1;
+}
+#else /* !OPENSSL_BN_ASM_MONT */
+int
+bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+ const BN_ULONG *np, const BN_ULONG *n0, int num)
+{
+ /*
+ * Return value of 0 indicates that multiplication/convolution was not
+ * performed to signal the caller to fall down to alternative/original
+ * code-path.
+ */
+ return 0;
+}
+#endif /* !OPENSSL_BN_ASM_MONT */
+#endif /* OPENSSL_NO_ASM */
+
#define MONT_WORD /* use the faster word-based algorithm */
#ifdef MONT_WORD