From 951afe033abee7c76882ffd14fdd72903b2f02bd Mon Sep 17 00:00:00 2001 From: jsing Date: Sat, 28 Jan 2023 17:07:02 +0000 Subject: [PATCH] Move the more readable version of bn_mul_mont() from bn_asm.c to bn_mont.c. Nothing actually uses this code, as OPENSSL_BN_ASM_MONT is not defined unless there is an assembly implementation available (not to mention that defining both OPENSSL_NO_ASM and OPENSSL_BN_ASM_MONT at the same time is extra strange). Discussed with tb@ --- lib/libcrypto/bn/bn_asm.c | 168 +------------------------------------ lib/libcrypto/bn/bn_mont.c | 55 +++++++++++- 2 files changed, 55 insertions(+), 168 deletions(-) diff --git a/lib/libcrypto/bn/bn_asm.c b/lib/libcrypto/bn/bn_asm.c index e2b584ee85c..bfdeabd9ebc 100644 --- a/lib/libcrypto/bn/bn_asm.c +++ b/lib/libcrypto/bn/bn_asm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_asm.c,v 1.23 2023/01/23 12:17:57 jsing Exp $ */ +/* $OpenBSD: bn_asm.c,v 1.24 2023/01/28 17:07:02 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -63,169 +63,3 @@ #include "bn_local.h" -#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) - -#ifdef OPENSSL_NO_ASM -#ifdef OPENSSL_BN_ASM_MONT -/* - * This is essentially reference implementation, which may or may not - * result in performance improvement. E.g. on IA-32 this routine was - * observed to give 40% faster rsa1024 private key operations and 10% - * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only - * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a - * reference implementation, one to be used as starting point for - * platform-specific assembler. Mentioned numbers apply to compiler - * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and - * can vary not only from platform to platform, but even for compiler - * versions. Assembler vs. assembler improvement coefficients can - * [and are known to] differ and are to be documented elsewhere. - */ -int -bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) -{ - BN_ULONG c0, c1, ml, *tp, n0; -#ifdef mul64 - BN_ULONG mh; -#endif - int i = 0, j; - -#if 0 /* template for platform-specific implementation */ - if (ap == bp) - return bn_sqr_mont(rp, ap, np, n0p, num); -#endif - tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); - if (tp == NULL) - return 0; - - n0 = *n0p; - - c0 = 0; - ml = bp[0]; -#ifdef mul64 - mh = HBITS(ml); - ml = LBITS(ml); - for (j = 0; j < num; ++j) - mul(tp[j], ap[j], ml, mh, c0); -#else - for (j = 0; j < num; ++j) - mul(tp[j], ap[j], ml, c0); -#endif - - tp[num] = c0; - tp[num + 1] = 0; - goto enter; - - for (i = 0; i < num; i++) { - c0 = 0; - ml = bp[i]; -#ifdef mul64 - mh = HBITS(ml); - ml = LBITS(ml); - for (j = 0; j < num; ++j) - mul_add(tp[j], ap[j], ml, mh, c0); -#else - for (j = 0; j < num; ++j) - mul_add(tp[j], ap[j], ml, c0); -#endif - c1 = (tp[num] + c0) & BN_MASK2; - tp[num] = c1; - tp[num + 1] = (c1 < c0 ? 1 : 0); -enter: - c1 = tp[0]; - ml = (c1 * n0) & BN_MASK2; - c0 = 0; -#ifdef mul64 - mh = HBITS(ml); - ml = LBITS(ml); - mul_add(c1, np[0], ml, mh, c0); -#else - mul_add(c1, ml, np[0], c0); -#endif - for (j = 1; j < num; j++) { - c1 = tp[j]; -#ifdef mul64 - mul_add(c1, np[j], ml, mh, c0); -#else - mul_add(c1, ml, np[j], c0); -#endif - tp[j - 1] = c1 & BN_MASK2; - } - c1 = (tp[num] + c0) & BN_MASK2; - tp[num - 1] = c1; - tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); - } - - if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { - c0 = bn_sub_words(rp, tp, np, num); - if (tp[num] != 0 || c0 == 0) { - goto out; - } - } - memcpy(rp, tp, num * sizeof(BN_ULONG)); -out: - freezero(tp, (num + 2) * sizeof(BN_ULONG)); - return 1; -} -#else -/* - * Return value of 0 indicates that multiplication/convolution was not - * performed to signal the caller to fall down to alternative/original - * code-path. - */ -int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) - { return 0; -} -#endif /* OPENSSL_BN_ASM_MONT */ -#endif - -#else /* !BN_MUL_COMBA */ - -#ifdef OPENSSL_NO_ASM -#ifdef OPENSSL_BN_ASM_MONT -int -bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - const BN_ULONG *np, const BN_ULONG *n0p, int num) -{ - BN_ULONG c0, c1, *tp, n0 = *n0p; - int i = 0, j; - - tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); - if (tp == NULL) - return 0; - - for (i = 0; i < num; i++) { - c0 = bn_mul_add_words(tp, ap, num, bp[i]); - c1 = (tp[num] + c0) & BN_MASK2; - tp[num] = c1; - tp[num + 1] = (c1 < c0 ? 1 : 0); - - c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); - c1 = (tp[num] + c0) & BN_MASK2; - tp[num] = c1; - tp[num + 1] += (c1 < c0 ? 1 : 0); - for (j = 0; j <= num; j++) - tp[j] = tp[j + 1]; - } - - if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { - c0 = bn_sub_words(rp, tp, np, num); - if (tp[num] != 0 || c0 == 0) { - goto out; - } - } - memcpy(rp, tp, num * sizeof(BN_ULONG)); -out: - freezero(tp, (num + 2) * sizeof(BN_ULONG)); - return 1; -} -#else -int -bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, - const BN_ULONG *np, const BN_ULONG *n0, int num) -{ - return 0; -} -#endif /* OPENSSL_BN_ASM_MONT */ -#endif - -#endif /* !BN_MUL_COMBA */ diff --git a/lib/libcrypto/bn/bn_mont.c b/lib/libcrypto/bn/bn_mont.c index f8b870266cc..8b364ff7165 100644 --- a/lib/libcrypto/bn/bn_mont.c +++ b/lib/libcrypto/bn/bn_mont.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_mont.c,v 1.33 2023/01/16 16:53:19 jsing Exp $ */ +/* $OpenBSD: bn_mont.c,v 1.34 2023/01/28 17:07:02 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -121,6 +121,59 @@ #include "bn_local.h" +#ifdef OPENSSL_NO_ASM +#ifdef OPENSSL_BN_ASM_MONT +int +bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0p, int num) +{ + BN_ULONG c0, c1, *tp, n0 = *n0p; + int i = 0, j; + + tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); + if (tp == NULL) + return 0; + + for (i = 0; i < num; i++) { + c0 = bn_mul_add_words(tp, ap, num, bp[i]); + c1 = (tp[num] + c0) & BN_MASK2; + tp[num] = c1; + tp[num + 1] = (c1 < c0 ? 1 : 0); + + c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); + c1 = (tp[num] + c0) & BN_MASK2; + tp[num] = c1; + tp[num + 1] += (c1 < c0 ? 1 : 0); + for (j = 0; j <= num; j++) + tp[j] = tp[j + 1]; + } + + if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { + c0 = bn_sub_words(rp, tp, np, num); + if (tp[num] != 0 || c0 == 0) { + goto out; + } + } + memcpy(rp, tp, num * sizeof(BN_ULONG)); +out: + freezero(tp, (num + 2) * sizeof(BN_ULONG)); + return 1; +} +#else /* !OPENSSL_BN_ASM_MONT */ +int +bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, int num) +{ + /* + * Return value of 0 indicates that multiplication/convolution was not + * performed to signal the caller to fall down to alternative/original + * code-path. + */ + return 0; +} +#endif /* !OPENSSL_BN_ASM_MONT */ +#endif /* OPENSSL_NO_ASM */ + #define MONT_WORD /* use the faster word-based algorithm */ #ifdef MONT_WORD -- 2.20.1