From: jsing Date: Mon, 12 Jun 2023 16:17:24 +0000 (+0000) Subject: Provide and use various quad word primitives. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=73e8eea7ecd835890990a264d0b2289a826d2cc6;p=openbsd Provide and use various quad word primitives. This includes bn_qwaddqw(), bn_qwsubqw(), bn_qwmulw_addw() and bn_qwmulw_addqw_addw(). These can typically be optimised on architectures that have a reasonable number of general purpose registers. ok tb@ --- diff --git a/lib/libcrypto/bn/bn_add.c b/lib/libcrypto/bn/bn_add.c index 92489b7da3a..36f160ab5fe 100644 --- a/lib/libcrypto/bn/bn_add.c +++ b/lib/libcrypto/bn/bn_add.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_add.c,v 1.24 2023/02/22 05:46:37 jsing Exp $ */ +/* $OpenBSD: bn_add.c,v 1.25 2023/06/12 16:17:24 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -80,18 +80,14 @@ bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) if (n <= 0) return 0; -#ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { - bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); - bn_addw_addw(a[1], b[1], carry, &carry, &r[1]); - bn_addw_addw(a[2], b[2], carry, &carry, &r[2]); - bn_addw_addw(a[3], b[3], carry, &carry, &r[3]); + bn_qwaddqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], + carry, &carry, &r[3], &r[2], &r[1], &r[0]); a += 4; b += 4; r += 4; n -= 4; } -#endif while (n) { bn_addw_addw(a[0], b[0], carry, &carry, &r[0]); a++; @@ -165,18 +161,14 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) if (n <= 0) return 0; -#ifndef OPENSSL_SMALL_FOOTPRINT while (n & ~3) { - bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); - bn_subw_subw(a[1], b[1], borrow, &borrow, &r[1]); - bn_subw_subw(a[2], b[2], borrow, &borrow, &r[2]); - bn_subw_subw(a[3], b[3], borrow, &borrow, &r[3]); + bn_qwsubqw(a[3], a[2], a[1], a[0], b[3], b[2], b[1], b[0], + borrow, &borrow, &r[3], &r[2], &r[1], &r[0]); a += 4; b += 4; r += 4; n -= 4; } -#endif while (n) { bn_subw_subw(a[0], b[0], borrow, &borrow, &r[0]); a++; diff --git a/lib/libcrypto/bn/bn_internal.h b/lib/libcrypto/bn/bn_internal.h index 8a729b8e44c..5f86e21330b 100644 --- a/lib/libcrypto/bn/bn_internal.h +++ b/lib/libcrypto/bn/bn_internal.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_internal.h,v 1.11 2023/03/07 09:35:55 jsing Exp $ */ +/* $OpenBSD: bn_internal.h,v 1.12 2023/06/12 16:17:24 jsing Exp $ */ /* * Copyright (c) 2023 Joel Sing * @@ -122,6 +122,33 @@ bn_addw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1, } #endif +/* + * bn_qwaddqw() computes + * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) + (b3:b2:b1:b0) + carry, where a is a quad word, + * b is a quad word, and carry is a single word with value 0 or 1, producing a four + * word result and carry. + */ +#ifndef HAVE_BN_QWADDQW +static inline void +bn_qwaddqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3, + BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG carry, BN_ULONG *out_carry, + BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) +{ + BN_ULONG r3, r2, r1, r0; + + bn_addw_addw(a0, b0, carry, &carry, &r0); + bn_addw_addw(a1, b1, carry, &carry, &r1); + bn_addw_addw(a2, b2, carry, &carry, &r2); + bn_addw_addw(a3, b3, carry, &carry, &r3); + + *out_carry = carry; + *out_r3 = r3; + *out_r2 = r2; + *out_r1 = r1; + *out_r0 = r0; +} +#endif + /* * bn_subw() computes r0 = a - b, where both inputs are single words, * producing a single word result and borrow. @@ -159,6 +186,33 @@ bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow, } #endif +/* + * bn_qwsubqw() computes + * (r3:r2:r1:r0) = (a3:a2:a1:a0) - (b3:b2:b1:b0) - borrow, where a is a quad word, + * b is a quad word, and borrow is a single word with value 0 or 1, producing a + * four word result and borrow. + */ +#ifndef HAVE_BN_QWSUBQW +static inline void +bn_qwsubqw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b3, + BN_ULONG b2, BN_ULONG b1, BN_ULONG b0, BN_ULONG borrow, BN_ULONG *out_borrow, + BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, BN_ULONG *out_r0) +{ + BN_ULONG r3, r2, r1, r0; + + bn_subw_subw(a0, b0, borrow, &borrow, &r0); + bn_subw_subw(a1, b1, borrow, &borrow, &r1); + bn_subw_subw(a2, b2, borrow, &borrow, &r2); + bn_subw_subw(a3, b3, borrow, &borrow, &r3); + + *out_borrow = borrow; + *out_r3 = r3; + *out_r2 = r2; + *out_r1 = r1; + *out_r0 = r0; +} +#endif + /* * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words, * producing a double word result. @@ -387,4 +441,58 @@ bn_mul2_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0 } #endif +/* + * bn_qwmulw_addw() computes (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + c, where a + * is a quad word, b is a single word and c is a single word, producing a five + * word result. + */ +#ifndef HAVE_BN_QWMULW_ADDW +static inline void +bn_qwmulw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, BN_ULONG b, + BN_ULONG c, BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, + BN_ULONG *out_r1, BN_ULONG *out_r0) +{ + BN_ULONG r3, r2, r1, r0; + + bn_mulw_addw(a0, b, c, &c, &r0); + bn_mulw_addw(a1, b, c, &c, &r1); + bn_mulw_addw(a2, b, c, &c, &r2); + bn_mulw_addw(a3, b, c, &c, &r3); + + *out_r4 = c; + *out_r3 = r3; + *out_r2 = r2; + *out_r1 = r1; + *out_r0 = r0; +} +#endif + +/* + * bn_qwmulw_addqw_addw() computes + * (r4:r3:r2:r1:r0) = (a3:a2:a1:a0) * b + (c3:c2:c1:c0) + d, where a + * is a quad word, b is a single word, c is a quad word, and d is a single word, + * producing a five word result. + */ +#ifndef HAVE_BN_QWMULW_ADDQW_ADDW +static inline void +bn_qwmulw_addqw_addw(BN_ULONG a3, BN_ULONG a2, BN_ULONG a1, BN_ULONG a0, + BN_ULONG b, BN_ULONG c3, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0, BN_ULONG d, + BN_ULONG *out_r4, BN_ULONG *out_r3, BN_ULONG *out_r2, BN_ULONG *out_r1, + BN_ULONG *out_r0) +{ + BN_ULONG r3, r2, r1, r0; + + bn_mulw_addw_addw(a0, b, c0, d, &d, &r0); + bn_mulw_addw_addw(a1, b, c1, d, &d, &r1); + bn_mulw_addw_addw(a2, b, c2, d, &d, &r2); + bn_mulw_addw_addw(a3, b, c3, d, &d, &r3); + + *out_r4 = d; + *out_r3 = r3; + *out_r2 = r2; + *out_r1 = r1; + *out_r0 = r0; +} +#endif + #endif diff --git a/lib/libcrypto/bn/bn_mul.c b/lib/libcrypto/bn/bn_mul.c index 118e8cddc5e..65088cc5c4d 100644 --- a/lib/libcrypto/bn/bn_mul.c +++ b/lib/libcrypto/bn/bn_mul.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_mul.c,v 1.37 2023/04/19 10:51:22 jsing Exp $ */ +/* $OpenBSD: bn_mul.c,v 1.38 2023/06/12 16:17:24 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -210,17 +210,13 @@ bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) if (num <= 0) return 0; -#ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { - bn_mulw_addw(a[0], w, carry, &carry, &r[0]); - bn_mulw_addw(a[1], w, carry, &carry, &r[1]); - bn_mulw_addw(a[2], w, carry, &carry, &r[2]); - bn_mulw_addw(a[3], w, carry, &carry, &r[3]); + bn_qwmulw_addw(a[3], a[2], a[1], a[0], w, carry, &carry, + &r[3], &r[2], &r[1], &r[0]); a += 4; r += 4; num -= 4; } -#endif while (num) { bn_mulw_addw(a[0], w, carry, &carry, &r[0]); a++; @@ -247,17 +243,14 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) if (num <= 0) return 0; -#ifndef OPENSSL_SMALL_FOOTPRINT while (num & ~3) { - bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); - bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]); - bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]); - bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]); + bn_qwmulw_addqw_addw(a[3], a[2], a[1], a[0], w, + r[3], r[2], r[1], r[0], carry, &carry, + &r[3], &r[2], &r[1], &r[0]); a += 4; r += 4; num -= 4; } -#endif while (num) { bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); a++;