From cda0265d86a9224400bcad634cf48fcb7c610556 Mon Sep 17 00:00:00 2001 From: jsing Date: Fri, 7 Jul 2023 16:10:32 +0000 Subject: [PATCH] Provide optimised bn_mulw() for riscv64. This provides a 1.5-2x performance gain for BN multiplication, with a similar improvement being seen for RSA operations. --- lib/libcrypto/bn/arch/riscv64/bn_arch.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/libcrypto/bn/arch/riscv64/bn_arch.h b/lib/libcrypto/bn/arch/riscv64/bn_arch.h index 354774cde3d..66256acad09 100644 --- a/lib/libcrypto/bn/arch/riscv64/bn_arch.h +++ b/lib/libcrypto/bn/arch/riscv64/bn_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bn_arch.h,v 1.4 2023/02/16 10:41:03 jsing Exp $ */ +/* $OpenBSD: bn_arch.h,v 1.5 2023/07/07 16:10:32 jsing Exp $ */ /* * Copyright (c) 2023 Joel Sing * @@ -15,15 +15,17 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include + #ifndef HEADER_BN_ARCH_H #define HEADER_BN_ARCH_H #ifndef OPENSSL_NO_ASM -#if 0 /* Needs testing and enabling. */ #if defined(__GNUC__) -#define HAVE_BN_MULW +#define HAVE_BN_MULW + static inline void bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) { @@ -34,15 +36,17 @@ bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0) * of these instructions is important, as they can potentially be fused * into a single operation. */ - __asm__ ("mulh %0, %2, %3; mul %1, %2, %3" - : "=&r"(r1), "=r"(r0) - : "r"(a), "r"(b)); + __asm__ ( + "mulhu %[r1], %[a], %[b] \n" + "mul %[r0], %[a], %[b] \n" + : [r1]"=&r"(r1), [r0]"=r"(r0) + : [a]"r"(a), [b]"r"(b)); *out_r1 = r1; *out_r0 = r0; } + #endif /* __GNUC__ */ -#endif #endif #endif -- 2.20.1