From ac15c2ab71f47c7a837f117ec4ae2ed336301e91 Mon Sep 17 00:00:00 2001 From: jsing Date: Wed, 17 May 2023 06:37:14 +0000 Subject: [PATCH] Clean up alignment handling for SHA-512. All assembly implementations are required to perform their own alignment handling. In the case of the C implementation, on strict alignment platforms, unaligned data will be copied into an aligned buffer. However, most platforms then perform byte-by-byte reads (via the PULL64 macros). Instead, remove SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA and alignment handling to sha512_block_data_order() - if the data is aligned then simply perform 64 bit loads and then do endian conversion via be64toh(). If the data is unaligned then use memcpy() and be64toh() (in the form of crypto_load_be64toh()). Overall this reduces complexity and can improve performance (on aarch64 we get a ~10% performance gain with aligned input and about ~1-2% gain on armv7), while the same movq/bswapq is generated for amd64 and movl/bswapl for i386. ok tb@ --- lib/libcrypto/crypto_internal.h | 55 ++++++++++++--- lib/libcrypto/sha/sha512.c | 121 +++++++++++++------------------- 2 files changed, 95 insertions(+), 81 deletions(-) diff --git a/lib/libcrypto/crypto_internal.h b/lib/libcrypto/crypto_internal.h index 24a06256dbf..2e6ab826929 100644 --- a/lib/libcrypto/crypto_internal.h +++ b/lib/libcrypto/crypto_internal.h @@ -1,4 +1,4 @@ -/* $OpenBSD: crypto_internal.h,v 1.3 2023/04/14 10:42:51 jsing Exp $ */ +/* $OpenBSD: crypto_internal.h,v 1.4 2023/05/17 06:37:14 jsing Exp $ */ /* * Copyright (c) 2023 Joel Sing * @@ -22,14 +22,34 @@ #ifndef HEADER_CRYPTO_INTERNAL_H #define HEADER_CRYPTO_INTERNAL_H +#define CTASSERT(x) \ + extern char _ctassert[(x) ? 1 : -1] __attribute__((__unused__)) + +/* + * crypto_load_be32toh() loads a 32 bit unsigned big endian value as a 32 bit + * unsigned host endian value, from the specified address in memory. The memory + * address may have any alignment. + */ +#ifndef HAVE_CRYPTO_LOAD_BE32TOH +static inline uint32_t +crypto_load_be32toh(const void *src) +{ + uint32_t v; + + memcpy(&v, src, sizeof(v)); + + return be32toh(v); +} +#endif + /* - * crypto_store_htobe32() stores a 32 bit unsigned host endian value - * as a 32 bit unsigned big endian value, at the specified location in - * memory. The memory location may have any alignment. + * crypto_store_htobe32() stores a 32 bit unsigned host endian value as a 32 bit + * unsigned big endian value, at the specified address in memory. The memory + * address may have any alignment. */ #ifndef HAVE_CRYPTO_STORE_HTOBE32 static inline void -crypto_store_htobe32(uint8_t *dst, uint32_t v) +crypto_store_htobe32(void *dst, uint32_t v) { v = htobe32(v); memcpy(dst, &v, sizeof(v)); @@ -37,13 +57,30 @@ crypto_store_htobe32(uint8_t *dst, uint32_t v) #endif /* - * crypto_store_htobe64() stores a 64 bit unsigned host endian value - * as a 64 bit unsigned big endian value, at the specified location in - * memory. The memory location may have any alignment. + * crypto_load_be64toh() loads a 64 bit unsigned big endian value as a 64 bit + * unsigned host endian value, from the specified address in memory. The memory + * address may have any alignment. + */ +#ifndef HAVE_CRYPTO_LOAD_BE64TOH +static inline uint64_t +crypto_load_be64toh(const void *src) +{ + uint64_t v; + + memcpy(&v, src, sizeof(v)); + + return be64toh(v); +} +#endif + +/* + * crypto_store_htobe64() stores a 64 bit unsigned host endian value as a 64 bit + * unsigned big endian value, at the specified address in memory. The memory + * address may have any alignment. */ #ifndef HAVE_CRYPTO_STORE_HTOBE64 static inline void -crypto_store_htobe64(uint8_t *dst, uint64_t v) +crypto_store_htobe64(void *dst, uint64_t v) { v = htobe64(v); memcpy(dst, &v, sizeof(v)); diff --git a/lib/libcrypto/sha/sha512.c b/lib/libcrypto/sha/sha512.c index c0752bd2c70..c88ef057dd7 100644 --- a/lib/libcrypto/sha/sha512.c +++ b/lib/libcrypto/sha/sha512.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sha512.c,v 1.36 2023/05/16 07:04:57 jsing Exp $ */ +/* $OpenBSD: sha512.c,v 1.37 2023/05/17 06:37:14 jsing Exp $ */ /* ==================================================================== * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. * @@ -66,9 +66,8 @@ #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) -#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) -#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA -#endif +/* Ensure that SHA_LONG64 and uint64_t are equivalent. */ +CTASSERT(sizeof(SHA_LONG64) == sizeof(uint64_t)); #ifdef SHA512_ASM void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num); @@ -118,31 +117,6 @@ static const SHA_LONG64 K512[80] = { U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), }; -#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) -# if defined(__x86_64) || defined(__x86_64__) -# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ - asm ("bswapq %0" \ - : "=r"(ret) \ - : "0"(ret)); ret; }) -# elif (defined(__i386) || defined(__i386__)) -# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ - unsigned int hi=p[0],lo=p[1]; \ - asm ("bswapl %0; bswapl %1;" \ - : "=r"(lo),"=r"(hi) \ - : "0"(lo),"1"(hi)); \ - ((SHA_LONG64)hi)<<32|lo; }) -# endif -#endif - -#ifndef PULL64 -#if BYTE_ORDER == BIG_ENDIAN -#define PULL64(x) (x) -#else -#define B(x, j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) -#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) -#endif -#endif - #define ROTR(x, s) crypto_ror_u64(x, s) #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @@ -185,37 +159,60 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) g = ctx->h[6]; h = ctx->h[7]; - X[0] = PULL64(in[0]); + if ((uintptr_t)in % sizeof(SHA_LONG64) == 0) { + /* Input is 64 bit aligned. */ + X[0] = be64toh(in[0]); + X[1] = be64toh(in[1]); + X[2] = be64toh(in[2]); + X[3] = be64toh(in[3]); + X[4] = be64toh(in[4]); + X[5] = be64toh(in[5]); + X[6] = be64toh(in[6]); + X[7] = be64toh(in[7]); + X[8] = be64toh(in[8]); + X[9] = be64toh(in[9]); + X[10] = be64toh(in[10]); + X[11] = be64toh(in[11]); + X[12] = be64toh(in[12]); + X[13] = be64toh(in[13]); + X[14] = be64toh(in[14]); + X[15] = be64toh(in[15]); + } else { + /* Input is not 64 bit aligned. */ + X[0] = crypto_load_be64toh(&in[0]); + X[1] = crypto_load_be64toh(&in[1]); + X[2] = crypto_load_be64toh(&in[2]); + X[3] = crypto_load_be64toh(&in[3]); + X[4] = crypto_load_be64toh(&in[4]); + X[5] = crypto_load_be64toh(&in[5]); + X[6] = crypto_load_be64toh(&in[6]); + X[7] = crypto_load_be64toh(&in[7]); + X[8] = crypto_load_be64toh(&in[8]); + X[9] = crypto_load_be64toh(&in[9]); + X[10] = crypto_load_be64toh(&in[10]); + X[11] = crypto_load_be64toh(&in[11]); + X[12] = crypto_load_be64toh(&in[12]); + X[13] = crypto_load_be64toh(&in[13]); + X[14] = crypto_load_be64toh(&in[14]); + X[15] = crypto_load_be64toh(&in[15]); + } + in += SHA_LBLOCK; + ROUND_00_15(0, a, b, c, d, e, f, g, h, X[0]); - X[1] = PULL64(in[1]); ROUND_00_15(1, h, a, b, c, d, e, f, g, X[1]); - X[2] = PULL64(in[2]); ROUND_00_15(2, g, h, a, b, c, d, e, f, X[2]); - X[3] = PULL64(in[3]); ROUND_00_15(3, f, g, h, a, b, c, d, e, X[3]); - X[4] = PULL64(in[4]); ROUND_00_15(4, e, f, g, h, a, b, c, d, X[4]); - X[5] = PULL64(in[5]); ROUND_00_15(5, d, e, f, g, h, a, b, c, X[5]); - X[6] = PULL64(in[6]); ROUND_00_15(6, c, d, e, f, g, h, a, b, X[6]); - X[7] = PULL64(in[7]); ROUND_00_15(7, b, c, d, e, f, g, h, a, X[7]); - X[8] = PULL64(in[8]); ROUND_00_15(8, a, b, c, d, e, f, g, h, X[8]); - X[9] = PULL64(in[9]); ROUND_00_15(9, h, a, b, c, d, e, f, g, X[9]); - X[10] = PULL64(in[10]); ROUND_00_15(10, g, h, a, b, c, d, e, f, X[10]); - X[11] = PULL64(in[11]); ROUND_00_15(11, f, g, h, a, b, c, d, e, X[11]); - X[12] = PULL64(in[12]); ROUND_00_15(12, e, f, g, h, a, b, c, d, X[12]); - X[13] = PULL64(in[13]); ROUND_00_15(13, d, e, f, g, h, a, b, c, X[13]); - X[14] = PULL64(in[14]); ROUND_00_15(14, c, d, e, f, g, h, a, b, X[14]); - X[15] = PULL64(in[15]); ROUND_00_15(15, b, c, d, e, f, g, h, a, X[15]); for (i = 16; i < 80; i += 16) { @@ -245,8 +242,6 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *_in, size_t num) ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; - - in += SHA_LBLOCK; } } @@ -323,21 +318,15 @@ SHA512_Init(SHA512_CTX *c) void SHA512_Transform(SHA512_CTX *c, const unsigned char *data) { -#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA - if ((size_t)data % sizeof(c->u.d[0]) != 0) { - memcpy(c->u.p, data, sizeof(c->u.p)); - data = c->u.p; - } -#endif sha512_block_data_order(c, data, 1); } int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) { - SHA_LONG64 l; - unsigned char *p = c->u.p; - const unsigned char *data = (const unsigned char *)_data; + const unsigned char *data = _data; + unsigned char *p = c->u.p; + SHA_LONG64 l; if (len == 0) return 1; @@ -366,22 +355,10 @@ SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) } if (len >= sizeof(c->u)) { -#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA - if ((size_t)data % sizeof(c->u.d[0]) != 0) { - while (len >= sizeof(c->u)) { - memcpy(p, data, sizeof(c->u)); - sha512_block_data_order(c, p, 1); - len -= sizeof(c->u); - data += sizeof(c->u); - } - } else -#endif - { - sha512_block_data_order(c, data, len/sizeof(c->u)); - data += len; - len %= sizeof(c->u); - data -= len; - } + sha512_block_data_order(c, data, len/sizeof(c->u)); + data += len; + len %= sizeof(c->u); + data -= len; } if (len != 0) { -- 2.20.1