From: jsing Date: Tue, 26 Mar 2024 05:21:35 +0000 (+0000) Subject: Demacro MD5 and improve data loading. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=bbd86a023317de6061133fc86b5d2d9b1ab5277b;p=openbsd Demacro MD5 and improve data loading. Use static inline functions instead of macros and improve handling of aligned data. Also number rounds as per RFC 1321. ok tb@ --- diff --git a/lib/libcrypto/md5/md5.c b/lib/libcrypto/md5/md5.c index c2ee2958df2..cb1a9a3a09c 100644 --- a/lib/libcrypto/md5/md5.c +++ b/lib/libcrypto/md5/md5.c @@ -1,4 +1,4 @@ -/* $OpenBSD: md5.c,v 1.18 2023/08/15 08:39:27 jsing Exp $ */ +/* $OpenBSD: md5.c,v 1.19 2024/03/26 05:21:35 jsing Exp $ */ /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) * All rights reserved. * @@ -90,47 +90,64 @@ void md5_block_asm_data_order(MD5_CTX *c, const void *p, size_t num); #include "md32_common.h" -/* -#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) -#define G(x,y,z) (((x) & (z)) | ((y) & (~(z)))) -*/ +#ifndef MD5_ASM +static inline uint32_t +md5_F(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (~x & z); +} -/* As pointed out by Wei Dai , the above can be - * simplified to the code below. Wei attributes these optimizations - * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel. - */ -#define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) -#define G(b,c,d) ((((b) ^ (c)) & (d)) ^ (c)) -#define H(b,c,d) ((b) ^ (c) ^ (d)) -#define I(b,c,d) (((~(d)) | (b)) ^ (c)) +static inline uint32_t +md5_G(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & z) | (y & ~z); +} -#define R0(a,b,c,d,k,s,t) { \ - a+=((k)+(t)+F((b),(c),(d))); \ - a=ROTATE(a,s); \ - a+=b; };\ +static inline uint32_t +md5_H(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static inline uint32_t +md5_I(uint32_t x, uint32_t y, uint32_t z) +{ + return y ^ (x | ~z); +} -#define R1(a,b,c,d,k,s,t) { \ - a+=((k)+(t)+G((b),(c),(d))); \ - a=ROTATE(a,s); \ - a+=b; }; +static inline void +md5_round1(uint32_t *a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, + uint32_t t, uint32_t s) +{ + *a = b + crypto_rol_u32(*a + md5_F(b, c, d) + x + t, s); +} -#define R2(a,b,c,d,k,s,t) { \ - a+=((k)+(t)+H((b),(c),(d))); \ - a=ROTATE(a,s); \ - a+=b; }; +static inline void +md5_round2(uint32_t *a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, + uint32_t t, uint32_t s) +{ + *a = b + crypto_rol_u32(*a + md5_G(b, c, d) + x + t, s); +} -#define R3(a,b,c,d,k,s,t) { \ - a+=((k)+(t)+I((b),(c),(d))); \ - a=ROTATE(a,s); \ - a+=b; }; +static inline void +md5_round3(uint32_t *a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, + uint32_t t, uint32_t s) +{ + *a = b + crypto_rol_u32(*a + md5_H(b, c, d) + x + t, s); +} -/* Implemented from RFC1321 The MD5 Message-Digest Algorithm. */ +static inline void +md5_round4(uint32_t *a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, + uint32_t t, uint32_t s) +{ + *a = b + crypto_rol_u32(*a + md5_I(b, c, d) + x + t, s); +} -#ifndef MD5_ASM static void md5_block_data_order(MD5_CTX *c, const void *_in, size_t num) { const uint8_t *in = _in; + const MD5_LONG *in32; MD5_LONG A, B, C, D; MD5_LONG X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15; @@ -140,93 +157,114 @@ md5_block_data_order(MD5_CTX *c, const void *_in, size_t num) C = c->C; D = c->D; - for (; num--; ) { - X0 = crypto_load_le32toh(&in[0 * 4]); - X1 = crypto_load_le32toh(&in[1 * 4]); - X2 = crypto_load_le32toh(&in[2 * 4]); - X3 = crypto_load_le32toh(&in[3 * 4]); - X4 = crypto_load_le32toh(&in[4 * 4]); - X5 = crypto_load_le32toh(&in[5 * 4]); - X6 = crypto_load_le32toh(&in[6 * 4]); - X7 = crypto_load_le32toh(&in[7 * 4]); - X8 = crypto_load_le32toh(&in[8 * 4]); - X9 = crypto_load_le32toh(&in[9 * 4]); - X10 = crypto_load_le32toh(&in[10 * 4]); - X11 = crypto_load_le32toh(&in[11 * 4]); - X12 = crypto_load_le32toh(&in[12 * 4]); - X13 = crypto_load_le32toh(&in[13 * 4]); - X14 = crypto_load_le32toh(&in[14 * 4]); - X15 = crypto_load_le32toh(&in[15 * 4]); + while (num-- > 0) { + if ((uintptr_t)in % 4 == 0) { + /* Input is 32 bit aligned. */ + in32 = (const MD5_LONG *)in; + X0 = le32toh(in32[0]); + X1 = le32toh(in32[1]); + X2 = le32toh(in32[2]); + X3 = le32toh(in32[3]); + X4 = le32toh(in32[4]); + X5 = le32toh(in32[5]); + X6 = le32toh(in32[6]); + X7 = le32toh(in32[7]); + X8 = le32toh(in32[8]); + X9 = le32toh(in32[9]); + X10 = le32toh(in32[10]); + X11 = le32toh(in32[11]); + X12 = le32toh(in32[12]); + X13 = le32toh(in32[13]); + X14 = le32toh(in32[14]); + X15 = le32toh(in32[15]); + } else { + /* Input is not 32 bit aligned. */ + X0 = crypto_load_le32toh(&in[0 * 4]); + X1 = crypto_load_le32toh(&in[1 * 4]); + X2 = crypto_load_le32toh(&in[2 * 4]); + X3 = crypto_load_le32toh(&in[3 * 4]); + X4 = crypto_load_le32toh(&in[4 * 4]); + X5 = crypto_load_le32toh(&in[5 * 4]); + X6 = crypto_load_le32toh(&in[6 * 4]); + X7 = crypto_load_le32toh(&in[7 * 4]); + X8 = crypto_load_le32toh(&in[8 * 4]); + X9 = crypto_load_le32toh(&in[9 * 4]); + X10 = crypto_load_le32toh(&in[10 * 4]); + X11 = crypto_load_le32toh(&in[11 * 4]); + X12 = crypto_load_le32toh(&in[12 * 4]); + X13 = crypto_load_le32toh(&in[13 * 4]); + X14 = crypto_load_le32toh(&in[14 * 4]); + X15 = crypto_load_le32toh(&in[15 * 4]); + } in += MD5_CBLOCK; - /* Round 0 */ - R0(A, B, C, D, X0, 7, 0xd76aa478L); - R0(D, A, B, C, X1, 12, 0xe8c7b756L); - R0(C, D, A, B, X2, 17, 0x242070dbL); - R0(B, C, D, A, X3, 22, 0xc1bdceeeL); - R0(A, B, C, D, X4, 7, 0xf57c0fafL); - R0(D, A, B, C, X5, 12, 0x4787c62aL); - R0(C, D, A, B, X6, 17, 0xa8304613L); - R0(B, C, D, A, X7, 22, 0xfd469501L); - R0(A, B, C, D, X8, 7, 0x698098d8L); - R0(D, A, B, C, X9, 12, 0x8b44f7afL); - R0(C, D, A, B, X10, 17, 0xffff5bb1L); - R0(B, C, D, A, X11, 22, 0x895cd7beL); - R0(A, B, C, D, X12, 7, 0x6b901122L); - R0(D, A, B, C, X13, 12, 0xfd987193L); - R0(C, D, A, B, X14, 17, 0xa679438eL); - R0(B, C, D, A, X15, 22, 0x49b40821L); - /* Round 1 */ - R1(A, B, C, D, X1, 5, 0xf61e2562L); - R1(D, A, B, C, X6, 9, 0xc040b340L); - R1(C, D, A, B, X11, 14, 0x265e5a51L); - R1(B, C, D, A, X0, 20, 0xe9b6c7aaL); - R1(A, B, C, D, X5, 5, 0xd62f105dL); - R1(D, A, B, C, X10, 9, 0x02441453L); - R1(C, D, A, B, X15, 14, 0xd8a1e681L); - R1(B, C, D, A, X4, 20, 0xe7d3fbc8L); - R1(A, B, C, D, X9, 5, 0x21e1cde6L); - R1(D, A, B, C, X14, 9, 0xc33707d6L); - R1(C, D, A, B, X3, 14, 0xf4d50d87L); - R1(B, C, D, A, X8, 20, 0x455a14edL); - R1(A, B, C, D, X13, 5, 0xa9e3e905L); - R1(D, A, B, C, X2, 9, 0xfcefa3f8L); - R1(C, D, A, B, X7, 14, 0x676f02d9L); - R1(B, C, D, A, X12, 20, 0x8d2a4c8aL); - /* Round 2 */ - R2(A, B, C, D, X5, 4, 0xfffa3942L); - R2(D, A, B, C, X8, 11, 0x8771f681L); - R2(C, D, A, B, X11, 16, 0x6d9d6122L); - R2(B, C, D, A, X14, 23, 0xfde5380cL); - R2(A, B, C, D, X1, 4, 0xa4beea44L); - R2(D, A, B, C, X4, 11, 0x4bdecfa9L); - R2(C, D, A, B, X7, 16, 0xf6bb4b60L); - R2(B, C, D, A, X10, 23, 0xbebfbc70L); - R2(A, B, C, D, X13, 4, 0x289b7ec6L); - R2(D, A, B, C, X0, 11, 0xeaa127faL); - R2(C, D, A, B, X3, 16, 0xd4ef3085L); - R2(B, C, D, A, X6, 23, 0x04881d05L); - R2(A, B, C, D, X9, 4, 0xd9d4d039L); - R2(D, A, B, C, X12, 11, 0xe6db99e5L); - R2(C, D, A, B, X15, 16, 0x1fa27cf8L); - R2(B, C, D, A, X2, 23, 0xc4ac5665L); - /* Round 3 */ - R3(A, B, C, D, X0, 6, 0xf4292244L); - R3(D, A, B, C, X7, 10, 0x432aff97L); - R3(C, D, A, B, X14, 15, 0xab9423a7L); - R3(B, C, D, A, X5, 21, 0xfc93a039L); - R3(A, B, C, D, X12, 6, 0x655b59c3L); - R3(D, A, B, C, X3, 10, 0x8f0ccc92L); - R3(C, D, A, B, X10, 15, 0xffeff47dL); - R3(B, C, D, A, X1, 21, 0x85845dd1L); - R3(A, B, C, D, X8, 6, 0x6fa87e4fL); - R3(D, A, B, C, X15, 10, 0xfe2ce6e0L); - R3(C, D, A, B, X6, 15, 0xa3014314L); - R3(B, C, D, A, X13, 21, 0x4e0811a1L); - R3(A, B, C, D, X4, 6, 0xf7537e82L); - R3(D, A, B, C, X11, 10, 0xbd3af235L); - R3(C, D, A, B, X2, 15, 0x2ad7d2bbL); - R3(B, C, D, A, X9, 21, 0xeb86d391L); + md5_round1(&A, B, C, D, X0, 0xd76aa478L, 7); + md5_round1(&D, A, B, C, X1, 0xe8c7b756L, 12); + md5_round1(&C, D, A, B, X2, 0x242070dbL, 17); + md5_round1(&B, C, D, A, X3, 0xc1bdceeeL, 22); + md5_round1(&A, B, C, D, X4, 0xf57c0fafL, 7); + md5_round1(&D, A, B, C, X5, 0x4787c62aL, 12); + md5_round1(&C, D, A, B, X6, 0xa8304613L, 17); + md5_round1(&B, C, D, A, X7, 0xfd469501L, 22); + md5_round1(&A, B, C, D, X8, 0x698098d8L, 7); + md5_round1(&D, A, B, C, X9, 0x8b44f7afL, 12); + md5_round1(&C, D, A, B, X10, 0xffff5bb1L, 17); + md5_round1(&B, C, D, A, X11, 0x895cd7beL, 22); + md5_round1(&A, B, C, D, X12, 0x6b901122L, 7); + md5_round1(&D, A, B, C, X13, 0xfd987193L, 12); + md5_round1(&C, D, A, B, X14, 0xa679438eL, 17); + md5_round1(&B, C, D, A, X15, 0x49b40821L, 22); + + md5_round2(&A, B, C, D, X1, 0xf61e2562L, 5); + md5_round2(&D, A, B, C, X6, 0xc040b340L, 9); + md5_round2(&C, D, A, B, X11, 0x265e5a51L, 14); + md5_round2(&B, C, D, A, X0, 0xe9b6c7aaL, 20); + md5_round2(&A, B, C, D, X5, 0xd62f105dL, 5); + md5_round2(&D, A, B, C, X10, 0x02441453L, 9); + md5_round2(&C, D, A, B, X15, 0xd8a1e681L, 14); + md5_round2(&B, C, D, A, X4, 0xe7d3fbc8L, 20); + md5_round2(&A, B, C, D, X9, 0x21e1cde6L, 5); + md5_round2(&D, A, B, C, X14, 0xc33707d6L, 9); + md5_round2(&C, D, A, B, X3, 0xf4d50d87L, 14); + md5_round2(&B, C, D, A, X8, 0x455a14edL, 20); + md5_round2(&A, B, C, D, X13, 0xa9e3e905L, 5); + md5_round2(&D, A, B, C, X2, 0xfcefa3f8L, 9); + md5_round2(&C, D, A, B, X7, 0x676f02d9L, 14); + md5_round2(&B, C, D, A, X12, 0x8d2a4c8aL, 20); + + md5_round3(&A, B, C, D, X5, 0xfffa3942L, 4); + md5_round3(&D, A, B, C, X8, 0x8771f681L, 11); + md5_round3(&C, D, A, B, X11, 0x6d9d6122L, 16); + md5_round3(&B, C, D, A, X14, 0xfde5380cL, 23); + md5_round3(&A, B, C, D, X1, 0xa4beea44L, 4); + md5_round3(&D, A, B, C, X4, 0x4bdecfa9L, 11); + md5_round3(&C, D, A, B, X7, 0xf6bb4b60L, 16); + md5_round3(&B, C, D, A, X10, 0xbebfbc70L, 23); + md5_round3(&A, B, C, D, X13, 0x289b7ec6L, 4); + md5_round3(&D, A, B, C, X0, 0xeaa127faL, 11); + md5_round3(&C, D, A, B, X3, 0xd4ef3085L, 16); + md5_round3(&B, C, D, A, X6, 0x04881d05L, 23); + md5_round3(&A, B, C, D, X9, 0xd9d4d039L, 4); + md5_round3(&D, A, B, C, X12, 0xe6db99e5L, 11); + md5_round3(&C, D, A, B, X15, 0x1fa27cf8L, 16); + md5_round3(&B, C, D, A, X2, 0xc4ac5665L, 23); + + md5_round4(&A, B, C, D, X0, 0xf4292244L, 6); + md5_round4(&D, A, B, C, X7, 0x432aff97L, 10); + md5_round4(&C, D, A, B, X14, 0xab9423a7L, 15); + md5_round4(&B, C, D, A, X5, 0xfc93a039L, 21); + md5_round4(&A, B, C, D, X12, 0x655b59c3L, 6); + md5_round4(&D, A, B, C, X3, 0x8f0ccc92L, 10); + md5_round4(&C, D, A, B, X10, 0xffeff47dL, 15); + md5_round4(&B, C, D, A, X1, 0x85845dd1L, 21); + md5_round4(&A, B, C, D, X8, 0x6fa87e4fL, 6); + md5_round4(&D, A, B, C, X15, 0xfe2ce6e0L, 10); + md5_round4(&C, D, A, B, X6, 0xa3014314L, 15); + md5_round4(&B, C, D, A, X13, 0x4e0811a1L, 21); + md5_round4(&A, B, C, D, X4, 0xf7537e82L, 6); + md5_round4(&D, A, B, C, X11, 0xbd3af235L, 10); + md5_round4(&C, D, A, B, X2, 0x2ad7d2bbL, 15); + md5_round4(&B, C, D, A, X9, 0xeb86d391L, 21); A = c->A += A; B = c->B += B;