From 9b1106c6549294debbff25e74017bd91867d8019 Mon Sep 17 00:00:00 2001 From: jsing Date: Fri, 18 Oct 2024 13:36:24 +0000 Subject: [PATCH] Provide crypto_cpu_caps_init() for amd64. This is a CPU capability detection implementation in C, with minimal inline assembly (for cpuid and xgetbv). This replaces the assembly mess generated by x86_64cpuid.pl. Rather than populating OPENSSL_ia32cap_P directly with CPUID output, just set the bits that the remaining perlasm checks (namely AESNI, AVX, FXSR, INTEL, HT, MMX, PCLMUL, SSE, SSE2 and SSSE3). ok joshua@ tb@ --- lib/libcrypto/arch/amd64/Makefile.inc | 12 +-- lib/libcrypto/arch/amd64/crypto_arch.h | 4 +- lib/libcrypto/arch/amd64/crypto_cpu_caps.c | 114 +++++++++++++++++++++ lib/libcrypto/x86_arch.h | 7 +- 4 files changed, 126 insertions(+), 11 deletions(-) create mode 100644 lib/libcrypto/arch/amd64/crypto_cpu_caps.c diff --git a/lib/libcrypto/arch/amd64/Makefile.inc b/lib/libcrypto/arch/amd64/Makefile.inc index dd136f76a73..2f41f443819 100644 --- a/lib/libcrypto/arch/amd64/Makefile.inc +++ b/lib/libcrypto/arch/amd64/Makefile.inc @@ -1,10 +1,12 @@ -# $OpenBSD: Makefile.inc,v 1.29 2024/08/11 13:02:39 jsing Exp $ +# $OpenBSD: Makefile.inc,v 1.30 2024/10/18 13:36:24 jsing Exp $ # amd64-specific libcrypto build rules # all amd64 code generators use this EXTRA_PL = ${LCRYPTO_SRC}/perlasm/x86_64-xlate.pl +SRCS += crypto_cpu_caps.c + # aes CFLAGS+= -DAES_ASM SSLASM+= aes aes-x86_64 @@ -69,12 +71,4 @@ ${f}.S: ${LCRYPTO_SRC}/${dir}/asm/${f}.pl ${EXTRA_PL} /usr/bin/perl ./asm/${f}.pl openbsd) > ${.TARGET} .endfor -CFLAGS+= -DOPENSSL_CPUID_OBJ -SRCS+= x86_64cpuid.S -GENERATED+=x86_64cpuid.S - -x86_64cpuid.S: ${LCRYPTO_SRC}/x86_64cpuid.pl ${EXTRA_PL} - (cd ${LCRYPTO_SRC}/${dir} ; \ - /usr/bin/perl ./x86_64cpuid.pl) > ${.TARGET} - CFLAGS+=-fret-clean diff --git a/lib/libcrypto/arch/amd64/crypto_arch.h b/lib/libcrypto/arch/amd64/crypto_arch.h index 8e91c255291..6feeaa209e8 100644 --- a/lib/libcrypto/arch/amd64/crypto_arch.h +++ b/lib/libcrypto/arch/amd64/crypto_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: crypto_arch.h,v 1.1 2024/08/11 13:02:39 jsing Exp $ */ +/* $OpenBSD: crypto_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $ */ /* * Copyright (c) 2024 Joel Sing * @@ -18,6 +18,8 @@ #ifndef HEADER_CRYPTO_ARCH_H #define HEADER_CRYPTO_ARCH_H +#define HAVE_CRYPTO_CPU_CAPS_INIT + #ifndef OPENSSL_NO_ASM #define HAVE_AES_CBC_ENCRYPT_INTERNAL diff --git a/lib/libcrypto/arch/amd64/crypto_cpu_caps.c b/lib/libcrypto/arch/amd64/crypto_cpu_caps.c new file mode 100644 index 00000000000..8cbf24edbd6 --- /dev/null +++ b/lib/libcrypto/arch/amd64/crypto_cpu_caps.c @@ -0,0 +1,114 @@ +/* $OpenBSD: crypto_cpu_caps.c,v 1.1 2024/10/18 13:36:24 jsing Exp $ */ +/* + * Copyright (c) 2024 Joel Sing + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +#include "x86_arch.h" + +/* Legacy architecture specific capabilities, used by perlasm. */ +extern uint64_t OPENSSL_ia32cap_P; + +/* Machine independent CPU capabilities. */ +extern uint64_t crypto_cpu_caps; + +static inline void +cpuid(uint32_t eax, uint32_t *out_eax, uint32_t *out_ebx, uint32_t *out_ecx, + uint32_t *out_edx) +{ + uint32_t ebx = 0, ecx = 0, edx = 0; + +#ifndef OPENSSL_NO_ASM + __asm__ ("cpuid": "+a"(eax), "+b"(ebx), "+c"(ecx), "+d"(edx)); +#else + eax = 0; +#endif + + if (out_eax != NULL) + *out_eax = eax; + if (out_ebx != NULL) + *out_ebx = ebx; + if (out_ebx != NULL) + *out_ecx = ecx; + if (out_edx != NULL) + *out_edx = edx; +} + +static inline void +xgetbv(uint32_t ecx, uint32_t *out_eax, uint32_t *out_edx) +{ + uint32_t eax = 0, edx = 0; + +#ifndef OPENSSL_NO_ASM + __asm__ ("xgetbv": "+a"(eax), "+c"(ecx), "+d"(edx)); +#endif + + if (out_eax != NULL) + *out_eax = eax; + if (out_edx != NULL) + *out_edx = edx; +} + +void +crypto_cpu_caps_init(void) +{ + uint32_t eax, ebx, ecx, edx; + uint64_t caps = 0; + + cpuid(0, &eax, &ebx, &ecx, &edx); + + /* "GenuineIntel" in little endian. */ + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + caps |= CPUCAP_MASK_INTEL; + + if (eax < 1) + return; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if ((edx & IA32CAP_MASK0_FXSR) != 0) + caps |= CPUCAP_MASK_FXSR; + if ((edx & IA32CAP_MASK0_HT) != 0) + caps |= CPUCAP_MASK_HT; + if ((edx & IA32CAP_MASK0_MMX) != 0) + caps |= CPUCAP_MASK_MMX; + if ((edx & IA32CAP_MASK0_SSE) != 0) + caps |= CPUCAP_MASK_SSE; + if ((edx & IA32CAP_MASK0_SSE2) != 0) + caps |= CPUCAP_MASK_SSE2; + + if ((ecx & IA32CAP_MASK1_AESNI) != 0) + caps |= CPUCAP_MASK_AESNI; + if ((ecx & IA32CAP_MASK1_PCLMUL) != 0) + caps |= CPUCAP_MASK_PCLMUL; + if ((ecx & IA32CAP_MASK1_SSSE3) != 0) + caps |= CPUCAP_MASK_SSSE3; + + /* AVX requires OSXSAVE and XMM/YMM state to be enabled. */ + if ((ecx & IA32CAP_MASK1_OSXSAVE) != 0) { + xgetbv(0, &eax, NULL); + if (((eax >> 1) & 3) == 3 && (ecx & IA32CAP_MASK1_AVX) != 0) + caps |= CPUCAP_MASK_AVX; + } + + /* Set machine independent CPU capabilities. */ + if ((caps & CPUCAP_MASK_AESNI) != 0) + crypto_cpu_caps |= CRYPTO_CPU_CAPS_ACCELERATED_AES; + + OPENSSL_ia32cap_P = caps; +} diff --git a/lib/libcrypto/x86_arch.h b/lib/libcrypto/x86_arch.h index 5b2cf97546f..e9e9d489602 100644 --- a/lib/libcrypto/x86_arch.h +++ b/lib/libcrypto/x86_arch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: x86_arch.h,v 1.1 2016/11/04 17:30:30 miod Exp $ */ +/* $OpenBSD: x86_arch.h,v 1.2 2024/10/18 13:36:24 jsing Exp $ */ /* * Copyright (c) 2016 Miodrag Vallat. * @@ -76,15 +76,20 @@ #define IA32CAP_MASK1_SSSE3 (1 << IA32CAP_BIT1_SSSE3) #define IA32CAP_MASK1_FMA3 (1 << IA32CAP_BIT1_FMA3) #define IA32CAP_MASK1_AESNI (1 << IA32CAP_BIT1_AESNI) +#define IA32CAP_MASK1_OSXSAVE (1 << IA32CAP_BIT1_OSXSAVE) #define IA32CAP_MASK1_AVX (1 << IA32CAP_BIT1_AVX) #define IA32CAP_MASK1_AMD_XOP (1 << IA32CAP_BIT1_AMD_XOP) /* bit masks for OPENSSL_cpu_caps() */ +#define CPUCAP_MASK_HT IA32CAP_MASK0_HT #define CPUCAP_MASK_MMX IA32CAP_MASK0_MMX #define CPUCAP_MASK_FXSR IA32CAP_MASK0_FXSR #define CPUCAP_MASK_SSE IA32CAP_MASK0_SSE +#define CPUCAP_MASK_SSE2 IA32CAP_MASK0_SSE2 +#define CPUCAP_MASK_INTEL IA32CAP_MASK0_INTEL #define CPUCAP_MASK_INTELP4 IA32CAP_MASK0_INTELP4 #define CPUCAP_MASK_PCLMUL (1ULL << (32 + IA32CAP_BIT1_PCLMUL)) #define CPUCAP_MASK_SSSE3 (1ULL << (32 + IA32CAP_BIT1_SSSE3)) #define CPUCAP_MASK_AESNI (1ULL << (32 + IA32CAP_BIT1_AESNI)) +#define CPUCAP_MASK_AVX (1ULL << (32 + IA32CAP_BIT1_AVX)) -- 2.20.1