Spectre-BHB can be mitigated by using a loop that branches a number of times.
For cores where this does not suffice, or where Spectre-V2 needs to be handled
as well, try and call into a new PSCI function that mitigates both Spectre-V2
and Spectre-BHB. Some newer machines, which might not be in anyone's hands
yet, have an instruction (CLRBHB) that clears the BHB. If ECBHB is set, the
BHB isn't vulnerable. If we have CSV2_3/HCXT, it's not vulnerable at all.
No visible performance dropoff on a MacchiatoBin (4xA72) or Lenovo x13s (4xA78C+
4xX1C), but around 2-3% on a LX2K (16xA72) and RK3399 (4xA53+2xA72).
ok kettenis@
-/* $OpenBSD: cpu.c,v 1.75 2022/12/09 21:23:24 patrick Exp $ */
+/* $OpenBSD: cpu.c,v 1.76 2022/12/10 10:13:58 patrick Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
int arm64_has_aes;
#endif
+extern char trampoline_vectors_none[];
+extern char trampoline_vectors_loop_8[];
+extern char trampoline_vectors_loop_24[];
+extern char trampoline_vectors_loop_32[];
+#if NPSCI > 0
+extern char trampoline_vectors_psci_hvc[];
+extern char trampoline_vectors_psci_smc[];
+#endif
+extern char trampoline_vectors_clrbhb[];
+
struct cpu_info *cpu_info_list = &cpu_info_primary;
int cpu_match(struct device *, void *, void *);
/*
* The architecture has been updated to explicitly tell us if
- * we're not vulnerable.
+ * we're not vulnerable to regular Spectre.
*/
id = READ_SPECIALREG(id_aa64pfr0_el1);
if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_IMPL)
ci->ci_flush_bp = cpu_flush_bp_noop;
+ /*
+ * But we might still be vulnerable to Spectre-BHB. If we know the
+ * CPU, we can add a branchy loop that cleans the BHB.
+ */
+
+ if (impl == CPU_IMPL_ARM) {
+ switch (part) {
+ case CPU_PART_CORTEX_A72:
+ ci->ci_trampoline_vectors =
+ (vaddr_t)trampoline_vectors_loop_8;
+ break;
+ case CPU_PART_CORTEX_A76:
+ case CPU_PART_CORTEX_A76AE:
+ case CPU_PART_CORTEX_A77:
+ case CPU_PART_NEOVERSE_N1:
+ ci->ci_trampoline_vectors =
+ (vaddr_t)trampoline_vectors_loop_24;
+ break;
+ case CPU_PART_CORTEX_A78:
+ case CPU_PART_CORTEX_A78AE:
+ case CPU_PART_CORTEX_A78C:
+ case CPU_PART_CORTEX_X1:
+ case CPU_PART_CORTEX_X2:
+ case CPU_PART_CORTEX_A710:
+ case CPU_PART_NEOVERSE_N2:
+ case CPU_PART_NEOVERSE_V1:
+ ci->ci_trampoline_vectors =
+ (vaddr_t)trampoline_vectors_loop_32;
+ break;
+ }
+ }
+
+ /*
+ * If we're not using a loop, try and call into PSCI. This also
+ * covers the original Spectre in addition to Spectre-BHB.
+ */
+#if NPSCI > 0
+ if (ci->ci_trampoline_vectors == (vaddr_t)trampoline_vectors_none &&
+ psci_flush_bp_has_bhb()) {
+ ci->ci_flush_bp = cpu_flush_bp_noop;
+ if (psci_method() == PSCI_METHOD_HVC)
+ ci->ci_trampoline_vectors =
+ (vaddr_t)trampoline_vectors_psci_hvc;
+ if (psci_method() == PSCI_METHOD_SMC)
+ ci->ci_trampoline_vectors =
+ (vaddr_t)trampoline_vectors_psci_smc;
+ }
+#endif
+
+ /* Prefer CLRBHB to mitigate Spectre-BHB. */
+
+ id = READ_SPECIALREG(id_aa64isar2_el1);
+ if (ID_AA64ISAR2_CLRBHB(id) >= ID_AA64ISAR2_CLRBHB_IMPL)
+ ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_clrbhb;
+
+ /* ECBHB tells us Spectre-BHB is mitigated. */
+
+ id = READ_SPECIALREG(id_aa64mmfr1_el1);
+ if (ID_AA64MMFR1_ECBHB(id) >= ID_AA64MMFR1_ECBHB_IMPL)
+ ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
+
+ /*
+ * The architecture has been updated to explicitly tell us if
+ * we're not vulnerable.
+ */
+
+ id = READ_SPECIALREG(id_aa64pfr0_el1);
+ if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_HCXT) {
+ ci->ci_flush_bp = cpu_flush_bp_noop;
+ ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
+ }
+
/*
* Print CPU features encoded in the ID registers.
*/
kstack = km_alloc(USPACE, &kv_any, &kp_zero, &kd_waitok);
ci->ci_el1_stkend = (vaddr_t)kstack + USPACE - 16;
+ ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
#ifdef MULTIPROCESSOR
if (ci->ci_flags & CPUF_AP) {
-/* $OpenBSD: exception.S,v 1.14 2022/12/08 01:25:44 guenther Exp $ */
+/* $OpenBSD: exception.S,v 1.15 2022/12/10 10:13:58 patrick Exp $ */
/*-
* Copyright (c) 2014 Andrew Turner
* All rights reserved.
.macro return
msr tpidrro_el0, x18
- ldr x18, =trampoline_vectors
+ mrs x18, tpidr_el1
+ ldr x18, [x18, #CI_TRAMPOLINE_VECTORS]
msr vbar_el1, x18
isb
b tramp_return
-# $OpenBSD: genassym.cf,v 1.8 2022/07/13 09:28:18 kettenis Exp $
+# $OpenBSD: genassym.cf,v 1.9 2022/12/10 10:13:58 patrick Exp $
# $NetBSD: genassym.cf,v 1.27 2003/11/04 10:33:16 dsl Exp$
# Copyright (c) 1982, 1990 The Regents of the University of California.
struct cpu_info
member ci_curproc
member ci_el1_stkend
+member ci_trampoline_vectors
member ci_ttbr1
member ci_self
-/* $OpenBSD: pmap.c,v 1.90 2022/12/09 22:31:31 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.91 2022/12/10 10:13:58 patrick Exp $ */
/*
* Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
*
pmap_postinit(void)
{
extern char trampoline_vectors[];
+ extern char trampoline_vectors_end[];
paddr_t pa;
vaddr_t minaddr, maxaddr;
u_long npteds, npages;
memset(pmap_tramp.pm_vp.l1, 0, sizeof(struct pmapvp1));
pmap_extract(pmap_kernel(), (vaddr_t)trampoline_vectors, &pa);
- pmap_enter(&pmap_tramp, (vaddr_t)trampoline_vectors, pa,
- PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED);
+ minaddr = (vaddr_t)trampoline_vectors;
+ maxaddr = (vaddr_t)trampoline_vectors_end;
+ while (minaddr < maxaddr) {
+ pmap_enter(&pmap_tramp, minaddr, pa,
+ PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED);
+ minaddr += PAGE_SIZE;
+ pa += PAGE_SIZE;
+ }
/*
* Reserve enough virtual address space to grow the kernel
-/* $OpenBSD: trampoline.S,v 1.3 2021/02/17 12:11:44 kettenis Exp $ */
+/* $OpenBSD: trampoline.S,v 1.4 2022/12/10 10:13:58 patrick Exp $ */
/*
* Copyright (c) 2018 Mark Kettenis <kettenis@openbsd.org>
#include <machine/asm.h>
#include <machine/param.h>
#include "assym.h"
+#include "psci.h"
+
+#define SMCCC_ARCH_WORKAROUND_3 0x80003fff
.text
*/
.align 12
+.macro spectre_bhb_none_early
+.endm
+.macro spectre_bhb_none_late
+.endm
+
+.macro spectre_bhb_loop_8_early
+ spectre_bhb_loop 8
+.endm
+.macro spectre_bhb_loop_8_late
+.endm
+
+.macro spectre_bhb_loop_24_early
+ spectre_bhb_loop 24
+.endm
+.macro spectre_bhb_loop_24_late
+.endm
+
+.macro spectre_bhb_loop_32_early
+ spectre_bhb_loop 32
+.endm
+.macro spectre_bhb_loop_32_late
+.endm
+
+.macro spectre_bhb_loop, cnt
+ mov x18, #\cnt
+1:
+ b . + 4
+ subs x18, x18, #1
+ b.ne 1b
+ dsb nsh
+ isb
+.endm
+
+#if NPSCI > 0
+.macro spectre_bhb_psci_hvc_early
+.endm
+.macro spectre_bhb_psci_hvc_late
+ stp x0, x1, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ mov w0, #SMCCC_ARCH_WORKAROUND_3
+ hvc #0
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+.endm
+
+.macro spectre_bhb_psci_smc_early
+.endm
+.macro spectre_bhb_psci_smc_late
+ stp x0, x1, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ mov w0, #SMCCC_ARCH_WORKAROUND_3
+ smc #0
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+.endm
+#endif
+
+.macro spectre_bhb_clrbhb_early
+ hint #22 /* clrbhb */
+ isb
+.endm
+.macro spectre_bhb_clrbhb_late
+.endm
+
.macro vempty
.align 7
brk 0xfff
1: b 1b
.endm
-.macro vector name
+.macro vector name, bhb
.align 7
msr tpidrro_el0, x18
+ spectre_bhb_\bhb\()_early
mrs x18, ttbr1_el1
bic x18, x18, #(1UL << 48)
sub x18, x18, #(2 * PAGE_SIZE)
msr ttbr1_el1, x18
isb
+ spectre_bhb_\bhb\()_late
b tramp_\name
.endm
+.macro tramp_vector, bhb
.align 11
- .globl trampoline_vectors
-trampoline_vectors:
+ .globl trampoline_vectors_\bhb
+trampoline_vectors_\bhb:
vempty /* Synchronous EL1t */
vempty /* IRQ EL1t */
vempty /* FIQ EL1t */
vempty /* FIQ EL1h */
vempty /* Error EL1h */
- vector el0_sync /* Synchronous 64-bit EL0 */
- vector el0_irq /* IRQ 64-bit EL0 */
- vector el0_fiq /* FIQ 64-bit EL0 */
- vector el0_error /* Error 64-bit EL0 */
+ vector el0_sync, \bhb /* Synchronous 64-bit EL0 */
+ vector el0_irq, \bhb /* IRQ 64-bit EL0 */
+ vector el0_fiq, \bhb /* FIQ 64-bit EL0 */
+ vector el0_error, \bhb /* Error 64-bit EL0 */
vempty /* Synchronous 32-bit EL0 */
vempty /* IRQ 32-bit EL0 */
vempty /* FIQ 32-bit EL0 */
vempty /* Error 32-bit EL0 */
+.endm
+
+ .align 11
+ .globl trampoline_vectors
+trampoline_vectors:
+ tramp_vector none
+ tramp_vector loop_8
+ tramp_vector loop_24
+ tramp_vector loop_32
+#if NPSCI > 0
+ tramp_vector psci_hvc
+ tramp_vector psci_smc
+#endif
+ tramp_vector clrbhb
.align 11
.globl tramp_return
dsb nsh
isb
+ .global trampoline_vectors_end
+trampoline_vectors_end:
+
/*
* The next page contains the start of the EL0 exception handlers.
* This page is not "exposed" to userland, but should immediately
-/* $OpenBSD: cpu.h,v 1.32 2022/11/26 17:23:15 tobhe Exp $ */
+/* $OpenBSD: cpu.h,v 1.33 2022/12/10 10:13:58 patrick Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
*
u_int32_t ci_ctrl; /* The CPU control register */
+ u_int64_t ci_trampoline_vectors;
+
uint32_t ci_cpl;
uint32_t ci_ipending;
uint32_t ci_idepth;
-/* $OpenBSD: psci.c,v 1.11 2022/07/09 19:27:56 kettenis Exp $ */
+/* $OpenBSD: psci.c,v 1.12 2022/12/10 10:13:58 patrick Exp $ */
/*
* Copyright (c) 2016 Jonathan Gray <jsg@openbsd.org>
#define SMCCC_VERSION 0x80000000
#define SMCCC_ARCH_FEATURES 0x80000001
#define SMCCC_ARCH_WORKAROUND_1 0x80008000
+#define SMCCC_ARCH_WORKAROUND_3 0x80003fff
#define PSCI_VERSION 0x84000000
#define CPU_OFF 0x84000002
uint32_t sc_cpu_off;
uint32_t sc_smccc_version;
+ uint32_t sc_method;
};
struct psci_softc *psci_sc;
uint32_t version;
if (OF_getprop(faa->fa_node, "method", method, sizeof(method))) {
- if (strcmp(method, "hvc") == 0)
+ if (strcmp(method, "hvc") == 0) {
sc->sc_callfn = hvc_call;
- else if (strcmp(method, "smc") == 0)
+ sc->sc_method = PSCI_METHOD_HVC;
+ } else if (strcmp(method, "smc") == 0) {
sc->sc_callfn = smc_call;
+ sc->sc_method = PSCI_METHOD_SMC;
+ }
}
/*
(*sc->sc_callfn)(SMCCC_ARCH_WORKAROUND_1, 0, 0, 0);
}
+void
+psci_flush_bp_smccc_arch_workaround_3(void)
+{
+ struct psci_softc *sc = psci_sc;
+
+ (*sc->sc_callfn)(SMCCC_ARCH_WORKAROUND_3, 0, 0, 0);
+}
+
void
psci_flush_bp(void)
{
}
}
+int
+psci_flush_bp_has_bhb(void)
+{
+ struct psci_softc *sc = psci_sc;
+
+ /*
+ * SMCCC 1.1 allows us to detect if the workaround is
+ * implemented and needed.
+ */
+ if (sc && sc->sc_smccc_version >= 0x10001 &&
+ smccc_arch_features(SMCCC_ARCH_WORKAROUND_3) == 0) {
+ /* Workaround implemented and needed. */
+ return 1;
+ }
+
+ return 0;
+}
+
int32_t
smccc_version(void)
{
return (sc && sc->sc_system_suspend != 0);
}
+
+int
+psci_method(void)
+{
+ struct psci_softc *sc = psci_sc;
+
+ return sc ? sc->sc_method : PSCI_METHOD_NONE;
+}
#define PSCI_SUCCESS 0
#define PSCI_NOT_SUPPORTED -1
+#define PSCI_METHOD_NONE 0
+#define PSCI_METHOD_HVC 1
+#define PSCI_METHOD_SMC 2
+
int psci_can_suspend(void);
int32_t psci_system_suspend(register_t, register_t);
int32_t psci_cpu_on(register_t, register_t, register_t);
int32_t psci_cpu_off(void);
void psci_flush_bp(void);
+int psci_flush_bp_has_bhb(void);
+int psci_method(void);
#endif /* _SYS_DEV_FDT_PSCIVAR_H_ */