Mitigate Spectre-BHB by using core-specific trampoline vectors. On some cores
authorpatrick <patrick@openbsd.org>
Sat, 10 Dec 2022 10:13:58 +0000 (10:13 +0000)
committerpatrick <patrick@openbsd.org>
Sat, 10 Dec 2022 10:13:58 +0000 (10:13 +0000)
Spectre-BHB can be mitigated by using a loop that branches a number of times.
For cores where this does not suffice, or where Spectre-V2 needs to be handled
as well, try and call into a new PSCI function that mitigates both Spectre-V2
and Spectre-BHB.  Some newer machines, which might not be in anyone's hands
yet, have an instruction (CLRBHB) that clears the BHB.  If ECBHB is set, the
BHB isn't vulnerable.  If we have CSV2_3/HCXT, it's not vulnerable at all.

No visible performance dropoff on a MacchiatoBin (4xA72) or Lenovo x13s (4xA78C+
4xX1C), but around 2-3% on a LX2K (16xA72) and RK3399 (4xA53+2xA72).

ok kettenis@

sys/arch/arm64/arm64/cpu.c
sys/arch/arm64/arm64/exception.S
sys/arch/arm64/arm64/genassym.cf
sys/arch/arm64/arm64/pmap.c
sys/arch/arm64/arm64/trampoline.S
sys/arch/arm64/include/cpu.h
sys/dev/fdt/psci.c
sys/dev/fdt/pscivar.h

index d35206f..6dbdb8e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpu.c,v 1.75 2022/12/09 21:23:24 patrick Exp $        */
+/*     $OpenBSD: cpu.c,v 1.76 2022/12/10 10:13:58 patrick Exp $        */
 
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -208,6 +208,16 @@ uint64_t cpu_id_aa64pfr1;
 int arm64_has_aes;
 #endif
 
+extern char trampoline_vectors_none[];
+extern char trampoline_vectors_loop_8[];
+extern char trampoline_vectors_loop_24[];
+extern char trampoline_vectors_loop_32[];
+#if NPSCI > 0
+extern char trampoline_vectors_psci_hvc[];
+extern char trampoline_vectors_psci_smc[];
+#endif
+extern char trampoline_vectors_clrbhb[];
+
 struct cpu_info *cpu_info_list = &cpu_info_primary;
 
 int    cpu_match(struct device *, void *, void *);
@@ -364,13 +374,85 @@ cpu_identify(struct cpu_info *ci)
 
        /*
         * The architecture has been updated to explicitly tell us if
-        * we're not vulnerable.
+        * we're not vulnerable to regular Spectre.
         */
 
        id = READ_SPECIALREG(id_aa64pfr0_el1);
        if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_IMPL)
                ci->ci_flush_bp = cpu_flush_bp_noop;
 
+       /*
+        * But we might still be vulnerable to Spectre-BHB.  If we know the
+        * CPU, we can add a branchy loop that cleans the BHB.
+        */
+
+       if (impl == CPU_IMPL_ARM) {
+               switch (part) {
+               case CPU_PART_CORTEX_A72:
+                       ci->ci_trampoline_vectors =
+                           (vaddr_t)trampoline_vectors_loop_8;
+                       break;
+               case CPU_PART_CORTEX_A76:
+               case CPU_PART_CORTEX_A76AE:
+               case CPU_PART_CORTEX_A77:
+               case CPU_PART_NEOVERSE_N1:
+                       ci->ci_trampoline_vectors =
+                           (vaddr_t)trampoline_vectors_loop_24;
+                       break;
+               case CPU_PART_CORTEX_A78:
+               case CPU_PART_CORTEX_A78AE:
+               case CPU_PART_CORTEX_A78C:
+               case CPU_PART_CORTEX_X1:
+               case CPU_PART_CORTEX_X2:
+               case CPU_PART_CORTEX_A710:
+               case CPU_PART_NEOVERSE_N2:
+               case CPU_PART_NEOVERSE_V1:
+                       ci->ci_trampoline_vectors =
+                           (vaddr_t)trampoline_vectors_loop_32;
+                       break;
+               }
+       }
+
+       /*
+        * If we're not using a loop, try and call into PSCI.  This also
+        * covers the original Spectre in addition to Spectre-BHB.
+        */
+#if NPSCI > 0
+       if (ci->ci_trampoline_vectors == (vaddr_t)trampoline_vectors_none &&
+           psci_flush_bp_has_bhb()) {
+               ci->ci_flush_bp = cpu_flush_bp_noop;
+               if (psci_method() == PSCI_METHOD_HVC)
+                       ci->ci_trampoline_vectors =
+                           (vaddr_t)trampoline_vectors_psci_hvc;
+               if (psci_method() == PSCI_METHOD_SMC)
+                       ci->ci_trampoline_vectors =
+                           (vaddr_t)trampoline_vectors_psci_smc;
+       }
+#endif
+
+       /* Prefer CLRBHB to mitigate Spectre-BHB. */
+
+       id = READ_SPECIALREG(id_aa64isar2_el1);
+       if (ID_AA64ISAR2_CLRBHB(id) >= ID_AA64ISAR2_CLRBHB_IMPL)
+               ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_clrbhb;
+
+       /* ECBHB tells us Spectre-BHB is mitigated. */
+
+       id = READ_SPECIALREG(id_aa64mmfr1_el1);
+       if (ID_AA64MMFR1_ECBHB(id) >= ID_AA64MMFR1_ECBHB_IMPL)
+               ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
+
+       /*
+        * The architecture has been updated to explicitly tell us if
+        * we're not vulnerable.
+        */
+
+       id = READ_SPECIALREG(id_aa64pfr0_el1);
+       if (ID_AA64PFR0_CSV2(id) >= ID_AA64PFR0_CSV2_HCXT) {
+               ci->ci_flush_bp = cpu_flush_bp_noop;
+               ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
+       }
+
        /*
         * Print CPU features encoded in the ID registers.
         */
@@ -727,6 +809,7 @@ cpu_attach(struct device *parent, struct device *dev, void *aux)
 
        kstack = km_alloc(USPACE, &kv_any, &kp_zero, &kd_waitok);
        ci->ci_el1_stkend = (vaddr_t)kstack + USPACE - 16;
+       ci->ci_trampoline_vectors = (vaddr_t)trampoline_vectors_none;
 
 #ifdef MULTIPROCESSOR
        if (ci->ci_flags & CPUF_AP) {
index 391f25f..7192791 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: exception.S,v 1.14 2022/12/08 01:25:44 guenther Exp $ */
+/* $OpenBSD: exception.S,v 1.15 2022/12/10 10:13:58 patrick Exp $ */
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
@@ -195,7 +195,8 @@ handle_el1h_error:
 
 .macro return
        msr     tpidrro_el0, x18
-       ldr     x18, =trampoline_vectors
+       mrs     x18, tpidr_el1
+       ldr     x18, [x18, #CI_TRAMPOLINE_VECTORS]
        msr     vbar_el1, x18
        isb
        b       tramp_return
index d82fd0a..faeb6fd 100644 (file)
@@ -1,4 +1,4 @@
-#      $OpenBSD: genassym.cf,v 1.8 2022/07/13 09:28:18 kettenis Exp $
+#      $OpenBSD: genassym.cf,v 1.9 2022/12/10 10:13:58 patrick Exp $
 #      $NetBSD: genassym.cf,v 1.27 2003/11/04 10:33:16 dsl Exp$
 
 # Copyright (c) 1982, 1990 The Regents of the University of California.
@@ -45,6 +45,7 @@ member        sf_sc
 struct cpu_info
 member ci_curproc
 member ci_el1_stkend
+member ci_trampoline_vectors
 member ci_ttbr1
 member ci_self
 
index 82216f7..a04fb1c 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.90 2022/12/09 22:31:31 kettenis Exp $ */
+/* $OpenBSD: pmap.c,v 1.91 2022/12/10 10:13:58 patrick Exp $ */
 /*
  * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
  *
@@ -1826,14 +1826,21 @@ void
 pmap_postinit(void)
 {
        extern char trampoline_vectors[];
+       extern char trampoline_vectors_end[];
        paddr_t pa;
        vaddr_t minaddr, maxaddr;
        u_long npteds, npages;
 
        memset(pmap_tramp.pm_vp.l1, 0, sizeof(struct pmapvp1));
        pmap_extract(pmap_kernel(), (vaddr_t)trampoline_vectors, &pa);
-       pmap_enter(&pmap_tramp, (vaddr_t)trampoline_vectors, pa,
-           PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED);
+       minaddr = (vaddr_t)trampoline_vectors;
+       maxaddr = (vaddr_t)trampoline_vectors_end;
+       while (minaddr < maxaddr) {
+               pmap_enter(&pmap_tramp, minaddr, pa,
+                   PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED);
+               minaddr += PAGE_SIZE;
+               pa += PAGE_SIZE;
+       }
 
        /*
         * Reserve enough virtual address space to grow the kernel
index 430db83..79eca16 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: trampoline.S,v 1.3 2021/02/17 12:11:44 kettenis Exp $ */
+/*     $OpenBSD: trampoline.S,v 1.4 2022/12/10 10:13:58 patrick Exp $  */
 
 /*
  * Copyright (c) 2018 Mark Kettenis <kettenis@openbsd.org>
@@ -19,6 +19,9 @@
 #include <machine/asm.h>
 #include <machine/param.h>
 #include "assym.h"
+#include "psci.h"
+
+#define SMCCC_ARCH_WORKAROUND_3                0x80003fff
 
        .text
 
  */
        .align 12
 
+.macro spectre_bhb_none_early
+.endm
+.macro spectre_bhb_none_late
+.endm
+
+.macro spectre_bhb_loop_8_early
+       spectre_bhb_loop 8
+.endm
+.macro spectre_bhb_loop_8_late
+.endm
+
+.macro spectre_bhb_loop_24_early
+       spectre_bhb_loop 24
+.endm
+.macro spectre_bhb_loop_24_late
+.endm
+
+.macro spectre_bhb_loop_32_early
+       spectre_bhb_loop 32
+.endm
+.macro spectre_bhb_loop_32_late
+.endm
+
+.macro spectre_bhb_loop, cnt
+       mov x18, #\cnt
+1:
+       b . + 4
+       subs x18, x18, #1
+       b.ne 1b
+       dsb nsh
+       isb
+.endm
+
+#if NPSCI > 0
+.macro spectre_bhb_psci_hvc_early
+.endm
+.macro spectre_bhb_psci_hvc_late
+       stp     x0, x1, [sp, #-16]!
+       stp     x2, x3, [sp, #-16]!
+       mov     w0, #SMCCC_ARCH_WORKAROUND_3
+       hvc     #0
+       ldp     x2, x3, [sp], #16
+       ldp     x0, x1, [sp], #16
+.endm
+
+.macro spectre_bhb_psci_smc_early
+.endm
+.macro spectre_bhb_psci_smc_late
+       stp     x0, x1, [sp, #-16]!
+       stp     x2, x3, [sp, #-16]!
+       mov     w0, #SMCCC_ARCH_WORKAROUND_3
+       smc     #0
+       ldp     x2, x3, [sp], #16
+       ldp     x0, x1, [sp], #16
+.endm
+#endif
+
+.macro spectre_bhb_clrbhb_early
+       hint    #22 /* clrbhb */
+       isb
+.endm
+.macro spectre_bhb_clrbhb_late
+.endm
+
 .macro vempty
        .align 7
        brk     0xfff
        1: b    1b
 .endm
 
-.macro vector  name
+.macro vector  name, bhb
        .align 7
        msr     tpidrro_el0, x18
+       spectre_bhb_\bhb\()_early
        mrs     x18, ttbr1_el1
        bic     x18, x18, #(1UL << 48)
        sub     x18, x18, #(2 * PAGE_SIZE)
        msr     ttbr1_el1, x18
        isb
+       spectre_bhb_\bhb\()_late
        b       tramp_\name
 .endm
 
+.macro tramp_vector, bhb
        .align 11
-       .globl trampoline_vectors
-trampoline_vectors:
+       .globl trampoline_vectors_\bhb
+trampoline_vectors_\bhb:
        vempty                  /* Synchronous EL1t */
        vempty                  /* IRQ EL1t */
        vempty                  /* FIQ EL1t */
@@ -61,15 +131,29 @@ trampoline_vectors:
        vempty                  /* FIQ EL1h */
        vempty                  /* Error EL1h */
 
-       vector el0_sync         /* Synchronous 64-bit EL0 */
-       vector el0_irq          /* IRQ 64-bit EL0 */
-       vector el0_fiq          /* FIQ 64-bit EL0 */
-       vector el0_error        /* Error 64-bit EL0 */
+       vector el0_sync, \bhb   /* Synchronous 64-bit EL0 */
+       vector el0_irq, \bhb    /* IRQ 64-bit EL0 */
+       vector el0_fiq, \bhb    /* FIQ 64-bit EL0 */
+       vector el0_error, \bhb  /* Error 64-bit EL0 */
 
        vempty                  /* Synchronous 32-bit EL0 */
        vempty                  /* IRQ 32-bit EL0 */
        vempty                  /* FIQ 32-bit EL0 */
        vempty                  /* Error 32-bit EL0 */
+.endm
+
+       .align 11
+       .globl trampoline_vectors
+trampoline_vectors:
+       tramp_vector none
+       tramp_vector loop_8
+       tramp_vector loop_24
+       tramp_vector loop_32
+#if NPSCI > 0
+       tramp_vector psci_hvc
+       tramp_vector psci_smc
+#endif
+       tramp_vector clrbhb
 
        .align 11
        .globl tramp_return
@@ -85,6 +169,9 @@ tramp_return:
        dsb nsh
        isb
 
+       .global trampoline_vectors_end
+trampoline_vectors_end:
+
 /*
  * The next page contains the start of the EL0 exception handlers.
  * This page is not "exposed" to userland, but should immediately
index b7cbad2..059e218 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.32 2022/11/26 17:23:15 tobhe Exp $ */
+/* $OpenBSD: cpu.h,v 1.33 2022/12/10 10:13:58 patrick Exp $ */
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
  *
@@ -126,6 +126,8 @@ struct cpu_info {
 
        u_int32_t               ci_ctrl; /* The CPU control register */
 
+       u_int64_t               ci_trampoline_vectors;
+
        uint32_t                ci_cpl;
        uint32_t                ci_ipending;
        uint32_t                ci_idepth;
index a7ebb8d..10aaca1 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: psci.c,v 1.11 2022/07/09 19:27:56 kettenis Exp $      */
+/*     $OpenBSD: psci.c,v 1.12 2022/12/10 10:13:58 patrick Exp $       */
 
 /*
  * Copyright (c) 2016 Jonathan Gray <jsg@openbsd.org>
@@ -34,6 +34,7 @@ extern void (*powerdownfn)(void);
 #define SMCCC_VERSION          0x80000000
 #define SMCCC_ARCH_FEATURES    0x80000001
 #define SMCCC_ARCH_WORKAROUND_1        0x80008000
+#define SMCCC_ARCH_WORKAROUND_3        0x80003fff
 
 #define PSCI_VERSION           0x84000000
 #define CPU_OFF                        0x84000002
@@ -63,6 +64,7 @@ struct psci_softc {
        uint32_t         sc_cpu_off;
 
        uint32_t         sc_smccc_version;
+       uint32_t         sc_method;
 };
 
 struct psci_softc *psci_sc;
@@ -108,10 +110,13 @@ psci_attach(struct device *parent, struct device *self, void *aux)
        uint32_t version;
 
        if (OF_getprop(faa->fa_node, "method", method, sizeof(method))) {
-               if (strcmp(method, "hvc") == 0)
+               if (strcmp(method, "hvc") == 0) {
                        sc->sc_callfn = hvc_call;
-               else if (strcmp(method, "smc") == 0)
+                       sc->sc_method = PSCI_METHOD_HVC;
+               } else if (strcmp(method, "smc") == 0) {
                        sc->sc_callfn = smc_call;
+                       sc->sc_method = PSCI_METHOD_SMC;
+               }
        }
 
        /*
@@ -197,6 +202,14 @@ psci_flush_bp_smccc_arch_workaround_1(void)
        (*sc->sc_callfn)(SMCCC_ARCH_WORKAROUND_1, 0, 0, 0);
 }
 
+void
+psci_flush_bp_smccc_arch_workaround_3(void)
+{
+       struct psci_softc *sc = psci_sc;
+
+       (*sc->sc_callfn)(SMCCC_ARCH_WORKAROUND_3, 0, 0, 0);
+}
+
 void
 psci_flush_bp(void)
 {
@@ -218,6 +231,24 @@ psci_flush_bp(void)
        }
 }
 
+int
+psci_flush_bp_has_bhb(void)
+{
+       struct psci_softc *sc = psci_sc;
+
+       /*
+        * SMCCC 1.1 allows us to detect if the workaround is
+        * implemented and needed.
+        */
+       if (sc && sc->sc_smccc_version >= 0x10001 &&
+           smccc_arch_features(SMCCC_ARCH_WORKAROUND_3) == 0) {
+               /* Workaround implemented and needed. */
+               return 1;
+       }
+
+       return 0;
+}
+
 int32_t
 smccc_version(void)
 {
@@ -308,3 +339,11 @@ psci_can_suspend(void)
 
        return (sc && sc->sc_system_suspend != 0);
 }
+
+int
+psci_method(void)
+{
+       struct psci_softc *sc = psci_sc;
+
+       return sc ? sc->sc_method : PSCI_METHOD_NONE;
+}
index 54ff2b1..e8ec062 100644 (file)
@@ -6,11 +6,17 @@
 #define PSCI_SUCCESS           0
 #define PSCI_NOT_SUPPORTED     -1
 
+#define PSCI_METHOD_NONE       0
+#define PSCI_METHOD_HVC                1
+#define PSCI_METHOD_SMC                2
+
 int    psci_can_suspend(void);
 
 int32_t        psci_system_suspend(register_t, register_t);
 int32_t        psci_cpu_on(register_t, register_t, register_t);
 int32_t        psci_cpu_off(void);
 void   psci_flush_bp(void);
+int    psci_flush_bp_has_bhb(void);
+int    psci_method(void);
 
 #endif /* _SYS_DEV_FDT_PSCIVAR_H_ */