From: kettenis Date: Wed, 10 Jan 2018 23:27:18 +0000 (+0000) Subject: Implement FUCKWIT for arm64; unmap the kernel almost entirely while userland X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=a97a42d9de4f9172faef873359aef60bcf18fd57;p=openbsd Implement FUCKWIT for arm64; unmap the kernel almost entirely while userland is running. This provides protection against meltown on cores that are vilnerable (just Cortex-A75 so far) but also seems to be an essential to protect against spectre-like attacks against the kernel. This implementation only exposes a single treampoline page that does not contain any kernel virtual addresses and also hides the real virtual address of the exception vectors, which helps on cores vulnerable to "variant 3a" (Cortex-A57, Cortex-A72). The implementation is inspired by the work done by Will Deacon for Linux, but there are no knobs to turn it off. The overhead is fairly limited: around 3-4% slowdown on Cortex-A57. ok patrick@, deraadt@ --- diff --git a/sys/arch/arm64/arm64/cpufunc_asm.S b/sys/arch/arm64/arm64/cpufunc_asm.S index 6f79705062a..94a127df330 100644 --- a/sys/arch/arm64/arm64/cpufunc_asm.S +++ b/sys/arch/arm64/arm64/cpufunc_asm.S @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc_asm.S,v 1.3 2017/03/24 19:48:01 kettenis Exp $ */ +/* $OpenBSD: cpufunc_asm.S,v 1.4 2018/01/10 23:27:18 kettenis Exp $ */ /*- * Copyright (c) 2014 Robin Randhawa * Copyright (c) 2015 The FreeBSD Foundation @@ -79,9 +79,11 @@ */ ENTRY(cpu_setttb) - dsb ish - msr ttbr0_el1, x0 - dsb ish + mrs x2, ttbr1_el1 + bfi x2, x0, #48, #16 + msr ttbr1_el1, x2 + isb + msr ttbr0_el1, x1 isb ret END(cpu_setttb) diff --git a/sys/arch/arm64/arm64/exception.S b/sys/arch/arm64/arm64/exception.S index 4cee76ac4a3..6220f9a6b7b 100644 --- a/sys/arch/arm64/arm64/exception.S +++ b/sys/arch/arm64/arm64/exception.S @@ -1,4 +1,4 @@ -/* $OpenBSD: exception.S,v 1.4 2017/08/05 17:30:51 drahn Exp $ */ +/* $OpenBSD: exception.S,v 1.5 2018/01/10 23:27:18 kettenis Exp $ */ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. @@ -155,6 +155,15 @@ handle_el1h_irq: handle_el1h_error: brk 0xf13 +.macro return + msr tpidrro_el0, x18 + ldr x18, =trampoline_vectors + msr vbar_el1, x18 + isb + b tramp_return +.endm + + .globl handle_el0_sync handle_el0_sync: save_registers 0 mov x0, sp @@ -162,8 +171,9 @@ handle_el0_sync: do_ast bl _C_LABEL(vfp_enable) restore_registers 0 - eret + return + .globl handle_el0_irq handle_el0_irq: save_registers 0 bl _C_LABEL(vfp_save) @@ -172,8 +182,9 @@ handle_el0_irq: do_ast bl _C_LABEL(vfp_enable) restore_registers 0 - eret + return + .globl handle_el0_error handle_el0_error: save_registers 0 mov x0, sp @@ -184,7 +195,7 @@ handle_el0_error: ENTRY(syscall_return) do_ast restore_registers 0 - eret + return .macro vempty .align 7 @@ -210,13 +221,12 @@ exception_vectors: vempty /* FIQ EL1h */ vector el1h_error /* Error EL1h */ - vector el0_sync /* Synchronous 64-bit EL0 */ - vector el0_irq /* IRQ 64-bit EL0 */ + vempty /* Synchronous 64-bit EL0 */ + vempty /* IRQ 64-bit EL0 */ vempty /* FIQ 64-bit EL0 */ - vector el0_error /* Error 64-bit EL0 */ + vempty /* Error 64-bit EL0 */ vempty /* Synchronous 32-bit EL0 */ vempty /* IRQ 32-bit EL0 */ vempty /* FIQ 32-bit EL0 */ vempty /* Error 32-bit EL0 */ - diff --git a/sys/arch/arm64/arm64/machdep.c b/sys/arch/arm64/arm64/machdep.c index 112e3486974..d3ad1effb6d 100644 --- a/sys/arch/arm64/arm64/machdep.c +++ b/sys/arch/arm64/arm64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.24 2018/01/04 14:30:08 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.25 2018/01/10 23:27:18 kettenis Exp $ */ /* * Copyright (c) 2014 Patrick Wildt * @@ -236,13 +236,17 @@ struct trapframe proc0tf; void cpu_startup() { - u_int loop; paddr_t minaddr; paddr_t maxaddr; proc0.p_addr = proc0paddr; + /* + * Give pmap a chance to set up a few more things now the vm + * is initialised + */ + pmap_postinit(); /* * Initialize error message buffer (at end of core). diff --git a/sys/arch/arm64/arm64/pmap.c b/sys/arch/arm64/arm64/pmap.c index c147b00885d..22319d35aae 100644 --- a/sys/arch/arm64/arm64/pmap.c +++ b/sys/arch/arm64/arm64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.42 2018/01/04 14:30:08 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.43 2018/01/10 23:27:18 kettenis Exp $ */ /* * Copyright (c) 2008-2009,2014-2016 Dale Rahn * @@ -37,6 +37,9 @@ void pmap_setttb(struct proc *p); void pmap_free_asid(pmap_t pm); +/* We run userland code with ASIDs that have the low bit set. */ +#define ASID_USER 1 + static inline void ttlb_flush(pmap_t pm, vaddr_t va) { @@ -48,10 +51,13 @@ ttlb_flush(pmap_t pm, vaddr_t va) } else { resva |= (uint64_t)pm->pm_asid << 48; cpu_tlb_flush_asid(resva); + resva |= (uint64_t)ASID_USER << 48; + cpu_tlb_flush_asid(resva); } } struct pmap kernel_pmap_; +struct pmap pmap_tramp; LIST_HEAD(pted_pv_head, pte_desc); @@ -514,12 +520,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) ttlb_flush(pm, va & ~PAGE_MASK); - if (flags & PROT_EXEC) { - if (pg != NULL) { - need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); - atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); - } else - need_sync = 1; + if (pg != NULL && (flags & PROT_EXEC)) { + need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0); + atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE); } if (need_sync && (pm == pmap_kernel() || (curproc && @@ -1078,12 +1081,17 @@ pmap_bootstrap(long kvo, paddr_t lpt1, long kernelstart, long kernelend, * via physical pointers */ - pt1pa = pmap_steal_avail(sizeof(struct pmapvp1), Lx_TABLE_ALIGN, &va); + pt1pa = pmap_steal_avail(2 * sizeof(struct pmapvp1), Lx_TABLE_ALIGN, + &va); vp1 = (struct pmapvp1 *)pt1pa; pmap_kernel()->pm_vp.l1 = (struct pmapvp1 *)va; pmap_kernel()->pm_privileged = 1; pmap_kernel()->pm_asid = 0; + pmap_tramp.pm_vp.l1 = (struct pmapvp1 *)va + 1; + pmap_tramp.pm_privileged = 1; + pmap_tramp.pm_asid = 0; + /* allocate Lx entries */ for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS); i <= VP_IDX1(VM_MAX_KERNEL_ADDRESS); @@ -1184,7 +1192,7 @@ pmap_bootstrap(long kvo, paddr_t lpt1, long kernelstart, long kernelend, vp2->l2[VP_IDX2(mapva)] = mappa | L2_BLOCK | ATTR_IDX(PTE_ATTR_WB) | ATTR_SH(SH_INNER) | - ap_bits_kern[prot]; + ATTR_nG | ap_bits_kern[prot]; } } @@ -1429,6 +1437,7 @@ pmap_init(void) tcr = READ_SPECIALREG(tcr_el1); tcr &= ~TCR_T0SZ(0x3f); tcr |= TCR_T0SZ(64 - USER_SPACE_BITS); + tcr |= TCR_A1; WRITE_SPECIALREG(tcr_el1, tcr); pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0, @@ -1473,7 +1482,7 @@ pmap_pte_update(struct pte_desc *pted, uint64_t *pl3) { uint64_t pte, access_bits; pmap_t pm = pted->pted_pmap; - uint64_t attr = 0; + uint64_t attr = ATTR_nG; /* see mair in locore.S */ switch (pted->pted_va & PMAP_CACHE_BITS) { @@ -1504,9 +1513,6 @@ pmap_pte_update(struct pte_desc *pted, uint64_t *pl3) else access_bits = ap_bits_user[pted->pted_pte & PROT_MASK]; - if (pted->pted_va < VM_MIN_KERNEL_ADDRESS) - access_bits |= ATTR_nG; - pte = (pted->pted_pte & PTE_RPGN) | attr | access_bits | L3_P; *pl3 = pte; } @@ -1655,6 +1661,13 @@ pmap_fault_fixup(pmap_t pm, vaddr_t va, vm_prot_t ftype, int user) void pmap_postinit(void) { + extern char trampoline_vectors[]; + paddr_t pa; + + memset(pmap_tramp.pm_vp.l1, 0, sizeof(struct pmapvp1)); + pmap_extract(pmap_kernel(), (vaddr_t)trampoline_vectors, &pa); + pmap_enter(&pmap_tramp, (vaddr_t)trampoline_vectors, pa, + PROT_READ | PROT_EXEC, PROT_READ | PROT_EXEC | PMAP_WIRED); } void @@ -2040,6 +2053,13 @@ pmap_map_early(paddr_t spa, psize_t len) } } +/* + * We allocate ASIDs in pairs. The first ASID is used to run the + * kernel and has both userland and the full kernel mapped. The + * second ASID is used for running userland and has only the + * trampoline page mapped in addition to userland. + */ + #define NUM_ASID (1 << 16) uint64_t pmap_asid[NUM_ASID / 64]; @@ -2049,25 +2069,25 @@ pmap_allocate_asid(pmap_t pm) int asid, bit; do { - asid = arc4random() & (NUM_ASID - 1); + asid = arc4random() & (NUM_ASID - 2); bit = (asid & (64 - 1)); - } while (asid == 0 || (pmap_asid[asid / 64] & (1ULL << bit))); + } while (asid == 0 || (pmap_asid[asid / 64] & (3ULL << bit))); - pmap_asid[asid / 64] |= (1ULL << bit); + pmap_asid[asid / 64] |= (3ULL << bit); pm->pm_asid = asid; } void pmap_free_asid(pmap_t pm) { - int asid, bit; + int bit; KASSERT(pm != curcpu()->ci_curpm); cpu_tlb_flush_asid_all((uint64_t)pm->pm_asid << 48); + cpu_tlb_flush_asid_all((uint64_t)(pm->pm_asid | ASID_USER) << 48); - asid = pm->pm_asid; - bit = (asid & (64 - 1)); - pmap_asid[asid / 64] &= ~(1ULL << bit); + bit = (pm->pm_asid & (64 - 1)); + pmap_asid[pm->pm_asid / 64] &= ~(3ULL << bit); } void @@ -2075,6 +2095,8 @@ pmap_setttb(struct proc *p) { pmap_t pm = p->p_vmspace->vm_map.pmap; - cpu_setttb(((uint64_t)pm->pm_asid << 48) | pm->pm_pt0pa); + WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa); + __asm volatile("isb"); + cpu_setttb(pm->pm_asid, pm->pm_pt0pa); curcpu()->ci_curpm = pm; } diff --git a/sys/arch/arm64/arm64/trampoline.S b/sys/arch/arm64/arm64/trampoline.S new file mode 100644 index 00000000000..4de835ddb5c --- /dev/null +++ b/sys/arch/arm64/arm64/trampoline.S @@ -0,0 +1,113 @@ +/* $OpenBSD: trampoline.S,v 1.1 2018/01/10 23:27:18 kettenis Exp $ */ + +/* + * Copyright (c) 2018 Mark Kettenis + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include "assym.h" + + .text + +/* + * The next page contains everything that needs to be "exposed" to + * userland. This is the exception vector table that contains the + * entries for EL0, and the tail end of the EL0 exception handlers. + * This code is deliberately laid out in a way that avoids leaking + * kernel virtual addresses. + */ + .align 12 + +.macro vempty + .align 7 + brk 0xfff + 1: b 1b +.endm + +.macro vector name + .align 7 + msr tpidrro_el0, x18 + mrs x18, ttbr1_el1 + bic x18, x18, #(1UL << 48) + sub x18, x18, #(2 * PAGE_SIZE) + msr ttbr1_el1, x18 + isb + b tramp_\name +.endm + + .align 11 + .globl trampoline_vectors +trampoline_vectors: + vempty /* Synchronous EL1t */ + vempty /* IRQ EL1t */ + vempty /* FIQ EL1t */ + vempty /* Error EL1t */ + + vempty /* Synchronous EL1h */ + vempty /* IRQ EL1h */ + vempty /* FIQ EL1h */ + vempty /* Error EL1h */ + + vector el0_sync /* Synchronous 64-bit EL0 */ + vector el0_irq /* IRQ 64-bit EL0 */ + vempty /* FIQ 64-bit EL0 */ + vector el0_error /* Error 64-bit EL0 */ + + vempty /* Synchronous 32-bit EL0 */ + vempty /* IRQ 32-bit EL0 */ + vempty /* FIQ 32-bit EL0 */ + vempty /* Error 32-bit EL0 */ + + .align 11 + .globl tramp_return +tramp_return: + mrs x18, ttbr1_el1 + orr x18, x18, #(1UL << 48) + add x18, x18, #(2 * PAGE_SIZE) + msr ttbr1_el1, x18 + isb + mrs x18, tpidrro_el0 + msr tpidrro_el0, xzr + eret + +/* + * The next page contains the start of the EL0 exception handlers. + * This page is not "exposed" to userland, but should immediately + * follow the page with the EL0 exception vector table such that + * relative branches don't give away anything about the layout of our + * kernel. + */ + .align 12 + +.macro tramp_enter + ldr x18, =exception_vectors + msr vbar_el1, x18 + isb + mrs x18, tpidrro_el0 + msr tpidrro_el0, xzr +.endm + +tramp_el0_sync: + tramp_enter + b handle_el0_sync + +tramp_el0_irq: + tramp_enter + b handle_el0_irq + +tramp_el0_error: + tramp_enter + b handle_el0_error diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64 index a4eee2b0e0a..e2134f24851 100644 --- a/sys/arch/arm64/conf/files.arm64 +++ b/sys/arch/arm64/conf/files.arm64 @@ -1,4 +1,4 @@ -# $OpenBSD: files.arm64,v 1.16 2018/01/04 14:30:08 kettenis Exp $ +# $OpenBSD: files.arm64,v 1.17 2018/01/10 23:27:18 kettenis Exp $ maxpartitions 16 maxusers 2 8 64 @@ -29,6 +29,7 @@ file arch/arm64/arm64/intr.c file arch/arm64/arm64/softintr.c file arch/arm64/arm64/vfp.c file arch/arm64/arm64/exception.S +file arch/arm64/arm64/trampoline.S file arch/arm64/arm64/trap.c file arch/arm64/arm64/ast.c file arch/arm64/arm64/arm64_mutex.c diff --git a/sys/arch/arm64/dev/efi.c b/sys/arch/arm64/dev/efi.c index 976e4c08ff2..ea1aaf35775 100644 --- a/sys/arch/arm64/dev/efi.c +++ b/sys/arch/arm64/dev/efi.c @@ -1,4 +1,4 @@ -/* $OpenBSD: efi.c,v 1.1 2018/01/04 14:30:08 kettenis Exp $ */ +/* $OpenBSD: efi.c,v 1.2 2018/01/10 23:27:18 kettenis Exp $ */ /* * Copyright (c) 2017 Mark Kettenis @@ -184,7 +184,9 @@ efi_enter(struct efi_softc *sc) struct pmap *pm = sc->sc_pm; sc->sc_psw = disable_interrupts(); - cpu_setttb(((uint64_t)pm->pm_asid << 48) | pm->pm_pt0pa); + WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa); + __asm volatile("isb"); + cpu_setttb(pm->pm_asid, pm->pm_pt0pa); } void @@ -192,7 +194,9 @@ efi_leave(struct efi_softc *sc) { struct pmap *pm = curcpu()->ci_curpm; - cpu_setttb(((uint64_t)pm->pm_asid << 48) | pm->pm_pt0pa); + WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa); + __asm volatile("isb"); + cpu_setttb(pm->pm_asid, pm->pm_pt0pa); restore_interrupts(sc->sc_psw); } diff --git a/sys/arch/arm64/include/cpufunc.h b/sys/arch/arm64/include/cpufunc.h index 6473ceb3e15..b8beb85c831 100644 --- a/sys/arch/arm64/include/cpufunc.h +++ b/sys/arch/arm64/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.2 2017/03/24 19:48:01 kettenis Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.3 2018/01/10 23:27:18 kettenis Exp $ */ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. @@ -39,7 +39,7 @@ extern int64_t icache_line_size; extern int64_t idcache_line_size; extern int64_t dczva_line_size; -void cpu_setttb(vaddr_t); +void cpu_setttb(int, paddr_t); void cpu_tlb_flush(void); void cpu_tlb_flush_asid(vaddr_t); void cpu_tlb_flush_all_asid(vaddr_t);