From: guenther Date: Thu, 12 Jul 2018 14:11:11 +0000 (+0000) Subject: Reorganize the Meltdown entry and exit trampolines for syscall and X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=1fc8fad1ef00427ff55d700df3f3dfdb82455f63;p=openbsd Reorganize the Meltdown entry and exit trampolines for syscall and traps so that the "mov %rax,%cr3" is followed by an infinite loop which is avoided because the mapping of the code being executed is changed. This means the sysretq/iretq isn't even present in that flow of instructions in the kernel mapping, so userspace code can't be speculatively reached on the kernel mapping and totally eliminates the conditional jump over the the %cr3 change that supported CPUs without the Meltdown vulnerability. The return paths were probably vulnerable to Spectre v1 (and v1.1/1.2) style attacks, speculatively executing user code post-system-call with the kernel mappings, thus creating cache/TLB/etc side-effects. Would like to apply this technique to the interrupt stubs too, but I'm hitting a bug in clang's assembler which misaligns the code and symbols. While here, when on a CPU not vulnerable to Meltdown, codepatch out the unnecessary bits in cpu_switchto(). Inspiration from sf@, refined over dinner with theo ok mlarkin@ deraadt@ --- diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 4ad2069c0d7..67626ed9c38 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.124 2018/07/09 12:58:43 guenther Exp $ */ +/* $OpenBSD: cpu.c,v 1.125 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /*- @@ -140,6 +140,7 @@ struct cpu_softc { }; void replacesmap(void); +void replacemeltdown(void); extern long _stac; extern long _clac; @@ -162,6 +163,21 @@ replacesmap(void) splx(s); } +void +replacemeltdown(void) +{ + static int replacedone = 0; + int s; + + if (replacedone) + return; + replacedone = 1; + + s = splhigh(); + codepatch_nop(CPTAG_MELTDOWN_NOP); + splx(s); +} + #ifdef MULTIPROCESSOR int mp_cpu_start(struct cpu_info *); void mp_cpu_start_cleanup(struct cpu_info *); @@ -880,7 +896,7 @@ mp_cpu_start_cleanup(struct cpu_info *ci) #endif /* MULTIPROCESSOR */ typedef void (vector)(void); -extern vector Xsyscall, Xsyscall32; +extern vector Xsyscall_meltdown, Xsyscall, Xsyscall32; void cpu_init_msrs(struct cpu_info *ci) @@ -888,7 +904,8 @@ cpu_init_msrs(struct cpu_info *ci) wrmsr(MSR_STAR, ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48)); - wrmsr(MSR_LSTAR, (uint64_t)Xsyscall); + wrmsr(MSR_LSTAR, cpu_meltdown ? (uint64_t)Xsyscall_meltdown : + (uint64_t)Xsyscall); wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC); diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c index 2bedf138471..182fda698e8 100644 --- a/sys/arch/amd64/amd64/identcpu.c +++ b/sys/arch/amd64/amd64/identcpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: identcpu.c,v 1.101 2018/07/11 20:07:55 guenther Exp $ */ +/* $OpenBSD: identcpu.c,v 1.102 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /* @@ -46,6 +46,7 @@ #include void replacesmap(void); +void replacemeltdown(void); uint64_t cpu_freq(struct cpu_info *); void tsc_timecounter_init(struct cpu_info *, uint64_t); #if NVMM > 0 @@ -633,6 +634,8 @@ identifycpu(struct cpu_info *ci) if (cpu_meltdown) printf(",MELTDOWN"); + else + replacemeltdown(); printf("\n"); diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 05ec5fb2d37..e3cfbdc8d36 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.104 2018/07/10 16:01:26 deraadt Exp $ */ +/* $OpenBSD: locore.S,v 1.105 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ /* @@ -458,18 +458,16 @@ restore_saved: movq %rdx,CPUVAR(KERN_RSP) movq PCB_PMAP(%r13),%rcx + CODEPATCH_START /* * Meltdown: iff we're doing separate U+K and U-K page tables, * then record them in cpu_info for easy access in syscall and - * interrupt trampolines. XXX code patch this + * interrupt trampolines. */ - movq PM_PDIRPA_INTEL(%rcx),%rdx - testq %rdx,%rdx - jz 0f /* yay, no intel suckiness */ movq %rax,CPUVAR(KERN_CR3) movq %rdx,CPUVAR(USER_CR3) -0: + CODEPATCH_END(CPTAG_MELTDOWN_NOP) /* set the new pmap's bit for the cpu */ lock @@ -546,30 +544,47 @@ IDTVEC(syscall32) /* * syscall insn entry. + * Enter here with interrupts blocked; %rcx contains the caller's + * %rip and the original rflags has been copied to %r11. %cs and + * %ss have been updated to the kernel segments, but %rsp is still + * the user-space value. + * First order of business is to swap to the kernel GS.base so that + * we can access our struct cpu_info. After possibly mucking with + * pagetables, we switch to our kernel stack. Once that's in place + * we can unblock interrupts and save the rest of the syscall frame. */ -IDTVEC(syscall) +KUTEXT_PAGE_START + .align NBPG, 0xcc +XUsyscall_meltdown: /* - * Enter here with interrupts blocked; %rcx contains the caller's - * %rip and the original rflags has been copied to %r11. %cs and - * %ss have been updated to the kernel segments, but %rsp is still - * the user-space value. - * First order of business is to swap to the kernel GS.base so that - * we can access our struct cpu_info and use the scratch space there - * to switch to the kernel page tables (thank you, Intel), then - * switch to our kernel stack. Once that's in place we can - * unblock interrupts and save the rest of the syscall frame. + * This is the real Xsyscall_meltdown page, which is mapped into + * the U-K page tables at the same location as Xsyscall_meltdown + * below. For this, the Meltdown case, we use the scratch space + * in cpu_info so we can switch to the kernel page tables + * (thank you, Intel), at which point we'll continue at the + * "movq CPUVAR(KERN_RSP),%rax" after Xsyscall below. + * In case the CPU speculates past the mov to cr3, we put a + * retpoline-style pause-jmp-to-pause loop. */ swapgs - movq %r15,CPUVAR(SCRATCH) - movq CPUVAR(KERN_CR3),%r15 - testq %r15,%r15 - jz Xsyscall_untramp - movq %r15,%cr3 - jmp Xsyscall_untramp - -NENTRY(Xsyscall_untramp) - movq CPUVAR(KERN_RSP),%r15 - xchgq %r15,%rsp + movq %rax,CPUVAR(SCRATCH) + movq CPUVAR(KERN_CR3),%rax + movq %rax,%cr3 +0: pause + jmp 0b +KUTEXT_PAGE_END + +KTEXT_PAGE_START + .align NBPG, 0xcc +IDTVEC_NOALIGN(syscall_meltdown) + /* pad to match real Xsyscall_meltdown positioning above */ + movq CPUVAR(KERN_CR3),%rax + movq %rax,%cr3 +IDTVEC_NOALIGN(syscall) + swapgs + movq %rax,CPUVAR(SCRATCH) + movq CPUVAR(KERN_RSP),%rax + xchgq %rax,%rsp sti /* @@ -580,8 +595,8 @@ NENTRY(Xsyscall_untramp) * saved. Then, fill in the rest. */ movq $(GSEL(GUDATA_SEL, SEL_UPL)),TF_SS(%rsp) - movq %r15,TF_RSP(%rsp) - movq CPUVAR(SCRATCH),%r15 + movq %rax,TF_RSP(%rsp) + movq CPUVAR(SCRATCH),%rax INTR_SAVE_MOST_GPRS_NO_ADJ movq %rcx,TF_RCX(%rsp) movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */ @@ -648,21 +663,29 @@ NENTRY(Xsyscall_untramp) */ movq TF_RDX(%rsp),%rdx movq TF_RAX(%rsp),%rax - movq %rax,CPUVAR(SCRATCH) - movq CPUVAR(USER_CR3),%rax - movq TF_RIP(%rsp),%rcx movq TF_RFLAGS(%rsp),%r11 movq TF_RSP(%rsp),%rsp - testq %rax,%rax - jz 1f - jmp syscall_trampback + CODEPATCH_START + movq %rax,CPUVAR(SCRATCH) + movq CPUVAR(USER_CR3),%rax + movq %rax,%cr3 +Xsyscall_trampback: +0: pause + jmp 0b + CODEPATCH_END(CPTAG_MELTDOWN_NOP) + swapgs + sysretq +KTEXT_PAGE_END -KUENTRY(syscall_trampback) +KUTEXT_PAGE_START + .space (Xsyscall_trampback - Xsyscall_meltdown) - \ + (. - XUsyscall_meltdown), 0xcc movq %rax,%cr3 -1: movq CPUVAR(SCRATCH),%rax + movq CPUVAR(SCRATCH),%rax swapgs sysretq +KUTEXT_PAGE_END .text _ALIGN_TRAPS @@ -745,7 +768,9 @@ NENTRY(proc_trampoline) * - swapgs * - iretq */ -NENTRY(intr_user_exit) +KTEXT_PAGE_START + _ALIGN_TRAPS +GENTRY(intr_user_exit) #ifdef DIAGNOSTIC pushfq popq %rdx @@ -809,25 +834,34 @@ intr_user_exit_post_ast: movq $(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx) /* finish with the trap frame */ movq TF_RAX(%rsp),%rax - movq %rax,CPUVAR(SCRATCH) movq TF_RCX(%rsp),%rcx movq TF_R11(%rsp),%r11 /* switch to the trampoline stack */ xchgq %rdx,%rsp movq TF_RDX(%rdx),%rdx + CODEPATCH_START + movq %rax,CPUVAR(SCRATCH) movq CPUVAR(USER_CR3),%rax - testq %rax,%rax - jz 1f - jmp iretq_tramp - -KUENTRY(iretq_tramp) movq %rax,%cr3 -1: movq CPUVAR(SCRATCH),%rax +Xiretq_trampback: +0: pause + jmp 0b + .space 5,0xcc /* pad to match "movq CPUVAR(SCRATCH),%rax" */ + CODEPATCH_END(CPTAG_MELTDOWN_NOP) swapgs .globl _C_LABEL(doreti_iret) _C_LABEL(doreti_iret): iretq +KTEXT_PAGE_END + +KUTEXT_PAGE_START + .space (Xiretq_trampback - Xsyscall_meltdown) - \ + (. - XUsyscall_meltdown), 0xcc + movq CPUVAR(SCRATCH),%rax + swapgs + iretq +KUTEXT_PAGE_END .text _ALIGN_TRAPS diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 4453cbe45f5..0467a4d994c 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.247 2018/07/10 04:19:59 guenther Exp $ */ +/* $OpenBSD: machdep.c,v 1.248 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ /*- @@ -345,6 +345,8 @@ void enter_shared_special_pages(void) { extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[]; + extern char __text_page_start[], __text_page_end[]; + extern char __kernel_kutext_page_phys[]; extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[]; vaddr_t va; paddr_t pa; @@ -365,6 +367,17 @@ enter_shared_special_pages(void) pa += PAGE_SIZE; } + /* .kutext.page section */ + va = (vaddr_t)__text_page_start; + pa = (paddr_t)__kernel_kutext_page_phys; + while (va < (vaddr_t)__text_page_end) { + pmap_enter_special(va, pa, PROT_READ | PROT_EXEC); + DPRINTF("%s: entered kutext.page va 0x%llx pa 0x%llx\n", + __func__, (uint64_t)va, (uint64_t)pa); + va += PAGE_SIZE; + pa += PAGE_SIZE; + } + /* .kudata section */ va = (vaddr_t)__kudata_start; pa = (paddr_t)__kernel_kudata_phys; diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 81b3af7ff18..3e473dc4915 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.115 2018/07/11 20:07:55 guenther Exp $ */ +/* $OpenBSD: pmap.c,v 1.116 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -2086,15 +2086,25 @@ pmap_enter_special(vaddr_t va, paddr_t pa, vm_prot_t prot) "0x%llx was 0x%llx\n", __func__, (uint64_t)npa, (uint64_t)pd, (uint64_t)prot, (uint64_t)pd[l1idx]); - pd[l1idx] = pa | protection_codes[prot] | PG_V | PG_G | PG_W; - DPRINTF("%s: setting PTE[%lld] = 0x%llx\n", __func__, l1idx, pd[l1idx]); + pd[l1idx] = pa | protection_codes[prot] | PG_V | PG_W; - /* now set the PG_G flag on the corresponding U+K entry */ + /* + * Look up the corresponding U+K entry. If we're installing the + * same PA into the U-K map then set the PG_G bit on both + */ level = pmap_find_pte_direct(pmap, va, &ptes, &offs); - if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))) - ptes[offs] |= PG_G; - else + if (__predict_true(level == 0 && pmap_valid_entry(ptes[offs]))) { + if (((pd[l1idx] ^ ptes[offs]) & PG_FRAME) == 0) { + pd[l1idx] |= PG_G; + ptes[offs] |= PG_G; + } else { + DPRINTF("%s: special diffing mapping at %llx\n", + __func__, (long long)va); + } + } else DPRINTF("%s: no U+K mapping for special mapping?\n", __func__); + + DPRINTF("%s: setting PTE[%lld] = 0x%llx\n", __func__, l1idx, pd[l1idx]); } void pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa) diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 0dc7f2e5dd7..5bd895307e7 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.S,v 1.71 2018/07/10 16:01:26 deraadt Exp $ */ +/* $OpenBSD: vector.S,v 1.72 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */ /* @@ -358,22 +358,59 @@ Xexceptions: * b) coming from supervisor mode and can directly jump to kernel * trap handling before returning sans AST or other handling. */ -KUENTRY(alltraps) - testb $SEL_RPL,24(%rsp) - je alltraps_kern +KUTEXT_PAGE_START + .align NBPG, 0xcc + /* + * This is the Meltdown alltraps page, which is mapped into + * the U-K page tables at the same location as alltraps + * below. For this, the Meltdown case, we must be coming from + * userspace so we skip the SEL_RPL test and go directly to + * the swapgs+use-scratch+change-cr3 sequence. Switching to + * the kernel page tables (thank you, Intel) will make us + * continue at the "movq CPUVAR(KERN_RSP),%rax" after alltraps + * below. In case the CPU speculates past the mov to cr3, + * we put a retpoline-style pause-jmp-to-pause loop. + */ +Xalltraps: swapgs movq %rax,CPUVAR(SCRATCH) movq CPUVAR(KERN_CR3),%rax - testq %rax,%rax - jz alltraps_user movq %rax,%cr3 - jmp alltraps_user +0: pause + jmp 0b +KUTEXT_PAGE_END + +KTEXT_PAGE_START + .align NBPG, 0xcc +GENTRY(alltraps) + testb $SEL_RPL,24(%rsp) + je alltraps_kern + swapgs + movq %rax,CPUVAR(SCRATCH) + .space (0b - Xalltraps) - (. - alltraps), 0x90 + + /* + * Traps from userspace + */ + INTR_ENTRY_USER + INTR_SAVE_MOST_GPRS_NO_ADJ + INTR_CLEAR_GPRS + sti + cld + SMAP_CLAC + .globl recall_trap +recall_trap: + movq %rsp, %rdi + call _C_LABEL(usertrap) + cli + jmp intr_user_exit END(alltraps) /* * Traps from supervisor mode (kernel) */ -NENTRY(alltraps_kern) + _ALIGN_TRAPS +GENTRY(alltraps_kern) INTR_ENTRY_KERN INTR_SAVE_MOST_GPRS_NO_ADJ sti @@ -404,30 +441,13 @@ NENTRY(alltraps_kern) movl %ebx,CPUVAR(ILEVEL) jmp 2b - .section .rodata + .pushsection .rodata spl_lowered: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n" - .text + .popsection #endif /* DIAGNOSTIC */ END(alltraps_kern) - -/* - * Traps from userspace - */ -NENTRY(alltraps_user) - INTR_ENTRY_USER - INTR_SAVE_MOST_GPRS_NO_ADJ - INTR_CLEAR_GPRS - sti - cld - SMAP_CLAC - .globl recall_trap -recall_trap: - movq %rsp, %rdi - call _C_LABEL(usertrap) - cli - jmp intr_user_exit -END(alltraps_user) +KTEXT_PAGE_END /* diff --git a/sys/arch/amd64/conf/ld.script b/sys/arch/amd64/conf/ld.script index ed749ed5926..7c0ee2b8c43 100644 --- a/sys/arch/amd64/conf/ld.script +++ b/sys/arch/amd64/conf/ld.script @@ -1,4 +1,4 @@ -/* $OpenBSD: ld.script,v 1.12 2018/06/18 05:43:20 guenther Exp $ */ +/* $OpenBSD: ld.script,v 1.13 2018/07/12 14:11:11 guenther Exp $ */ /* * Copyright (c) 2009 Tobias Weingartner @@ -51,6 +51,10 @@ SECTIONS start = .; locore0.o(.text) *(.text .text.*) + . = ALIGN(__ALIGN_SIZE); + __text_page_start = ABSOLUTE(.); + *(.ktext.page) + __text_page_end = ABSOLUTE(.); } :text =0xcccccccc . = ALIGN(__ALIGN_SIZE); @@ -76,6 +80,13 @@ SECTIONS *(.codepatchend) } :rodata =0xcccccccc + . = ALIGN(__ALIGN_SIZE); + __kernel_kutext_page_phys = . + __kernel_virt_to_phys; + .kutext.page : AT (__kernel_kutext_page_phys) + { + *(SORT_BY_ALIGNMENT(.kutext.page)) + } :rodata =0xcccccccc + . = ALIGN(0x1000); __kernel_randomdata_phys = . + __kernel_virt_to_phys; .openbsd.randomdata : AT (__kernel_randomdata_phys) diff --git a/sys/arch/amd64/include/asm.h b/sys/arch/amd64/include/asm.h index 0ef11752c02..5c29eae0995 100644 --- a/sys/arch/amd64/include/asm.h +++ b/sys/arch/amd64/include/asm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: asm.h,v 1.14 2018/07/10 16:01:26 deraadt Exp $ */ +/* $OpenBSD: asm.h,v 1.15 2018/07/12 14:11:11 guenther Exp $ */ /* $NetBSD: asm.h,v 1.2 2003/05/02 18:05:47 yamt Exp $ */ /*- @@ -65,24 +65,30 @@ #endif #define _ALIGN_TRAPS .align 16, 0xcc +#define _GENTRY(x) .globl x; .type x,@function; x: #define _ENTRY(x) \ - .text; _ALIGN_TRAPS; .globl x; .type x,@function; x: + .text; _ALIGN_TRAPS; _GENTRY(x) #define _NENTRY(x) \ - .text; _ALIGN_TEXT; .globl x; .type x,@function; x: + .text; _ALIGN_TEXT; _GENTRY(x) #ifdef _KERNEL -#define KUTEXT .section .kutext, "ax" +#define KUTEXT .section .kutext, "ax", @progbits + +#define KUTEXT_PAGE_START .pushsection .kutext.page, "a", @progbits +#define KTEXT_PAGE_START .pushsection .ktext.page, "ax", @progbits +#define KUTEXT_PAGE_END .popsection +#define KTEXT_PAGE_END .popsection #define IDTVEC(name) \ KUTEXT; _ALIGN_TRAPS; IDTVEC_NOALIGN(name) -#define IDTVEC_NOALIGN(name) \ - .globl X ## name; .type X ## name,@function; X ## name: +#define IDTVEC_NOALIGN(name) _GENTRY(X ## name) +#define GENTRY(x) _GENTRY(x) #define KIDTVEC(name) \ .text; _ALIGN_TRAPS; IDTVEC_NOALIGN(name) #define KIDTVEC_FALLTHROUGH(name) \ _ALIGN_TEXT; IDTVEC_NOALIGN(name) #define KUENTRY(x) \ - KUTEXT; _ALIGN_TRAPS; .globl x; .type x,@function; x: + KUTEXT; _ALIGN_TRAPS; _GENTRY(x) #endif /* _KERNEL */ diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h index 159d9109451..1fc008bffe1 100644 --- a/sys/arch/amd64/include/codepatch.h +++ b/sys/arch/amd64/include/codepatch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: codepatch.h,v 1.5 2018/06/05 06:39:11 guenther Exp $ */ +/* $OpenBSD: codepatch.h,v 1.6 2018/07/12 14:11:11 guenther Exp $ */ /* * Copyright (c) 2014-2015 Stefan Fritsch * @@ -52,6 +52,7 @@ void codepatch_call(uint16_t tag, void *func); #define CPTAG_EOI 3 #define CPTAG_XRSTOR 4 #define CPTAG_XSAVE 5 +#define CPTAG_MELTDOWN_NOP 6 /* * As stac/clac SMAP instructions are 3 bytes, we want the fastest