From 9f1181d59de355abd911a3e0ab234c28078875ed Mon Sep 17 00:00:00 2001 From: guenther Date: Tue, 1 Jun 2021 21:12:11 +0000 Subject: [PATCH] Don't clear the cpu's bit in the old pmap's pm_cpus until we're off the old one and set it in the new pmap's pm_cpus before loading %cr3 with the new value. In particular, do neither if %cr3 isn't changing. This eliminates a window where, when switching between threads in a single a process, the pmap wouldn't have this cpu's bit set even though we didn't change %cr3. With more of uvm unlocked, it was possible for another cpu to update the page tables but not see a need to send an IPI to this cpu, leading to crashes when TLB entries that should have been invalidated were used. malloc_duel testing by abluhm@ ok abluhm@ kettenis@ mlarkin@ --- sys/arch/amd64/amd64/locore.S | 61 +++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index a630dce51a6..4ad7c137601 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.S,v 1.123 2021/05/25 17:07:55 guenther Exp $ */ +/* $OpenBSD: locore.S,v 1.124 2021/06/01 21:12:11 guenther Exp $ */ /* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ /* @@ -342,6 +342,7 @@ ENTRY(cpu_switchto) shrq $32,%rdx /* If old proc exited, don't bother. */ + xorl %ecx,%ecx testq %r13,%r13 jz switch_exited @@ -349,18 +350,19 @@ ENTRY(cpu_switchto) * Save old context. * * Registers: - * %rax, %rcx - scratch + * %rax - scratch * %r13 - old proc, then old pcb + * %rcx - old pmap if not P_SYSTEM * %r12 - new proc * %r9d - cpuid */ + /* remember the pmap if not P_SYSTEM */ + testl $P_SYSTEM,P_FLAG(%r13) movq P_ADDR(%r13),%r13 - - /* clear the old pmap's bit for the cpu */ + jnz 0f movq PCB_PMAP(%r13),%rcx - lock - btrq %r9,PM_CPUS(%rcx) +0: /* Save stack pointers. */ movq %rsp,PCB_RSP(%r13) @@ -416,14 +418,24 @@ restore_saved: * Restore saved context. * * Registers: - * %rax, %rcx, %rdx - scratch - * %r13 - new pcb + * %rax, %rdx - scratch + * %rcx - old pmap if not P_SYSTEM * %r12 - new process + * %r13 - new pcb + * %rbx - new pmap if not P_SYSTEM */ + movq P_ADDR(%r12),%r13 + + /* remember the pmap if not P_SYSTEM */ + xorl %ebx,%ebx + testl $P_SYSTEM,P_FLAG(%r12) + jnz 1f + movq PCB_PMAP(%r13),%rbx +1: + /* No interrupts while loading new state. */ cli - movq P_ADDR(%r12),%r13 /* Restore stack pointers. */ movq PCB_RSP(%r13),%rsp @@ -438,8 +450,25 @@ restore_saved: movq PCB_CR3(%r13),%rax /* flags from cmpq unchanged */ jz .Lsame_cr3 + /* set the new pmap's bit for the cpu */ + testq %rbx,%rbx + jz .Lno_new_pmap + lock + btsq %r9,PM_CPUS(%rbx) +#ifdef DIAGNOSTIC + jc _C_LABEL(switch_pmcpu_set) +#endif +.Lno_new_pmap: + movq %rax,%cr3 /* %rax used below too */ + /* clear the old pmap's bit for the cpu */ + testq %rcx,%rcx + jz .Lno_old_pmap + lock + btrq %r9,PM_CPUS(%rcx) +.Lno_old_pmap: + .Lsame_cr3: /* * If we switched from a userland thread with a shallow call stack @@ -451,14 +480,13 @@ restore_saved: RET_STACK_REFILL_WITH_RCX /* Don't bother with the rest if switching to a system process. */ - testl $P_SYSTEM,P_FLAG(%r12) - jnz switch_restored + testq %rbx,%rbx + jz switch_restored /* record the bits needed for future U-->K transition */ movq PCB_KSTACK(%r13),%rdx subq $FRAMESIZE,%rdx movq %rdx,CPUVAR(KERN_RSP) - movq PCB_PMAP(%r13),%rcx CODEPATCH_START /* @@ -466,20 +494,13 @@ restore_saved: * then record them in cpu_info for easy access in syscall and * interrupt trampolines. */ - movq PM_PDIRPA_INTEL(%rcx),%rdx + movq PM_PDIRPA_INTEL(%rbx),%rdx orq cr3_reuse_pcid,%rax orq cr3_pcid_proc_intel,%rdx movq %rax,CPUVAR(KERN_CR3) movq %rdx,CPUVAR(USER_CR3) CODEPATCH_END(CPTAG_MELTDOWN_NOP) - /* set the new pmap's bit for the cpu */ - lock - btsq %r9,PM_CPUS(%rcx) -#ifdef DIAGNOSTIC - jc _C_LABEL(switch_pmcpu_set) -#endif - switch_restored: SET_CURPCB(%r13) -- 2.20.1