From: guenther Date: Tue, 1 Feb 2022 08:38:53 +0000 (+0000) Subject: Attempt to guarantee that on copy-on-write faulting, the new copy X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=43687ba57c7d88063c6fa2df2386adbd1a0cf241;p=openbsd Attempt to guarantee that on copy-on-write faulting, the new copy can't be written to while any thread can see the original version of the page via a not-yet-flushed stale TLB entry: pmaps can indicate they do this correctly by defining __HAVE_PMAP_MPSAFE_ENTER_COW; uvm will force the initial CoW fault to be read-only otherwise. Set that on amd64 and fix the problem case in pmap_enter() by putting a read-only mapping in place, shooting the TLB entry, then fixing it to the final read-write entry so this thread can continue without re-faulting. reported by jsing@ from https://github.com/golang/go/issues/34988 assisted by discussion in https://reviews.freebsd.org/D14347 tweaks from jsing@ and kettenis@ ok jsing@ mpi@ kettenis@ --- diff --git a/sys/arch/amd64/amd64/pmap.c b/sys/arch/amd64/amd64/pmap.c index 4773d35121d..b69d9720b4e 100644 --- a/sys/arch/amd64/amd64/pmap.c +++ b/sys/arch/amd64/amd64/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.148 2021/09/14 16:14:50 kettenis Exp $ */ +/* $OpenBSD: pmap.c,v 1.149 2022/02/01 08:38:53 guenther Exp $ */ /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ /* @@ -2840,16 +2840,34 @@ enter_now: if (pmap == pmap_kernel()) npte |= pg_g_kern; - PTE_BASE[pl1_i(va)] = npte; /* zap! */ - /* - * If we changed anything other than modified/used bits, - * flush the TLB. (is this overkill?) + * If the old entry wasn't valid, we can just update it and + * go. If it was valid, and this isn't a read->write + * transition, then we can safely just update it and flush + * any old TLB entries. + * + * If it _was_ valid and this _is_ a read->write transition, + * then this could be a CoW resolution and we need to make + * sure no CPU can see the new writable mapping while another + * still has the old mapping in its TLB, so insert a correct + * but unwritable mapping, flush any old TLB entries, then + * make it writable. */ - if (pmap_valid_entry(opte)) { + if (! pmap_valid_entry(opte)) { + PTE_BASE[pl1_i(va)] = npte; + } else if ((opte | (npte ^ PG_RW)) & PG_RW) { + /* previously writable or not making writable */ + PTE_BASE[pl1_i(va)] = npte; if (nocache && (opte & PG_N) == 0) wbinvd_on_all_cpus(); pmap_tlb_shootpage(pmap, va, shootself); + } else { + PTE_BASE[pl1_i(va)] = npte ^ PG_RW; + if (nocache && (opte & PG_N) == 0) /* XXX impossible? */ + wbinvd_on_all_cpus(); + pmap_tlb_shootpage(pmap, va, shootself); + pmap_tlb_shootwait(); + PTE_BASE[pl1_i(va)] = npte; } pmap_unmap_ptes(pmap, scr3); diff --git a/sys/arch/amd64/include/pmap.h b/sys/arch/amd64/include/pmap.h index 25c5f07c9f8..c5ba981b282 100644 --- a/sys/arch/amd64/include/pmap.h +++ b/sys/arch/amd64/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.78 2021/06/18 06:17:28 guenther Exp $ */ +/* $OpenBSD: pmap.h,v 1.79 2022/02/01 08:38:53 guenther Exp $ */ /* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */ /* @@ -518,6 +518,7 @@ kvtopte(vaddr_t va) #define pmap_unmap_direct(va) PHYS_TO_VM_PAGE(PMAP_DIRECT_UNMAP(va)) #define __HAVE_PMAP_DIRECT +#define __HAVE_PMAP_MPSAFE_ENTER_COW #endif /* _KERNEL && !_LOCORE */ diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index d54a2b85d75..312ba2b33d6 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_fault.c,v 1.124 2021/12/28 13:16:28 mpi Exp $ */ +/* $OpenBSD: uvm_fault.c,v 1.125 2022/02/01 08:38:53 guenther Exp $ */ /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ /* @@ -1014,6 +1014,18 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, /* deref: can not drop to zero here by defn! */ oanon->an_ref--; +#ifndef __HAVE_PMAP_MPSAFE_ENTER_COW + /* + * If there are multiple threads, either uvm or the + * pmap has to make sure no threads see the old RO + * mapping once any have seen the new RW mapping. + * uvm does it by inserting the new mapping RO and + * letting it fault again. + */ + if (P_HASSIBLING(curproc)) + flt->enter_prot &= ~PROT_WRITE; +#endif + /* * note: anon is _not_ locked, but we have the sole references * to in from amap.