From 00e7696ef29d639c47992c239d3900a5b4cbb811 Mon Sep 17 00:00:00 2001 From: mpi Date: Thu, 4 Mar 2021 08:38:48 +0000 Subject: [PATCH] Bring back previous fix for UVM vnode deadlock. tb@ reports that refaulting when there's contention on the vnode makes firefox start very slowly on his machine. To revisit when the fault handler will be unlocked. ok anton@ Original commit message: Fix a deadlock between uvn_io() and uvn_flush(). While faulting on a page backed by a vnode, uvn_io() will end up being called in order to populate newly allocated pages using I/O on the backing vnode. Before performing the I/O, newly allocated pages are flagged as busy by uvn_get(), that is before uvn_io() tries to lock the vnode. Such pages could then end up being flushed by uvn_flush() which already has acquired the vnode lock. Since such pages are flagged as busy, uvn_flush() will wait for them to be flagged as not busy. This will never happens as uvn_io() cannot make progress until the vnode lock is released. Instead, grab the vnode lock before allocating and flagging pages as busy in uvn_get(). This does extend the scope in uvn_get() in which the vnode is locked but resolves the deadlock. ok mpi@ Reported-by: syzbot+e63407b35dff08dbee02@syzkaller.appspotmail.com --- sys/uvm/uvm_pager.h | 3 +- sys/uvm/uvm_vnode.c | 138 +++++++++++++++++++++++++++++--------------- sys/uvm/uvm_vnode.h | 3 +- 3 files changed, 95 insertions(+), 49 deletions(-) diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h index e9ce8b989fb..7067078ba9a 100644 --- a/sys/uvm/uvm_pager.h +++ b/sys/uvm/uvm_pager.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pager.h,v 1.30 2021/03/02 10:12:37 mpi Exp $ */ +/* $OpenBSD: uvm_pager.h,v 1.31 2021/03/04 08:38:48 mpi Exp $ */ /* $NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $ */ /* @@ -111,7 +111,6 @@ struct uvm_pagerops { #define PGO_LOCKED 0x040 /* fault data structures are locked [get] */ #define PGO_PDFREECLUST 0x080 /* daemon's free cluster flag [uvm_pager_put] */ #define PGO_REALLOCSWAP 0x100 /* reallocate swap area [pager_dropcluster] */ -#define PGO_NOWAIT 0x200 /* do not wait for inode lock */ /* page we are not interested in getting */ #define PGO_DONTCARE ((struct vm_page *) -1L) /* [get only] */ diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c index 75760347835..c26b938e7c1 100644 --- a/sys/uvm/uvm_vnode.c +++ b/sys/uvm/uvm_vnode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.c,v 1.110 2021/03/02 10:12:38 mpi Exp $ */ +/* $OpenBSD: uvm_vnode.c,v 1.111 2021/03/04 08:38:48 mpi Exp $ */ /* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */ /* @@ -90,6 +90,9 @@ int uvn_io(struct uvm_vnode *, vm_page_t *, int, int, int); int uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t); void uvn_reference(struct uvm_object *); +int uvm_vnode_lock(struct uvm_vnode *); +void uvm_vnode_unlock(struct uvm_vnode *); + /* * master pager structure */ @@ -875,11 +878,16 @@ uvn_cluster(struct uvm_object *uobj, voff_t offset, voff_t *loffset, int uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags) { + struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; int retval; KERNEL_ASSERT_LOCKED(); - retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE); + retval = uvm_vnode_lock(uvn); + if (retval) + return(retval); + retval = uvn_io(uvn, pps, npages, flags, UIO_WRITE); + uvm_vnode_unlock(uvn); return(retval); } @@ -897,9 +905,10 @@ int uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) { + struct uvm_vnode *uvn = (struct uvm_vnode *)uobj; voff_t current_offset; struct vm_page *ptmp; - int lcv, result, gotpages; + int lcv, result, gotpages, retval; boolean_t done; KERNEL_ASSERT_LOCKED(); @@ -973,6 +982,18 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, return(VM_PAGER_UNLOCK); } + /* + * Before getting non-resident pages which must be populate with data + * using I/O on the backing vnode, lock the same vnode. Such pages are + * about to be allocated and busied (i.e. PG_BUSY) by the current + * thread. Allocating and busying the page(s) before acquiring the + * vnode lock could cause a deadlock with uvn_flush() which acquires the + * vnode lock before waiting on pages to become unbusy and then flushed. + */ + retval = uvm_vnode_lock(uvn); + if (retval) + return(retval); + /* * step 2: get non-resident or busy pages. * data structures are unlocked. @@ -1059,14 +1080,15 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, * we have a "fake/busy/clean" page that we just allocated. do * I/O to fill it with valid data. */ - result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1, - PGO_SYNCIO|PGO_NOWAIT, UIO_READ); + result = uvn_io(uvn, &ptmp, 1, PGO_SYNCIO, UIO_READ); /* * I/O done. because we used syncio the result can not be * PEND or AGAIN. */ if (result != VM_PAGER_OK) { + uvm_vnode_unlock(uvn); + if (ptmp->pg_flags & PG_WANTED) wakeup(ptmp); @@ -1097,12 +1119,15 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, } + uvm_vnode_unlock(uvn); + return (VM_PAGER_OK); } /* * uvn_io: do I/O to a vnode * + * => uvn: the backing vnode must be locked * => prefer map unlocked (not required) * => flags: PGO_SYNCIO -- use sync. I/O * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync. @@ -1120,7 +1145,6 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) int waitf, result, mapinflags; size_t got, wanted; int netunlocked = 0; - int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0; /* init values */ waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT; @@ -1189,42 +1213,17 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) } /* do the I/O! (XXX: curproc?) */ - /* - * This process may already have this vnode locked, if we faulted in - * copyin() or copyout() on a region backed by this vnode - * while doing I/O to the vnode. If this is the case, don't - * panic.. instead, return the error to the user. - * - * XXX this is a stopgap to prevent a panic. - * Ideally, this kind of operation *should* work. - */ - result = 0; - if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) - result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags); - if (result == 0) { - /* NOTE: vnode now locked! */ - if (rw == UIO_READ) - result = VOP_READ(vn, &uio, 0, curproc->p_ucred); - else - result = VOP_WRITE(vn, &uio, - (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, - curproc->p_ucred); - - if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) - VOP_UNLOCK(vn); - - } + if (rw == UIO_READ) + result = VOP_READ(vn, &uio, 0, curproc->p_ucred); + else + result = VOP_WRITE(vn, &uio, + (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, + curproc->p_ucred); if (netunlocked) NET_LOCK(); - - /* NOTE: vnode now unlocked (unless vnislocked) */ - /* - * result == unix style errno (0 == OK!) - * - * zero out rest of buffer (if needed) - */ + /* zero out rest of buffer (if needed) */ if (result == 0) { got = wanted - uio.uio_resid; @@ -1245,16 +1244,14 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw) wakeup(&uvn->u_nio); } - if (result == 0) { + if (result == 0) return(VM_PAGER_OK); - } else if (result == EBUSY) { - KASSERT(flags & PGO_NOWAIT); - return(VM_PAGER_AGAIN); - } else { - while (rebooting) - tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); - return(VM_PAGER_ERROR); + + if (result == EIO) { + /* Signal back to uvm_vnode_unlock(). */ + uvn->u_flags |= UVM_VNODE_IOERROR; } + return(VM_PAGER_ERROR); } /* @@ -1471,3 +1468,52 @@ uvm_vnp_sync(struct mount *mp) rw_exit_write(&uvn_sync_lock); } + +int +uvm_vnode_lock(struct uvm_vnode *uvn) +{ + int error; + int netunlocked = 0; + + if (uvn->u_flags & UVM_VNODE_VNISLOCKED) + return(VM_PAGER_OK); + + /* + * This thread may already have the net lock, if we faulted in copyin() + * or copyout() in the network stack. + */ + if (rw_status(&netlock) == RW_WRITE) { + NET_UNLOCK(); + netunlocked = 1; + } + + /* + * This thread may already have this vnode locked, if we faulted in + * copyin() or copyout() on a region backed by this vnode + * while doing I/O to the vnode. If this is the case, don't panic but + * instead return an error; as dictated by the LK_RECURSEFAIL flag. + * + * XXX this is a stopgap to prevent a panic. + * Ideally, this kind of operation *should* work. + */ + error = vn_lock(uvn->u_vnode, LK_EXCLUSIVE | LK_RECURSEFAIL); + if (netunlocked) + NET_LOCK(); + return(error ? VM_PAGER_ERROR : VM_PAGER_OK); +} + +void +uvm_vnode_unlock(struct uvm_vnode *uvn) +{ + int error; + + if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) + VOP_UNLOCK(uvn->u_vnode); + + error = uvn->u_flags & UVM_VNODE_IOERROR; + uvn->u_flags &= ~UVM_VNODE_IOERROR; + if (error) { + while (rebooting) + tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP); + } +} diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h index 6578e8b521d..ff67e3f0bf4 100644 --- a/sys/uvm/uvm_vnode.h +++ b/sys/uvm/uvm_vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_vnode.h,v 1.16 2021/03/02 10:09:20 mpi Exp $ */ +/* $OpenBSD: uvm_vnode.h,v 1.17 2021/03/04 08:38:48 mpi Exp $ */ /* $NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $ */ /* @@ -84,6 +84,7 @@ struct uvm_vnode { i/o sync to clear so it can do i/o */ #define UVM_VNODE_WRITEABLE 0x200 /* uvn has pages that are writeable */ +#define UVM_VNODE_IOERROR 0x400 /* i/o error occurred in uvn_io() */ /* * UVM_VNODE_BLOCKED: any condition that should new processes from -- 2.20.1