From: mpi <mpi@openbsd.org>
Date: Thu, 4 Mar 2021 08:38:48 +0000 (+0000)
Subject: Bring back previous fix for UVM vnode deadlock.
X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=00e7696ef29d639c47992c239d3900a5b4cbb811;p=openbsd

Bring back previous fix for UVM vnode deadlock.

tb@ reports that refaulting when there's contention on the vnode makes
firefox start very slowly on his machine.  To revisit when the fault
handler will be unlocked.

ok anton@

Original commit message:

Fix a deadlock between uvn_io() and uvn_flush(). While faulting on a
page backed by a vnode, uvn_io() will end up being called in order to
populate newly allocated pages using I/O on the backing vnode. Before
performing the I/O, newly allocated pages are flagged as busy by
uvn_get(), that is before uvn_io() tries to lock the vnode. Such pages
could then end up being flushed by uvn_flush() which already has
acquired the vnode lock. Since such pages are flagged as busy,
uvn_flush() will wait for them to be flagged as not busy. This will
never happens as uvn_io() cannot make progress until the vnode lock is
released.

Instead, grab the vnode lock before allocating and flagging pages as
busy in uvn_get(). This does extend the scope in uvn_get() in which the
vnode is locked but resolves the deadlock.

ok mpi@

Reported-by: syzbot+e63407b35dff08dbee02@syzkaller.appspotmail.com
---

diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
index e9ce8b989fb..7067078ba9a 100644
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_pager.h,v 1.30 2021/03/02 10:12:37 mpi Exp $	*/
+/*	$OpenBSD: uvm_pager.h,v 1.31 2021/03/04 08:38:48 mpi Exp $	*/
 /*	$NetBSD: uvm_pager.h,v 1.20 2000/11/27 08:40:05 chs Exp $	*/
 
 /*
@@ -111,7 +111,6 @@ struct uvm_pagerops {
 #define PGO_LOCKED	0x040	/* fault data structures are locked [get] */
 #define PGO_PDFREECLUST	0x080	/* daemon's free cluster flag [uvm_pager_put] */
 #define PGO_REALLOCSWAP	0x100	/* reallocate swap area [pager_dropcluster] */
-#define PGO_NOWAIT	0x200	/* do not wait for inode lock */
 
 /* page we are not interested in getting */
 #define PGO_DONTCARE ((struct vm_page *) -1L)	/* [get only] */
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index 75760347835..c26b938e7c1 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_vnode.c,v 1.110 2021/03/02 10:12:38 mpi Exp $	*/
+/*	$OpenBSD: uvm_vnode.c,v 1.111 2021/03/04 08:38:48 mpi Exp $	*/
 /*	$NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $	*/
 
 /*
@@ -90,6 +90,9 @@ int		 uvn_io(struct uvm_vnode *, vm_page_t *, int, int, int);
 int		 uvn_put(struct uvm_object *, vm_page_t *, int, boolean_t);
 void		 uvn_reference(struct uvm_object *);
 
+int		 uvm_vnode_lock(struct uvm_vnode *);
+void		 uvm_vnode_unlock(struct uvm_vnode *);
+
 /*
  * master pager structure
  */
@@ -875,11 +878,16 @@ uvn_cluster(struct uvm_object *uobj, voff_t offset, voff_t *loffset,
 int
 uvn_put(struct uvm_object *uobj, struct vm_page **pps, int npages, int flags)
 {
+	struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
 	int retval;
 
 	KERNEL_ASSERT_LOCKED();
 
-	retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
+	retval = uvm_vnode_lock(uvn);
+	if (retval)
+		return(retval);
+	retval = uvn_io(uvn, pps, npages, flags, UIO_WRITE);
+	uvm_vnode_unlock(uvn);
 
 	return(retval);
 }
@@ -897,9 +905,10 @@ int
 uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
 {
+	struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
 	voff_t current_offset;
 	struct vm_page *ptmp;
-	int lcv, result, gotpages;
+	int lcv, result, gotpages, retval;
 	boolean_t done;
 
 	KERNEL_ASSERT_LOCKED();
@@ -973,6 +982,18 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
 			return(VM_PAGER_UNLOCK);
 	}
 
+	/*
+	 * Before getting non-resident pages which must be populate with data
+	 * using I/O on the backing vnode, lock the same vnode. Such pages are
+	 * about to be allocated and busied (i.e. PG_BUSY) by the current
+	 * thread. Allocating and busying the page(s) before acquiring the
+	 * vnode lock could cause a deadlock with uvn_flush() which acquires the
+	 * vnode lock before waiting on pages to become unbusy and then flushed.
+	 */
+	retval = uvm_vnode_lock(uvn);
+	if (retval)
+		return(retval);
+
 	/*
 	 * step 2: get non-resident or busy pages.
 	 * data structures are unlocked.
@@ -1059,14 +1080,15 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
 		 * we have a "fake/busy/clean" page that we just allocated.  do
 		 * I/O to fill it with valid data.
 		 */
-		result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
-		    PGO_SYNCIO|PGO_NOWAIT, UIO_READ);
+		result = uvn_io(uvn, &ptmp, 1, PGO_SYNCIO, UIO_READ);
 
 		/*
 		 * I/O done.  because we used syncio the result can not be
 		 * PEND or AGAIN.
 		 */
 		if (result != VM_PAGER_OK) {
+			uvm_vnode_unlock(uvn);
+
 			if (ptmp->pg_flags & PG_WANTED)
 				wakeup(ptmp);
 
@@ -1097,12 +1119,15 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
 
 	}
 
+	uvm_vnode_unlock(uvn);
+
 	return (VM_PAGER_OK);
 }
 
 /*
  * uvn_io: do I/O to a vnode
  *
+ * => uvn: the backing vnode must be locked
  * => prefer map unlocked (not required)
  * => flags: PGO_SYNCIO -- use sync. I/O
  * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
@@ -1120,7 +1145,6 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw)
 	int waitf, result, mapinflags;
 	size_t got, wanted;
 	int netunlocked = 0;
-	int lkflags = (flags & PGO_NOWAIT) ? LK_NOWAIT : 0;
 
 	/* init values */
 	waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
@@ -1189,42 +1213,17 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw)
 	}
 
 	/* do the I/O!  (XXX: curproc?) */
-	/*
-	 * This process may already have this vnode locked, if we faulted in
-	 * copyin() or copyout() on a region backed by this vnode
-	 * while doing I/O to the vnode.  If this is the case, don't
-	 * panic.. instead, return the error to the user.
-	 *
-	 * XXX this is a stopgap to prevent a panic.
-	 * Ideally, this kind of operation *should* work.
-	 */
-	result = 0;
-	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
-		result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL | lkflags);
-	if (result == 0) {
-		/* NOTE: vnode now locked! */
-		if (rw == UIO_READ)
-			result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
-		else
-			result = VOP_WRITE(vn, &uio,
-			    (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
-			    curproc->p_ucred);
-
-		if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
-			VOP_UNLOCK(vn);
-
-	}
+	if (rw == UIO_READ)
+		result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
+	else
+		result = VOP_WRITE(vn, &uio,
+		    (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
+		    curproc->p_ucred);
 
 	if (netunlocked)
 		NET_LOCK();
 
-
-	/* NOTE: vnode now unlocked (unless vnislocked) */
-	/*
-	 * result == unix style errno (0 == OK!)
-	 *
-	 * zero out rest of buffer (if needed)
-	 */
+	/* zero out rest of buffer (if needed) */
 	if (result == 0) {
 		got = wanted - uio.uio_resid;
 
@@ -1245,16 +1244,14 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npages, int flags, int rw)
 		wakeup(&uvn->u_nio);
 	}
 
-	if (result == 0) {
+	if (result == 0)
 		return(VM_PAGER_OK);
-	} else if (result == EBUSY) {
-		KASSERT(flags & PGO_NOWAIT);
-		return(VM_PAGER_AGAIN);
-	} else {
-		while (rebooting)
-			tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
-		return(VM_PAGER_ERROR);
+
+	if (result == EIO) {
+		/* Signal back to uvm_vnode_unlock(). */
+		uvn->u_flags |= UVM_VNODE_IOERROR;
 	}
+	return(VM_PAGER_ERROR);
 }
 
 /*
@@ -1471,3 +1468,52 @@ uvm_vnp_sync(struct mount *mp)
 
 	rw_exit_write(&uvn_sync_lock);
 }
+
+int
+uvm_vnode_lock(struct uvm_vnode *uvn)
+{
+	int error;
+	int netunlocked = 0;
+
+	if (uvn->u_flags & UVM_VNODE_VNISLOCKED)
+		return(VM_PAGER_OK);
+
+	/*
+	 * This thread may already have the net lock, if we faulted in copyin()
+	 * or copyout() in the network stack.
+	 */
+	if (rw_status(&netlock) == RW_WRITE) {
+		NET_UNLOCK();
+		netunlocked = 1;
+	}
+
+	/*
+	 * This thread may already have this vnode locked, if we faulted in
+	 * copyin() or copyout() on a region backed by this vnode
+	 * while doing I/O to the vnode. If this is the case, don't panic but
+	 * instead return an error; as dictated by the LK_RECURSEFAIL flag.
+	 *
+	 * XXX this is a stopgap to prevent a panic.
+	 * Ideally, this kind of operation *should* work.
+	 */
+	error = vn_lock(uvn->u_vnode, LK_EXCLUSIVE | LK_RECURSEFAIL);
+	if (netunlocked)
+		NET_LOCK();
+	return(error ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
+
+void
+uvm_vnode_unlock(struct uvm_vnode *uvn)
+{
+	int error;
+
+	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+		VOP_UNLOCK(uvn->u_vnode);
+
+	error = uvn->u_flags & UVM_VNODE_IOERROR;
+	uvn->u_flags &= ~UVM_VNODE_IOERROR;
+	if (error) {
+		while (rebooting)
+			tsleep_nsec(&rebooting, PVM, "uvndead", INFSLP);
+	}
+}
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
index 6578e8b521d..ff67e3f0bf4 100644
--- a/sys/uvm/uvm_vnode.h
+++ b/sys/uvm/uvm_vnode.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: uvm_vnode.h,v 1.16 2021/03/02 10:09:20 mpi Exp $	*/
+/*	$OpenBSD: uvm_vnode.h,v 1.17 2021/03/04 08:38:48 mpi Exp $	*/
 /*	$NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $	*/
 
 /*
@@ -84,6 +84,7 @@ struct uvm_vnode {
 					   i/o sync to clear so it can do
 					   i/o */
 #define UVM_VNODE_WRITEABLE	0x200	/* uvn has pages that are writeable */
+#define UVM_VNODE_IOERROR	0x400	/* i/o error occurred in uvn_io() */
 
 /*
  * UVM_VNODE_BLOCKED: any condition that should new processes from