From 9d6122f62b6ed32d6c956e1d5269114b2f24ea14 Mon Sep 17 00:00:00 2001 From: claudio Date: Wed, 28 Apr 2021 09:53:53 +0000 Subject: [PATCH] Introduce a global vnode_mtx and use it to make vn_lock() safe to be called without the KERNEL_LOCK. This moves VXLOCK and VXWANT to a mutex protected v_lflag field and also v_lockcount is protected by this mutex. The vn_lock() dance is overly complex and all of this should probably replaced by a proper lock on the vnode but such a diff is a lot more complex. This is an intermediate step so that at least some calls can be modified to grab the KERNEL_LOCK later or not at all. OK mpi@ --- sys/kern/spec_vnops.c | 26 ++++++++++++----- sys/kern/vfs_default.c | 19 +++++++++---- sys/kern/vfs_subr.c | 51 ++++++++++++++++++++++------------ sys/kern/vfs_vnops.c | 24 +++++++++++----- sys/kern/vfs_vops.c | 6 ++-- sys/miscfs/deadfs/dead_vnops.c | 10 ++++--- sys/sys/vnode.h | 29 +++++++++++-------- sys/ufs/ffs/ffs_softdep.c | 8 ++++-- 8 files changed, 115 insertions(+), 58 deletions(-) diff --git a/sys/kern/spec_vnops.c b/sys/kern/spec_vnops.c index b55c3941f8e..0e0071ce719 100644 --- a/sys/kern/spec_vnops.c +++ b/sys/kern/spec_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: spec_vnops.c,v 1.103 2021/03/10 10:21:47 jsg Exp $ */ +/* $OpenBSD: spec_vnops.c,v 1.104 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: spec_vnops.c,v 1.29 1996/04/22 01:42:38 christos Exp $ */ /* @@ -484,7 +484,7 @@ spec_close(void *v) struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; int (*devclose)(dev_t, int, int, struct proc *); - int mode, relock, error; + int mode, relock, xlocked, error; int clone = 0; switch (vp->v_type) { @@ -512,7 +512,10 @@ spec_close(void *v) * of forcibly closing the device, otherwise we only * close on last reference. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (vcount(vp) > 1 && !xlocked) return (0); } devclose = cdevsw[major(dev)].d_close; @@ -527,10 +530,13 @@ spec_close(void *v) * that, we must lock the vnode. If we are coming from * vclean(), the vnode is already locked. */ - if (!(vp->v_flag & VXLOCK)) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (!xlocked) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(vp, V_SAVE, ap->a_cred, p, 0, INFSLP); - if (!(vp->v_flag & VXLOCK)) + if (!xlocked) VOP_UNLOCK(vp); if (error) return (error); @@ -543,7 +549,10 @@ spec_close(void *v) * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + if (vcount(vp) > 1 && !xlocked) return (0); devclose = bdevsw[major(dev)].d_close; mode = S_IFBLK; @@ -554,7 +563,10 @@ spec_close(void *v) } /* release lock if held and this isn't coming from vclean() */ - relock = VOP_ISLOCKED(vp) && !(vp->v_flag & VXLOCK); + mtx_enter(&vnode_mtx); + xlocked = (vp->v_lflag & VXLOCK); + mtx_leave(&vnode_mtx); + relock = VOP_ISLOCKED(vp) && !xlocked; if (relock) VOP_UNLOCK(vp); error = (*devclose)(dev, ap->a_fflag, mode, p); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 35793370639..04c815f8be4 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_default.c,v 1.47 2020/02/20 16:56:52 visa Exp $ */ +/* $OpenBSD: vfs_default.c,v 1.48 2021/04/28 09:53:53 claudio Exp $ */ /* * Portions of this code are: @@ -86,9 +86,12 @@ vop_generic_revoke(void *v) * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vop_generic_revokeall", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, + "vop_generic_revokeall", INFSLP); + mtx_leave(&vnode_mtx); return(0); } @@ -96,7 +99,9 @@ vop_generic_revoke(void *v) * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ - vp->v_flag |= VXLOCK; + vp->v_lflag |= VXLOCK; + mtx_leave(&vnode_mtx); + while (vp->v_flag & VALIASED) { SLIST_FOREACH(vq, vp->v_hashchain, v_specnext) { if (vq->v_rdev != vp->v_rdev || @@ -112,7 +117,9 @@ vop_generic_revoke(void *v) * really eliminate the vnode after which time * vgone will awaken any sleepers. */ - vp->v_flag &= ~VXLOCK; + mtx_enter(&vnode_mtx); + vp->v_lflag &= ~VXLOCK; + mtx_leave(&vnode_mtx); } vgonel(vp, p); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2a6adabe6af..433ba0030c6 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_subr.c,v 1.304 2021/01/29 10:47:24 claudio Exp $ */ +/* $OpenBSD: vfs_subr.c,v 1.305 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */ /* @@ -117,6 +117,8 @@ void vputonfreelist(struct vnode *); int vflush_vnode(struct vnode *, void *); int maxvnodes; +struct mutex vnode_mtx = MUTEX_INITIALIZER(IPL_BIO); + void vfs_unmountall(void); #ifdef DEBUG @@ -644,16 +646,19 @@ vget(struct vnode *vp, int flags) * return failure. Cleaning is determined by checking that * the VXLOCK flag is set. */ - - if (vp->v_flag & VXLOCK) { + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { if (flags & LK_NOWAIT) { + mtx_leave(&vnode_mtx); return (EBUSY); } - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vget", INFSLP); + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vget", INFSLP); + mtx_leave(&vnode_mtx); return (ENOENT); } + mtx_leave(&vnode_mtx); onfreelist = vp->v_bioflag & VBIOONFREELIST; if (vp->v_usecount == 0 && onfreelist) { @@ -982,7 +987,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) void vclean(struct vnode *vp, int flags, struct proc *p) { - int active; + int active, do_wakeup = 0; /* * Check to see if the vnode is in use. @@ -997,9 +1002,10 @@ vclean(struct vnode *vp, int flags, struct proc *p) * Prevent the vnode from being recycled or * brought into use while we clean it out. */ - if (vp->v_flag & VXLOCK) + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) panic("vclean: deadlock"); - vp->v_flag |= VXLOCK; + vp->v_lflag |= VXLOCK; if (vp->v_lockcount > 0) { /* @@ -1007,9 +1013,11 @@ vclean(struct vnode *vp, int flags, struct proc *p) * observed that the vnode is about to be exclusively locked * before continuing. */ - tsleep_nsec(&vp->v_lockcount, PINOD, "vop_lock", INFSLP); + msleep_nsec(&vp->v_lockcount, &vnode_mtx, PINOD, "vop_lock", + INFSLP); KASSERT(vp->v_lockcount == 0); } + mtx_leave(&vnode_mtx); /* * Even if the count is zero, the VOP_INACTIVE routine may still @@ -1067,14 +1075,18 @@ vclean(struct vnode *vp, int flags, struct proc *p) vp->v_op = &dead_vops; VN_KNOTE(vp, NOTE_REVOKE); vp->v_tag = VT_NON; - vp->v_flag &= ~VXLOCK; #ifdef VFSLCKDEBUG vp->v_flag &= ~VLOCKSWORK; #endif - if (vp->v_flag & VXWANT) { - vp->v_flag &= ~VXWANT; + mtx_enter(&vnode_mtx); + vp->v_lflag &= ~VXLOCK; + if (vp->v_lflag & VXWANT) { + vp->v_lflag &= ~VXWANT; + do_wakeup = 1; + } + mtx_leave(&vnode_mtx); + if (do_wakeup) wakeup(vp); - } } /* @@ -1116,11 +1128,14 @@ vgonel(struct vnode *vp, struct proc *p) * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vgone", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP); + mtx_leave(&vnode_mtx); return; } + mtx_leave(&vnode_mtx); /* * Clean out the filesystem specific data. @@ -1275,9 +1290,9 @@ vprint(char *label, struct vnode *vp) strlcat(buf, "|VTEXT", sizeof buf); if (vp->v_flag & VSYSTEM) strlcat(buf, "|VSYSTEM", sizeof buf); - if (vp->v_flag & VXLOCK) + if (vp->v_lflag & VXLOCK) strlcat(buf, "|VXLOCK", sizeof buf); - if (vp->v_flag & VXWANT) + if (vp->v_lflag & VXWANT) strlcat(buf, "|VXWANT", sizeof buf); if (vp->v_bioflag & VBIOWAIT) strlcat(buf, "|VBIOWAIT", sizeof buf); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index fefbebf3cfd..945b40935e2 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.114 2020/04/08 08:07:51 mpi Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.115 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -563,19 +563,29 @@ vn_poll(struct file *fp, int events, struct proc *p) int vn_lock(struct vnode *vp, int flags) { - int error; + int error, xlocked, do_wakeup; do { - if (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "vn_lock", INFSLP); + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "vn_lock", INFSLP); + mtx_leave(&vnode_mtx); error = ENOENT; } else { vp->v_lockcount++; + mtx_leave(&vnode_mtx); + error = VOP_LOCK(vp, flags); + + mtx_enter(&vnode_mtx); vp->v_lockcount--; + do_wakeup = (vp->v_lockcount == 0); + xlocked = vp->v_lflag & VXLOCK; + mtx_leave(&vnode_mtx); + if (error == 0) { - if ((vp->v_flag & VXLOCK) == 0) + if (!xlocked) return (0); /* @@ -585,7 +595,7 @@ vn_lock(struct vnode *vp, int flags) */ error = ENOENT; VOP_UNLOCK(vp); - if (vp->v_lockcount == 0) + if (do_wakeup) wakeup_one(&vp->v_lockcount); } } diff --git a/sys/kern/vfs_vops.c b/sys/kern/vfs_vops.c index d153c9f309e..877daef51b2 100644 --- a/sys/kern/vfs_vops.c +++ b/sys/kern/vfs_vops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vops.c,v 1.29 2020/10/07 12:33:03 mpi Exp $ */ +/* $OpenBSD: vfs_vops.c,v 1.30 2021/04/28 09:53:53 claudio Exp $ */ /* * Copyright (c) 2010 Thordur I. Bjornsson * @@ -48,8 +48,6 @@ #include #ifdef VFSLCKDEBUG -#include /* for panic() */ - #define ASSERT_VP_ISLOCKED(vp) do { \ if (((vp)->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp)) { \ VOP_PRINT(vp); \ @@ -608,6 +606,8 @@ VOP_LOCK(struct vnode *vp, int flags) a.a_vp = vp; a.a_flags = flags; + MUTEX_ASSERT_UNLOCKED(&vnode_mtx); + if (vp->v_op->vop_lock == NULL) return (EOPNOTSUPP); diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c index 54083f509b2..8aada87b09e 100644 --- a/sys/miscfs/deadfs/dead_vnops.c +++ b/sys/miscfs/deadfs/dead_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dead_vnops.c,v 1.34 2021/03/24 16:11:32 semarie Exp $ */ +/* $OpenBSD: dead_vnops.c,v 1.35 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: dead_vnops.c,v 1.16 1996/02/13 13:12:48 mycroft Exp $ */ /* @@ -285,10 +285,12 @@ chkvnlock(struct vnode *vp) { int locked = 0; - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - tsleep_nsec(vp, PINOD, "chkvnlock", INFSLP); + mtx_enter(&vnode_mtx); + while (vp->v_lflag & VXLOCK) { + vp->v_lflag |= VXWANT; + msleep_nsec(vp, &vnode_mtx, PINOD, "chkvnlock", INFSLP); locked = 1; } + mtx_leave(&vnode_mtx); return (locked); } diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 089934087e1..8a9dce55bb7 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.156 2020/04/08 08:07:52 mpi Exp $ */ +/* $OpenBSD: vnode.h,v 1.157 2021/04/28 09:53:53 claudio Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -85,18 +85,24 @@ RBT_HEAD(buf_rb_bufs, buf); struct namecache; RBT_HEAD(namecache_rb_cache, namecache); +/* + * Locks used to protect struct members in struct vnode: + * a atomic + * V vnode_mtx + */ struct uvm_vnode; struct vnode { - struct uvm_vnode *v_uvm; /* uvm data */ - const struct vops *v_op; /* vnode operations vector */ - enum vtype v_type; /* vnode type */ - enum vtagtype v_tag; /* type of underlying data */ - u_int v_flag; /* vnode flags (see below) */ - u_int v_usecount; /* reference count of users */ - u_int v_uvcount; /* unveil references */ - /* reference count of writers */ - u_int v_writecount; - u_int v_lockcount; /* # threads waiting on lock */ + struct uvm_vnode *v_uvm; /* uvm data */ + const struct vops *v_op; /* vnode operations vector */ + enum vtype v_type; /* vnode type */ + enum vtagtype v_tag; /* type of underlying data */ + u_int v_flag; /* vnode flags (see below) */ + u_int v_lflag; /* [V] lock vnode flags */ + u_int v_usecount; /* reference count of users */ + u_int v_uvcount; /* unveil references */ + u_int v_writecount; /* reference count of writers */ + u_int v_lockcount; /* [V] # threads waiting on lock */ + /* Flags that can be read/written in interrupts */ u_int v_bioflag; u_int v_holdcnt; /* buffer references */ @@ -255,6 +261,7 @@ extern int initialvnodes; /* XXX number of vnodes to start */ extern int maxvnodes; /* XXX number of vnodes to allocate */ extern int syncdelay; /* seconds to delay syncing vnodes */ extern int rushjob; /* # of slots syncer should run ASAP */ +extern struct mutex vnode_mtx; extern void vhold(struct vnode *); extern void vdrop(struct vnode *); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 52895599c62..99abc3cc76c 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_softdep.c,v 1.149 2020/03/09 16:49:12 millert Exp $ */ +/* $OpenBSD: ffs_softdep.c,v 1.150 2021/04/28 09:53:53 claudio Exp $ */ /* * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. @@ -4563,8 +4563,12 @@ softdep_fsync(struct vnode *vp) * not now, but then the user was not asking to have it * written, so we are not breaking any promises. */ - if (vp->v_flag & VXLOCK) + mtx_enter(&vnode_mtx); + if (vp->v_lflag & VXLOCK) { + mtx_leave(&vnode_mtx); break; + } + mtx_leave(&vnode_mtx); /* * We prevent deadlock by always fetching inodes from the * root, moving down the directory tree. Thus, when fetching -- 2.20.1