From 7429db109c10a5df9f7d3067e1d6de09f03a98b2 Mon Sep 17 00:00:00 2001 From: mvs Date: Tue, 16 Nov 2021 08:56:19 +0000 Subject: [PATCH] Move UNIX domain sockets garbage collector out of `unp_lock. Except `unp_ino' this leaves only per-socket data protected by `unp_lock'. The `unp_ino' protection is not the big deal and will be done with mutex(9) in the future diff. The garbage collector flags moved from from `unp_flags' to unp_gcflags'. The two new locks introduced to protect garbage collector data. The `unp_gc_lock' rwlock(9) protects `unp_defer', `unp_gcing', `unp_gcflags' and `unp_link' list. The `unp_df_lock' protects `ud_link' list. We need to simultaneously lock `unp_gc_lock' and `unp_lock'. When we perform unp_attach() or unp_detach() we link PCB to `unp_link' list with `unp_lock' held. But when unp_gc() does `unp_link' list walkthrough with the `unp_gc_lock' lock held it should lock socket while performs `so_rcv' buffer scan and the lock order should be the opposite. In the future diff `unp_lock' will be replaced by per-socket `so_lock' so it's better to enforce `unp_gc_lock' -> `unp_lock' (solock()) lock order and release `unp_lock' in the unp_attach() and unp_detach() paths. The previously committed diffs made this safe. The `unp_df_lock' introduced because the `unp_lock' and `unp_gc_lock' state are unknown when unp_discard() called. Since it touches only `ud_link' list the re-lock dances are unwanted in this path. Also this keeps M_WAITOK allocation outside rwlock(9) when unp_discard() called from unp_externalize() error path. ok bluhm@ --- sys/kern/uipc_usrreq.c | 114 +++++++++++++++++++++++++++-------------- sys/sys/unpcb.h | 20 +++++--- 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index d819f43d3f6..2db67d7c619 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.156 2021/11/11 18:36:59 mvs Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.157 2021/11/16 08:56:19 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -59,12 +59,17 @@ /* * Locks used to protect global data and struct members: * I immutable after creation + * D unp_df_lock + * G unp_gc_lock * U unp_lock * R unp_rights_mtx * a atomic */ struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); +struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); +struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); + struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* @@ -72,7 +77,7 @@ struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); * not received and need to be closed. */ struct unp_deferral { - SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ + SLIST_ENTRY(unp_deferral) ud_link; /* [D] */ int ud_n; /* [I] */ /* followed by ud_n struct fdpass */ struct fdpass ud_fp[]; /* [I] */ @@ -97,17 +102,17 @@ struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); */ const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; -/* [U] list of all UNIX domain sockets, for unp_gc() */ +/* [G] list of all UNIX domain sockets, for unp_gc() */ LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head); -/* [U] list of sets of files that were sent over sockets that are now closed */ +/* [D] list of sets of files that were sent over sockets that are now closed */ SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred); ino_t unp_ino; /* [U] prototype for fake inode numbers */ int unp_rights; /* [R] file descriptors in flight */ -int unp_defer; /* [U] number of deferred fp to close by the GC task */ -int unp_gcing; /* [U] GC task currently running */ +int unp_defer; /* [G] number of deferred fp to close by the GC task */ +int unp_gcing; /* [G] GC task currently running */ void unp_init(void) @@ -430,7 +435,21 @@ uipc_attach(struct socket *so, int proto) unp->unp_socket = so; so->so_pcb = unp; getnanotime(&unp->unp_ctime); + + /* + * Enforce `unp_gc_lock' -> `solock()' lock order. + */ + /* + * We also release the lock on listening socket and on our peer + * socket when called from unp_connect(). This is safe. The + * listening socket protected by vnode(9) lock. The peer socket + * has 'UNP_CONNECTING' flag set. + */ + sounlock(so, SL_LOCKED); + rw_enter_write(&unp_gc_lock); LIST_INSERT_HEAD(&unp_head, unp, unp_link); + rw_exit_write(&unp_gc_lock); + solock(so); return (0); } @@ -490,28 +509,29 @@ unp_detach(struct unpcb *unp) rw_assert_wrlock(&unp_lock); - LIST_REMOVE(unp, unp_link); + unp->unp_vnode = NULL; - if (vp != NULL) { - unp->unp_vnode = NULL; - - /* - * Enforce `i_lock' -> `unp_lock' because fifo - * subsystem requires it. - */ + /* + * Enforce `unp_gc_lock' -> `solock()' lock order. + * Enforce `i_lock' -> `unp_lock' lock order. + */ + sounlock(so, SL_LOCKED); - sounlock(so, SL_LOCKED); + rw_enter_write(&unp_gc_lock); + LIST_REMOVE(unp, unp_link); + rw_exit_write(&unp_gc_lock); + if (vp != NULL) { VOP_LOCK(vp, LK_EXCLUSIVE); vp->v_socket = NULL; KERNEL_LOCK(); vput(vp); KERNEL_UNLOCK(); - - solock(so); } + solock(so); + if (unp->unp_conn) unp_disconnect(unp); while (!SLIST_EMPTY(&unp->unp_refs)) @@ -954,10 +974,11 @@ restart: if (error) { if (nfds > 0) { + /* + * No lock required. We are the only `cm' holder. + */ rp = ((struct fdpass *)CMSG_DATA(cm)); - rw_enter_write(&unp_lock); unp_discard(rp, nfds); - rw_exit_write(&unp_lock); } } @@ -1093,16 +1114,17 @@ unp_gc(void *arg __unused) struct unpcb *unp; int nunref, i; - rw_enter_write(&unp_lock); - + rw_enter_write(&unp_gc_lock); if (unp_gcing) goto unlock; unp_gcing = 1; + rw_exit_write(&unp_gc_lock); + rw_enter_write(&unp_df_lock); /* close any fds on the deferred list */ while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { SLIST_REMOVE_HEAD(&unp_deferred, ud_link); - rw_exit_write(&unp_lock); + rw_exit_write(&unp_df_lock); for (i = 0; i < defer->ud_n; i++) { fp = defer->ud_fp[i].fp; if (fp == NULL) @@ -1118,25 +1140,27 @@ unp_gc(void *arg __unused) } free(defer, M_TEMP, sizeof(*defer) + sizeof(struct fdpass) * defer->ud_n); - rw_enter_write(&unp_lock); + rw_enter_write(&unp_df_lock); } + rw_exit_write(&unp_df_lock); + rw_enter_write(&unp_gc_lock); unp_defer = 0; LIST_FOREACH(unp, &unp_head, unp_link) - unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); + unp->unp_gcflags = 0; do { nunref = 0; LIST_FOREACH(unp, &unp_head, unp_link) { fp = unp->unp_file; - if (unp->unp_flags & UNP_GCDEFER) { + if (unp->unp_gcflags & UNP_GCDEFER) { /* * This socket is referenced by another * socket which is known to be live, * so it's certainly live. */ - unp->unp_flags &= ~UNP_GCDEFER; + unp->unp_gcflags &= ~UNP_GCDEFER; unp_defer--; - } else if (unp->unp_flags & UNP_GCMARK) { + } else if (unp->unp_gcflags & UNP_GCMARK) { /* marked as live in previous pass */ continue; } else if (fp == NULL) { @@ -1155,7 +1179,7 @@ unp_gc(void *arg __unused) */ if (fp->f_count == unp->unp_msgcount) { nunref++; - unp->unp_flags |= UNP_GCDEAD; + unp->unp_gcflags |= UNP_GCDEAD; continue; } } @@ -1167,10 +1191,12 @@ unp_gc(void *arg __unused) * sockets and note them as deferred (== referenced, * but not yet marked). */ - unp->unp_flags |= UNP_GCMARK; + unp->unp_gcflags |= UNP_GCMARK; so = unp->unp_socket; + solock(so); unp_scan(so->so_rcv.sb_mb, unp_mark); + sounlock(so, SL_LOCKED); } } while (unp_defer); @@ -1180,14 +1206,23 @@ unp_gc(void *arg __unused) */ if (nunref) { LIST_FOREACH(unp, &unp_head, unp_link) { - if (unp->unp_flags & UNP_GCDEAD) - unp_scan(unp->unp_socket->so_rcv.sb_mb, - unp_discard); + if (unp->unp_gcflags & UNP_GCDEAD) { + /* + * This socket could still be connected + * and if so it's `so_rcv' is still + * accessible by concurrent PRU_SEND + * thread. + */ + so = unp->unp_socket; + solock(so); + unp_scan(so->so_rcv.sb_mb, unp_discard); + sounlock(so, SL_LOCKED); + } } } unp_gcing = 0; unlock: - rw_exit_write(&unp_lock); + rw_exit_write(&unp_gc_lock); } void @@ -1233,7 +1268,7 @@ unp_mark(struct fdpass *rp, int nfds) struct unpcb *unp; int i; - rw_assert_wrlock(&unp_lock); + rw_assert_wrlock(&unp_gc_lock); for (i = 0; i < nfds; i++) { if (rp[i].fp == NULL) @@ -1243,12 +1278,12 @@ unp_mark(struct fdpass *rp, int nfds) if (unp == NULL) continue; - if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) + if (unp->unp_gcflags & (UNP_GCMARK|UNP_GCDEFER)) continue; unp_defer++; - unp->unp_flags |= UNP_GCDEFER; - unp->unp_flags &= ~UNP_GCDEAD; + unp->unp_gcflags |= UNP_GCDEFER; + unp->unp_gcflags &= ~UNP_GCDEAD; } } @@ -1257,14 +1292,15 @@ unp_discard(struct fdpass *rp, int nfds) { struct unp_deferral *defer; - rw_assert_wrlock(&unp_lock); - /* copy the file pointers to a deferral structure */ defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); defer->ud_n = nfds; memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); memset(rp, 0, sizeof(*rp) * nfds); + + rw_enter_write(&unp_df_lock); SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); + rw_exit_write(&unp_df_lock); task_add(systqmp, &unp_gc_task); } diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index ab195ab6e4f..7111ea97f54 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: unpcb.h,v 1.19 2021/11/06 17:35:14 mvs Exp $ */ +/* $OpenBSD: unpcb.h,v 1.20 2021/11/16 08:56:20 mvs Exp $ */ /* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */ /* @@ -59,6 +59,7 @@ * * Locks used to protect struct members: * I immutable after creation + * G unp_gc_lock * U unp_lock * a atomic */ @@ -75,9 +76,10 @@ struct unpcb { struct mbuf *unp_addr; /* [U] bound address of socket */ long unp_msgcount; /* [a] references from socket rcv buf */ int unp_flags; /* [U] this unpcb contains peer eids */ + int unp_gcflags; /* [G] garbge collector flags */ struct sockpeercred unp_connid;/* [U] id of peer process */ struct timespec unp_ctime; /* [I] holds creation time */ - LIST_ENTRY(unpcb) unp_link; /* [U] link in per-AF list of sockets */ + LIST_ENTRY(unpcb) unp_link; /* [G] link in per-AF list of sockets */ }; /* @@ -85,11 +87,15 @@ struct unpcb { */ #define UNP_FEIDS 0x01 /* unp_connid contains information */ #define UNP_FEIDSBIND 0x02 /* unp_connid was set by a bind */ -#define UNP_GCMARK 0x04 /* mark during unp_gc() */ -#define UNP_GCDEFER 0x08 /* ref'd, but not marked in this pass */ -#define UNP_GCDEAD 0x10 /* unref'd in this pass */ -#define UNP_BINDING 0x20 /* unp is binding now */ -#define UNP_CONNECTING 0x40 /* unp is connecting now */ +#define UNP_BINDING 0x04 /* unp is binding now */ +#define UNP_CONNECTING 0x08 /* unp is connecting now */ + +/* + * flag bits in unp_gcflags + */ +#define UNP_GCMARK 0x01 /* mark during unp_gc() */ +#define UNP_GCDEFER 0x02 /* ref'd, but not marked in this pass */ +#define UNP_GCDEAD 0x04 /* unref'd in this pass */ #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) -- 2.20.1