From 46d8bbe61ce96a35f176dd10933b30d9d8adfb09 Mon Sep 17 00:00:00 2001 From: mvs Date: Sat, 6 Nov 2021 17:35:14 +0000 Subject: [PATCH] Make `unp_msgcount' and `unp_file' atomic. Introduce `unp_rights_mtx' mutex(9) to protect `unp_rights'. This removes global rwlock(9) from unp_internalize() and unp_externalize() normal paths and leaves it in the unp_externalize() error path only. Also we don't need to simultaneously hold fdplock() and `unp_lock' within unp_internalize(). The `unp_rights' can't be atomic. Otherwise the thread which exceeding the limit will break all other not-exceeding threads until it decrements `unp_rights'. That why the mutex(9) used for protection. It's safe to call fptounp() without `unp_lock' held. We always got this file descriptor by fd_getfile(9) so we always have the extra reference and this descriptor can't be closed by concurrent thread. Some sockets could be destroyed through 'PRU_ABORT' path but they don't have associated file descriptor and they are not accessible in the unp_internalize() path. The `unp_file' access without `unp_lock' held is also safe. Each socket could have the only associated file descriptor and each file descriptor could have the only associated socket. We only assign `unp_file' in the unp_internalize() path where we got the socket by fd_getfile(9). This descriptor has the extra reference and couldn't be closed concurrently. We could override `unp_file' but with the same address because the associated file descriptor can't be changed so the address will be also the same. While unp_gc() concurrently runs the dereference of non-NULL `unp_file' is always safe. Discussed with kettenis@ and mpi@. ok mpi@ --- sys/kern/uipc_usrreq.c | 49 ++++++++++++++++++++++-------------------- sys/sys/unpcb.h | 7 +++--- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 0994f76abea..bc36b1ebbad 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.153 2021/10/30 16:35:31 mvs Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.154 2021/11/06 17:35:14 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -52,14 +52,19 @@ #include #include #include +#include #include /* * Locks used to protect global data and struct members: * I immutable after creation * U unp_lock + * R unp_rights_mtx + * a atomic */ + struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); +struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* * Stack of sets of files that were passed over a socket but were @@ -99,7 +104,7 @@ SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred); ino_t unp_ino; /* [U] prototype for fake inode numbers */ -int unp_rights; /* [U] file descriptors in flight */ +int unp_rights; /* [R] file descriptors in flight */ int unp_defer; /* [U] number of deferred fp to close by the GC task */ int unp_gcing; /* [U] GC task currently running */ @@ -927,17 +932,18 @@ restart: */ rp = (struct fdpass *)CMSG_DATA(cm); - rw_enter_write(&unp_lock); for (i = 0; i < nfds; i++) { struct unpcb *unp; fp = rp->fp; rp++; if ((unp = fptounp(fp)) != NULL) - unp->unp_msgcount--; - unp_rights--; + atomic_dec_long(&unp->unp_msgcount); } - rw_exit_write(&unp_lock); + + mtx_enter(&unp_rights_mtx); + unp_rights -= nfds; + mtx_leave(&unp_rights_mtx); /* * Copy temporary array to message and adjust length, in case of @@ -985,13 +991,13 @@ unp_internalize(struct mbuf *control, struct proc *p) return (EINVAL); nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); - rw_enter_write(&unp_lock); + mtx_enter(&unp_rights_mtx); if (unp_rights + nfds > maxfiles / 10) { - rw_exit_write(&unp_lock); + mtx_leave(&unp_rights_mtx); return (EMFILE); } unp_rights += nfds; - rw_exit_write(&unp_lock); + mtx_leave(&unp_rights_mtx); /* Make sure we have room for the struct file pointers */ morespace: @@ -1031,7 +1037,6 @@ morespace: ip = ((int *)CMSG_DATA(cm)) + nfds - 1; rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; fdplock(fdp); - rw_enter_write(&unp_lock); for (i = 0; i < nfds; i++) { memcpy(&fd, ip, sizeof fd); ip--; @@ -1056,15 +1061,13 @@ morespace: rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; rp--; if ((unp = fptounp(fp)) != NULL) { + atomic_inc_long(&unp->unp_msgcount); unp->unp_file = fp; - unp->unp_msgcount++; } } - rw_exit_write(&unp_lock); fdpunlock(fdp); return (0); fail: - rw_exit_write(&unp_lock); fdpunlock(fdp); if (fp != NULL) FRELE(fp, p); @@ -1072,17 +1075,15 @@ fail: for ( ; i > 0; i--) { rp++; fp = rp->fp; - rw_enter_write(&unp_lock); if ((unp = fptounp(fp)) != NULL) - unp->unp_msgcount--; - rw_exit_write(&unp_lock); + atomic_dec_long(&unp->unp_msgcount); FRELE(fp, p); } nospace: - rw_enter_write(&unp_lock); + mtx_enter(&unp_rights_mtx); unp_rights -= nfds; - rw_exit_write(&unp_lock); + mtx_leave(&unp_rights_mtx); return (error); } @@ -1105,21 +1106,23 @@ unp_gc(void *arg __unused) /* close any fds on the deferred list */ while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { SLIST_REMOVE_HEAD(&unp_deferred, ud_link); + rw_exit_write(&unp_lock); for (i = 0; i < defer->ud_n; i++) { fp = defer->ud_fp[i].fp; if (fp == NULL) continue; - /* closef() expects a refcount of 2 */ - FREF(fp); if ((unp = fptounp(fp)) != NULL) - unp->unp_msgcount--; + atomic_dec_long(&unp->unp_msgcount); + mtx_enter(&unp_rights_mtx); unp_rights--; - rw_exit_write(&unp_lock); + mtx_leave(&unp_rights_mtx); + /* closef() expects a refcount of 2 */ + FREF(fp); (void) closef(fp, NULL); - rw_enter_write(&unp_lock); } free(defer, M_TEMP, sizeof(*defer) + sizeof(struct fdpass) * defer->ud_n); + rw_enter_write(&unp_lock); } unp_defer = 0; diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index 0d41e1437e0..ab195ab6e4f 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: unpcb.h,v 1.18 2021/02/10 08:20:09 mvs Exp $ */ +/* $OpenBSD: unpcb.h,v 1.19 2021/11/06 17:35:14 mvs Exp $ */ /* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */ /* @@ -60,19 +60,20 @@ * Locks used to protect struct members: * I immutable after creation * U unp_lock + * a atomic */ struct unpcb { struct socket *unp_socket; /* [I] pointer back to socket */ struct vnode *unp_vnode; /* [U] if associated with file */ - struct file *unp_file; /* [U] backpointer for unp_gc() */ + struct file *unp_file; /* [a] backpointer for unp_gc() */ struct unpcb *unp_conn; /* [U] control block of connected socket */ ino_t unp_ino; /* [U] fake inode number */ SLIST_HEAD(,unpcb) unp_refs; /* [U] referencing socket linked list */ SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */ struct mbuf *unp_addr; /* [U] bound address of socket */ - long unp_msgcount; /* [U] references from socket rcv buf */ + long unp_msgcount; /* [a] references from socket rcv buf */ int unp_flags; /* [U] this unpcb contains peer eids */ struct sockpeercred unp_connid;/* [U] id of peer process */ struct timespec unp_ctime; /* [I] holds creation time */ -- 2.20.1