From 02a827120adb2d89281e9b7a793e5d4eb9e5b56e Mon Sep 17 00:00:00 2001 From: mvs Date: Tue, 26 Mar 2024 09:46:47 +0000 Subject: [PATCH] Use `sb_mtx' to protect `so_rcv' receive buffer of unix(4) sockets. This makes re-locking unnecessary in the uipc_*send() paths, because it's enough to lock one socket to prevent peer from concurrent disconnection. As the little bonus, one unix(4) socket can perform simultaneous transmission and reception with one exception for uipc_rcvd(), which still requires the re-lock for connection oriented sockets. The socket lock is not held while filt_soread() and filt_soexcept() called from uipc_*send() through sorwakeup(). However, the unlocked access to the `so_options', `so_state' and `so_error' is fine. The receiving socket can't be or became listening socket. It also can't be disconnected concurrently. This makes immutable SO_ACCEPTCONN, SS_ISDISCONNECTED and SS_ISCONNECTED bits which are clean and set respectively. `so_error' is set on the peer sockets only by unp_detach(), which also can't be called concurrently on sending socket. This is also true for filt_fiforead() and filt_fifoexcept(). For other callers like kevent(2) or doaccept() the socket lock is still held. ok bluhm --- sys/kern/sys_socket.c | 4 ++- sys/kern/uipc_socket.c | 46 +++++++++++++++++++++++----------- sys/kern/uipc_socket2.c | 38 +++++++++++++++++----------- sys/kern/uipc_usrreq.c | 43 ++++++++++++++++++++----------- sys/miscfs/fifofs/fifo_vnops.c | 6 ++--- sys/sys/socketvar.h | 7 ++++-- 6 files changed, 94 insertions(+), 50 deletions(-) diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 8a0c8ed0e00..bba512b4321 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_socket.c,v 1.61 2023/04/15 13:18:28 kn Exp $ */ +/* $OpenBSD: sys_socket.c,v 1.62 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: sys_socket.c,v 1.13 1995/08/12 23:59:09 mycroft Exp $ */ /* @@ -144,9 +144,11 @@ soo_stat(struct file *fp, struct stat *ub, struct proc *p) memset(ub, 0, sizeof (*ub)); ub->st_mode = S_IFSOCK; solock(so); + mtx_enter(&so->so_rcv.sb_mtx); if ((so->so_rcv.sb_state & SS_CANTRCVMORE) == 0 || so->so_rcv.sb_cc != 0) ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH; + mtx_leave(&so->so_rcv.sb_mtx); if ((so->so_snd.sb_state & SS_CANTSENDMORE) == 0) ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; ub->st_uid = so->so_euid; diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7b04935ef0d..06313d40cbf 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.321 2024/03/22 17:34:11 mvs Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.322 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -160,6 +160,9 @@ soalloc(const struct protosw *prp, int wait) break; } break; + case AF_UNIX: + so->so_rcv.sb_flags |= SB_MTXLOCK; + break; } return (so); @@ -987,8 +990,11 @@ dontblock: * Dispose of any SCM_RIGHTS message that went * through the read path rather than recv. */ - if (pr->pr_domain->dom_dispose) + if (pr->pr_domain->dom_dispose) { + sb_mtx_unlock(&so->so_rcv); pr->pr_domain->dom_dispose(cm); + sb_mtx_lock(&so->so_rcv); + } m_free(cm); } } @@ -1173,8 +1179,11 @@ dontblock: } SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); - if (pr->pr_flags & PR_WANTRCVD) + if (pr->pr_flags & PR_WANTRCVD) { + sb_mtx_unlock(&so->so_rcv); pru_rcvd(so); + sb_mtx_lock(&so->so_rcv); + } } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && @@ -1233,10 +1242,10 @@ sorflush(struct socket *so) /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ KASSERT(error == 0); socantrcvmore(so); + mtx_enter(&sb->sb_mtx); m = sb->sb_mb; memset(&sb->sb_startzero, 0, (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); - mtx_enter(&sb->sb_mtx); sb->sb_timeo_nsecs = INFSLP; mtx_leave(&sb->sb_mtx); sbunlock(so, sb); @@ -1757,7 +1766,8 @@ somove(struct socket *so, int wait) void sorwakeup(struct socket *so) { - soassertlocked_readonly(so); + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) + soassertlocked_readonly(so); #ifdef SOCKET_SPLICE if (so->so_rcv.sb_flags & SB_SPLICE) { @@ -1877,6 +1887,8 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m) cnt = 1; solock(so); + mtx_enter(&sb->sb_mtx); + switch (optname) { case SO_SNDBUF: case SO_RCVBUF: @@ -1898,7 +1910,10 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m) sb->sb_hiwat : cnt; break; } + + mtx_leave(&sb->sb_mtx); sounlock(so); + break; } @@ -2169,13 +2184,6 @@ sofilt_unlock(struct socket *so, struct sockbuf *sb) } } -static inline void -sofilt_assert_locked(struct socket *so, struct sockbuf *sb) -{ - MUTEX_ASSERT_LOCKED(&sb->sb_mtx); - soassertlocked_readonly(so); -} - int soo_kqfilter(struct file *fp, struct knote *kn) { @@ -2218,9 +2226,14 @@ filt_soread(struct knote *kn, long hint) struct socket *so = kn->kn_fp->f_data; int rv = 0; - sofilt_assert_locked(so, &so->so_rcv); + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) + soassertlocked_readonly(so); if (so->so_options & SO_ACCEPTCONN) { + if (so->so_rcv.sb_flags & SB_MTXLOCK) + soassertlocked_readonly(so); + kn->kn_data = so->so_qlen; rv = (kn->kn_data != 0); @@ -2275,7 +2288,8 @@ filt_sowrite(struct knote *kn, long hint) struct socket *so = kn->kn_fp->f_data; int rv; - sofilt_assert_locked(so, &so->so_snd); + MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); + soassertlocked_readonly(so); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_snd.sb_state & SS_CANTSENDMORE) { @@ -2306,7 +2320,9 @@ filt_soexcept(struct knote *kn, long hint) struct socket *so = kn->kn_fp->f_data; int rv = 0; - sofilt_assert_locked(so, &so->so_rcv); + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) + soassertlocked_readonly(so); #ifdef SOCKET_SPLICE if (isspliced(so)) { diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 70f7c150f76..153010a59cc 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.144 2024/02/12 22:48:27 mvs Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.145 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -142,7 +142,9 @@ soisdisconnecting(struct socket *so) soassertlocked(so); so->so_state &= ~SS_ISCONNECTING; so->so_state |= SS_ISDISCONNECTING; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_state |= SS_CANTRCVMORE; + mtx_leave(&so->so_rcv.sb_mtx); so->so_snd.sb_state |= SS_CANTSENDMORE; wakeup(&so->so_timeo); sowwakeup(so); @@ -155,7 +157,9 @@ soisdisconnected(struct socket *so) soassertlocked(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISDISCONNECTED; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_state |= SS_CANTRCVMORE; + mtx_leave(&so->so_rcv.sb_mtx); so->so_snd.sb_state |= SS_CANTSENDMORE; wakeup(&so->so_timeo); sowwakeup(so); @@ -219,9 +223,10 @@ sonewconn(struct socket *head, int connstatus, int wait) mtx_enter(&head->so_snd.sb_mtx); so->so_snd.sb_timeo_nsecs = head->so_snd.sb_timeo_nsecs; mtx_leave(&head->so_snd.sb_mtx); + + mtx_enter(&head->so_rcv.sb_mtx); so->so_rcv.sb_wat = head->so_rcv.sb_wat; so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; - mtx_enter(&head->so_rcv.sb_mtx); so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs; mtx_leave(&head->so_rcv.sb_mtx); @@ -651,16 +656,22 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) if (sbreserve(so, &so->so_snd, sndcc)) goto bad; - if (sbreserve(so, &so->so_rcv, rcvcc)) - goto bad2; so->so_snd.sb_wat = sndcc; - so->so_rcv.sb_wat = rcvcc; - if (so->so_rcv.sb_lowat == 0) - so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + + mtx_enter(&so->so_rcv.sb_mtx); + if (sbreserve(so, &so->so_rcv, rcvcc)) { + mtx_leave(&so->so_rcv.sb_mtx); + goto bad2; + } + so->so_rcv.sb_wat = rcvcc; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + mtx_leave(&so->so_rcv.sb_mtx); + return (0); bad2: sbrelease(so, &so->so_snd); @@ -676,8 +687,7 @@ bad: int sbreserve(struct socket *so, struct sockbuf *sb, u_long cc) { - KASSERT(sb == &so->so_rcv || sb == &so->so_snd); - soassertlocked(so); + sbmtxassertlocked(so, sb); if (cc == 0 || cc > sb_max) return (1); @@ -818,7 +828,7 @@ sbappend(struct socket *so, struct sockbuf *sb, struct mbuf *m) if (m == NULL) return; - soassertlocked(so); + sbmtxassertlocked(so, sb); SBLASTRECORDCHK(sb, "sbappend 1"); if ((n = sb->sb_lastrecord) != NULL) { @@ -899,8 +909,7 @@ sbappendrecord(struct socket *so, struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; - KASSERT(sb == &so->so_rcv || sb == &so->so_snd); - soassertlocked(so); + sbmtxassertlocked(so, sb); if (m0 == NULL) return; @@ -984,6 +993,8 @@ sbappendcontrol(struct socket *so, struct sockbuf *sb, struct mbuf *m0, struct mbuf *m, *mlast, *n; int eor = 0, space = 0; + sbmtxassertlocked(so, sb); + if (control == NULL) panic("sbappendcontrol"); for (m = control; ; m = m->m_next) { @@ -1109,8 +1120,7 @@ sbdrop(struct socket *so, struct sockbuf *sb, int len) struct mbuf *m, *mn; struct mbuf *next; - KASSERT(sb == &so->so_rcv || sb == &so->so_snd); - soassertlocked(so); + sbmtxassertlocked(so, sb); next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; while (len > 0) { diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 7dc83db4d51..17db0cd4d79 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.202 2024/03/22 17:34:11 mvs Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.203 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -489,8 +489,10 @@ uipc_rcvd(struct socket *so) * Adjust backpressure on sender * and wakeup any waiting to write. */ + mtx_enter(&so->so_rcv.sb_mtx); so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; + mtx_leave(&so->so_rcv.sb_mtx); sowwakeup(so2); sounlock(so2); } @@ -499,8 +501,9 @@ int uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, struct mbuf *control) { + struct unpcb *unp = sotounpcb(so); struct socket *so2; - int error = 0; + int error = 0, dowakeup = 0; if (control) { sounlock(so); @@ -514,21 +517,24 @@ uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, error = EPIPE; goto dispose; } - if ((so2 = unp_solock_peer(so)) == NULL) { + if (unp->unp_conn == NULL) { error = ENOTCONN; goto dispose; } + so2 = unp->unp_conn->unp_socket; + /* * Send to paired receive port, and then raise * send buffer counts to maintain backpressure. * Wake up readers. */ + mtx_enter(&so2->so_rcv.sb_mtx); if (control) { if (sbappendcontrol(so2, &so2->so_rcv, m, control)) { control = NULL; } else { - sounlock(so2); + mtx_leave(&so2->so_rcv.sb_mtx); error = ENOBUFS; goto dispose; } @@ -539,9 +545,12 @@ uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; so->so_snd.sb_cc = so2->so_rcv.sb_cc; if (so2->so_rcv.sb_cc > 0) + dowakeup = 1; + mtx_leave(&so2->so_rcv.sb_mtx); + + if (dowakeup) sorwakeup(so2); - sounlock(so2); m = NULL; dispose: @@ -563,7 +572,7 @@ uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam, struct unpcb *unp = sotounpcb(so); struct socket *so2; const struct sockaddr *from; - int error = 0; + int error = 0, dowakeup = 0; if (control) { sounlock(so); @@ -583,7 +592,7 @@ uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam, goto dispose; } - if ((so2 = unp_solock_peer(so)) == NULL) { + if (unp->unp_conn == NULL) { if (nam != NULL) error = ECONNREFUSED; else @@ -591,20 +600,24 @@ uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam, goto dispose; } + so2 = unp->unp_conn->unp_socket; + if (unp->unp_addr) from = mtod(unp->unp_addr, struct sockaddr *); else from = &sun_noname; + + mtx_enter(&so2->so_rcv.sb_mtx); if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { - sorwakeup(so2); + dowakeup = 1; m = NULL; control = NULL; } else error = ENOBUFS; + mtx_leave(&so2->so_rcv.sb_mtx); - if (so2 != so) - sounlock(so2); - + if (dowakeup) + sorwakeup(so2); if (nam) unp_disconnect(unp); @@ -1390,9 +1403,9 @@ unp_gc(void *arg __unused) if ((unp->unp_gcflags & UNP_GCDEAD) == 0) continue; so = unp->unp_socket; - solock(so); + mtx_enter(&so->so_rcv.sb_mtx); unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs); - sounlock(so); + mtx_leave(&so->so_rcv.sb_mtx); } /* @@ -1414,9 +1427,9 @@ unp_gc(void *arg __unused) unp->unp_gcflags &= ~UNP_GCDEAD; so = unp->unp_socket; - solock(so); + mtx_enter(&so->so_rcv.sb_mtx); unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs); - sounlock(so); + mtx_leave(&so->so_rcv.sb_mtx); KASSERT(nunref > 0); nunref--; diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index 54583728236..81175a6742f 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.103 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.104 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -201,7 +201,9 @@ fifo_open(void *v) if (fip->fi_writers == 1) { solock(rso); rso->so_state &= ~SS_ISDISCONNECTED; + mtx_enter(&rso->so_rcv.sb_mtx); rso->so_rcv.sb_state &= ~SS_CANTRCVMORE; + mtx_leave(&rso->so_rcv.sb_mtx); sounlock(rso); if (fip->fi_readers > 0) wakeup(&fip->fi_readers); @@ -518,7 +520,6 @@ filt_fiforead(struct knote *kn, long hint) struct socket *so = kn->kn_hook; int rv; - soassertlocked(so); MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); kn->kn_data = so->so_rcv.sb_cc; @@ -574,7 +575,6 @@ filt_fifoexcept(struct knote *kn, long hint) struct socket *so = kn->kn_hook; int rv = 0; - soassertlocked(so); MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); if (kn->kn_flags & __EV_POLL) { diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index dc34b1c5c61..782ebce75fd 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.125 2024/03/22 17:34:11 mvs Exp $ */ +/* $OpenBSD: socketvar.h,v 1.126 2024/03/26 09:46:47 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -242,7 +242,10 @@ sb_notify(struct socket *so, struct sockbuf *sb) static inline long sbspace(struct socket *so, struct sockbuf *sb) { - soassertlocked_readonly(so); + if (sb->sb_flags & SB_MTXLOCK) + sbmtxassertlocked(so, sb); + else + soassertlocked_readonly(so); return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } -- 2.20.1