From ac42138b096e513ab6ef1d1b78cf9b7436f66d2c Mon Sep 17 00:00:00 2001 From: mvs Date: Fri, 12 Jul 2024 17:20:18 +0000 Subject: [PATCH] Switch `so_snd' of udp(4) sockets to the new locking scheme. udp_send() and following udp{,6}_output() do not append packets to `so_snd' socket buffer. This mean the sosend() and sosplice() sending paths are dummy pru_send() and there is no problems to simultaneously run them on the same socket. Push shared solock() deep down to sesend() and take it only around pru_send(), but keep somove() running unedr exclusive solock(). Since sosend() doesn't modify `so_snd' the unlocked `so_snd' space checks within somove() are safe. Corresponding `sb_state' and `sb_flags' modifications are protected by `sb_mtx' mutex(9). Tested and OK bluhm. --- sys/kern/uipc_socket.c | 35 +++++++++++++++++++++++++--------- sys/kern/uipc_socket2.c | 6 +++--- sys/miscfs/fifofs/fifo_vnops.c | 4 ++-- sys/net/rtsock.c | 6 +++--- sys/nfs/nfs_socket.c | 4 +++- sys/sys/socketvar.h | 16 ++++++++++++++-- 6 files changed, 51 insertions(+), 20 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 12818fb706c..a2dfc010589 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.336 2024/06/14 08:32:22 mvs Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.337 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -158,9 +158,8 @@ soalloc(const struct protosw *prp, int wait) case AF_INET6: switch (prp->pr_type) { case SOCK_RAW: - so->so_snd.sb_flags |= SB_MTXLOCK; - /* FALLTHROUGH */ case SOCK_DGRAM: + so->so_snd.sb_flags |= SB_MTXLOCK; so->so_rcv.sb_flags |= SB_MTXLOCK; break; } @@ -628,7 +627,7 @@ restart: } else if (addr == NULL) snderr(EDESTADDRREQ); } - space = sbspace(so, &so->so_snd); + space = sbspace_locked(so, &so->so_snd); if (flags & MSG_OOB) space += 1024; if (so->so_proto->pr_domain->dom_family == AF_UNIX) { @@ -1414,9 +1413,12 @@ sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) /* Splice so and sosp together. */ mtx_enter(&so->so_rcv.sb_mtx); + mtx_enter(&sosp->so_snd.sb_mtx); so->so_sp->ssp_socket = sosp; sosp->so_sp->ssp_soback = so; + mtx_leave(&sosp->so_snd.sb_mtx); mtx_leave(&so->so_rcv.sb_mtx); + so->so_splicelen = 0; so->so_splicemax = max; if (tv) @@ -1432,9 +1434,11 @@ sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) */ if (somove(so, M_WAIT)) { mtx_enter(&so->so_rcv.sb_mtx); + mtx_enter(&sosp->so_snd.sb_mtx); so->so_rcv.sb_flags |= SB_SPLICE; - mtx_leave(&so->so_rcv.sb_mtx); sosp->so_snd.sb_flags |= SB_SPLICE; + mtx_leave(&sosp->so_snd.sb_mtx); + mtx_leave(&so->so_rcv.sb_mtx); } release: @@ -1454,11 +1458,13 @@ sounsplice(struct socket *so, struct socket *sosp, int freeing) task_del(sosplice_taskq, &so->so_splicetask); timeout_del(&so->so_idleto); - sosp->so_snd.sb_flags &= ~SB_SPLICE; mtx_enter(&so->so_rcv.sb_mtx); + mtx_enter(&sosp->so_snd.sb_mtx); so->so_rcv.sb_flags &= ~SB_SPLICE; + sosp->so_snd.sb_flags &= ~SB_SPLICE; so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; + mtx_leave(&sosp->so_snd.sb_mtx); mtx_leave(&so->so_rcv.sb_mtx); /* Do not wakeup a socket that is about to be freed. */ @@ -1571,21 +1577,26 @@ somove(struct socket *so, int wait) maxreached = 1; } } - space = sbspace(sosp, &sosp->so_snd); + mtx_enter(&sosp->so_snd.sb_mtx); + space = sbspace_locked(sosp, &sosp->so_snd); if (so->so_oobmark && so->so_oobmark < len && so->so_oobmark < space + 1024) space += 1024; if (space <= 0) { + mtx_leave(&sosp->so_snd.sb_mtx); maxreached = 0; goto release; } if (space < len) { maxreached = 0; - if (space < sosp->so_snd.sb_lowat) + if (space < sosp->so_snd.sb_lowat) { + mtx_leave(&sosp->so_snd.sb_mtx); goto release; + } len = space; } sosp->so_snd.sb_state |= SS_ISSENDING; + mtx_leave(&sosp->so_snd.sb_mtx); SBLASTRECORDCHK(&so->so_rcv, "somove 1"); SBLASTMBUFCHK(&so->so_rcv, "somove 1"); @@ -1780,9 +1791,12 @@ somove(struct socket *so, int wait) } } + mtx_enter(&sosp->so_snd.sb_mtx); /* Append all remaining data to drain socket. */ if (so->so_rcv.sb_cc == 0 || maxreached) sosp->so_snd.sb_state &= ~SS_ISSENDING; + mtx_leave(&sosp->so_snd.sb_mtx); + error = pru_send(sosp, m, NULL, NULL); if (error) { if (sosp->so_snd.sb_state & SS_CANTSENDMORE) @@ -1796,7 +1810,10 @@ somove(struct socket *so, int wait) goto nextpkt; release: + mtx_enter(&sosp->so_snd.sb_mtx); sosp->so_snd.sb_state &= ~SS_ISSENDING; + mtx_leave(&sosp->so_snd.sb_mtx); + if (!error && maxreached && so->so_splicemax == so->so_splicelen) error = EFBIG; if (error) @@ -2346,7 +2363,7 @@ filt_sowrite(struct knote *kn, long hint) if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) soassertlocked_readonly(so); - kn->kn_data = sbspace(so, &so->so_snd); + kn->kn_data = sbspace_locked(so, &so->so_snd); if (so->so_snd.sb_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 27edd54afb4..6f50f5283a7 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.156 2024/06/28 21:30:24 mvs Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.157 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -926,7 +926,7 @@ sbappendaddr(struct socket *so, struct sockbuf *sb, const struct sockaddr *asa, if (n->m_next == NULL) /* keep pointer to last control buf */ break; } - if (space > sbspace(so, sb)) + if (space > sbspace_locked(so, sb)) return (0); if (asa->sa_len > MLEN) return (0); @@ -984,7 +984,7 @@ sbappendcontrol(struct socket *so, struct sockbuf *sb, struct mbuf *m0, m->m_flags &= ~M_EOR; } } - if (space > sbspace(so, sb)) + if (space > sbspace_locked(so, sb)) return (0); n->m_next = m0; /* concatenate data to control */ diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index aa04381db4d..a381fc454cd 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.106 2024/06/28 21:30:24 mvs Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.107 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -564,7 +564,7 @@ filt_fifowrite(struct knote *kn, long hint) MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); - kn->kn_data = sbspace(so, &so->so_snd); + kn->kn_data = sbspace_locked(so, &so->so_snd); if (so->so_snd.sb_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; rv = 1; diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 4031033b110..72033371528 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtsock.c,v 1.374 2024/06/14 08:32:22 mvs Exp $ */ +/* $OpenBSD: rtsock.c,v 1.375 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* @@ -316,7 +316,7 @@ route_rcvd(struct socket *so) mtx_enter(&so->so_rcv.sb_mtx); if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && - ((sbspace(so, &so->so_rcv) == so->so_rcv.sb_hiwat))) + ((sbspace_locked(so, &so->so_rcv) == so->so_rcv.sb_hiwat))) rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; mtx_leave(&so->so_rcv.sb_mtx); } @@ -603,7 +603,7 @@ rtm_sendup(struct socket *so, struct mbuf *m0) return (ENOMEM); mtx_enter(&so->so_rcv.sb_mtx); - if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || + if (sbspace_locked(so, &so->so_rcv) < (2 * MSIZE) || sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) send_desync = 1; mtx_leave(&so->so_rcv.sb_mtx); diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c index c6ec74e1cc3..f75122ed20d 100644 --- a/sys/nfs/nfs_socket.c +++ b/sys/nfs/nfs_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nfs_socket.c,v 1.150 2024/04/30 17:05:20 miod Exp $ */ +/* $OpenBSD: nfs_socket.c,v 1.151 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $ */ /* @@ -374,7 +374,9 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_flags |= SB_NOINTR; mtx_leave(&so->so_rcv.sb_mtx); + mtx_enter(&so->so_snd.sb_mtx); so->so_snd.sb_flags |= SB_NOINTR; + mtx_leave(&so->so_snd.sb_mtx); sounlock(so); m_freem(mopt); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index d7587bafb7d..d116c07d809 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.131 2024/05/17 19:11:14 mvs Exp $ */ +/* $OpenBSD: socketvar.h,v 1.132 2024/07/12 17:20:18 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -237,7 +237,7 @@ sb_notify(struct socket *so, struct sockbuf *sb) */ static inline long -sbspace(struct socket *so, struct sockbuf *sb) +sbspace_locked(struct socket *so, struct sockbuf *sb) { if (sb->sb_flags & SB_MTXLOCK) sbmtxassertlocked(so, sb); @@ -247,6 +247,18 @@ sbspace(struct socket *so, struct sockbuf *sb) return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } +static inline long +sbspace(struct socket *so, struct sockbuf *sb) +{ + long ret; + + sb_mtx_lock(sb); + ret = sbspace_locked(so, sb); + sb_mtx_unlock(sb); + + return ret; +} + /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) -- 2.20.1