From bb0cd11ade5825e1c8d4476bf7882b2e6d1a756e Mon Sep 17 00:00:00 2001 From: mvs Date: Fri, 3 May 2024 17:43:09 +0000 Subject: [PATCH] Push solock() down to sosend() and remove it from soreceive() paths fro unix(4) sockets. Push solock() deep down to sosend() and remove it from soreceive() paths for unix(4) sockets. The transmission of unix(4) sockets already half-unlocked because connected peer is not locked by solock() during sbappend*() call. Use `sb_mtx' mutex(9) and `sb_lock' rwlock(9) to protect both `so_snd' and `so_rcv'. Since the `so_snd' is protected by `sb_mtx' mutex(9) the re-locking is not required in uipc_rcvd(). Do direct `so_rcv' dispose and cleanup in sofree(). This sockets is almost dead and unlinked from everywhere include spliced peer, so concurrent sotask() thread will just exit. This required to keep locks order between `i_lock' and `sb_lock'. Also this removes re-locking from sofree() for all sockets. SB_OWNLOCK became redundant with SB_MTXLOCK, so remove it. SB_MTXLOCK was kept because checks against SB_MTXLOCK within sb*() routines are mor consistent. Feedback and ok bluhm --- sys/kern/uipc_socket.c | 59 ++++++++++++++++++---------------- sys/kern/uipc_socket2.c | 11 ++++--- sys/kern/uipc_usrreq.c | 27 ++++++++++++---- sys/miscfs/fifofs/fifo_vnops.c | 15 +++------ sys/sys/socketvar.h | 5 ++- 5 files changed, 65 insertions(+), 52 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 65ae645171d..e67882b2df0 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.332 2024/05/02 11:55:31 mvs Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.333 2024/05/03 17:43:09 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -159,14 +159,15 @@ soalloc(const struct protosw *prp, int wait) case AF_INET6: switch (prp->pr_type) { case SOCK_RAW: - so->so_snd.sb_flags |= SB_MTXLOCK | SB_OWNLOCK; + so->so_snd.sb_flags |= SB_MTXLOCK; /* FALLTHROUGH */ case SOCK_DGRAM: - so->so_rcv.sb_flags |= SB_MTXLOCK | SB_OWNLOCK; + so->so_rcv.sb_flags |= SB_MTXLOCK; break; } break; case AF_UNIX: + so->so_snd.sb_flags |= SB_MTXLOCK; so->so_rcv.sb_flags |= SB_MTXLOCK; break; } @@ -354,18 +355,16 @@ sofree(struct socket *so, int keep_lock) mtx_leave(&so->so_snd.sb_mtx); /* - * Regardless on '_locked' postfix, must release solock() before - * call sorflush_locked() for SB_OWNLOCK marked socket. Can't - * release solock() and call sorflush() because solock() release - * is unwanted for tcp(4) socket. + * Unlocked dispose and cleanup is safe. Socket is unlinked + * from everywhere. Even concurrent sotask() thread will not + * call somove(). */ + if (so->so_proto->pr_flags & PR_RIGHTS && + so->so_proto->pr_domain->dom_dispose) + (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); + m_purge(so->so_rcv.sb_mb); - if (so->so_rcv.sb_flags & SB_OWNLOCK) - sounlock(so); - - sorflush_locked(so); - - if (!((so->so_rcv.sb_flags & SB_OWNLOCK) || keep_lock)) + if (!keep_lock) sounlock(so); #ifdef SOCKET_SPLICE @@ -574,7 +573,7 @@ sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, size_t resid; int error; int atomic = sosendallatonce(so) || top; - int dosolock = ((so->so_snd.sb_flags & SB_OWNLOCK) == 0); + int dosolock = ((so->so_snd.sb_flags & SB_MTXLOCK) == 0); if (uio) resid = uio->uio_resid; @@ -846,7 +845,7 @@ soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, const struct protosw *pr = so->so_proto; struct mbuf *nextrecord; size_t resid, orig_resid = uio->uio_resid; - int dosolock = ((so->so_rcv.sb_flags & SB_OWNLOCK) == 0); + int dosolock = ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0); mp = mp0; if (paddr) @@ -945,7 +944,7 @@ restart: SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); - if (so->so_rcv.sb_flags & (SB_MTXLOCK | SB_OWNLOCK)) { + if (so->so_rcv.sb_flags & SB_MTXLOCK) { sbunlock_locked(so, &so->so_rcv); if (dosolock) sounlock_shared(so); @@ -1247,7 +1246,11 @@ dontblock: SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD) { sb_mtx_unlock(&so->so_rcv); + if (!dosolock) + solock_shared(so); pru_rcvd(so); + if (!dosolock) + sounlock_shared(so); sb_mtx_lock(&so->so_rcv); } } @@ -1306,17 +1309,17 @@ sorflush_locked(struct socket *so) const struct protosw *pr = so->so_proto; int error; - if ((sb->sb_flags & SB_OWNLOCK) == 0) + if ((sb->sb_flags & SB_MTXLOCK) == 0) soassertlocked(so); error = sblock(so, sb, SBL_WAIT | SBL_NOINTR); /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ KASSERT(error == 0); - if (sb->sb_flags & SB_OWNLOCK) + if (sb->sb_flags & SB_MTXLOCK) solock(so); socantrcvmore(so); - if (sb->sb_flags & SB_OWNLOCK) + if (sb->sb_flags & SB_MTXLOCK) sounlock(so); mtx_enter(&sb->sb_mtx); @@ -1334,10 +1337,10 @@ sorflush_locked(struct socket *so) void sorflush(struct socket *so) { - if ((so->so_rcv.sb_flags & SB_OWNLOCK) == 0) + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) solock_shared(so); sorflush_locked(so); - if ((so->so_rcv.sb_flags & SB_OWNLOCK) == 0) + if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) sounlock_shared(so); } @@ -1383,7 +1386,7 @@ sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) membar_consumer(); } - if (so->so_rcv.sb_flags & SB_OWNLOCK) { + if (so->so_rcv.sb_flags & SB_MTXLOCK) { if ((error = sblock(so, &so->so_rcv, SBL_WAIT)) != 0) return (error); solock(so); @@ -1471,7 +1474,7 @@ sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) release: sbunlock(sosp, &sosp->so_snd); out: - if (so->so_rcv.sb_flags & SB_OWNLOCK) { + if (so->so_rcv.sb_flags & SB_MTXLOCK) { sounlock(so); sbunlock(so, &so->so_rcv); } else { @@ -1885,7 +1888,8 @@ sorwakeup(struct socket *so) void sowwakeup(struct socket *so) { - soassertlocked_readonly(so); + if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) + soassertlocked_readonly(so); #ifdef SOCKET_SPLICE if (so->so_snd.sb_flags & SB_SPLICE) @@ -1976,7 +1980,7 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m) if ((long)cnt <= 0) cnt = 1; - if (((sb->sb_flags & SB_OWNLOCK) == 0)) + if (((sb->sb_flags & SB_MTXLOCK) == 0)) solock(so); mtx_enter(&sb->sb_mtx); @@ -2003,7 +2007,7 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m) } mtx_leave(&sb->sb_mtx); - if (((sb->sb_flags & SB_OWNLOCK) == 0)) + if (((sb->sb_flags & SB_MTXLOCK) == 0)) sounlock(so); break; @@ -2380,7 +2384,8 @@ filt_sowrite(struct knote *kn, long hint) int rv; MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); - soassertlocked_readonly(so); + if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) + soassertlocked_readonly(so); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_snd.sb_state & SS_CANTSENDMORE) { diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index f454f21bfdb..e8401225fdb 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.152 2024/05/02 21:26:52 mvs Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.153 2024/05/03 17:43:09 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -228,9 +228,10 @@ sonewconn(struct socket *head, int connstatus, int wait) */ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) goto fail; + + mtx_enter(&head->so_snd.sb_mtx); so->so_snd.sb_wat = head->so_snd.sb_wat; so->so_snd.sb_lowat = head->so_snd.sb_lowat; - mtx_enter(&head->so_snd.sb_mtx); so->so_snd.sb_timeo_nsecs = head->so_snd.sb_timeo_nsecs; mtx_leave(&head->so_snd.sb_mtx); @@ -543,7 +544,7 @@ sblock(struct socket *so, struct sockbuf *sb, int flags) { int error = 0, prio = PSOCK; - if (sb->sb_flags & SB_OWNLOCK) { + if (sb->sb_flags & SB_MTXLOCK) { int rwflags = RW_WRITE; if (!(flags & SBL_NOINTR || sb->sb_flags & SB_NOINTR)) @@ -586,7 +587,7 @@ out: void sbunlock_locked(struct socket *so, struct sockbuf *sb) { - if (sb->sb_flags & SB_OWNLOCK) { + if (sb->sb_flags & SB_MTXLOCK) { rw_exit(&sb->sb_lock); return; } @@ -603,7 +604,7 @@ sbunlock_locked(struct socket *so, struct sockbuf *sb) void sbunlock(struct socket *so, struct sockbuf *sb) { - if (sb->sb_flags & SB_OWNLOCK) { + if (sb->sb_flags & SB_MTXLOCK) { rw_exit(&sb->sb_lock); return; } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 56228965d9a..89a5e971ff3 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.205 2024/05/02 17:10:55 mvs Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.206 2024/05/03 17:43:09 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -477,20 +477,24 @@ uipc_dgram_shutdown(struct socket *so) void uipc_rcvd(struct socket *so) { + struct unpcb *unp = sotounpcb(so); struct socket *so2; - if ((so2 = unp_solock_peer(so)) == NULL) + if (unp->unp_conn == NULL) return; + so2 = unp->unp_conn->unp_socket; + /* * Adjust backpressure on sender * and wakeup any waiting to write. */ mtx_enter(&so->so_rcv.sb_mtx); + mtx_enter(&so2->so_snd.sb_mtx); so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; + mtx_leave(&so2->so_snd.sb_mtx); mtx_leave(&so->so_rcv.sb_mtx); sowwakeup(so2); - sounlock(so2); } int @@ -509,10 +513,6 @@ uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, goto out; } - if (so->so_snd.sb_state & SS_CANTSENDMORE) { - error = EPIPE; - goto dispose; - } if (unp->unp_conn == NULL) { error = ENOTCONN; goto dispose; @@ -525,11 +525,23 @@ uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, * send buffer counts to maintain backpressure. * Wake up readers. */ + /* + * sbappend*() should be serialized together + * with so_snd modification. + */ mtx_enter(&so2->so_rcv.sb_mtx); + mtx_enter(&so->so_snd.sb_mtx); + if (so->so_snd.sb_state & SS_CANTSENDMORE) { + mtx_leave(&so->so_snd.sb_mtx); + mtx_leave(&so2->so_rcv.sb_mtx); + error = EPIPE; + goto dispose; + } if (control) { if (sbappendcontrol(so2, &so2->so_rcv, m, control)) { control = NULL; } else { + mtx_leave(&so->so_snd.sb_mtx); mtx_leave(&so2->so_rcv.sb_mtx); error = ENOBUFS; goto dispose; @@ -542,6 +554,7 @@ uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, so->so_snd.sb_cc = so2->so_rcv.sb_cc; if (so2->so_rcv.sb_cc > 0) dowakeup = 1; + mtx_leave(&so->so_snd.sb_mtx); mtx_leave(&so2->so_rcv.sb_mtx); if (dowakeup) diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index 81175a6742f..97b14c1bf2d 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.104 2024/03/26 09:46:47 mvs Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.105 2024/05/03 17:43:09 mvs Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -174,10 +174,10 @@ fifo_open(void *v) return (error); } fip->fi_readers = fip->fi_writers = 0; - solock(wso); + mtx_enter(&wso->so_snd.sb_mtx); wso->so_snd.sb_state |= SS_CANTSENDMORE; wso->so_snd.sb_lowat = PIPE_BUF; - sounlock(wso); + mtx_leave(&wso->so_snd.sb_mtx); } else { rso = fip->fi_readsock; wso = fip->fi_writesock; @@ -185,9 +185,9 @@ fifo_open(void *v) if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { - solock(wso); + mtx_enter(&wso->so_snd.sb_mtx); wso->so_snd.sb_state &= ~SS_CANTSENDMORE; - sounlock(wso); + mtx_leave(&wso->so_snd.sb_mtx); if (fip->fi_writers > 0) wakeup(&fip->fi_writers); } @@ -554,7 +554,6 @@ filt_fifowrite(struct knote *kn, long hint) struct socket *so = kn->kn_hook; int rv; - soassertlocked(so); MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); kn->kn_data = sbspace(so, &so->so_snd); @@ -625,11 +624,9 @@ filt_fifowmodify(struct kevent *kev, struct knote *kn) struct socket *so = kn->kn_hook; int rv; - solock(so); mtx_enter(&so->so_snd.sb_mtx); rv = knote_modify(kev, kn); mtx_leave(&so->so_snd.sb_mtx); - sounlock(so); return (rv); } @@ -640,11 +637,9 @@ filt_fifowprocess(struct knote *kn, struct kevent *kev) struct socket *so = kn->kn_hook; int rv; - solock(so); mtx_enter(&so->so_snd.sb_mtx); rv = knote_process(kn, kev); mtx_leave(&so->so_snd.sb_mtx); - sounlock(so); return (rv); } diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 780b1ccb8dd..65e17e42a2a 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.129 2024/04/11 13:32:51 mvs Exp $ */ +/* $OpenBSD: socketvar.h,v 1.130 2024/05/03 17:43:09 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -134,8 +134,7 @@ struct socket { #define SB_ASYNC 0x0010 /* ASYNC I/O, need signals */ #define SB_SPLICE 0x0020 /* buffer is splice source or drain */ #define SB_NOINTR 0x0040 /* operations not interruptible */ -#define SB_MTXLOCK 0x0080 /* use sb_mtx for sockbuf protection */ -#define SB_OWNLOCK 0x0100 /* sblock() doesn't need solock() */ +#define SB_MTXLOCK 0x0080 /* sblock() doesn't need solock() */ void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); caddr_t so_upcallarg; /* Arg for above */ -- 2.20.1