Push solock() down to sosend() for SOCK_RAW sockets.
authormvs <mvs@openbsd.org>
Tue, 30 Apr 2024 17:59:15 +0000 (17:59 +0000)
committermvs <mvs@openbsd.org>
Tue, 30 Apr 2024 17:59:15 +0000 (17:59 +0000)
Raw sockets are the simplest inet sockets, so use them to start landing
`sb_mtx' mutex(9) protection for `so_snd' buffer. Now solock() is taken
only around pru_send*(), the rest of sosend() serialized by sblock() and
`sb_mtx'. The unlocked SS_ISCONNECTED check is fine, because
rip{,6}_send() check it. Also, previously the SS_ISCONNECTED could be
lost due to solock() release around following m_getuio().

ok bluhm

sys/kern/sys_socket.c
sys/kern/uipc_socket.c
sys/kern/uipc_socket2.c

index 748be5a..e0a34c6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: sys_socket.c,v 1.64 2024/04/11 08:33:37 mvs Exp $     */
+/*     $OpenBSD: sys_socket.c,v 1.65 2024/04/30 17:59:15 mvs Exp $     */
 /*     $NetBSD: sys_socket.c,v 1.13 1995/08/12 23:59:09 mycroft Exp $  */
 
 /*
@@ -92,6 +92,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
        case FIOASYNC:
                solock(so);
                mtx_enter(&so->so_rcv.sb_mtx);
+               mtx_enter(&so->so_snd.sb_mtx);
                if (*(int *)data) {
                        so->so_rcv.sb_flags |= SB_ASYNC;
                        so->so_snd.sb_flags |= SB_ASYNC;
@@ -99,6 +100,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
                        so->so_rcv.sb_flags &= ~SB_ASYNC;
                        so->so_snd.sb_flags &= ~SB_ASYNC;
                }
+               mtx_leave(&so->so_snd.sb_mtx);
                mtx_leave(&so->so_rcv.sb_mtx);
                sounlock(so);
                break;
index 6b539e4..8b49d6f 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_socket.c,v 1.330 2024/04/15 21:31:29 mvs Exp $   */
+/*     $OpenBSD: uipc_socket.c,v 1.331 2024/04/30 17:59:15 mvs Exp $   */
 /*     $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $        */
 
 /*
@@ -146,8 +146,8 @@ soalloc(const struct protosw *prp, int wait)
        refcnt_init(&so->so_refcnt);
        rw_init(&so->so_rcv.sb_lock, "sbufrcv");
        rw_init(&so->so_snd.sb_lock, "sbufsnd");
-       mtx_init(&so->so_rcv.sb_mtx, IPL_MPFLOOR);
-       mtx_init(&so->so_snd.sb_mtx, IPL_MPFLOOR);
+       mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0);
+       mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0);
        klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx);
        klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx);
        sigio_init(&so->so_sigio);
@@ -158,8 +158,10 @@ soalloc(const struct protosw *prp, int wait)
        case AF_INET:
        case AF_INET6:
                switch (prp->pr_type) {
-               case SOCK_DGRAM:
                case SOCK_RAW:
+                       so->so_snd.sb_flags |= SB_MTXLOCK | SB_OWNLOCK;
+                       /* FALLTHROUGH */
+               case SOCK_DGRAM:
                        so->so_rcv.sb_flags |= SB_MTXLOCK | SB_OWNLOCK;
                        break;
                }
@@ -346,7 +348,10 @@ sofree(struct socket *so, int keep_lock)
                sounsplice(so, so->so_sp->ssp_socket, freeing);
        }
 #endif /* SOCKET_SPLICE */
+
+       mtx_enter(&so->so_snd.sb_mtx);
        sbrelease(so, &so->so_snd);
+       mtx_leave(&so->so_snd.sb_mtx);
 
        /*
         * Regardless on '_locked' postfix, must release solock() before
@@ -569,6 +574,7 @@ sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
        size_t resid;
        int error;
        int atomic = sosendallatonce(so) || top;
+       int dosolock = ((so->so_snd.sb_flags & SB_OWNLOCK) == 0);
 
        if (uio)
                resid = uio->uio_resid;
@@ -601,16 +607,17 @@ sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
 
 #define        snderr(errno)   { error = errno; goto release; }
 
-       solock_shared(so);
+       if (dosolock)
+               solock_shared(so);
 restart:
        if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0)
                goto out;
+       sb_mtx_lock(&so->so_snd);
        so->so_snd.sb_state |= SS_ISSENDING;
        do {
                if (so->so_snd.sb_state & SS_CANTSENDMORE)
                        snderr(EPIPE);
-               if (so->so_error) {
-                       error = so->so_error;
+               if ((error = READ_ONCE(so->so_error))) {
                        so->so_error = 0;
                        snderr(error);
                }
@@ -638,8 +645,14 @@ restart:
                        if (flags & MSG_DONTWAIT)
                                snderr(EWOULDBLOCK);
                        sbunlock(so, &so->so_snd);
-                       error = sbwait(so, &so->so_snd);
+
+                       if (so->so_snd.sb_flags & SB_MTXLOCK)
+                               error = sbwait_locked(so, &so->so_snd);
+                       else
+                               error = sbwait(so, &so->so_snd);
+
                        so->so_snd.sb_state &= ~SS_ISSENDING;
+                       sb_mtx_unlock(&so->so_snd);
                        if (error)
                                goto out;
                        goto restart;
@@ -654,9 +667,13 @@ restart:
                                if (flags & MSG_EOR)
                                        top->m_flags |= M_EOR;
                        } else {
-                               sounlock_shared(so);
+                               sb_mtx_unlock(&so->so_snd);
+                               if (dosolock)
+                                       sounlock_shared(so);
                                error = m_getuio(&top, atomic, space, uio);
-                               solock_shared(so);
+                               if (dosolock)
+                                       solock_shared(so);
+                               sb_mtx_lock(&so->so_snd);
                                if (error)
                                        goto release;
                                space -= top->m_pkthdr.len;
@@ -668,10 +685,16 @@ restart:
                                so->so_snd.sb_state &= ~SS_ISSENDING;
                        if (top && so->so_options & SO_ZEROIZE)
                                top->m_flags |= M_ZEROIZE;
+                       sb_mtx_unlock(&so->so_snd);
+                       if (!dosolock)
+                               solock_shared(so);
                        if (flags & MSG_OOB)
                                error = pru_sendoob(so, top, addr, control);
                        else
                                error = pru_send(so, top, addr, control);
+                       if (!dosolock)
+                               sounlock_shared(so);
+                       sb_mtx_lock(&so->so_snd);
                        clen = 0;
                        control = NULL;
                        top = NULL;
@@ -682,9 +705,11 @@ restart:
 
 release:
        so->so_snd.sb_state &= ~SS_ISSENDING;
+       sb_mtx_unlock(&so->so_snd);
        sbunlock(so, &so->so_snd);
 out:
-       sounlock_shared(so);
+       if (dosolock)
+               sounlock_shared(so);
        m_freem(top);
        m_freem(control);
        return (error);
index 7fe3df6..af7f432 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_socket2.c,v 1.150 2024/04/25 17:32:53 bluhm Exp $        */
+/*     $OpenBSD: uipc_socket2.c,v 1.151 2024/04/30 17:59:15 mvs Exp $  */
 /*     $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $       */
 
 /*
@@ -142,10 +142,15 @@ soisdisconnecting(struct socket *so)
        soassertlocked(so);
        so->so_state &= ~SS_ISCONNECTING;
        so->so_state |= SS_ISDISCONNECTING;
+
        mtx_enter(&so->so_rcv.sb_mtx);
        so->so_rcv.sb_state |= SS_CANTRCVMORE;
        mtx_leave(&so->so_rcv.sb_mtx);
+
+       mtx_enter(&so->so_snd.sb_mtx);
        so->so_snd.sb_state |= SS_CANTSENDMORE;
+       mtx_leave(&so->so_snd.sb_mtx);
+
        wakeup(&so->so_timeo);
        sowwakeup(so);
        sorwakeup(so);
@@ -157,10 +162,15 @@ soisdisconnected(struct socket *so)
        soassertlocked(so);
        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= SS_ISDISCONNECTED;
+
        mtx_enter(&so->so_rcv.sb_mtx);
        so->so_rcv.sb_state |= SS_CANTRCVMORE;
        mtx_leave(&so->so_rcv.sb_mtx);
+
+       mtx_enter(&so->so_snd.sb_mtx);
        so->so_snd.sb_state |= SS_CANTSENDMORE;
+       mtx_leave(&so->so_snd.sb_mtx);
+
        wakeup(&so->so_timeo);
        sowwakeup(so);
        sorwakeup(so);
@@ -315,7 +325,9 @@ void
 socantsendmore(struct socket *so)
 {
        soassertlocked(so);
+       mtx_enter(&so->so_snd.sb_mtx);
        so->so_snd.sb_state |= SS_CANTSENDMORE;
+       mtx_leave(&so->so_snd.sb_mtx);
        sowwakeup(so);
 }
 
@@ -666,6 +678,8 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
        soassertlocked(so);
 
+       mtx_enter(&so->so_rcv.sb_mtx);
+       mtx_enter(&so->so_snd.sb_mtx);
        if (sbreserve(so, &so->so_snd, sndcc))
                goto bad;
        so->so_snd.sb_wat = sndcc;
@@ -673,21 +687,20 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
                so->so_snd.sb_lowat = MCLBYTES;
        if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
                so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
-
-       mtx_enter(&so->so_rcv.sb_mtx);
-       if (sbreserve(so, &so->so_rcv, rcvcc)) {
-               mtx_leave(&so->so_rcv.sb_mtx);
+       if (sbreserve(so, &so->so_rcv, rcvcc))
                goto bad2;
-       }
        so->so_rcv.sb_wat = rcvcc;
        if (so->so_rcv.sb_lowat == 0)
                so->so_rcv.sb_lowat = 1;
+       mtx_leave(&so->so_snd.sb_mtx);
        mtx_leave(&so->so_rcv.sb_mtx);
 
        return (0);
 bad2:
        sbrelease(so, &so->so_snd);
 bad:
+       mtx_leave(&so->so_snd.sb_mtx);
+       mtx_leave(&so->so_rcv.sb_mtx);
        return (ENOBUFS);
 }