From: mvs Date: Sun, 11 Feb 2024 18:14:26 +0000 (+0000) Subject: Use `sb_mtx' instead of `inp_mtx' in receive path for inet sockets. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=c75e434f6f5f50f8d08d05b4de43650989a1e128;p=openbsd Use `sb_mtx' instead of `inp_mtx' in receive path for inet sockets. In soreceve(), we only touch `so_rcv' socket buffer, which has it's own `sb_mtx' mutex(9) for protection. So, we can avoid solock() in this path - it's enough to hold `sb_mtx' in soreceive() and around corresponding sbappend*(). But not right now :) This time we use shared netlock for some inet sockets in the soreceive() path. To protect `so_rcv' buffer we use `inp_mtx' mutex(9) and the pru_lock() to acquire this mutex(9) in socket layer. But the `inp_mtx' mutex belongs to the PCB. We initialize socket before PCB, tcp(4) sockets could exist without PCB, so use `sb_mtx' mutex(9) to protect sockbuf stuff. This diff mechanically replaces `inp_mtx' by `sb_mtx' in the receive path. Only for sockets which already use `inp_mtx'. All other sockets left as is. They will be converted later. Since the `sb_mtx' is optional, the new SB_MTXLOCK flag introduced. If this flag is set on `sb_flags', the `sb_mtx' mutex(9) should be taken. New sb_mtx_lock() and sb_mtx_unlock() was introduced to hide this check. They are temporary and will be replaced by mtx_enter() when all this area will be converted to `sb_mtx' mutex(9). Also, the new sbmtxassertlocked() function introduced to throw corresponding assertion for SB_MTXLOCK marked buffers. This time only sbappendaddr() calls it. This function is also temporary and will be replaced by MTX_ASSERT_LOCKED() later. ok bluhm --- diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 86dbd160af2..9a2bda2aa14 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.317 2024/02/05 20:21:38 mvs Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.318 2024/02/11 18:14:26 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -150,6 +150,18 @@ soalloc(const struct domain *dp, int wait) TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); + switch (dp->dom_family) { + case AF_INET: + case AF_INET6: + switch (dp->dom_protosw->pr_type) { + case SOCK_DGRAM: + case SOCK_RAW: + so->so_rcv.sb_flags |= SB_MTXLOCK; + break; + } + break; + } + return (so); } @@ -825,7 +837,7 @@ restart: sounlock_shared(so); return (error); } - pru_lock(so); + sb_mtx_lock(&so->so_rcv); m = so->so_rcv.sb_mb; #ifdef SOCKET_SPLICE @@ -889,7 +901,7 @@ restart: SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(so, &so->so_rcv); - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); error = sbwait(so, &so->so_rcv); if (error) { sounlock_shared(so); @@ -961,13 +973,13 @@ dontblock: sbsync(&so->so_rcv, nextrecord); if (controlp) { if (pr->pr_domain->dom_externalize) { - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); sounlock_shared(so); error = (*pr->pr_domain->dom_externalize) (cm, controllen, flags); solock_shared(so); - pru_lock(so); + sb_mtx_lock(&so->so_rcv); } *controlp = cm; } else { @@ -1041,11 +1053,11 @@ dontblock: SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); resid = uio->uio_resid; - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); sounlock_shared(so); uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); solock_shared(so); - pru_lock(so); + sb_mtx_lock(&so->so_rcv); if (uio_error) uio->uio_resid = resid - len; } else @@ -1127,14 +1139,14 @@ dontblock: break; SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); error = sbwait(so, &so->so_rcv); if (error) { sbunlock(so, &so->so_rcv); sounlock_shared(so); return (0); } - pru_lock(so); + sb_mtx_lock(&so->so_rcv); if ((m = so->so_rcv.sb_mb) != NULL) nextrecord = m->m_nextpkt; } @@ -1168,7 +1180,7 @@ dontblock: (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) { sbunlock(so, &so->so_rcv); - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); goto restart; } @@ -1179,7 +1191,7 @@ dontblock: *flagsp |= flags; release: sbunlock(so, &so->so_rcv); - pru_unlock(so); + sb_mtx_unlock(&so->so_rcv); sounlock_shared(so); return (error); } diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index cc809f67e16..9322cd2735c 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.142 2024/02/05 20:21:38 mvs Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.143 2024/02/11 18:14:26 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -504,6 +504,16 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, return ret; } +void +sbmtxassertlocked(struct socket *so, struct sockbuf *sb) +{ + if (sb->sb_flags & SB_MTXLOCK) { + if (splassert_ctl > 0 && mtx_owned(&sb->sb_mtx) == 0) + splassert_fail(0, RW_WRITE, __func__); + } else + soassertlocked(so); +} + /* * Wait for data to arrive at/drain from a socket buffer. */ @@ -925,7 +935,7 @@ sbappendaddr(struct socket *so, struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m, *n, *nlast; int space = asa->sa_len; - soassertlocked(so); + sbmtxassertlocked(so, sb); if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr"); diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 593bfcfbc11..d5ee3004bcb 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_divert.c,v 1.93 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: ip_divert.c,v 1.94 2024/02/11 18:14:26 mvs Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -241,14 +241,14 @@ divert_packet(struct mbuf *m, int dir, u_int16_t divert_port) in_proto_cksum_out(m, NULL); } - mtx_enter(&inp->inp_mtx); so = inp->inp_socket; + mtx_enter(&so->so_rcv.sb_mtx); if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) { - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); divstat_inc(divs_fullsock); goto bad; } - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); sorwakeup(so); in_pcbunref(inp); diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index e318493fcd0..3e9b00ec265 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_mroute.c,v 1.140 2023/12/06 09:27:17 bluhm Exp $ */ +/* $OpenBSD: ip_mroute.c,v 1.141 2024/02/11 18:14:26 mvs Exp $ */ /* $NetBSD: ip_mroute.c,v 1.85 2004/04/26 01:31:57 matt Exp $ */ /* @@ -1051,12 +1051,11 @@ int socket_send(struct socket *so, struct mbuf *mm, struct sockaddr_in *src) { if (so != NULL) { - struct inpcb *inp = sotoinpcb(so); int ret; - mtx_enter(&inp->inp_mtx); + mtx_enter(&so->so_rcv.sb_mtx); ret = sbappendaddr(so, &so->so_rcv, sintosa(src), mm, NULL); - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); if (ret != 0) { sorwakeup(so); diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 5f8047c77e7..0075382bf54 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip.c,v 1.155 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: raw_ip.c,v 1.156 2024/02/11 18:14:26 mvs Exp $ */ /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ /* @@ -220,24 +220,24 @@ rip_input(struct mbuf **mp, int *offp, int proto, int af) else n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) { + struct socket *so = inp->inp_socket; int ret; if (inp->inp_flags & INP_CONTROLOPTS || - inp->inp_socket->so_options & SO_TIMESTAMP) + so->so_options & SO_TIMESTAMP) ip_savecontrol(inp, &opts, ip, n); - mtx_enter(&inp->inp_mtx); - ret = sbappendaddr(inp->inp_socket, - &inp->inp_socket->so_rcv, + mtx_enter(&so->so_rcv.sb_mtx); + ret = sbappendaddr(so, &so->so_rcv, sintosa(&ripsrc), n, opts); - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); if (ret == 0) { /* should notify about lost packet */ m_freem(n); m_freem(opts); } else - sorwakeup(inp->inp_socket); + sorwakeup(so); } in_pcbunref(inp); } diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 18b0ae983bc..65f6a745d73 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.317 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.318 2024/02/11 18:14:26 mvs Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -697,15 +697,15 @@ udp_sbappend(struct inpcb *inp, struct mbuf *m, struct ip *ip, #endif m_adj(m, hlen); - mtx_enter(&inp->inp_mtx); + mtx_enter(&so->so_rcv.sb_mtx); if (sbappendaddr(so, &so->so_rcv, srcaddr, m, opts) == 0) { - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); udpstat_inc(udps_fullsock); m_freem(m); m_freem(opts); return; } - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); sorwakeup(so); } diff --git a/sys/netinet6/ip6_divert.c b/sys/netinet6/ip6_divert.c index 4b75c3c2adf..c7b3e3ed740 100644 --- a/sys/netinet6/ip6_divert.c +++ b/sys/netinet6/ip6_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_divert.c,v 1.93 2024/02/11 01:27:45 bluhm Exp $ */ +/* $OpenBSD: ip6_divert.c,v 1.94 2024/02/11 18:14:27 mvs Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -249,14 +249,14 @@ divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port) in6_proto_cksum_out(m, NULL); } - mtx_enter(&inp->inp_mtx); so = inp->inp_socket; + mtx_enter(&so->so_rcv.sb_mtx); if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) { - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); div6stat_inc(div6s_fullsock); goto bad; } - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); sorwakeup(so); in_pcbunref(inp); diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index 759a677b30e..9af9ed593d1 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_mroute.c,v 1.139 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: ip6_mroute.c,v 1.140 2024/02/11 18:14:27 mvs Exp $ */ /* $NetBSD: ip6_mroute.c,v 1.59 2003/12/10 09:28:38 itojun Exp $ */ /* $KAME: ip6_mroute.c,v 1.45 2001/03/25 08:38:51 itojun Exp $ */ @@ -856,17 +856,16 @@ int socket6_send(struct socket *so, struct mbuf *mm, struct sockaddr_in6 *src) { if (so != NULL) { - struct inpcb *inp = sotoinpcb(so); int ret; - mtx_enter(&inp->inp_mtx); + mtx_enter(&so->so_rcv.sb_mtx); ret = sbappendaddr(so, &so->so_rcv, sin6tosa(src), mm, NULL); - if (ret != 0) - sorwakeup(so); - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); - if (ret != 0) + if (ret != 0) { + sorwakeup(so); return 0; + } } m_freem(mm); return -1; diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 4ae5ff2ba1a..653c3005608 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip6.c,v 1.180 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: raw_ip6.c,v 1.181 2024/02/11 18:14:27 mvs Exp $ */ /* $KAME: raw_ip6.c,v 1.69 2001/03/04 15:55:44 itojun Exp $ */ /* @@ -263,6 +263,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) else n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) { + struct socket *so = inp->inp_socket; int ret; if (inp->inp_flags & IN6P_CONTROLOPTS) @@ -270,11 +271,10 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) /* strip intermediate headers */ m_adj(n, *offp); - mtx_enter(&inp->inp_mtx); - ret = sbappendaddr(inp->inp_socket, - &inp->inp_socket->so_rcv, + mtx_enter(&so->so_rcv.sb_mtx); + ret = sbappendaddr(so, &so->so_rcv, sin6tosa(&rip6src), n, opts); - mtx_leave(&inp->inp_mtx); + mtx_leave(&so->so_rcv.sb_mtx); if (ret == 0) { /* should notify about lost packet */ @@ -282,7 +282,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) m_freem(opts); rip6stat_inc(rip6s_fullsock); } else - sorwakeup(inp->inp_socket); + sorwakeup(so); } in_pcbunref(inp); } diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index f57773e63b8..f1922f6b6d7 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.122 2024/02/03 22:50:09 mvs Exp $ */ +/* $OpenBSD: socketvar.h,v 1.123 2024/02/11 18:14:27 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -134,6 +134,7 @@ struct socket { #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_SPLICE 0x20 /* buffer is splice source or drain */ #define SB_NOINTR 0x40 /* operations not interruptible */ +#define SB_MTXLOCK 0x80 /* use sb_mtx for sockbuf protection */ void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); caddr_t so_upcallarg; /* Arg for above */ @@ -197,6 +198,22 @@ sorele(struct socket *so) #define isspliced(so) ((so)->so_sp && (so)->so_sp->ssp_socket) #define issplicedback(so) ((so)->so_sp && (so)->so_sp->ssp_soback) +static inline void +sb_mtx_lock(struct sockbuf *sb) +{ + if (sb->sb_flags & SB_MTXLOCK) + mtx_enter(&sb->sb_mtx); +} + +static inline void +sb_mtx_unlock(struct sockbuf *sb) +{ + if (sb->sb_flags & SB_MTXLOCK) + mtx_leave(&sb->sb_mtx); +} + +void sbmtxassertlocked(struct socket *so, struct sockbuf *); + /* * Do we need to notify the other side when I/O is possible? */