Run bind(2) system call in parallel.
authorbluhm <bluhm@openbsd.org>
Mon, 18 Dec 2023 13:11:20 +0000 (13:11 +0000)
committerbluhm <bluhm@openbsd.org>
Mon, 18 Dec 2023 13:11:20 +0000 (13:11 +0000)
For protocols that care about locking, use the shared net lock to
call sobind().  Use the per socket rwlock together with shared net
lock.  This affects protocols UDP, raw IP, and divert.  Move the
inpcb mutex locking into soreceive(), it is only used there.  Add
a comment to describe the current inmplementation of inpcb locking.

OK mvs@ sashan@

sys/kern/uipc_socket.c
sys/kern/uipc_socket2.c
sys/kern/uipc_syscalls.c
sys/netinet/in_pcb.h
sys/sys/protosw.h

index 6c2f93c..7a3062c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_socket.c,v 1.309 2023/08/08 22:07:25 mvs Exp $   */
+/*     $OpenBSD: uipc_socket.c,v 1.310 2023/12/18 13:11:20 bluhm Exp $ */
 /*     $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $        */
 
 /*
@@ -832,8 +832,10 @@ bad:
                *mp = NULL;
 
        solock_shared(so);
+       pru_lock(so);
 restart:
        if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
+               pru_unlock(so);
                sounlock_shared(so);
                return (error);
        }
@@ -900,11 +902,13 @@ restart:
                SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
                sbunlock(so, &so->so_rcv);
+               pru_unlock(so);
                error = sbwait(so, &so->so_rcv);
                if (error) {
                        sounlock_shared(so);
                        return (error);
                }
+               pru_lock(so);
                goto restart;
        }
 dontblock:
@@ -971,11 +975,13 @@ dontblock:
                        sbsync(&so->so_rcv, nextrecord);
                        if (controlp) {
                                if (pr->pr_domain->dom_externalize) {
+                                       pru_unlock(so);
                                        sounlock_shared(so);
                                        error =
                                            (*pr->pr_domain->dom_externalize)
                                            (cm, controllen, flags);
                                        solock_shared(so);
+                                       pru_lock(so);
                                }
                                *controlp = cm;
                        } else {
@@ -1049,9 +1055,11 @@ dontblock:
                        SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
                        SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
                        resid = uio->uio_resid;
+                       pru_unlock(so);
                        sounlock_shared(so);
                        uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
                        solock_shared(so);
+                       pru_lock(so);
                        if (uio_error)
                                uio->uio_resid = resid - len;
                } else
@@ -1136,6 +1144,7 @@ dontblock:
                        error = sbwait(so, &so->so_rcv);
                        if (error) {
                                sbunlock(so, &so->so_rcv);
+                               pru_unlock(so);
                                sounlock_shared(so);
                                return (0);
                        }
@@ -1182,6 +1191,7 @@ dontblock:
                *flagsp |= flags;
 release:
        sbunlock(so, &so->so_rcv);
+       pru_unlock(so);
        sounlock_shared(so);
        return (error);
 }
index f21e0e2..18f7746 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_socket2.c,v 1.138 2023/10/30 13:27:53 bluhm Exp $        */
+/*     $OpenBSD: uipc_socket2.c,v 1.139 2023/12/18 13:11:20 bluhm Exp $        */
 /*     $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $       */
 
 /*
@@ -368,7 +368,7 @@ solock_shared(struct socket *so)
        case PF_INET6:
                if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
                        NET_LOCK_SHARED();
-                       pru_lock(so);
+                       rw_enter_write(&so->so_lock);
                } else
                        NET_LOCK();
                break;
@@ -427,7 +427,7 @@ sounlock_shared(struct socket *so)
        case PF_INET:
        case PF_INET6:
                if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
-                       pru_unlock(so);
+                       rw_exit_write(&so->so_lock);
                        NET_UNLOCK_SHARED();
                } else
                        NET_UNLOCK();
@@ -463,12 +463,12 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
        case PF_INET6:
                if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
                    rw_status(&netlock) == RW_READ) {
-                       pru_unlock(so);
+                       rw_exit_write(&so->so_lock);
                }
                ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
                if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
                    rw_status(&netlock) == RW_READ) {
-                       pru_lock(so);
+                       rw_enter_write(&so->so_lock);
                }
                break;
        default:
index 2919c1c..0a58664 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_syscalls.c,v 1.214 2023/09/23 09:17:21 jan Exp $ */
+/*     $OpenBSD: uipc_syscalls.c,v 1.215 2023/12/18 13:11:20 bluhm Exp $       */
 /*     $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $      */
 
 /*
@@ -185,9 +185,9 @@ sys_bind(struct proc *p, void *v, register_t *retval)
        if (KTRPOINT(p, KTR_STRUCT))
                ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
 #endif
-       solock(so);
+       solock_shared(so);
        error = sobind(so, nam, p);
-       sounlock(so);
+       sounlock_shared(so);
        m_freem(nam);
 out:
        FRELE(fp, p);
index b618a2e..16d1ce3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: in_pcb.h,v 1.144 2023/12/15 00:24:56 bluhm Exp $      */
+/*     $OpenBSD: in_pcb.h,v 1.145 2023/12/18 13:11:20 bluhm Exp $      */
 /*     $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $     */
 
 /*
  *     p       inpcb_mtx               pcb mutex
  */
 
+/*
+ * The pcb table mutex guarantees that all inpcb are consistent and
+ * that bind(2) and connect(2) create unique combinations of
+ * laddr/faddr/lport/fport/rtalbleid.  This mutex is used to protect
+ * both address consistency and inpcb lookup during protocol input.
+ * All writes to inp_[lf]addr take table mutex.  A per socket lock is
+ * needed, so that socket layer input have a consistent view at these
+ * values.
+ *
+ * In soconnect() and sosend() pcb mutex cannot be used.  They eventually
+ * can call IP output which takes pf lock which is a sleeping lock.
+ * Also connect(2) does a route lookup for source selection.  There
+ * route resolve happens, which creates a route, which sends a route
+ * message, which needs route lock, which is a rw-lock.
+ *
+ * On the other hand a mutex should be used in protocol input.  It
+ * does not make sense to do a process switch per packet.  Better spin
+ * until the packet can be processed.
+ *
+ * So there are three locks.  Table mutex is for writing inp_[lf]addr/port
+ * and lookup, socket rw-lock to separate sockets in system calls, and
+ * pcb mutex to protect socket receive buffer.  Changing inp_[lf]addr/port
+ * takes both per socket rw-lock and global table mutex.  Protocol
+ * input only reads inp_[lf]addr/port during lookup and is safe.  System
+ * call only reads when holding socket rw-lock and is safe.  The socket
+ * layer needs pcb mutex only in soreceive().
+ *
+ * Function pru_lock() grabs the pcb mutex and its existence indicates
+ * that a protocol is MP safe.  Otherwise the exclusive net lock is
+ * used.
+ */
+
 struct pf_state_key;
 
 union inpaddru {
index bf17e7b..78b439b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: protosw.h,v 1.62 2023/05/18 09:59:44 mvs Exp $        */
+/*     $OpenBSD: protosw.h,v 1.63 2023/12/18 13:11:20 bluhm Exp $      */
 /*     $NetBSD: protosw.h,v 1.10 1996/04/09 20:55:32 cgd Exp $ */
 
 /*-
@@ -284,13 +284,15 @@ pru_detach(struct socket *so)
 static inline void
 pru_lock(struct socket *so)
 {
-       (*so->so_proto->pr_usrreqs->pru_lock)(so);
+       if (so->so_proto->pr_usrreqs->pru_lock)
+               (*so->so_proto->pr_usrreqs->pru_lock)(so);
 }
 
 static inline void
 pru_unlock(struct socket *so)
 {
-       (*so->so_proto->pr_usrreqs->pru_unlock)(so);
+       if (so->so_proto->pr_usrreqs->pru_unlock)
+               (*so->so_proto->pr_usrreqs->pru_unlock)(so);
 }
 
 static inline int