-/* $OpenBSD: uipc_socket.c,v 1.309 2023/08/08 22:07:25 mvs Exp $ */
+/* $OpenBSD: uipc_socket.c,v 1.310 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
/*
*mp = NULL;
solock_shared(so);
+ pru_lock(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
+ pru_unlock(so);
sounlock_shared(so);
return (error);
}
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
error = sbwait(so, &so->so_rcv);
if (error) {
sounlock_shared(so);
return (error);
}
+ pru_lock(so);
goto restart;
}
dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
+ pru_unlock(so);
sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
solock_shared(so);
+ pru_lock(so);
}
*controlp = cm;
} else {
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
+ pru_unlock(so);
sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
solock_shared(so);
+ pru_lock(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
sounlock_shared(so);
return (0);
}
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
+ pru_unlock(so);
sounlock_shared(so);
return (error);
}
-/* $OpenBSD: uipc_socket2.c,v 1.138 2023/10/30 13:27:53 bluhm Exp $ */
+/* $OpenBSD: uipc_socket2.c,v 1.139 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */
/*
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
NET_LOCK_SHARED();
- pru_lock(so);
+ rw_enter_write(&so->so_lock);
} else
NET_LOCK();
break;
case PF_INET:
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
- pru_unlock(so);
+ rw_exit_write(&so->so_lock);
NET_UNLOCK_SHARED();
} else
NET_UNLOCK();
case PF_INET6:
if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
rw_status(&netlock) == RW_READ) {
- pru_unlock(so);
+ rw_exit_write(&so->so_lock);
}
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
rw_status(&netlock) == RW_READ) {
- pru_lock(so);
+ rw_enter_write(&so->so_lock);
}
break;
default:
-/* $OpenBSD: uipc_syscalls.c,v 1.214 2023/09/23 09:17:21 jan Exp $ */
+/* $OpenBSD: uipc_syscalls.c,v 1.215 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */
/*
if (KTRPOINT(p, KTR_STRUCT))
ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
#endif
- solock(so);
+ solock_shared(so);
error = sobind(so, nam, p);
- sounlock(so);
+ sounlock_shared(so);
m_freem(nam);
out:
FRELE(fp, p);
-/* $OpenBSD: in_pcb.h,v 1.144 2023/12/15 00:24:56 bluhm Exp $ */
+/* $OpenBSD: in_pcb.h,v 1.145 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */
/*
* p inpcb_mtx pcb mutex
*/
+/*
+ * The pcb table mutex guarantees that all inpcb are consistent and
+ * that bind(2) and connect(2) create unique combinations of
+ * laddr/faddr/lport/fport/rtalbleid. This mutex is used to protect
+ * both address consistency and inpcb lookup during protocol input.
+ * All writes to inp_[lf]addr take table mutex. A per socket lock is
+ * needed, so that socket layer input have a consistent view at these
+ * values.
+ *
+ * In soconnect() and sosend() pcb mutex cannot be used. They eventually
+ * can call IP output which takes pf lock which is a sleeping lock.
+ * Also connect(2) does a route lookup for source selection. There
+ * route resolve happens, which creates a route, which sends a route
+ * message, which needs route lock, which is a rw-lock.
+ *
+ * On the other hand a mutex should be used in protocol input. It
+ * does not make sense to do a process switch per packet. Better spin
+ * until the packet can be processed.
+ *
+ * So there are three locks. Table mutex is for writing inp_[lf]addr/port
+ * and lookup, socket rw-lock to separate sockets in system calls, and
+ * pcb mutex to protect socket receive buffer. Changing inp_[lf]addr/port
+ * takes both per socket rw-lock and global table mutex. Protocol
+ * input only reads inp_[lf]addr/port during lookup and is safe. System
+ * call only reads when holding socket rw-lock and is safe. The socket
+ * layer needs pcb mutex only in soreceive().
+ *
+ * Function pru_lock() grabs the pcb mutex and its existence indicates
+ * that a protocol is MP safe. Otherwise the exclusive net lock is
+ * used.
+ */
+
struct pf_state_key;
union inpaddru {
-/* $OpenBSD: protosw.h,v 1.62 2023/05/18 09:59:44 mvs Exp $ */
+/* $OpenBSD: protosw.h,v 1.63 2023/12/18 13:11:20 bluhm Exp $ */
/* $NetBSD: protosw.h,v 1.10 1996/04/09 20:55:32 cgd Exp $ */
/*-
static inline void
pru_lock(struct socket *so)
{
- (*so->so_proto->pr_usrreqs->pru_lock)(so);
+ if (so->so_proto->pr_usrreqs->pru_lock)
+ (*so->so_proto->pr_usrreqs->pru_lock)(so);
}
static inline void
pru_unlock(struct socket *so)
{
- (*so->so_proto->pr_usrreqs->pru_unlock)(so);
+ if (so->so_proto->pr_usrreqs->pru_unlock)
+ (*so->so_proto->pr_usrreqs->pru_unlock)(so);
}
static inline int