From: mvs Date: Sat, 1 May 2021 16:13:12 +0000 (+0000) Subject: Implement per-socket `so_lock' rwlock(9) and use it to protect routing X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=451a9593c7fddd3eaa43c1bee9b110bf8d38b01d;p=openbsd Implement per-socket `so_lock' rwlock(9) and use it to protect routing (PF_ROUTE) sockets. This can be done because we have no cases where one thread should lock two sockets simultaneously. Against the previous version rtm_senddesync_timer() execution was moved to process context. Also this time `so_lock' used for routing sockets only but in the future it will be used to other socket types too. tested by claudio@ ok claudio@ bluhm@ --- diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 52b6c474b74..2859b932c20 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.258 2021/04/26 08:21:35 claudio Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.259 2021/05/01 16:13:12 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -151,6 +151,7 @@ socreate(int dom, struct socket **aso, int type, int proto) if (prp->pr_type != type) return (EPROTOTYPE); so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); + rw_init(&so->so_lock, "solock"); sigio_init(&so->so_sigio); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 940da149093..851a0dfa16d 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.108 2021/04/26 08:21:35 claudio Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.109 2021/05/01 16:13:13 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -162,6 +162,7 @@ sonewconn(struct socket *head, int connstatus) so = pool_get(&socket_pool, PR_NOWAIT|PR_ZERO); if (so == NULL) return (NULL); + rw_init(&so->so_lock, "solock"); so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; @@ -287,6 +288,8 @@ solock(struct socket *so) rw_enter_write(&unp_lock); break; case PF_ROUTE: + rw_enter_write(&so->so_lock); + break; case PF_KEY: default: KERNEL_LOCK(); @@ -313,6 +316,8 @@ sounlock(struct socket *so, int s) rw_exit_write(&unp_lock); break; case PF_ROUTE: + rw_exit_write(&so->so_lock); + break; case PF_KEY: default: KERNEL_UNLOCK(); @@ -332,6 +337,8 @@ soassertlocked(struct socket *so) rw_assert_wrlock(&unp_lock); break; case PF_ROUTE: + rw_assert_wrlock(&so->so_lock); + break; case PF_KEY: default: KERNEL_ASSERT_LOCKED(); @@ -354,6 +361,8 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs); break; case PF_ROUTE: + ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs); + break; case PF_KEY: default: ret = tsleep_nsec(ident, prio, wmesg, nsecs); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index ac7c163128e..3f8985d6d27 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtsock.c,v 1.311 2021/04/26 08:21:36 claudio Exp $ */ +/* $OpenBSD: rtsock.c,v 1.312 2021/05/01 16:13:13 mvs Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* @@ -143,7 +143,7 @@ int rt_setsource(unsigned int, struct sockaddr *); /* * Locks used to protect struct members * I immutable after creation - * sK solock (kernel lock) + * s solock */ struct rtpcb { struct socket *rop_socket; /* [I] */ @@ -151,12 +151,12 @@ struct rtpcb { SRPL_ENTRY(rtpcb) rop_list; struct refcnt rop_refcnt; struct timeout rop_timeout; - unsigned int rop_msgfilter; /* [sK] */ - unsigned int rop_flagfilter; /* [sK] */ - unsigned int rop_flags; /* [sK] */ - u_int rop_rtableid; /* [sK] */ + unsigned int rop_msgfilter; /* [s] */ + unsigned int rop_flagfilter; /* [s] */ + unsigned int rop_flags; /* [s] */ + u_int rop_rtableid; /* [s] */ unsigned short rop_proto; /* [I] */ - u_char rop_priority; /* [sK] */ + u_char rop_priority; /* [s] */ }; #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) @@ -188,7 +188,7 @@ route_prinit(void) rw_init(&rtptable.rtp_lk, "rtsock"); SRPL_INIT(&rtptable.rtp_list); pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, - IPL_NONE, PR_WAITOK, "rtpcb", NULL); + IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); } void @@ -312,7 +312,8 @@ route_attach(struct socket *so, int proto) rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); so->so_pcb = rop; /* Init the timeout structure */ - timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); + timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so, + TIMEOUT_PROC); refcnt_init(&rop->rop_refcnt); rop->rop_socket = so; @@ -352,9 +353,13 @@ route_detach(struct socket *so) rop_list); rw_exit(&rtptable.rtp_lk); + sounlock(so, SL_LOCKED); + /* wait for all references to drop */ refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); + solock(so); + so->so_pcb = NULL; KASSERT((so->so_state & SS_NOFDREF) == 0); pool_put(&rtpcb_pool, rop); @@ -676,7 +681,7 @@ route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, struct rtentry *rt = NULL; struct rt_addrinfo info; struct ifnet *ifp; - int len, seq, error = 0; + int len, seq, useloopback, error = 0; u_int tableid; u_int8_t prio; u_char vers, type; @@ -686,6 +691,16 @@ route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); + + useloopback = so->so_options & SO_USELOOPBACK; + + /* + * The socket can't be closed concurrently because the file + * descriptor reference is still held. + */ + + sounlock(so, SL_LOCKED); + len = m->m_pkthdr.len; if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { @@ -859,13 +874,10 @@ route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, /* * Check to see if we don't want our own messages. */ - if (!(so->so_options & SO_USELOOPBACK)) { - if (rtptable.rtp_count <= 1) { + if (!useloopback) { + if (rtptable.rtp_count == 0) { /* no other listener and no loopback of messages */ -fail: - free(rtm, M_RTABLE, len); - m_freem(m); - return (error); + goto fail; } } if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { @@ -877,6 +889,13 @@ fail: if (m) route_input(m, so, info.rti_info[RTAX_DST] ? info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); + solock(so); + + return (error); +fail: + free(rtm, M_RTABLE, len); + m_freem(m); + solock(so); return (error); } diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 18845733bcd..5ae7eef564f 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.95 2021/04/26 08:21:36 claudio Exp $ */ +/* $OpenBSD: socketvar.h,v 1.96 2021/05/01 16:13:13 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -37,6 +37,7 @@ #include /* for struct sigio_ref */ #include #include +#include #ifndef _SOCKLEN_T_DEFINED_ #define _SOCKLEN_T_DEFINED_ @@ -53,6 +54,7 @@ TAILQ_HEAD(soqhead, socket); */ struct socket { const struct protosw *so_proto; /* protocol handle */ + struct rwlock so_lock; /* this socket lock */ void *so_pcb; /* protocol control block */ u_int so_state; /* internal state flags SS_*, below */ short so_type; /* generic type, see socket.h */