From cd28665a485085cced1f2ddbb04be43bbf0c9855 Mon Sep 17 00:00:00 2001 From: bluhm Date: Fri, 1 Dec 2023 15:30:46 +0000 Subject: [PATCH] Set inp address, port and rtable together with inpcb hash. The inpcb hash table is protected by table->inpt_mtx. The hash is based on addresses, ports, and routing table. These fields were not sychronized with the hash. Put writes and hash update into the same critical section. Move the updates from ip_ctloutput(), ip6_ctloutput(), syn_cache_get(), tcp_connect(), udp_disconnect() to dedicated inpcb set functions. There they use the same table mutex as in_pcbrehash(). in_pcbbind(), in_pcbconnect(), and in6_pcbconnect() need more work and are not included yet. OK sashan@ mvs@ --- sys/netinet/in_pcb.c | 111 ++++++++++++++++++++++++++++++++------ sys/netinet/in_pcb.h | 7 ++- sys/netinet/ip_output.c | 9 +--- sys/netinet/tcp_input.c | 35 +++++------- sys/netinet/tcp_usrreq.c | 3 +- sys/netinet/udp_usrreq.c | 11 +--- sys/netinet6/in6_pcb.c | 8 ++- sys/netinet6/ip6_output.c | 9 +--- 8 files changed, 127 insertions(+), 66 deletions(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 41cfebc6b41..502354e024a 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.c,v 1.279 2023/11/29 18:30:48 bluhm Exp $ */ +/* $OpenBSD: in_pcb.c,v 1.280 2023/12/01 15:30:46 bluhm Exp $ */ /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ /* @@ -268,6 +268,7 @@ in_pcballoc(struct socket *so, struct inpcbtable *table, int wait) int in_pcbbind(struct inpcb *inp, struct mbuf *nam, struct proc *p) { + struct inpcbtable *table = inp->inp_table; struct socket *so = inp->inp_socket; u_int16_t lport = 0; int wild = 0; @@ -341,7 +342,10 @@ in_pcbbind(struct inpcb *inp, struct mbuf *nam, struct proc *p) } } inp->inp_lport = lport; + mtx_enter(&table->inpt_mtx); in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); + return (0); } @@ -480,6 +484,7 @@ in_pcbpickport(u_int16_t *lport, const void *laddr, int wild, int in_pcbconnect(struct inpcb *inp, struct mbuf *nam) { + struct inpcbtable *table = inp->inp_table; struct in_addr ina; struct sockaddr_in *sin; struct inpcb *t; @@ -526,7 +531,10 @@ in_pcbconnect(struct inpcb *inp, struct mbuf *nam) } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; + mtx_enter(&table->inpt_mtx); in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); + #if NSTOEPLITZ > 0 inp->inp_flowid = stoeplitz_ip4port(inp->inp_faddr.s_addr, inp->inp_laddr.s_addr, inp->inp_fport, inp->inp_lport); @@ -544,20 +552,7 @@ in_pcbdisconnect(struct inpcb *inp) pf_inp_unlink(inp); } #endif - switch (sotopf(inp->inp_socket)) { -#ifdef INET6 - case PF_INET6: - inp->inp_faddr6 = in6addr_any; - break; -#endif - case PF_INET: - inp->inp_faddr.s_addr = INADDR_ANY; - break; - } - - inp->inp_fport = 0; inp->inp_flowid = 0; - in_pcbrehash(inp); if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } @@ -1044,11 +1039,11 @@ in_pcbrehash(struct inpcb *inp) { struct inpcbtable *table = inp->inp_table; - mtx_enter(&table->inpt_mtx); + MUTEX_ASSERT_LOCKED(&table->inpt_mtx); + LIST_REMOVE(inp, inp_lhash); LIST_REMOVE(inp, inp_hash); in_pcbhash_insert(inp); - mtx_leave(&table->inpt_mtx); } void @@ -1266,3 +1261,87 @@ in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, #endif return (inp); } + +int +in_pcbset_rtableid(struct inpcb *inp, u_int rtableid) +{ + struct inpcbtable *table = inp->inp_table; + + mtx_enter(&table->inpt_mtx); + if (inp->inp_lport) { + mtx_leave(&table->inpt_mtx); + return (EBUSY); + } + inp->inp_rtableid = rtableid; + in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); + + return (0); +} + +void +in_pcbset_laddr(struct inpcb *inp, const struct sockaddr *sa, u_int rtableid) +{ + struct inpcbtable *table = inp->inp_table; + + mtx_enter(&table->inpt_mtx); + inp->inp_rtableid = rtableid; +#ifdef INET6 + if (ISSET(inp->inp_flags, INP_IPV6)) { + const struct sockaddr_in6 *sin6; + + KASSERT(sa->sa_family == AF_INET6); + sin6 = satosin6_const(sa); + inp->inp_lport = sin6->sin6_port; + inp->inp_laddr6 = sin6->sin6_addr; + } else +#endif + { + const struct sockaddr_in *sin; + + KASSERT(sa->sa_family == AF_INET); + sin = satosin_const(sa); + inp->inp_lport = sin->sin_port; + inp->inp_laddr = sin->sin_addr; + } + in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); +} + +void +in_pcbunset_faddr(struct inpcb *inp) +{ + struct inpcbtable *table = inp->inp_table; + + mtx_enter(&table->inpt_mtx); +#ifdef INET6 + if (ISSET(inp->inp_flags, INP_IPV6)) + inp->inp_faddr6 = in6addr_any; + else +#endif + inp->inp_faddr.s_addr = INADDR_ANY; + inp->inp_fport = 0; + in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); +} + +void +in_pcbunset_laddr(struct inpcb *inp) +{ + struct inpcbtable *table = inp->inp_table; + + mtx_enter(&table->inpt_mtx); +#ifdef INET6 + if (ISSET(inp->inp_flags, INP_IPV6)) { + inp->inp_faddr6 = in6addr_any; + inp->inp_laddr6 = in6addr_any; + } else +#endif + { + inp->inp_faddr.s_addr = INADDR_ANY; + inp->inp_laddr.s_addr = INADDR_ANY; + } + inp->inp_fport = 0; + in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); +} diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 0bb4bac28a9..0a3bb35a43b 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.h,v 1.140 2023/11/29 18:30:48 bluhm Exp $ */ +/* $OpenBSD: in_pcb.h,v 1.141 2023/12/01 15:30:46 bluhm Exp $ */ /* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ /* @@ -335,5 +335,10 @@ void in6_pcbnotify(struct inpcbtable *, struct sockaddr_in6 *, int in6_selecthlim(struct inpcb *); int in_pcbpickport(u_int16_t *, const void *, int, const struct inpcb *, struct proc *); +int in_pcbset_rtableid(struct inpcb *, u_int); +void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int); +void in_pcbunset_faddr(struct inpcb *); +void in_pcbunset_laddr(struct inpcb *); + #endif /* _KERNEL */ #endif /* _NETINET_IN_PCB_H_ */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index fb9cc84fd14..87cf83c8171 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_output.c,v 1.391 2023/11/26 22:08:10 bluhm Exp $ */ +/* $OpenBSD: ip_output.c,v 1.392 2023/12/01 15:30:47 bluhm Exp $ */ /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ /* @@ -1082,12 +1082,7 @@ ip_ctloutput(int op, struct socket *so, int level, int optname, error = EINVAL; break; } - if (inp->inp_lport) { - error = EBUSY; - break; - } - inp->inp_rtableid = rtid; - in_pcbrehash(inp); + error = in_pcbset_rtableid(inp, rtid); break; case IP_PIPEX: if (m != NULL && m->m_len == sizeof(int)) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 349cadfede7..ef28525209d 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.396 2023/11/30 10:21:56 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.397 2023/12/01 15:30:47 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -3489,6 +3489,7 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, struct tcpcb *tp = NULL; struct mbuf *am; struct socket *oso; + u_int rtableid; NET_ASSERT_LOCKED(); @@ -3553,37 +3554,25 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, #endif /* INET6 */ { inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl; + inp->inp_options = ip_srcroute(m); + if (inp->inp_options == NULL) { + inp->inp_options = sc->sc_ipopts; + sc->sc_ipopts = NULL; + } } + /* inherit rtable from listening socket */ + rtableid = sc->sc_rtableid; #if NPF > 0 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { struct pf_divert *divert; divert = pf_find_divert(m); KASSERT(divert != NULL); - inp->inp_rtableid = divert->rdomain; - } else -#endif - /* inherit rtable from listening socket */ - inp->inp_rtableid = sc->sc_rtableid; - - inp->inp_lport = th->th_dport; - switch (src->sa_family) { -#ifdef INET6 - case AF_INET6: - inp->inp_laddr6 = satosin6(dst)->sin6_addr; - break; -#endif /* INET6 */ - case AF_INET: - inp->inp_laddr = satosin(dst)->sin_addr; - inp->inp_options = ip_srcroute(m); - if (inp->inp_options == NULL) { - inp->inp_options = sc->sc_ipopts; - sc->sc_ipopts = NULL; - } - break; + rtableid = divert->rdomain; } - in_pcbrehash(inp); +#endif + in_pcbset_laddr(inp, dst, rtableid); /* * Give the new socket our cached route reference. diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index e7f9767e0ae..5f06150ff6a 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.225 2023/12/01 14:08:03 bluhm Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.226 2023/12/01 15:30:47 bluhm Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -656,6 +656,7 @@ tcp_connect(struct socket *so, struct mbuf *nam) tp->t_template = tcp_template(tp); if (tp->t_template == 0) { + in_pcbunset_faddr(inp); in_pcbdisconnect(inp); error = ENOBUFS; goto out; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 836939175a0..7ad852405de 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.311 2023/12/01 14:08:03 bluhm Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.312 2023/12/01 15:30:47 bluhm Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -1184,14 +1184,7 @@ udp_disconnect(struct socket *so) if (inp->inp_faddr.s_addr == INADDR_ANY) return (ENOTCONN); } - -#ifdef INET6 - if (inp->inp_flags & INP_IPV6) - inp->inp_laddr6 = in6addr_any; - else -#endif /* INET6 */ - inp->inp_laddr.s_addr = INADDR_ANY; - + in_pcbunset_laddr(inp); in_pcbdisconnect(inp); so->so_state &= ~SS_ISCONNECTED; /* XXX */ diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 6529063e49d..83b6caa08b6 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in6_pcb.c,v 1.127 2023/12/01 14:08:04 bluhm Exp $ */ +/* $OpenBSD: in6_pcb.c,v 1.128 2023/12/01 15:30:47 bluhm Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -245,6 +245,7 @@ in6_pcbaddrisavail(struct inpcb *inp, struct sockaddr_in6 *sin6, int wild, int in6_pcbconnect(struct inpcb *inp, struct mbuf *nam) { + struct inpcbtable *table = inp->inp_table; const struct in6_addr *in6a; struct sockaddr_in6 *sin6; struct inpcb *t; @@ -312,6 +313,10 @@ in6_pcbconnect(struct inpcb *inp, struct mbuf *nam) } inp->inp_faddr6 = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; + mtx_enter(&table->inpt_mtx); + in_pcbrehash(inp); + mtx_leave(&table->inpt_mtx); + inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK; if (ip6_auto_flowlabel) inp->inp_flowinfo |= @@ -320,7 +325,6 @@ in6_pcbconnect(struct inpcb *inp, struct mbuf *nam) inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6, &inp->inp_laddr6, inp->inp_fport, inp->inp_lport); #endif - in_pcbrehash(inp); return (0); } diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 96e0bea3b98..8366d833cf2 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_output.c,v 1.281 2023/11/28 13:23:20 bluhm Exp $ */ +/* $OpenBSD: ip6_output.c,v 1.282 2023/12/01 15:30:47 bluhm Exp $ */ /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ /* @@ -1381,12 +1381,7 @@ do { \ error = EINVAL; break; } - if (inp->inp_lport) { - error = EBUSY; - break; - } - inp->inp_rtableid = rtid; - in_pcbrehash(inp); + error = in_pcbset_rtableid(inp, rtid); break; case IPV6_PIPEX: if (m != NULL && m->m_len == sizeof(int)) -- 2.20.1