From 93536db294f52bc74669089161e04f33a62520f5 Mon Sep 17 00:00:00 2001 From: bluhm Date: Fri, 12 Apr 2024 16:07:09 +0000 Subject: [PATCH] Split single TCP inpcb table into IPv4 and IPv6 parts. With two separate TCP hash tables, each one becomes smaller. When we remove the exclusive net lock from TCP, contention on internet PCB table mutex will be reduced. UDP has been split earlier into IPv4 and IPv6. Replace branch conditions based on INP_IPV6 with assertions. OK mvs@ --- sys/kern/kern_sysctl.c | 8 +++++++- sys/net/pf.c | 20 +++++++++++--------- sys/netinet/in_pcb.c | 24 ++++++++---------------- sys/netinet/tcp_input.c | 24 ++++++++++++------------ sys/netinet/tcp_subr.c | 29 +++++++++++++---------------- sys/netinet/tcp_usrreq.c | 31 +++++++++++++++++++------------ sys/netinet/tcp_var.h | 4 ++-- sys/netinet/udp_usrreq.c | 16 ++++++++-------- sys/netinet6/in6_pcb.c | 9 ++++----- 9 files changed, 84 insertions(+), 81 deletions(-) diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 9ff340ca2ea..5d0d1a8a851 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.426 2024/03/29 06:50:06 miod Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.427 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, char *where, size_t *sizep, TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); mtx_leave(&tcbtable.inpt_mtx); +#ifdef INET6 + mtx_enter(&tcb6table.inpt_mtx); + TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue) + FILLSO(inp->inp_socket); + mtx_leave(&tcb6table.inpt_mtx); +#endif mtx_enter(&udbtable.inpt_mtx); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) FILLSO(inp->inp_socket); diff --git a/sys/net/pf.c b/sys/net/pf.c index f8e6f20bfa6..e0e31b754eb 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1193 2024/01/10 16:44:30 bluhm Exp $ */ +/* $OpenBSD: pf.c,v 1.1194 2024/04/12 16:07:09 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd) { struct pf_addr *saddr, *daddr; u_int16_t sport, dport; - struct inpcbtable *tb; + struct inpcbtable *table; struct inpcb *inp; pd->lookup.uid = -1; @@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd) dport = pd->hdr.tcp.th_dport; PF_ASSERT_LOCKED(); NET_ASSERT_LOCKED(); - tb = &tcbtable; + table = &tcbtable; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; PF_ASSERT_LOCKED(); NET_ASSERT_LOCKED(); - tb = &udbtable; + table = &udbtable; break; default: return (-1); @@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd) * Fails when rtable is changed while evaluating the ruleset * The socket looked up will not match the one hit in the end. */ - inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport, + inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport, pd->rdomain); if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, + inp = in_pcblookup_listen(table, daddr->v4, dport, NULL, pd->rdomain); if (inp == NULL) return (-1); @@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd) #ifdef INET6 case AF_INET6: if (pd->virtual_proto == IPPROTO_UDP) - tb = &udb6table; - inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6, + table = &udb6table; + if (pd->virtual_proto == IPPROTO_TCP) + table = &tcb6table; + inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6, dport, pd->rdomain); if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, + inp = in6_pcblookup_listen(table, &daddr->v6, dport, NULL, pd->rdomain); if (inp == NULL) return (-1); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index b937c783dbf..7a79b6b4cca 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.c,v 1.299 2024/03/31 15:53:12 bluhm Exp $ */ +/* $OpenBSD: in_pcb.c,v 1.300 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ /* @@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table, const struct sockaddr_in *dst, rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr || rtable_l2(inp->inp_rtableid) != rdomain) { continue; @@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtable *table, const void *laddrp, wildcard = 0; #ifdef INET6 if (ISSET(flags, INPLOOKUP_IPV6)) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) wildcard++; @@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtable *table, const void *laddrp, } else #endif /* INET6 */ { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif /* INET6 */ + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; @@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp) &inp->inp_faddr6, inp->inp_fport, &inp->inp_laddr6, inp->inp_lport); else -#endif /* INET6 */ +#endif hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid), &inp->inp_faddr, inp->inp_fport, &inp->inp_laddr, inp->inp_lport); @@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain, head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { -#ifdef INET6 - if (ISSET(inp->inp_flags, INP_IPV6)) - continue; -#endif + KASSERT(!ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_fport == fport && inp->inp_lport == lport && inp->inp_faddr.s_addr == faddr->s_addr && inp->inp_laddr.s_addr == laddr->s_addr && diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 7bd9e07e399..9320e240105 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.402 2024/04/10 22:10:03 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.403 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last; #ifdef INET6 #define ND6_HINT(tp) \ do { \ - if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) && \ + if (tp && tp->t_inpcb && \ + ISSET(tp->t_inpcb->inp_flags, INP_IPV6) && \ rtisvalid(tp->t_inpcb->inp_route.ro_rt)) { \ nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt); \ } \ @@ -540,7 +541,7 @@ findpcb: switch (af) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup(&tcbtable, &ip6->ip6_src, + inp = in6_pcblookup(&tcb6table, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, m->m_pkthdr.ph_rtableid); break; @@ -557,10 +558,10 @@ findpcb: switch (af) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst, + inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst, th->th_dport, m, m->m_pkthdr.ph_rtableid); break; -#endif /* INET6 */ +#endif case AF_INET: inp = in_pcblookup_listen(&tcbtable, ip->ip_dst, th->th_dport, m, m->m_pkthdr.ph_rtableid); @@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, sizeof(oldinp->inp_seclevel)); #endif /* IPSEC */ #ifdef INET6 - /* - * inp still has the OLD in_pcb stuff, set the - * v6-related flags on the new guy, too. - */ - inp->inp_flags |= (oldinp->inp_flags & INP_IPV6); - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { + KASSERT(ISSET(oldinp->inp_flags, INP_IPV6)); + inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim; inp->inp_hops = oldinp->inp_hops; } else -#endif /* INET6 */ +#endif { + KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6)); + inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl; inp->inp_options = ip_srcroute(m); if (inp->inp_options == NULL) { diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index e8256d04235..61cd51830d6 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.199 2024/02/13 12:22:09 bluhm Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.200 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -159,6 +159,9 @@ tcp_init(void) "sackhl", NULL); pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0); in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE); +#ifdef INET6 + in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE); +#endif tcpcounters = counters_alloc(tcps_ncounters); arc4random_buf(tcp_secret, sizeof(tcp_secret)); @@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait) tp->t_pmtud_mss_acked = 0; #ifdef INET6 - /* we disallow IPv4 mapped address completely. */ - if ((inp->inp_flags & INP_IPV6) == 0) - tp->pf = PF_INET; - else + if (ISSET(inp->inp_flags, INP_IPV6)) { tp->pf = PF_INET6; -#else - tp->pf = PF_INET; -#endif - -#ifdef INET6 - if (inp->inp_flags & INP_IPV6) inp->inp_ipv6.ip6_hlim = ip6_defhlim; - else -#endif /* INET6 */ + } else +#endif + { + tp->pf = PF_INET; inp->inp_ip.ip_ttl = ip_defttl; + } inp->inp_ppcb = (caddr_t)tp; return (tp); @@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *d) * corresponding to the address in the ICMPv6 message * payload. */ - inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr, + inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr, th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain); if (cmd == PRC_MSGSIZE) { /* @@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *d) rdomain); in_pcbunref(inp); } else { - in6_pcbnotify(&tcbtable, sa6, 0, + in6_pcbnotify(&tcb6table, sa6, 0, sa6_src, 0, rdomain, cmd, NULL, notify); } } @@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v) void tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain) { - in6_pcbnotify(&tcbtable, sin6, 0, + in6_pcbnotify(&tcb6table, sin6, 0, &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc); } #endif /* INET6 */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 48c694a0f09..7704890afc0 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.230 2024/02/11 01:27:45 bluhm Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.231 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_vars[] = { }; struct inpcbtable tcbtable; +#ifdef INET6 +struct inpcbtable tcb6table; +#endif int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *); int tcp_ident(void *, size_t *, void *, size_t, int); @@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so, int level, int optname, if (ISSET(inp->inp_flags, INP_IPV6)) error = ip6_ctloutput(op, so, level, optname, m); else -#endif /* INET6 */ +#endif error = ip_ctloutput(op, so, level, optname, m); return (error); } @@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so, int level, int optname, int tcp_attach(struct socket *so, int proto, int wait) { + struct inpcbtable *table; struct tcpcb *tp; struct inpcb *inp; int error; @@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto, int wait) } NET_ASSERT_LOCKED(); - error = in_pcballoc(so, &tcbtable, wait); +#ifdef INET6 + if (so->so_proto->pr_domain->dom_family == PF_INET6) + table = &tcb6table; + else +#endif + table = &tcbtable; + error = in_pcballoc(so, table, wait); if (error) return (error); inp = sotoinpcb(so); @@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto, int wait) } tp->t_state = TCPS_CLOSED; #ifdef INET6 - /* we disallow IPv4 mapped address completely. */ - if (inp->inp_flags & INP_IPV6) + if (ISSET(inp->inp_flags, INP_IPV6)) tp->pf = PF_INET6; else - tp->pf = PF_INET; -#else - tp->pf = PF_INET; #endif + tp->pf = PF_INET; if ((so->so_options & SO_LINGER) && so->so_linger == 0) so->so_linger = TCP_LINGERTIME; @@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mbuf *nam) } #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { struct sockaddr_in6 *sin6; if ((error = in6_nam2sin6(nam, &sin6))) @@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mbuf *nam) goto out; } } else -#endif /* INET6 */ +#endif { struct sockaddr_in *sin; @@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) switch (tir.faddr.ss_family) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup(&tcbtable, &f6, + inp = in6_pcblookup(&tcb6table, &f6, fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain); break; #endif @@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) switch (tir.faddr.ss_family) { #ifdef INET6 case AF_INET6: - inp = in6_pcblookup_listen(&tcbtable, + inp = in6_pcblookup_listen(&tcb6table, &l6, lin6->sin6_port, NULL, tir.rdomain); break; #endif diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index f96f39eefd2..3d75cf84c6e 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.176 2024/02/13 12:22:09 bluhm Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.177 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -676,7 +676,7 @@ extern const struct pr_usrreqs tcp6_usrreqs; #endif extern struct pool tcpcb_pool; -extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ +extern struct inpcbtable tcbtable, tcb6table; /* queue of active tcpcb's */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* [N] time to keep alive initial SYN packet */ extern int tcp_mssdflt; /* default maximum segment size */ diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 65f6a745d73..0a9c1afb5f4 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.318 2024/02/11 18:14:26 mvs Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.319 2024/04/12 16:07:09 bluhm Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto, int wait) if ((error = in_pcballoc(so, table, wait))) return error; #ifdef INET6 - if (sotoinpcb(so)->inp_flags & INP_IPV6) + if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6)) sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim; else -#endif /* INET6 */ +#endif sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl; return 0; } @@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mbuf *addr) soassertlocked(so); #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) return (EISCONN); } else -#endif /* INET6 */ +#endif { if (inp->inp_faddr.s_addr != INADDR_ANY) return (EISCONN); @@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so) soassertlocked(so); #ifdef INET6 - if (inp->inp_flags & INP_IPV6) { + if (ISSET(inp->inp_flags, INP_IPV6)) { if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) return (ENOTCONN); } else -#endif /* INET6 */ +#endif { if (inp->inp_faddr.s_addr == INADDR_ANY) return (ENOTCONN); @@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf *m, struct mbuf *addr, mtod(addr, struct sockaddr *)); else #ifdef INET6 - if (inp->inp_flags & INP_IPV6) + if (ISSET(inp->inp_flags, INP_IPV6)) session = pipex_l2tp_userland_lookup_session_ipv6( m, inp->inp_faddr6); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index edace547cd4..0e6ab9fb5b7 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in6_pcb.c,v 1.143 2024/03/31 15:53:12 bluhm Exp $ */ +/* $OpenBSD: in6_pcb.c,v 1.144 2024/04/12 16:07:09 bluhm Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst, rw_enter_write(&table->inpt_notify); mtx_enter(&table->inpt_mtx); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); /* * Under the following condition, notify of redirects @@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain, head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { - if (!ISSET(inp->inp_flags, INP_IPV6)) - continue; + KASSERT(ISSET(inp->inp_flags, INP_IPV6)); + if (inp->inp_fport == fport && inp->inp_lport == lport && IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) && -- 2.20.1