Split single TCP inpcb table into IPv4 and IPv6 parts.
authorbluhm <bluhm@openbsd.org>
Fri, 12 Apr 2024 16:07:09 +0000 (16:07 +0000)
committerbluhm <bluhm@openbsd.org>
Fri, 12 Apr 2024 16:07:09 +0000 (16:07 +0000)
With two separate TCP hash tables, each one becomes smaller.  When
we remove the exclusive net lock from TCP, contention on internet
PCB table mutex will be reduced.  UDP has been split earlier into
IPv4 and IPv6.  Replace branch conditions based on INP_IPV6 with
assertions.

OK mvs@

sys/kern/kern_sysctl.c
sys/net/pf.c
sys/netinet/in_pcb.c
sys/netinet/tcp_input.c
sys/netinet/tcp_subr.c
sys/netinet/tcp_usrreq.c
sys/netinet/tcp_var.h
sys/netinet/udp_usrreq.c
sys/netinet6/in6_pcb.c

index 9ff340c..5d0d1a8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_sysctl.c,v 1.426 2024/03/29 06:50:06 miod Exp $  */
+/*     $OpenBSD: kern_sysctl.c,v 1.427 2024/04/12 16:07:09 bluhm Exp $ */
 /*     $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $     */
 
 /*-
@@ -1482,6 +1482,12 @@ sysctl_file(int *name, u_int namelen, char *where, size_t *sizep,
                        TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
                                FILLSO(inp->inp_socket);
                        mtx_leave(&tcbtable.inpt_mtx);
+#ifdef INET6
+                       mtx_enter(&tcb6table.inpt_mtx);
+                       TAILQ_FOREACH(inp, &tcb6table.inpt_queue, inp_queue)
+                               FILLSO(inp->inp_socket);
+                       mtx_leave(&tcb6table.inpt_mtx);
+#endif
                        mtx_enter(&udbtable.inpt_mtx);
                        TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
                                FILLSO(inp->inp_socket);
index f8e6f20..e0e31b7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pf.c,v 1.1193 2024/01/10 16:44:30 bluhm Exp $ */
+/*     $OpenBSD: pf.c,v 1.1194 2024/04/12 16:07:09 bluhm Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -3788,7 +3788,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
 {
        struct pf_addr          *saddr, *daddr;
        u_int16_t                sport, dport;
-       struct inpcbtable       *tb;
+       struct inpcbtable       *table;
        struct inpcb            *inp;
 
        pd->lookup.uid = -1;
@@ -3800,14 +3800,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
                dport = pd->hdr.tcp.th_dport;
                PF_ASSERT_LOCKED();
                NET_ASSERT_LOCKED();
-               tb = &tcbtable;
+               table = &tcbtable;
                break;
        case IPPROTO_UDP:
                sport = pd->hdr.udp.uh_sport;
                dport = pd->hdr.udp.uh_dport;
                PF_ASSERT_LOCKED();
                NET_ASSERT_LOCKED();
-               tb = &udbtable;
+               table = &udbtable;
                break;
        default:
                return (-1);
@@ -3830,10 +3830,10 @@ pf_socket_lookup(struct pf_pdesc *pd)
                 * Fails when rtable is changed while evaluating the ruleset
                 * The socket looked up will not match the one hit in the end.
                 */
-               inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
+               inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport,
                    pd->rdomain);
                if (inp == NULL) {
-                       inp = in_pcblookup_listen(tb, daddr->v4, dport,
+                       inp = in_pcblookup_listen(table, daddr->v4, dport,
                            NULL, pd->rdomain);
                        if (inp == NULL)
                                return (-1);
@@ -3842,11 +3842,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
 #ifdef INET6
        case AF_INET6:
                if (pd->virtual_proto == IPPROTO_UDP)
-                       tb = &udb6table;
-               inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
+                       table = &udb6table;
+               if (pd->virtual_proto == IPPROTO_TCP)
+                       table = &tcb6table;
+               inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6,
                    dport, pd->rdomain);
                if (inp == NULL) {
-                       inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
+                       inp = in6_pcblookup_listen(table, &daddr->v6, dport,
                            NULL, pd->rdomain);
                        if (inp == NULL)
                                return (-1);
index b937c78..7a79b6b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: in_pcb.c,v 1.299 2024/03/31 15:53:12 bluhm Exp $      */
+/*     $OpenBSD: in_pcb.c,v 1.300 2024/04/12 16:07:09 bluhm Exp $      */
 /*     $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $     */
 
 /*
@@ -743,10 +743,8 @@ in_pcbnotifyall(struct inpcbtable *table, const struct sockaddr_in *dst,
        rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
-#ifdef INET6
-               if (ISSET(inp->inp_flags, INP_IPV6))
-                       continue;
-#endif
+               KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+
                if (inp->inp_faddr.s_addr != dst->sin_addr.s_addr ||
                    rtable_l2(inp->inp_rtableid) != rdomain) {
                        continue;
@@ -852,8 +850,7 @@ in_pcblookup_local_lock(struct inpcbtable *table, const void *laddrp,
                wildcard = 0;
 #ifdef INET6
                if (ISSET(flags, INPLOOKUP_IPV6)) {
-                       if (!ISSET(inp->inp_flags, INP_IPV6))
-                               continue;
+                       KASSERT(ISSET(inp->inp_flags, INP_IPV6));
 
                        if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
                                wildcard++;
@@ -869,10 +866,7 @@ in_pcblookup_local_lock(struct inpcbtable *table, const void *laddrp,
                } else
 #endif /* INET6 */
                {
-#ifdef INET6
-                       if (ISSET(inp->inp_flags, INP_IPV6))
-                               continue;
-#endif /* INET6 */
+                       KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
 
                        if (inp->inp_faddr.s_addr != INADDR_ANY)
                                wildcard++;
@@ -1032,7 +1026,7 @@ in_pcbhash_insert(struct inpcb *inp)
                    &inp->inp_faddr6, inp->inp_fport,
                    &inp->inp_laddr6, inp->inp_lport);
        else
-#endif /* INET6 */
+#endif
                hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid),
                    &inp->inp_faddr, inp->inp_fport,
                    &inp->inp_laddr, inp->inp_lport);
@@ -1052,10 +1046,8 @@ in_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain,
 
        head = &table->inpt_hashtbl[hash & table->inpt_mask];
        LIST_FOREACH(inp, head, inp_hash) {
-#ifdef INET6
-               if (ISSET(inp->inp_flags, INP_IPV6))
-                       continue;
-#endif
+               KASSERT(!ISSET(inp->inp_flags, INP_IPV6));
+
                if (inp->inp_fport == fport && inp->inp_lport == lport &&
                    inp->inp_faddr.s_addr == faddr->s_addr &&
                    inp->inp_laddr.s_addr == laddr->s_addr &&
index 7bd9e07..9320e24 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_input.c,v 1.402 2024/04/10 22:10:03 bluhm Exp $   */
+/*     $OpenBSD: tcp_input.c,v 1.403 2024/04/12 16:07:09 bluhm Exp $   */
 /*     $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $  */
 
 /*
@@ -140,7 +140,8 @@ struct timeval tcp_ackdrop_ppslim_last;
 #ifdef INET6
 #define ND6_HINT(tp) \
 do { \
-       if (tp && tp->t_inpcb && (tp->t_inpcb->inp_flags & INP_IPV6) && \
+       if (tp && tp->t_inpcb &&                                        \
+           ISSET(tp->t_inpcb->inp_flags, INP_IPV6) &&                  \
            rtisvalid(tp->t_inpcb->inp_route.ro_rt)) {                  \
                nd6_nud_hint(tp->t_inpcb->inp_route.ro_rt);             \
        } \
@@ -540,7 +541,7 @@ findpcb:
                switch (af) {
 #ifdef INET6
                case AF_INET6:
-                       inp = in6_pcblookup(&tcbtable, &ip6->ip6_src,
+                       inp = in6_pcblookup(&tcb6table, &ip6->ip6_src,
                            th->th_sport, &ip6->ip6_dst, th->th_dport,
                            m->m_pkthdr.ph_rtableid);
                        break;
@@ -557,10 +558,10 @@ findpcb:
                switch (af) {
 #ifdef INET6
                case AF_INET6:
-                       inp = in6_pcblookup_listen(&tcbtable, &ip6->ip6_dst,
+                       inp = in6_pcblookup_listen(&tcb6table, &ip6->ip6_dst,
                            th->th_dport, m, m->m_pkthdr.ph_rtableid);
                        break;
-#endif /* INET6 */
+#endif
                case AF_INET:
                        inp = in_pcblookup_listen(&tcbtable, ip->ip_dst,
                            th->th_dport, m, m->m_pkthdr.ph_rtableid);
@@ -3543,17 +3544,16 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
            sizeof(oldinp->inp_seclevel));
 #endif /* IPSEC */
 #ifdef INET6
-       /*
-        * inp still has the OLD in_pcb stuff, set the
-        * v6-related flags on the new guy, too.
-        */
-       inp->inp_flags |= (oldinp->inp_flags & INP_IPV6);
-       if (inp->inp_flags & INP_IPV6) {
+       if (ISSET(inp->inp_flags, INP_IPV6)) {
+               KASSERT(ISSET(oldinp->inp_flags, INP_IPV6));
+
                inp->inp_ipv6.ip6_hlim = oldinp->inp_ipv6.ip6_hlim;
                inp->inp_hops = oldinp->inp_hops;
        } else
-#endif /* INET6 */
+#endif
        {
+               KASSERT(!ISSET(oldinp->inp_flags, INP_IPV6));
+
                inp->inp_ip.ip_ttl = oldinp->inp_ip.ip_ttl;
                inp->inp_options = ip_srcroute(m);
                if (inp->inp_options == NULL) {
index e8256d0..61cd518 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_subr.c,v 1.199 2024/02/13 12:22:09 bluhm Exp $    */
+/*     $OpenBSD: tcp_subr.c,v 1.200 2024/04/12 16:07:09 bluhm Exp $    */
 /*     $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $   */
 
 /*
@@ -159,6 +159,9 @@ tcp_init(void)
            "sackhl", NULL);
        pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
        in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE);
+#ifdef INET6
+       in_pcbinit(&tcb6table, TCB_INITIAL_HASH_SIZE);
+#endif
        tcpcounters = counters_alloc(tcps_ncounters);
 
        arc4random_buf(tcp_secret, sizeof(tcp_secret));
@@ -461,21 +464,15 @@ tcp_newtcpcb(struct inpcb *inp, int wait)
        tp->t_pmtud_mss_acked = 0;
 
 #ifdef INET6
-       /* we disallow IPv4 mapped address completely. */
-       if ((inp->inp_flags & INP_IPV6) == 0)
-               tp->pf = PF_INET;
-       else
+       if (ISSET(inp->inp_flags, INP_IPV6)) {
                tp->pf = PF_INET6;
-#else
-       tp->pf = PF_INET;
-#endif
-
-#ifdef INET6
-       if (inp->inp_flags & INP_IPV6)
                inp->inp_ipv6.ip6_hlim = ip6_defhlim;
-       else
-#endif /* INET6 */
+       } else
+#endif
+       {
+               tp->pf = PF_INET;
                inp->inp_ip.ip_ttl = ip_defttl;
+       }
 
        inp->inp_ppcb = (caddr_t)tp;
        return (tp);
@@ -675,7 +672,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *d)
                 * corresponding to the address in the ICMPv6 message
                 * payload.
                 */
-               inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr,
+               inp = in6_pcblookup(&tcb6table, &sa6->sin6_addr,
                    th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain);
                if (cmd == PRC_MSGSIZE) {
                        /*
@@ -703,7 +700,7 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *d)
                            rdomain);
                in_pcbunref(inp);
        } else {
-               in6_pcbnotify(&tcbtable, sa6, 0,
+               in6_pcbnotify(&tcb6table, sa6, 0,
                    sa6_src, 0, rdomain, cmd, NULL, notify);
        }
 }
@@ -845,7 +842,7 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
 void
 tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain)
 {
-       in6_pcbnotify(&tcbtable, sin6, 0,
+       in6_pcbnotify(&tcb6table, sin6, 0,
            &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc);
 }
 #endif /* INET6 */
index 48c694a..7704890 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_usrreq.c,v 1.230 2024/02/11 01:27:45 bluhm Exp $  */
+/*     $OpenBSD: tcp_usrreq.c,v 1.231 2024/04/12 16:07:09 bluhm Exp $  */
 /*     $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
 
 /*
@@ -171,6 +171,9 @@ const struct sysctl_bounded_args tcpctl_vars[] = {
 };
 
 struct inpcbtable tcbtable;
+#ifdef INET6
+struct inpcbtable tcb6table;
+#endif
 
 int    tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
 int    tcp_ident(void *, size_t *, void *, size_t, int);
@@ -317,7 +320,7 @@ tcp_ctloutput(int op, struct socket *so, int level, int optname,
                if (ISSET(inp->inp_flags, INP_IPV6))
                        error = ip6_ctloutput(op, so, level, optname, m);
                else
-#endif /* INET6 */
+#endif
                        error = ip_ctloutput(op, so, level, optname, m);
                return (error);
        }
@@ -452,6 +455,7 @@ tcp_ctloutput(int op, struct socket *so, int level, int optname,
 int
 tcp_attach(struct socket *so, int proto, int wait)
 {
+       struct inpcbtable *table;
        struct tcpcb *tp;
        struct inpcb *inp;
        int error;
@@ -467,7 +471,13 @@ tcp_attach(struct socket *so, int proto, int wait)
        }
 
        NET_ASSERT_LOCKED();
-       error = in_pcballoc(so, &tcbtable, wait);
+#ifdef INET6
+       if (so->so_proto->pr_domain->dom_family == PF_INET6)
+               table = &tcb6table;
+       else
+#endif
+               table = &tcbtable;
+       error = in_pcballoc(so, table, wait);
        if (error)
                return (error);
        inp = sotoinpcb(so);
@@ -482,14 +492,11 @@ tcp_attach(struct socket *so, int proto, int wait)
        }
        tp->t_state = TCPS_CLOSED;
 #ifdef INET6
-       /* we disallow IPv4 mapped address completely. */
-       if (inp->inp_flags & INP_IPV6)
+       if (ISSET(inp->inp_flags, INP_IPV6))
                tp->pf = PF_INET6;
        else
-               tp->pf = PF_INET;
-#else
-       tp->pf = PF_INET;
 #endif
+               tp->pf = PF_INET;
        if ((so->so_options & SO_LINGER) && so->so_linger == 0)
                so->so_linger = TCP_LINGERTIME;
 
@@ -619,7 +626,7 @@ tcp_connect(struct socket *so, struct mbuf *nam)
        }
 
 #ifdef INET6
-       if (inp->inp_flags & INP_IPV6) {
+       if (ISSET(inp->inp_flags, INP_IPV6)) {
                struct sockaddr_in6 *sin6;
 
                if ((error = in6_nam2sin6(nam, &sin6)))
@@ -630,7 +637,7 @@ tcp_connect(struct socket *so, struct mbuf *nam)
                        goto out;
                }
        } else
-#endif /* INET6 */
+#endif
        {
                struct sockaddr_in *sin;
 
@@ -1148,7 +1155,7 @@ tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop)
        switch (tir.faddr.ss_family) {
 #ifdef INET6
        case AF_INET6:
-               inp = in6_pcblookup(&tcbtable, &f6,
+               inp = in6_pcblookup(&tcb6table, &f6,
                    fin6->sin6_port, &l6, lin6->sin6_port, tir.rdomain);
                break;
 #endif
@@ -1175,7 +1182,7 @@ tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop)
                switch (tir.faddr.ss_family) {
 #ifdef INET6
                case AF_INET6:
-                       inp = in6_pcblookup_listen(&tcbtable,
+                       inp = in6_pcblookup_listen(&tcb6table,
                            &l6, lin6->sin6_port, NULL, tir.rdomain);
                        break;
 #endif
index f96f39e..3d75cf8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_var.h,v 1.176 2024/02/13 12:22:09 bluhm Exp $     */
+/*     $OpenBSD: tcp_var.h,v 1.177 2024/04/12 16:07:09 bluhm Exp $     */
 /*     $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $    */
 
 /*
@@ -676,7 +676,7 @@ extern      const struct pr_usrreqs tcp6_usrreqs;
 #endif
 
 extern struct pool tcpcb_pool;
-extern struct inpcbtable tcbtable;     /* head of queue of active tcpcb's */
+extern struct inpcbtable tcbtable, tcb6table;  /* queue of active tcpcb's */
 extern int tcp_do_rfc1323;     /* enabled/disabled? */
 extern int tcptv_keep_init;    /* [N] time to keep alive initial SYN packet */
 extern int tcp_mssdflt;        /* default maximum segment size */
index 65f6a74..0a9c1af 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: udp_usrreq.c,v 1.318 2024/02/11 18:14:26 mvs Exp $    */
+/*     $OpenBSD: udp_usrreq.c,v 1.319 2024/04/12 16:07:09 bluhm Exp $  */
 /*     $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */
 
 /*
@@ -1117,10 +1117,10 @@ udp_attach(struct socket *so, int proto, int wait)
        if ((error = in_pcballoc(so, table, wait)))
                return error;
 #ifdef INET6
-       if (sotoinpcb(so)->inp_flags & INP_IPV6)
+       if (ISSET(sotoinpcb(so)->inp_flags, INP_IPV6))
                sotoinpcb(so)->inp_ipv6.ip6_hlim = ip6_defhlim;
        else
-#endif /* INET6 */
+#endif
                sotoinpcb(so)->inp_ip.ip_ttl = ip_defttl;
        return 0;
 }
@@ -1184,11 +1184,11 @@ udp_connect(struct socket *so, struct mbuf *addr)
        soassertlocked(so);
 
 #ifdef INET6
-       if (inp->inp_flags & INP_IPV6) {
+       if (ISSET(inp->inp_flags, INP_IPV6)) {
                if (!IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
                        return (EISCONN);
        } else
-#endif /* INET6 */
+#endif
        {
                if (inp->inp_faddr.s_addr != INADDR_ANY)
                        return (EISCONN);
@@ -1209,11 +1209,11 @@ udp_disconnect(struct socket *so)
        soassertlocked(so);
 
 #ifdef INET6
-       if (inp->inp_flags & INP_IPV6) {
+       if (ISSET(inp->inp_flags, INP_IPV6)) {
                if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
                        return (ENOTCONN);
        } else
-#endif /* INET6 */
+#endif
        {
                if (inp->inp_faddr.s_addr == INADDR_ANY)
                        return (ENOTCONN);
@@ -1251,7 +1251,7 @@ udp_send(struct socket *so, struct mbuf *m, struct mbuf *addr,
                                mtod(addr, struct sockaddr *));
                else
 #ifdef INET6
-               if (inp->inp_flags & INP_IPV6)
+               if (ISSET(inp->inp_flags, INP_IPV6))
                        session =
                            pipex_l2tp_userland_lookup_session_ipv6(
                                m, inp->inp_faddr6);
index edace54..0e6ab9f 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: in6_pcb.c,v 1.143 2024/03/31 15:53:12 bluhm Exp $     */
+/*     $OpenBSD: in6_pcb.c,v 1.144 2024/04/12 16:07:09 bluhm Exp $     */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -479,8 +479,7 @@ in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst,
        rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
-               if (!ISSET(inp->inp_flags, INP_IPV6))
-                       continue;
+               KASSERT(ISSET(inp->inp_flags, INP_IPV6));
 
                /*
                 * Under the following condition, notify of redirects
@@ -580,8 +579,8 @@ in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain,
 
        head = &table->inpt_hashtbl[hash & table->inpt_mask];
        LIST_FOREACH(inp, head, inp_hash) {
-               if (!ISSET(inp->inp_flags, INP_IPV6))
-                       continue;
+               KASSERT(ISSET(inp->inp_flags, INP_IPV6));
+
                if (inp->inp_fport == fport && inp->inp_lport == lport &&
                    IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
                    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&