Use TSO and LRO on the loopback interface to transfer TCP faster.
authorbluhm <bluhm@openbsd.org>
Sun, 2 Jul 2023 19:59:15 +0000 (19:59 +0000)
committerbluhm <bluhm@openbsd.org>
Sun, 2 Jul 2023 19:59:15 +0000 (19:59 +0000)
If tcplro is activated on lo(4), ignore the MTU with TCP packets.
They are passed along with the information that they have to be
chopped in case they are forwarded later.  New netstat(1) counter
shows that software LRO is in effect.  The feature is currently
turned off by default.

tested by jan@; OK claudio@ jan@

sys/net/if.c
sys/net/if_loop.c
sys/netinet/tcp_usrreq.c
sys/netinet/tcp_var.h
usr.bin/netstat/inet.c

index 770537c..8901f84 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if.c,v 1.701 2023/06/27 21:02:13 mvs Exp $    */
+/*     $OpenBSD: if.c,v 1.702 2023/07/02 19:59:15 bluhm Exp $  */
 /*     $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $  */
 
 /*
 #ifdef MROUTING
 #include <netinet/ip_mroute.h>
 #endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 
 #ifdef INET6
 #include <netinet6/in6_var.h>
@@ -802,13 +805,30 @@ if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
         * is now incorrect, will be calculated before sending.
         */
        keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
-           M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT);
+           M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
+           M_TCP_TSO);
        m_resethdr(m);
        m->m_flags |= M_LOOP | keepflags;
        m->m_pkthdr.csum_flags = keepcksum;
        m->m_pkthdr.ph_ifidx = ifp->if_index;
        m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
 
+       if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
+               if (ifp->if_mtu > 0 &&
+                   ((af == AF_INET &&
+                   ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
+                   (af == AF_INET6 &&
+                   ISSET(ifp->if_capabilities, IFCAP_TSOv6)))) {
+                       tcpstat_inc(tcps_inswlro);
+                       tcpstat_add(tcps_inpktlro,
+                           (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
+               } else {
+                       tcpstat_inc(tcps_inbadlro);
+                       m_freem(m);
+                       return (EPROTONOSUPPORT);
+               }
+       }
+
        if (ISSET(keepcksum, M_TCP_CSUM_OUT))
                m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
        if (ISSET(keepcksum, M_UDP_CSUM_OUT))
index d96f5ea..112fda7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if_loop.c,v 1.94 2023/06/05 11:35:46 bluhm Exp $      */
+/*     $OpenBSD: if_loop.c,v 1.95 2023/07/02 19:59:15 bluhm Exp $      */
 /*     $NetBSD: if_loop.c,v 1.15 1996/05/07 02:40:33 thorpej Exp $     */
 
 /*
@@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, int unit)
        ifp->if_xflags = IFXF_CLONED;
        ifp->if_capabilities = IFCAP_CSUM_IPv4 |
            IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
-           IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+           IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 |
+           IFCAP_LRO;
        ifp->if_rtrequest = lortrequest;
        ifp->if_ioctl = loioctl;
        ifp->if_input = loinput;
@@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
        switch (cmd) {
        case SIOCSIFFLAGS:
+               if (ISSET(ifp->if_xflags, IFXF_LRO))
+                       SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
+               else
+                       CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
                break;
 
        case SIOCSIFADDR:
index e321da3..7a92879 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_usrreq.c,v 1.219 2023/05/23 09:16:16 jan Exp $    */
+/*     $OpenBSD: tcp_usrreq.c,v 1.220 2023/07/02 19:59:15 bluhm Exp $  */
 /*     $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
 
 /*
@@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *oldlenp, void *newp)
        ASSIGN(tcps_outhwtso);
        ASSIGN(tcps_outpkttso);
        ASSIGN(tcps_outbadtso);
+       ASSIGN(tcps_inswlro);
        ASSIGN(tcps_inhwlro);
        ASSIGN(tcps_inpktlro);
        ASSIGN(tcps_inbadlro);
index 0cc13c6..e071961 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tcp_var.h,v 1.167 2023/05/23 09:16:16 jan Exp $       */
+/*     $OpenBSD: tcp_var.h,v 1.168 2023/07/02 19:59:15 bluhm Exp $     */
 /*     $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $    */
 
 /*
@@ -447,6 +447,7 @@ struct      tcpstat {
        u_int32_t tcps_outhwtso;        /* output tso processed by hardware */
        u_int32_t tcps_outpkttso;       /* packets generated by tso */
        u_int32_t tcps_outbadtso;       /* output tso failed, packet dropped */
+       u_int32_t tcps_inswlro;         /* input lro on pseudo device */
        u_int32_t tcps_inhwlro;         /* input lro from hardware */
        u_int32_t tcps_inpktlro;        /* packets coalesced by hardware lro */
        u_int32_t tcps_inbadlro;        /* input bad lro packets */
@@ -628,6 +629,7 @@ enum tcpstat_counters {
        tcps_outhwtso,
        tcps_outpkttso,
        tcps_outbadtso,
+       tcps_inswlro,
        tcps_inhwlro,
        tcps_inpktlro,
        tcps_inbadlro,
index b84c287..1718f34 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: inet.c,v 1.176 2023/05/23 09:16:16 jan Exp $  */
+/*     $OpenBSD: inet.c,v 1.177 2023/07/02 19:59:15 bluhm Exp $        */
 /*     $NetBSD: inet.c,v 1.14 1995/10/03 21:42:37 thorpej Exp $        */
 
 /*
@@ -439,9 +439,12 @@ tcp_stats(char *name)
        p(tcps_inswcsum, "\t\t%u packet%s software-checksummed\n");
        p(tcps_rcvbadsig, "\t\t%u bad/missing md5 checksum%s\n");
        p(tcps_rcvgoodsig, "\t\t%llu good md5 checksum%s\n");
+       p(tcps_inswlro,
+           "\t\t%u input LRO packet%s passed through pseudo device\n");
        p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n");
-       p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s by hardware\n");
-       p(tcps_inbadlro, "\t\t%u input bad LRO packet%s\n");
+       p(tcps_inpktlro,
+           "\t\t%u input LRO coalesced packet%s by network device\n");
+       p(tcps_inbadlro, "\t\t%u input bad LRO packet%s dropped\n");
        p(tcps_connattempt, "\t%u connection request%s\n");
        p(tcps_accepts, "\t%u connection accept%s\n");
        p(tcps_connects, "\t%u connection%s established (including accepts)\n");