From: bluhm Date: Mon, 15 May 2023 16:34:56 +0000 (+0000) Subject: Implement the TCP/IP layer for hardware TCP segmentation offload. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=510f4386e1535b967eb78c315ea65daf4f87c7db;p=openbsd Implement the TCP/IP layer for hardware TCP segmentation offload. If the driver of a network interface claims to support TSO, do not chop the packet in software, but pass it down to the interface layer. Precalculate parts of the pseudo header checksum, but without the packet length. The length of all generated smaller packets is not known yet. Driver and hardware will use the mbuf packet header field ph_mss to calculate it and update checksum. Introduce separate flags IFCAP_TSOv4 and IFCAP_TSOv6 as hardware might support ony one protocol family. The old flag IFXF_TSO is only relevant for large receive offload. It is missnamed, but keep that for now. Note that drivers do not set TSO capabilites yet. Also the ifconfig flags and pseudo interfaces capabilities will be done separately. So this commit should not change behavior. heavily based on the work from jan@; OK sashan@ --- diff --git a/sys/net/if.h b/sys/net/if.h index 757ff96f126..c8c7ec5f03f 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if.h,v 1.211 2023/03/07 20:09:48 jan Exp $ */ +/* $OpenBSD: if.h,v 1.212 2023/05/15 16:34:56 bluhm Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -231,7 +231,7 @@ struct if_status_description { #define IFXF_INET6_NOSOII 0x40 /* [N] don't do RFC 7217 */ #define IFXF_AUTOCONF4 0x80 /* [N] v4 autoconf (aka dhcp) enabled */ #define IFXF_MONITOR 0x100 /* [N] only used for bpf */ -#define IFXF_TSO 0x200 /* [N] TCP segment offloading */ +#define IFXF_TSO 0x200 /* [N] XXX missnamed, should be LRO */ #define IFXF_CANTCHANGE \ (IFXF_MPSAFE|IFXF_CLONED) @@ -251,7 +251,9 @@ struct if_status_description { #define IFCAP_VLAN_HWTAGGING 0x00000020 /* hardware VLAN tag support */ #define IFCAP_CSUM_TCPv6 0x00000080 /* can do IPv6/TCP checksums */ #define IFCAP_CSUM_UDPv6 0x00000100 /* can do IPv6/UDP checksums */ -#define IFCAP_TSO 0x00004000 /* TCP segment offloading */ +#define IFCAP_TSOv4 0x00001000 /* IPv4/TCP segment offload */ +#define IFCAP_TSOv6 0x00002000 /* IPv6/TCP segment offload */ +#define IFCAP_TSO 0x00004000 /* XXX should be LRO */ #define IFCAP_WOL 0x00008000 /* can do wake on lan */ #define IFCAP_CSUM_MASK (IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | \ diff --git a/sys/net/pf.c b/sys/net/pf.c index 7c395644c9b..81939d45c31 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1179 2023/05/13 13:35:17 bluhm Exp $ */ +/* $OpenBSD: pf.c,v 1.1180 2023/05/15 16:34:56 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -6555,15 +6555,9 @@ pf_route(struct pf_pdesc *pd, struct pf_state *st) goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sintosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt, + IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -6598,7 +6592,6 @@ void pf_route6(struct pf_pdesc *pd, struct pf_state *st) { struct mbuf *m0; - struct mbuf_list ml; struct sockaddr_in6 *dst, sin6; struct rtentry *rt = NULL; struct ip6_hdr *ip6; @@ -6696,15 +6689,9 @@ pf_route6(struct pf_pdesc *pd, struct pf_state *st) goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sin6tosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt, + IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); ip6stat_inc(ip6s_cantfrag); if (st->rt != PF_DUPTO) diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 394da08ad9f..a44ee063d06 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_output.c,v 1.386 2023/05/13 13:35:17 bluhm Exp $ */ +/* $OpenBSD: ip_output.c,v 1.387 2023/05/15 16:34:56 bluhm Exp $ */ /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ /* @@ -460,15 +460,10 @@ sendit: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sintosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, + IFCAP_TSOv4, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -1887,10 +1882,15 @@ in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) u_int16_t csum = 0, offset; offset = ip->ip_hl << 2; - if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in_cksum_phdr(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htonl(ip->ip_p)); + } else if (ISSET(m->m_pkthdr.csum_flags, + M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { csum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - offset + ip->ip_p)); + } if (ip->ip_p == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (ip->ip_p == IPPROTO_UDP) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 752b485ba27..c7111b143ec 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.137 2023/05/13 13:35:18 bluhm Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.138 2023/05/15 16:34:56 bluhm Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -80,6 +80,7 @@ #include #include +#include #include #if NPF > 0 #include @@ -753,7 +754,7 @@ send: /* Enable TSO and specify the size of the resulting segments. */ if (tso) { - m->m_pkthdr.csum_flags |= M_TCP_TSO; + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); m->m_pkthdr.ph_mss = tp->t_maxseg; } @@ -1349,3 +1350,45 @@ tcp_chopper(struct mbuf *m0, struct mbuf_list *ml, struct ifnet *ifp, ml_purge(ml); return error; } + +int +tcp_if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, + struct rtentry *rt, uint32_t ifcap, u_int mtu) +{ + struct mbuf_list ml; + int error; + + /* caller must fail later or fragment */ + if (!ISSET((*mp)->m_pkthdr.csum_flags, M_TCP_TSO)) + return 0; + if ((*mp)->m_pkthdr.ph_mss > mtu) { + CLR((*mp)->m_pkthdr.csum_flags, M_TCP_TSO); + return 0; + } + + /* network interface hardware will do TSO */ + if (in_ifcap_cksum(*mp, ifp, ifcap)) { + if (ISSET(ifcap, IFCAP_TSOv4)) { + in_hdr_cksum_out(*mp, ifp); + in_proto_cksum_out(*mp, ifp); + } +#ifdef INET6 + if (ISSET(ifcap, IFCAP_TSOv6)) + in6_proto_cksum_out(*mp, ifp); +#endif + error = ifp->if_output(ifp, *mp, dst, rt); + if (!error) + tcpstat_inc(tcps_outhwtso); + goto done; + } + + /* as fallback do TSO in software */ + if ((error = tcp_chopper(*mp, &ml, ifp, (*mp)->m_pkthdr.ph_mss)) || + (error = if_output_ml(ifp, &ml, dst, rt))) + goto done; + tcpstat_inc(tcps_outswtso); + + done: + *mp = NULL; + return error; +} diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index d8cfc12c517..fc64e11876c 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.164 2023/05/10 12:07:16 bluhm Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.165 2023/05/15 16:34:56 bluhm Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -719,6 +719,8 @@ struct tcpcb * void tcp_notify(struct inpcb *, int); int tcp_output(struct tcpcb *); int tcp_chopper(struct mbuf *, struct mbuf_list *, struct ifnet *, u_int); +int tcp_if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *, + struct rtentry *, uint32_t, u_int); void tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int); int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *); void tcp_rscale(struct tcpcb *, u_long); diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index b14ffbe4106..add8fb22c66 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_output.c,v 1.275 2023/05/10 12:07:17 bluhm Exp $ */ +/* $OpenBSD: ip6_output.c,v 1.276 2023/05/15 16:34:57 bluhm Exp $ */ /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ /* @@ -706,15 +706,10 @@ reroute: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, + IFCAP_TSOv6, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * try to fragment the packet. case 1-b @@ -2715,8 +2710,13 @@ in6_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) u_int16_t csum; offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); - csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, - htonl(m->m_pkthdr.len - offset), htonl(nxt)); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(0), htonl(nxt)); + } else { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(m->m_pkthdr.len - offset), htonl(nxt)); + } if (nxt == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (nxt == IPPROTO_UDP)