From c95756fbdbd67a2f21d1de9fed4db3122a1cfd6d Mon Sep 17 00:00:00 2001 From: bluhm Date: Mon, 17 Jun 2024 11:13:43 +0000 Subject: [PATCH] In vmx(4) TSO must pullup headers into first mbuf. Forwarding IPv6 packets from vmx with LRO to vmx with TSO did not work. vmx(4) has the requirement that all headers are in the first mbuf. ip6_forward() is quite dumb. It calls m_copym() to create a mbuf that might be used for sending ICMP6 later. After passing the forwarded packet down to ether_encap(), m_prepend() is used to restore the ethernet header. As the mbuf cluster has been copied, it is read only now. That means m_prepend() does not provide the empty space at the beginning of the cluster, but allocates a new mbuf that contains only the ethernet header. vmx(4) cannot transmit such a TSO packet and drops it. Solution is to call m_pullup() in vmxnet3_start(). If we ended up in such a miserable condition, use the first mbuf in the chain and move all headers into it. OK jan@ --- sys/dev/pci/if_vmx.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/sys/dev/pci/if_vmx.c b/sys/dev/pci/if_vmx.c index e70d9b50f8a..c3ad040ce9b 100644 --- a/sys/dev/pci/if_vmx.c +++ b/sys/dev/pci/if_vmx.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_vmx.c,v 1.87 2024/06/07 08:44:25 jan Exp $ */ +/* $OpenBSD: if_vmx.c,v 1.88 2024/06/17 11:13:43 bluhm Exp $ */ /* * Copyright (c) 2013 Tsubai Masanari @@ -1619,6 +1619,8 @@ vmxnet3_start(struct ifqueue *ifq) rgen = ring->gen; for (;;) { + int hdrlen; + if (free <= NTXSEGS) { ifq_set_oactive(ifq); break; @@ -1628,6 +1630,30 @@ vmxnet3_start(struct ifqueue *ifq) if (m == NULL) break; + /* + * Headers for Ether, IP, TCP including options must lay in + * first mbuf to support TSO. Usually our stack gets that + * right. To avoid packet parsing here, make a rough estimate + * for simple IPv4. Cases seen in the wild contain only ether + * header in separate mbuf. To support IPv6 with TCP options, + * move as much as possible into first mbuf. Realloc mbuf + * before bus dma load. + */ + hdrlen = sizeof(struct ether_header) + sizeof(struct ip) + + sizeof(struct tcphdr); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && + m->m_len < hdrlen && hdrlen <= m->m_pkthdr.len) { + hdrlen = MHLEN; + /* m_pullup preserves alignment, reserve space */ + hdrlen -= mtod(m, unsigned long) & (sizeof(long) - 1); + if (hdrlen > m->m_pkthdr.len) + hdrlen = m->m_pkthdr.len; + if ((m = m_pullup(m, hdrlen)) == NULL) { + ifq->ifq_errors++; + continue; + } + } + map = ring->dmap[prod]; if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) { -- 2.20.1