From b8646e37c51ba7f8ab54cb45b28b2fdca46a65db Mon Sep 17 00:00:00 2001 From: bluhm Date: Sun, 7 May 2023 16:23:23 +0000 Subject: [PATCH] I preparation for TSO in software, cleanup the fragment code. Use if_output_ml() to send mbuf lists to interfaces. This can be used for TSO, fragments, ARP and ND6. Rename variable fml to ml. In pf_route6() split the if else block. Put the safety check (hlen + firstlen < tlen) into ip_fragment(). It makes the code correct in case the packet is too short to be fragmented. This should not happen, but other functions also have this logic. No functional change. OK sashan@ --- sys/net/if.c | 63 +++++++++++++++++++++++++-------------- sys/net/if_bridge.c | 10 +++---- sys/net/if_var.h | 8 +++-- sys/net/pf.c | 40 +++++++++++-------------- sys/net/pf_norm.c | 8 ++--- sys/netinet/if_ether.c | 4 +-- sys/netinet/ip_output.c | 51 +++++++++++++------------------ sys/netinet6/ip6_output.c | 36 ++++++++-------------- sys/netinet6/nd6.c | 4 +-- sys/netinet6/nd6_nbr.c | 4 +-- 10 files changed, 112 insertions(+), 116 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index 4534ed54faa..e9ea77f6132 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.694 2023/04/26 19:54:35 mvs Exp $ */ +/* $OpenBSD: if.c,v 1.695 2023/05/07 16:23:23 bluhm Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -761,27 +761,6 @@ if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m) return (0); } -void -if_mqoutput(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total, - struct sockaddr *dst, struct rtentry *rt) -{ - struct mbuf_list ml; - struct mbuf *m; - unsigned int len; - - mq_delist(mq, &ml); - len = ml_len(&ml); - while ((m = ml_dequeue(&ml)) != NULL) - ifp->if_output(ifp, m, rt_key(rt), rt); - - /* XXXSMP we also discard if other CPU enqueues */ - if (mq_len(mq) > 0) { - /* mbuf is back in queue. Discard. */ - atomic_sub_int(total, len + mq_purge(mq)); - } else - atomic_sub_int(total, len); -} - void if_input(struct ifnet *ifp, struct mbuf_list *ml) { @@ -843,6 +822,46 @@ if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af) return (0); } +int +if_output_ml(struct ifnet *ifp, struct mbuf_list *ml, + struct sockaddr *dst, struct rtentry *rt) +{ + struct mbuf *m; + int error = 0; + + while ((m = ml_dequeue(ml)) != NULL) { + error = ifp->if_output(ifp, m, dst, rt); + if (error) + break; + } + if (error) + ml_purge(ml); + + return error; +} + +int +if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total, + struct sockaddr *dst, struct rtentry *rt) +{ + struct mbuf_list ml; + unsigned int len; + int error; + + mq_delist(mq, &ml); + len = ml_len(&ml); + error = if_output_ml(ifp, &ml, dst, rt); + + /* XXXSMP we also discard if other CPU enqueues */ + if (mq_len(mq) > 0) { + /* mbuf is back in queue. Discard. */ + atomic_sub_int(total, len + mq_purge(mq)); + } else + atomic_sub_int(total, len); + + return error; +} + int if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af) { diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index a202556e41f..463ca9cf96c 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_bridge.c,v 1.365 2023/02/27 09:35:32 jan Exp $ */ +/* $OpenBSD: if_bridge.c,v 1.366 2023/05/07 16:23:23 bluhm Exp $ */ /* * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net) @@ -1826,7 +1826,7 @@ bridge_fragment(struct ifnet *brifp, struct ifnet *ifp, struct ether_header *eh, struct mbuf *m) { struct llc llc; - struct mbuf_list fml; + struct mbuf_list ml; int error = 0; int hassnap = 0; u_int16_t etype; @@ -1884,11 +1884,11 @@ bridge_fragment(struct ifnet *brifp, struct ifnet *ifp, struct ether_header *eh, return; } - error = ip_fragment(m, &fml, ifp, ifp->if_mtu); + error = ip_fragment(m, &ml, ifp, ifp->if_mtu); if (error) return; - while ((m = ml_dequeue(&fml)) != NULL) { + while ((m = ml_dequeue(&ml)) != NULL) { if (hassnap) { M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT); if (m == NULL) { @@ -1908,7 +1908,7 @@ bridge_fragment(struct ifnet *brifp, struct ifnet *ifp, struct ether_header *eh, break; } if (error) - ml_purge(&fml); + ml_purge(&ml); else ipstat_inc(ips_fragmented); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 853dbccadbc..94480ac14c0 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_var.h,v 1.125 2023/04/18 22:01:24 mvs Exp $ */ +/* $OpenBSD: if_var.h,v 1.126 2023/05/07 16:23:23 bluhm Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -321,12 +321,14 @@ extern struct ifnet_head ifnetlist; void if_start(struct ifnet *); int if_enqueue(struct ifnet *, struct mbuf *); int if_enqueue_ifq(struct ifnet *, struct mbuf *); -void if_mqoutput(struct ifnet *, struct mbuf_queue *, unsigned int *, - struct sockaddr *, struct rtentry *); void if_input(struct ifnet *, struct mbuf_list *); void if_vinput(struct ifnet *, struct mbuf *); void if_input_process(struct ifnet *, struct mbuf_list *); int if_input_local(struct ifnet *, struct mbuf *, sa_family_t); +int if_output_ml(struct ifnet *, struct mbuf_list *, + struct sockaddr *, struct rtentry *); +int if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *, + struct sockaddr *, struct rtentry *); int if_output_local(struct ifnet *, struct mbuf *, sa_family_t); void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *); void p2p_rtrequest(struct ifnet *, int, struct rtentry *); diff --git a/sys/net/pf.c b/sys/net/pf.c index 56739a2e8fc..e98bda16551 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1175 2023/05/03 10:32:47 kn Exp $ */ +/* $OpenBSD: pf.c,v 1.1176 2023/05/07 16:23:23 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -6466,12 +6466,11 @@ void pf_route(struct pf_pdesc *pd, struct pf_state *st) { struct mbuf *m0; - struct mbuf_list fml; + struct mbuf_list ml; struct sockaddr_in *dst, sin; struct rtentry *rt = NULL; struct ip *ip; struct ifnet *ifp = NULL; - int error = 0; unsigned int rtableid; if (pd->m->m_pkthdr.pf.routed++ > 3) { @@ -6559,7 +6558,7 @@ pf_route(struct pf_pdesc *pd, struct pf_state *st) ipstat_inc(ips_outswcsum); ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); } - error = ifp->if_output(ifp, m0, sintosa(dst), rt); + ifp->if_output(ifp, m0, sintosa(dst), rt); goto done; } @@ -6575,19 +6574,10 @@ pf_route(struct pf_pdesc *pd, struct pf_state *st) goto bad; } - error = ip_fragment(m0, &fml, ifp, ifp->if_mtu); - if (error) + if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) || + if_output_ml(ifp, &ml, sintosa(dst), rt)) goto done; - - while ((m0 = ml_dequeue(&fml)) != NULL) { - error = ifp->if_output(ifp, m0, sintosa(dst), rt); - if (error) - break; - } - if (error) - ml_purge(&fml); - else - ipstat_inc(ips_fragmented); + ipstat_inc(ips_fragmented); done: if_put(ifp); @@ -6695,16 +6685,20 @@ pf_route6(struct pf_pdesc *pd, struct pf_state *st) */ if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { (void) pf_refragment6(&m0, mtag, dst, ifp, rt); - } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { + goto done; + } + + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { ifp->if_output(ifp, m0, sin6tosa(dst), rt); - } else { - ip6stat_inc(ip6s_cantfrag); - if (st->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, - ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); - goto bad; + goto done; } + ip6stat_inc(ip6s_cantfrag); + if (st->rt != PF_DUPTO) + pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, + ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); + goto bad; + done: if_put(ifp); rtfree(rt); diff --git a/sys/net/pf_norm.c b/sys/net/pf_norm.c index db2f5755bcd..7ab4c00c73f 100644 --- a/sys/net/pf_norm.c +++ b/sys/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.226 2022/11/06 18:05:05 dlg Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.227 2023/05/07 16:23:23 bluhm Exp $ */ /* * Copyright 2001 Niels Provos @@ -954,7 +954,7 @@ pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, struct ifnet *ifp, struct rtentry *rt) { struct mbuf *m = *m0; - struct mbuf_list fml; + struct mbuf_list ml; struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); u_int32_t mtu; u_int16_t hdrlen, extoff, maxlen; @@ -997,14 +997,14 @@ pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, * we drop the packet. */ mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; - error = ip6_fragment(m, &fml, hdrlen, proto, mtu); + error = ip6_fragment(m, &ml, hdrlen, proto, mtu); *m0 = NULL; /* ip6_fragment() has consumed original packet. */ if (error) { DPFPRINTF(LOG_NOTICE, "refragment error %d", error); return (PF_DROP); } - while ((m = ml_dequeue(&fml)) != NULL) { + while ((m = ml_dequeue(&ml)) != NULL) { m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; if (ifp == NULL) { ip6_forward(m, NULL, 0); diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 4ecf82f0796..fbf811349c2 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ether.c,v 1.263 2023/04/25 16:24:25 bluhm Exp $ */ +/* $OpenBSD: if_ether.c,v 1.264 2023/05/07 16:23:23 bluhm Exp $ */ /* $NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $ */ /* @@ -723,7 +723,7 @@ arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt) la->la_asked = 0; la->la_refreshed = 0; - if_mqoutput(ifp, &la->la_mq, &la_hold_total, rt_key(rt), rt); + if_output_mq(ifp, &la->la_mq, &la_hold_total, rt_key(rt), rt); return (0); } diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index b0c6ed720f6..c44ad8a136d 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_output.c,v 1.382 2022/08/12 17:04:16 bluhm Exp $ */ +/* $OpenBSD: ip_output.c,v 1.383 2023/05/07 16:23:23 bluhm Exp $ */ /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ /* @@ -104,7 +104,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, { struct ip *ip; struct ifnet *ifp = NULL; - struct mbuf_list fml; + struct mbuf_list ml; int hlen = sizeof (struct ip); int error = 0; struct route iproute; @@ -505,19 +505,10 @@ sendit: goto bad; } - error = ip_fragment(m, &fml, ifp, mtu); - if (error) + if ((error = ip_fragment(m, &ml, ifp, mtu)) || + (error = if_output_ml(ifp, &ml, sintosa(dst), ro->ro_rt))) goto done; - - while ((m = ml_dequeue(&fml)) != NULL) { - error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); - if (error) - break; - } - if (error) - ml_purge(&fml); - else - ipstat_inc(ips_fragmented); + ipstat_inc(ips_fragmented); done: if (ro == &iproute && ro->ro_rt) @@ -677,16 +668,15 @@ ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) #endif /* IPSEC */ int -ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp, +ip_fragment(struct mbuf *m0, struct mbuf_list *ml, struct ifnet *ifp, u_long mtu) { - struct mbuf *m; struct ip *ip; int firstlen, hlen, tlen, len, off; int error; - ml_init(fml); - ml_enqueue(fml, m0); + ml_init(ml); + ml_enqueue(ml, m0); ip = mtod(m0, struct ip *); hlen = ip->ip_hl << 2; @@ -705,10 +695,11 @@ ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp, in_proto_cksum_out(m0, NULL); /* - * Loop through length of segment after first fragment, + * Loop through length of payload after first fragment, * make new header and copy data of each part and link onto chain. */ for (off = hlen + firstlen; off < tlen; off += len) { + struct mbuf *m; struct ip *mhip; int mhlen; @@ -717,8 +708,7 @@ ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp, error = ENOBUFS; goto bad; } - ml_enqueue(fml, m); - + ml_enqueue(ml, m); if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) goto bad; m->m_data += max_linkhdr; @@ -762,25 +752,26 @@ ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp, * Update first fragment by trimming what's been copied out * and updating header, then send each fragment (in order). */ - m = m0; - m_adj(m, hlen + firstlen - tlen); - ip->ip_off |= htons(IP_MF); - ip->ip_len = htons(m->m_pkthdr.len); + if (hlen + firstlen < tlen) { + m_adj(m0, hlen + firstlen - tlen); + ip->ip_off |= htons(IP_MF); + } + ip->ip_len = htons(m0->m_pkthdr.len); ip->ip_sum = 0; - if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) - m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; + if (in_ifcap_cksum(m0, ifp, IFCAP_CSUM_IPv4)) + m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; else { ipstat_inc(ips_outswcsum); - ip->ip_sum = in_cksum(m, hlen); + ip->ip_sum = in_cksum(m0, hlen); } - ipstat_add(ips_ofragments, ml_len(fml)); + ipstat_add(ips_ofragments, ml_len(ml)); return (0); bad: ipstat_inc(ips_odropped); - ml_purge(fml); + ml_purge(ml); return (error); } diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 07e13d56524..6fe0f1c5aa1 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_output.c,v 1.272 2022/11/12 02:50:59 kn Exp $ */ +/* $OpenBSD: ip6_output.c,v 1.273 2023/05/07 16:23:24 bluhm Exp $ */ /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ /* @@ -165,7 +165,7 @@ ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route_in6 *ro, { struct ip6_hdr *ip6; struct ifnet *ifp = NULL; - struct mbuf_list fml; + struct mbuf_list ml; int hlen, tlen; struct route_in6 ip6route; struct rtentry *rt = NULL; @@ -751,19 +751,10 @@ reroute: ip6->ip6_nxt = IPPROTO_FRAGMENT; } - error = ip6_fragment(m, &fml, hlen, nextproto, mtu); - if (error) + if ((error = ip6_fragment(m, &ml, hlen, nextproto, mtu)) || + (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt))) goto done; - - while ((m = ml_dequeue(&fml)) != NULL) { - error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt); - if (error) - break; - } - if (error) - ml_purge(&fml); - else - ip6stat_inc(ip6s_fragmented); + ip6stat_inc(ip6s_fragmented); done: if (ro == &ip6route && ro->ro_rt) { @@ -789,16 +780,15 @@ bad: } int -ip6_fragment(struct mbuf *m0, struct mbuf_list *fml, int hlen, - u_char nextproto, u_long mtu) +ip6_fragment(struct mbuf *m0, struct mbuf_list *ml, int hlen, u_char nextproto, + u_long mtu) { - struct mbuf *m; struct ip6_hdr *ip6; u_int32_t id; int tlen, len, off; int error; - ml_init(fml); + ml_init(ml); ip6 = mtod(m0, struct ip6_hdr *); tlen = m0->m_pkthdr.len; @@ -810,10 +800,11 @@ ip6_fragment(struct mbuf *m0, struct mbuf_list *fml, int hlen, id = htonl(ip6_randomid()); /* - * Loop through length of segment, + * Loop through length of payload, * make new header and copy data of each part and link onto chain. */ for (off = hlen; off < tlen; off += len) { + struct mbuf *m; struct mbuf *mlast; struct ip6_hdr *mhip6; struct ip6_frag *ip6f; @@ -823,8 +814,7 @@ ip6_fragment(struct mbuf *m0, struct mbuf_list *fml, int hlen, error = ENOBUFS; goto bad; } - ml_enqueue(fml, m); - + ml_enqueue(ml, m); if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) goto bad; m->m_data += max_linkhdr; @@ -856,13 +846,13 @@ ip6_fragment(struct mbuf *m0, struct mbuf_list *fml, int hlen, ip6f->ip6f_nxt = nextproto; } - ip6stat_add(ip6s_ofragments, ml_len(fml)); + ip6stat_add(ip6s_ofragments, ml_len(ml)); m_freem(m0); return (0); bad: ip6stat_inc(ip6s_odropped); - ml_purge(fml); + ml_purge(ml); m_freem(m0); return (error); } diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 47efb5c3972..5dc07a65a7f 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6.c,v 1.275 2023/05/04 06:56:56 bluhm Exp $ */ +/* $OpenBSD: nd6.c,v 1.276 2023/05/07 16:23:24 bluhm Exp $ */ /* $KAME: nd6.c,v 1.280 2002/06/08 19:52:07 itojun Exp $ */ /* @@ -1181,7 +1181,7 @@ fail: * meaningless. */ nd6_llinfo_settimer(ln, nd6_gctimer); - if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total, + if_output_mq(ifp, &ln->ln_mq, &ln_hold_total, rt_key(rt), rt); } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 583dc16d370..30b8ffedbc2 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6_nbr.c,v 1.148 2023/05/04 06:56:56 bluhm Exp $ */ +/* $OpenBSD: nd6_nbr.c,v 1.149 2023/05/07 16:23:24 bluhm Exp $ */ /* $KAME: nd6_nbr.c,v 1.61 2001/02/10 16:06:14 jinmei Exp $ */ /* @@ -851,7 +851,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) } rt->rt_flags &= ~RTF_REJECT; ln->ln_asked = 0; - if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total, rt_key(rt), rt); + if_output_mq(ifp, &ln->ln_mq, &ln_hold_total, rt_key(rt), rt); freeit: rtfree(rt); -- 2.20.1