From: bluhm Date: Wed, 5 Apr 2023 19:35:23 +0000 (+0000) Subject: ARP has a queue of packets that should be sent after name resolution. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=e0e67b981de1c260417f1b68fd50d5ec01427a2a;p=openbsd ARP has a queue of packets that should be sent after name resolution. ND6 did only hold a single packet. Unify the logic and add a mbuf hold queue to struct llinfo_nd6. This is MP safe and queue limits are tracked with atomic operations. New function if_mqoutput() has common code for ARP and ND6. ln_saddr6 holds the source address of the requesting packet. That is easier than fiddling with mbuf queue in nd6_ns_output(). OK kn@ --- diff --git a/sys/net/if.c b/sys/net/if.c index 8ae59647204..dbd9890531f 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.685 2023/03/07 20:09:48 jan Exp $ */ +/* $OpenBSD: if.c,v 1.686 2023/04/05 19:35:23 bluhm Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -752,6 +752,27 @@ if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m) return (0); } +void +if_mqoutput(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total, + struct sockaddr *dst, struct rtentry *rt) +{ + struct mbuf_list ml; + struct mbuf *m; + unsigned int len; + + mq_delist(mq, &ml); + len = ml_len(&ml); + while ((m = ml_dequeue(&ml)) != NULL) + ifp->if_output(ifp, m, rt_key(rt), rt); + + /* XXXSMP we also discard if other CPU enqueues */ + if (mq_len(mq) > 0) { + /* mbuf is back in queue. Discard. */ + atomic_sub_int(total, len + mq_purge(mq)); + } else + atomic_sub_int(total, len); +} + void if_input(struct ifnet *ifp, struct mbuf_list *ml) { diff --git a/sys/net/if_var.h b/sys/net/if_var.h index fc7512c0974..49b38eaf797 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_var.h,v 1.122 2022/11/23 14:50:59 kn Exp $ */ +/* $OpenBSD: if_var.h,v 1.123 2023/04/05 19:35:23 bluhm Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -320,6 +320,8 @@ extern struct ifnet_head ifnetlist; void if_start(struct ifnet *); int if_enqueue(struct ifnet *, struct mbuf *); int if_enqueue_ifq(struct ifnet *, struct mbuf *); +void if_mqoutput(struct ifnet *, struct mbuf_queue *, unsigned int *, + struct sockaddr *, struct rtentry *); void if_input(struct ifnet *, struct mbuf_list *); void if_vinput(struct ifnet *, struct mbuf *); void if_input_process(struct ifnet *, struct mbuf_list *); diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index a11d90c7108..b1268d3fa00 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ether.c,v 1.258 2023/03/08 04:43:08 guenther Exp $ */ +/* $OpenBSD: if_ether.c,v 1.259 2023/04/05 19:35:23 bluhm Exp $ */ /* $NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $ */ /* @@ -653,10 +653,7 @@ arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt) struct in_addr *spa = (struct in_addr *)ea->arp_spa; char addr[INET_ADDRSTRLEN]; struct ifnet *rifp; - struct mbuf_list ml; - struct mbuf *m; time_t uptime; - unsigned int len; int changed = 0; KERNEL_ASSERT_LOCKED(); @@ -729,16 +726,7 @@ arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt) la->la_asked = 0; la->la_refreshed = 0; - mq_delist(&la->la_mq, &ml); - len = ml_len(&ml); - while ((m = ml_dequeue(&ml)) != NULL) - ifp->if_output(ifp, m, rt_key(rt), rt); - /* XXXSMP we discard if other CPU enqueues */ - if (mq_len(&la->la_mq) > 0) { - /* mbuf is back in queue. Discard. */ - atomic_sub_int(&la_hold_total, len + mq_purge(&la->la_mq)); - } else - atomic_sub_int(&la_hold_total, len); + if_mqoutput(ifp, &la->la_mq, &la_hold_total, rt_key(rt), rt); return (0); } diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 04a581bce46..c937040f420 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6.c,v 1.269 2023/04/05 10:40:37 kn Exp $ */ +/* $OpenBSD: nd6.c,v 1.270 2023/04/05 19:35:23 bluhm Exp $ */ /* $KAME: nd6.c,v 1.280 2002/06/08 19:52:07 itojun Exp $ */ /* @@ -87,6 +87,7 @@ int nd6_debug = 0; TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list; struct pool nd6_pool; /* pool for llinfo_nd6 structures */ int nd6_inuse; +int ln_hold_total; /* [a] packets currently in the nd6 queue */ void nd6_timer(void *); void nd6_slowtimo(void *); @@ -304,26 +305,33 @@ nd6_llinfo_timer(struct rtentry *rt) nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000); nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); } else { - struct mbuf *m = ln->ln_hold; - if (m) { - ln->ln_hold = NULL; + struct mbuf_list ml; + struct mbuf *m; + unsigned int len; + + mq_delist(&ln->ln_mq, &ml); + len = ml_len(&ml); + while ((m = ml_dequeue(&ml)) != NULL) { /* * Fake rcvif to make the ICMP error * more helpful in diagnosing for the * receiver. - * XXX: should we consider - * older rcvif? + * XXX: should we consider older rcvif? */ m->m_pkthdr.ph_ifidx = rt->rt_ifidx; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); - if (ln->ln_hold == m) { - /* m is back in ln_hold. Discard. */ - m_freem(ln->ln_hold); - ln->ln_hold = NULL; - } } + + /* XXXSMP we also discard if other CPU enqueues */ + if (mq_len(&ln->ln_mq) > 0) { + /* mbuf is back in queue. Discard. */ + atomic_sub_int(&ln_hold_total, + len + mq_purge(&ln->ln_mq)); + } else + atomic_sub_int(&ln_hold_total, len); + nd6_free(rt); ln = NULL; } @@ -618,9 +626,8 @@ nd6_invalidate(struct rtentry *rt) struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; struct sockaddr_dl *sdl = satosdl(rt->rt_gateway); - m_freem(ln->ln_hold); + atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq)); sdl->sdl_alen = 0; - ln->ln_hold = NULL; ln->ln_state = ND6_LLINFO_INCOMPLETE; ln->ln_asked = 0; } @@ -805,6 +812,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt) log(LOG_DEBUG, "%s: pool get failed\n", __func__); break; } + mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET); nd6_inuse++; ln->ln_rt = rt; /* this is required for "ndp" command. - shin */ @@ -926,7 +934,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt) rt->rt_expire = 0; rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; - m_freem(ln->ln_hold); + atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq)); pool_put(&nd6_pool, ln); break; @@ -1130,21 +1138,8 @@ fail: * meaningless. */ nd6_llinfo_settimer(ln, nd6_gctimer); - - if (ln->ln_hold) { - struct mbuf *n = ln->ln_hold; - ln->ln_hold = NULL; - /* - * we assume ifp is not a p2p here, so just - * set the 2nd argument as the 1st one. - */ - ifp->if_output(ifp, n, rt_key(rt), rt); - if (ln->ln_hold == n) { - /* n is back in ln_hold. Discard. */ - m_freem(ln->ln_hold); - ln->ln_hold = NULL; - } - } + if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total, + rt_key(rt), rt); } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ nd6_llinfo_settimer(ln, 0); @@ -1333,13 +1328,23 @@ nd6_resolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, /* * There is a neighbor cache entry, but no ethernet address - * response yet. Replace the held mbuf (if any) with this - * latest one. + * response yet. Insert mbuf in hold queue if below limit. + * If above the limit free the queue without queuing the new packet. */ if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; - m_freem(ln->ln_hold); - ln->ln_hold = m; + /* source address of prompting packet is needed by nd6_ns_output() */ + if (m->m_len >= sizeof(struct ip6_hdr)) { + memcpy(&ln->ln_saddr6, &mtod(m, struct ip6_hdr *)->ip6_src, + sizeof(ln->ln_saddr6)); + } + if (atomic_inc_int_nv(&ln_hold_total) <= LN_HOLD_TOTAL) { + if (mq_push(&ln->ln_mq, m) != 0) + atomic_dec_int(&ln_hold_total); + } else { + atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq) + 1); + m_freem(m); + } /* * If there has been no NS for the neighbor after entering the diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index d63023c5eb9..b3c2be97bcb 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6.h,v 1.95 2023/01/06 14:35:34 kn Exp $ */ +/* $OpenBSD: nd6.h,v 1.96 2023/04/05 19:35:23 bluhm Exp $ */ /* $KAME: nd6.h,v 1.95 2002/06/08 11:31:06 itojun Exp $ */ /* @@ -81,12 +81,17 @@ struct in6_ndireq { struct llinfo_nd6 { TAILQ_ENTRY(llinfo_nd6) ln_list; struct rtentry *ln_rt; - struct mbuf *ln_hold; /* last packet until resolved/timeout */ + struct mbuf_queue ln_mq; /* hold packets until resolved */ + struct in6_addr ln_saddr6; /* source of prompting packet */ long ln_asked; /* number of queries already sent for addr */ int ln_byhint; /* # of times we made it reachable by UL hint */ short ln_state; /* reachability state */ short ln_router; /* 2^0: ND6 router bit */ }; +#define LN_HOLD_QUEUE 10 +#define LN_HOLD_TOTAL 100 + +extern int ln_hold_total; #define ND6_LLINFO_PERMANENT(n) ((n)->ln_rt->rt_expire == 0) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 0c168f20b95..31bbc8f2e2e 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6_nbr.c,v 1.143 2023/03/31 19:43:33 bluhm Exp $ */ +/* $OpenBSD: nd6_nbr.c,v 1.144 2023/04/05 19:35:23 bluhm Exp $ */ /* $KAME: nd6_nbr.c,v 1.61 2001/02/10 16:06:14 jinmei Exp $ */ /* @@ -451,23 +451,14 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, * - if taddr is link local saddr6 must be link local as well * Otherwise, we perform the source address selection as usual. */ - struct ip6_hdr *hip6; /* hold ip6 */ - struct in6_addr *saddr6; - - if (ln && ln->ln_hold) { - hip6 = mtod(ln->ln_hold, struct ip6_hdr *); - if (sizeof(*hip6) <= ln->ln_hold->m_len) { - saddr6 = &hip6->ip6_src; - if (saddr6 && IN6_IS_ADDR_LINKLOCAL(taddr6) && - !IN6_IS_ADDR_LINKLOCAL(saddr6)) - saddr6 = NULL; - } else - saddr6 = NULL; - } else - saddr6 = NULL; - if (saddr6 && in6ifa_ifpwithaddr(ifp, saddr6)) - src_sa.sin6_addr = *saddr6; - else { + if (ln != NULL) + src_sa.sin6_addr = ln->ln_saddr6; + + if (!IN6_IS_ADDR_LINKLOCAL(taddr6) || + IN6_IS_ADDR_UNSPECIFIED(&src_sa.sin6_addr) || + IN6_IS_ADDR_LINKLOCAL(&src_sa.sin6_addr) || + !in6ifa_ifpwithaddr(ifp, &src_sa.sin6_addr)) { + struct rtentry *rt; rt = rtalloc(sin6tosa(&dst_sa), RT_RESOLVE, @@ -832,20 +823,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) } rt->rt_flags &= ~RTF_REJECT; ln->ln_asked = 0; - if (ln->ln_hold) { - struct mbuf *n = ln->ln_hold; - ln->ln_hold = NULL; - /* - * we assume ifp is not a loopback here, so just set the 2nd - * argument as the 1st one. - */ - ifp->if_output(ifp, n, rt_key(rt), rt); - if (ln->ln_hold == n) { - /* n is back in ln_hold. Discard. */ - m_freem(ln->ln_hold); - ln->ln_hold = NULL; - } - } + if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total, rt_key(rt), rt); freeit: rtfree(rt);