From 555d04f4a08b338ebfedd07243e099cb01b35c3c Mon Sep 17 00:00:00 2001 From: claudio Date: Tue, 16 Aug 2022 08:14:58 +0000 Subject: [PATCH] Do not send kroutes from the RDE to the FIB with the true_nexthop but instead use exit_nexthop (the nexthop from BGP). The FIB code can then do the lookup and replace the nexthop in the FIB. This solves an issue when multiple nexthops change concurrently. In the RDE the decision process handles these changes ansynchronously which resulted in bad true_nexthops to be sent to the FIB. The exit_nethop is stable so the data sent to the FIB is always correct. Fix a bug in netxhop tracking introduced in 1.280. On RTM_CHANGE when the nexthop of a kroute changes a knexthop_send_update() must be sent but knexthop_track() does not do that because the kroute did not change. Introduce a knexthop_update() function for this case instead. OK tb@ --- usr.sbin/bgpd/kroute.c | 88 ++++++++++++++++++++++++++++++++++++------ usr.sbin/bgpd/rde.c | 12 +----- 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/usr.sbin/bgpd/kroute.c b/usr.sbin/bgpd/kroute.c index 3703e6044e0..d17f8737d7a 100644 --- a/usr.sbin/bgpd/kroute.c +++ b/usr.sbin/bgpd/kroute.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kroute.c,v 1.288 2022/08/10 14:21:24 tb Exp $ */ +/* $OpenBSD: kroute.c,v 1.289 2022/08/16 08:14:58 claudio Exp $ */ /* * Copyright (c) 2022 Claudio Jeker @@ -160,8 +160,10 @@ void kif_clear(void); int kroute_validate(struct kroute *); int kroute6_validate(struct kroute6 *); +int knexthop_true_nexthop(struct ktable *, struct kroute_full *); void knexthop_validate(struct ktable *, struct knexthop *); void knexthop_track(struct ktable *, u_short); +void knexthop_update(struct ktable *, struct kroute_full *); void knexthop_send_update(struct knexthop *); struct kroute *kroute_match(struct ktable *, struct bgpd_addr *, int); struct kroute6 *kroute6_match(struct ktable *, struct bgpd_addr *, int); @@ -453,6 +455,8 @@ kr_change(u_int rtableid, struct kroute_full *kf) return (0); kf->flags |= F_BGPD; kf->priority = RTP_MINE; + if (!knexthop_true_nexthop(kt, kf)) + return kroute_remove(kt, kf, 1); switch (kf->prefix.aid) { case AID_INET: return (kr4_change(kt, kf)); @@ -1406,10 +1410,10 @@ kr6_tofull(struct kroute6 *kr6) bzero(&kf, sizeof(kf)); kf.prefix.aid = AID_INET6; - memcpy(&kf.prefix.v6, &kr6->prefix, sizeof(struct in6_addr)); + kf.prefix.v6 = kr6->prefix; kf.prefix.scope_id = kr6->prefix_scope_id; kf.nexthop.aid = AID_INET6; - memcpy(&kf.nexthop.v6, &kr6->nexthop, sizeof(struct in6_addr)); + kf.nexthop.v6 = kr6->nexthop; kf.nexthop.scope_id = kr6->nexthop_scope_id; strlcpy(kf.label, rtlabel_id2name(kr6->labelid), sizeof(kf.label)); kf.flags = kr6->flags; @@ -2124,6 +2128,51 @@ kroute6_validate(struct kroute6 *kr) return (kif->k.nh_reachable); } +int +knexthop_true_nexthop(struct ktable *kt, struct kroute_full *kf) +{ + struct bgpd_addr gateway = { 0 }; + struct knexthop *kn; + struct kroute *kr; + struct kroute6 *kr6; + + /* + * Ignore the nexthop for VPN routes. The gateway is forced + * to an mpe(4) interface route using an MPLS label. + */ + switch (kf->prefix.aid) { + case AID_VPN_IPv4: + case AID_VPN_IPv6: + return 1; + } + + kn = knexthop_find(kt, &kf->nexthop); + if (kn == NULL) { + log_warnx("%s: nexthop %s not found", __func__, + log_addr(&kf->nexthop)); + return 0; + } + if (kn->kroute == NULL) + return 0; + + switch (kn->nexthop.aid) { + case AID_INET: + kr = kn->kroute; + gateway.aid = AID_INET; + gateway.v4.s_addr = kr->nexthop.s_addr; + break; + case AID_INET6: + kr6 = kn->kroute; + gateway.aid = AID_INET6; + gateway.v6 = kr6->nexthop; + gateway.scope_id = kr6->nexthop_scope_id; + break; + } + + kf->nexthop = gateway; + return 1; +} + void knexthop_validate(struct ktable *kt, struct knexthop *kn) { @@ -2150,7 +2199,7 @@ knexthop_validate(struct ktable *kt, struct knexthop *kn) /* * Send update if nexthop route changed under us if * the route remains the same then the NH state has not - * changed. State changes are tracked by knexthop_track(). + * changed. */ if (kr != oldk) knexthop_send_update(kn); @@ -2170,6 +2219,9 @@ knexthop_validate(struct ktable *kt, struct knexthop *kn) } } +/* + * Called on interface state change. + */ void knexthop_track(struct ktable *kt, u_short ifindex) { @@ -2180,6 +2232,20 @@ knexthop_track(struct ktable *kt, u_short ifindex) knexthop_validate(kt, kn); } +/* + * Called on route change. + */ +void +knexthop_update(struct ktable *kt, struct kroute_full *kf) +{ + struct knexthop *kn; + + RB_FOREACH(kn, knexthop_tree, KT2KNT(kt)) + if (prefix_compare(&kf->prefix, &kn->nexthop, + kf->prefixlen) == 0) + knexthop_send_update(kn); +} + void knexthop_send_update(struct knexthop *kn) { @@ -2187,8 +2253,8 @@ knexthop_send_update(struct knexthop *kn) struct kroute *kr; struct kroute6 *kr6; - bzero(&n, sizeof(n)); - memcpy(&n.nexthop, &kn->nexthop, sizeof(n.nexthop)); + memset(&n, 0, sizeof(n)); + n.nexthop = kn->nexthop; if (kn->kroute == NULL) { n.valid = 0; /* NH is not valid */ @@ -2218,14 +2284,12 @@ knexthop_send_update(struct knexthop *kn) if (memcmp(&kr6->nexthop, &in6addr_any, sizeof(struct in6_addr)) != 0) { n.gateway.aid = AID_INET6; - memcpy(&n.gateway.v6, &kr6->nexthop, - sizeof(struct in6_addr)); + n.gateway.v6 = kr6->nexthop; n.gateway.scope_id = kr6->nexthop_scope_id; } if (n.connected) { n.net.aid = AID_INET6; - memcpy(&n.net.v6, &kr6->prefix, - sizeof(struct in6_addr)); + n.net.v6 = kr6->prefix; n.net.scope_id = kr6->prefix_scope_id; n.netlen = kr6->prefixlen; } @@ -3104,7 +3168,7 @@ kr_fib_change(struct ktable *kt, struct kroute_full *kf, int type, int mpath) kr_redistribute(IMSG_NETWORK_ADD, kt, kr_tofull(kr)); if (kr->flags & F_NEXTHOP && changed) - knexthop_track(kt, kf->ifindex); + knexthop_update(kt, kf); } else { kr->flags &= ~F_BGPD_INSERTED; } @@ -3177,7 +3241,7 @@ add4: kt, kr6_tofull(kr6)); if (kr6->flags & F_NEXTHOP && changed) - knexthop_track(kt, kf->ifindex); + knexthop_update(kt, kf); } else { kr6->flags &= ~F_BGPD_INSERTED; } diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index b23295553bf..bffe049a11d 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.562 2022/08/10 14:17:01 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.563 2022/08/16 08:14:58 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -3056,8 +3056,7 @@ rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old) kf.flags |= F_REJECT; if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE) kf.flags |= F_BLACKHOLE; - memcpy(&kf.nexthop, &prefix_nexthop(p)->true_nexthop, - sizeof(kf.nexthop)); + kf.nexthop = prefix_nexthop(p)->exit_nexthop; strlcpy(kf.label, rtlabel_id2name(prefix_aspath(p)->rtlabelid), sizeof(kf.label)); } @@ -3072,13 +3071,6 @@ rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old) SIMPLEQ_FOREACH(vpn, &conf->l3vpns, entry) { if (!rde_l3vpn_import(prefix_communities(p), vpn)) continue; - /* must send exit_nexthop so that correct MPLS tunnel - * is chosen - */ - if (type == IMSG_KROUTE_CHANGE) - memcpy(&kf.nexthop, - &prefix_nexthop(p)->exit_nexthop, - sizeof(kf.nexthop)); /* XXX not ideal but this will change */ kf.ifindex = if_nametoindex(vpn->ifmpe); if (imsg_compose(ibuf_main, type, vpn->rtableid, 0, -1, -- 2.20.1