From 13d31ce9d625cd457d307aacf713d55b48d659b2 Mon Sep 17 00:00:00 2001 From: claudio Date: Mon, 25 Jul 2022 16:37:55 +0000 Subject: [PATCH] Properly handle nexthop state changes in the decision process In rev 1.90 of rde_decide.c the re->active cache of the best prefix was replaced with a call to prefix_best(). This introduced a bug because the nexthop state at that time may have changed already. As a result when a nexthop became unreachable prefix_evaluate() had oldbest = NULL and newbest = NULL and did not withdraw the prefix from FIB and Adj-RIB-Out. To fix this store the nexthop state per prefix and introduce prefix_evaluate_nexthop() which removes the prefix from the decision list, updates the nexthop state of the prefix and reinserts the prefix. Doing this ensures that prefix_best() always reports the same result once the decison process is done. prefix_best() and prefix_eligible() only depend on data stored on the prefix itself. OK tb@ --- usr.sbin/bgpd/rde.c | 5 +- usr.sbin/bgpd/rde.h | 14 ++++- usr.sbin/bgpd/rde_decide.c | 113 ++++++++++++++++++++++++++----------- usr.sbin/bgpd/rde_rib.c | 52 ++++++----------- 4 files changed, 109 insertions(+), 75 deletions(-) diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index ea0a746c0f2..7c5f83c538f 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.558 2022/07/23 08:44:06 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.559 2022/07/25 16:37:55 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -4249,8 +4249,7 @@ network_dump_upcall(struct rib_entry *re, void *ptr) bzero(&kf, sizeof(kf)); memcpy(&kf.prefix, &addr, sizeof(kf.prefix)); - if (prefix_nexthop(p) == NULL || - prefix_nexthop(p)->state != NEXTHOP_REACH) + if (prefix_nhvalid(p)) kf.nexthop.aid = kf.prefix.aid; else memcpy(&kf.nexthop, &prefix_nexthop(p)->true_nexthop, diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 4499d329931..d6efbc1bfa9 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.259 2022/07/11 17:08:21 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.260 2022/07/25 16:37:55 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -362,6 +362,8 @@ struct prefix { #define NEXTHOP_REJECT 0x02 #define NEXTHOP_BLACKHOLE 0x04 #define NEXTHOP_NOMODIFY 0x08 +#define NEXTHOP_MASK 0x0f +#define NEXTHOP_VALID 0x80 struct filterstate { struct rde_aspath aspath; @@ -522,6 +524,8 @@ int prefix_eligible(struct prefix *); struct prefix *prefix_best(struct rib_entry *); void prefix_evaluate(struct rib_entry *, struct prefix *, struct prefix *); +void prefix_evaluate_nexthop(struct prefix *, enum nexthop_state, + enum nexthop_state); /* rde_filter.c */ void rde_apply_set(struct filter_set_head *, struct rde_peer *, @@ -663,7 +667,13 @@ prefix_nexthop(struct prefix *p) static inline uint8_t prefix_nhflags(struct prefix *p) { - return (p->nhflags); + return (p->nhflags & NEXTHOP_MASK); +} + +static inline int +prefix_nhvalid(struct prefix *p) +{ + return ((p->nhflags & NEXTHOP_VALID) != 0); } static inline uint8_t diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index 0a6c6b79f4b..062fa78b55c 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.96 2022/07/18 09:42:46 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.97 2022/07/25 16:37:55 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -147,28 +147,10 @@ prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall) peer1 = prefix_peer(p1); peer2 = prefix_peer(p2); - /* pathes with errors are not eligible */ - if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR) - return -rv; - if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR) - return rv; - - /* only loop free pathes are eligible */ - if (asp1->flags & F_ATTR_LOOP) - return -rv; - if (asp2->flags & F_ATTR_LOOP) - return rv; - - /* - * 1. check if prefix is eligible a.k.a reachable - * A NULL nexthop is eligible since it is used for locally - * announced networks. - */ - if (prefix_nexthop(p2) != NULL && - prefix_nexthop(p2)->state != NEXTHOP_REACH) + /* 1. check if prefix is eligible a.k.a reachable */ + if (!prefix_eligible(p2)) return rv; - if (prefix_nexthop(p1) != NULL && - prefix_nexthop(p1)->state != NEXTHOP_REACH) + if (!prefix_eligible(p1)) return -rv; /* bump rv, from here on prefix is considered valid */ @@ -503,16 +485,13 @@ int prefix_eligible(struct prefix *p) { struct rde_aspath *asp = prefix_aspath(p); - struct nexthop *nh = prefix_nexthop(p); /* The aspath needs to be loop and error free */ if (asp == NULL || asp->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR)) return 0; - /* - * If the nexthop exists it must be reachable. - * It is OK if the nexthop does not exist (local announcement). - */ - if (nh != NULL && nh->state != NEXTHOP_REACH) + + /* The nexthop must be valid. */ + if (!prefix_nhvalid(p)) return 0; return 1; @@ -561,16 +540,11 @@ prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) } oldbest = prefix_best(re); - if (old != NULL) prefix_remove(old, re); - if (new != NULL) prefix_insert(new, NULL, re); - - newbest = TAILQ_FIRST(&re->prefix_h); - if (newbest != NULL && !prefix_eligible(newbest)) - newbest = NULL; + newbest = prefix_best(re); /* * If the active prefix changed or the active prefix was removed @@ -597,3 +571,74 @@ prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) if ((new != NULL && prefix_eligible(new)) || old != NULL) rde_generate_updates(rib, newbest, NULL, EVAL_ALL); } + +void +prefix_evaluate_nexthop(struct prefix *p, enum nexthop_state state, + enum nexthop_state oldstate) +{ + struct rib_entry *re = prefix_re(p); + struct prefix *newbest, *oldbest; + struct rib *rib; + + /* Skip non local-RIBs or RIBs that are flagged as noeval. */ + rib = re_rib(re); + if (rib->flags & F_RIB_NOEVALUATE) { + log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__); + return; + } + + if (oldstate == state) { + /* + * The state of the nexthop did not change. The only + * thing that may have changed is the true_nexthop + * or other internal infos. This will not change + * the routing decision so shortcut here. + * XXX needs to be changed for ECMP + */ + if (state == NEXTHOP_REACH) { + if ((rib->flags & F_RIB_NOFIB) == 0 && + p == prefix_best(re)) + rde_send_kroute(rib, p, NULL); + } + return; + } + + /* + * Re-evaluate the prefix by removing the prefix then updating the + * nexthop state and reinserting the prefix again. + */ + oldbest = prefix_best(re); + prefix_remove(p, re); + + if (state == NEXTHOP_REACH) + p->nhflags |= NEXTHOP_VALID; + else + p->nhflags &= ~NEXTHOP_VALID; + + prefix_insert(p, NULL, re); + newbest = prefix_best(re); + + /* + * If the active prefix changed or the active prefix was removed + * and added again then generate an update. + */ + if (oldbest != newbest || newbest == p) { + /* + * Send update withdrawing oldbest and adding newbest + * but remember that newbest may be NULL aka ineligible. + * Additional decision may be made by the called functions. + */ + rde_generate_updates(rib, newbest, oldbest, EVAL_DEFAULT); + if ((rib->flags & F_RIB_NOFIB) == 0) + rde_send_kroute(rib, newbest, oldbest); + return; + } + + /* + * If there are peers with 'rde evaluate all' every update needs + * to be passed on (not only a change of the best prefix). + * rde_generate_updates() will then take care of distribution. + */ + if (rde_evaluate_all()) + rde_generate_updates(rib, newbest, NULL, EVAL_ALL); +} diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 74da9508941..d07171010d2 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.240 2022/07/08 10:01:52 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.241 2022/07/25 16:37:55 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -1521,37 +1521,6 @@ prefix_bypeer(struct rib_entry *re, struct rde_peer *peer, uint32_t path_id) return (NULL); } -static void -prefix_evaluate_all(struct prefix *p, enum nexthop_state state, - enum nexthop_state oldstate) -{ - struct rib_entry *re = prefix_re(p); - - /* Skip non local-RIBs or RIBs that are flagged as noeval. */ - if (re_rib(re)->flags & F_RIB_NOEVALUATE) { - log_warnx("%s: prefix with F_RIB_NOEVALUATE hit", __func__); - return; - } - - if (oldstate == state) { - /* - * The state of the nexthop did not change. The only - * thing that may have changed is the true_nexthop - * or other internal infos. This will not change - * the routing decision so shortcut here. - */ - if (state == NEXTHOP_REACH) { - if ((re_rib(re)->flags & F_RIB_NOFIB) == 0 && - p == prefix_best(re)) - rde_send_kroute(re_rib(re), p, NULL); - } - return; - } - - /* redo the route decision */ - prefix_evaluate(prefix_re(p), p, p); -} - /* kill a prefix. */ void prefix_destroy(struct prefix *p) @@ -1724,7 +1693,7 @@ nexthop_runner(void) p = nh->next_prefix; for (j = 0; p != NULL && j < RDE_RUNNER_ROUNDS; j++) { - prefix_evaluate_all(p, nh->state, nh->oldstate); + prefix_evaluate_nexthop(p, nh->state, nh->oldstate); p = LIST_NEXT(p, entry.list.nexthop); } @@ -1826,15 +1795,24 @@ nexthop_modify(struct nexthop *setnh, enum action_types type, uint8_t aid, void nexthop_link(struct prefix *p) { - if (p->nexthop == NULL) - return; - if (p->flags & PREFIX_FLAG_ADJOUT) + p->nhflags &= ~NEXTHOP_VALID; + + if (p->flags & PREFIX_FLAG_ADJOUT) { + /* All nexthops are valid in Adj-RIB-Out */ + p->nhflags |= NEXTHOP_VALID; return; + } /* no need to link prefixes in RIBs that have no decision process */ if (re_rib(prefix_re(p))->flags & F_RIB_NOEVALUATE) return; + /* self-announce networks use nexthop NULL and are valid as well */ + if (p->nexthop == NULL || p->nexthop->state == NEXTHOP_REACH) + p->nhflags |= NEXTHOP_VALID; + + if (p->nexthop == NULL) + return; p->flags |= PREFIX_NEXTHOP_LINKED; LIST_INSERT_HEAD(&p->nexthop->prefix_h, p, entry.list.nexthop); } @@ -1842,6 +1820,8 @@ nexthop_link(struct prefix *p) void nexthop_unlink(struct prefix *p) { + p->nhflags &= ~NEXTHOP_VALID; + if (p->nexthop == NULL || (p->flags & PREFIX_NEXTHOP_LINKED) == 0) return; -- 2.20.1