-/* $OpenBSD: rde.c,v 1.548 2022/07/07 10:46:54 claudio Exp $ */
+/* $OpenBSD: rde.c,v 1.549 2022/07/07 12:16:04 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
struct attr *a;
struct nexthop *nexthop;
struct rib_entry *re;
+ struct prefix *xp;
struct rde_peer *peer;
void *bp;
time_t staletime;
rib.prefixlen = p->pt->prefixlen;
rib.origin = asp->origin;
rib.validation_state = p->validation_state;
+ rib.dmetric = p->dmetric;
rib.flags = 0;
re = prefix_re(p);
- if (re != NULL && prefix_best(re) == p)
- rib.flags |= F_PREF_BEST;
+ TAILQ_FOREACH(xp, &re->prefix_h, entry.list.rib) {
+ switch (xp->dmetric) {
+ case PREFIX_DMETRIC_BEST:
+ if (xp == p)
+ rib.flags |= F_PREF_BEST;
+ break;
+ case PREFIX_DMETRIC_ECMP:
+ if (xp == p)
+ rib.flags |= F_PREF_ECMP;
+ break;
+ case PREFIX_DMETRIC_AS_WIDE:
+ if (xp == p)
+ rib.flags |= F_PREF_AS_WIDE;
+ break;
+ default:
+ xp = NULL; /* stop loop */
+ break;
+ }
+ }
if (!peer->conf.ebgp)
rib.flags |= F_PREF_INTERNAL;
if (asp->flags & F_PREFIX_ANNOUNCED)
rde_dump_filter(struct prefix *p, struct ctl_show_rib_request *req, int adjout)
{
struct rde_aspath *asp;
- struct rib_entry *re;
if (!rde_match_peer(prefix_peer(p), &req->neighbor))
return;
asp = prefix_aspath(p);
- re = prefix_re(p);
- if (asp == NULL) /* skip pending withdraw in Adj-RIB-Out */
- return;
- if ((req->flags & F_CTL_BEST) && re != NULL && prefix_best(re) != p)
+ if ((req->flags & F_CTL_BEST) && p->dmetric != PREFIX_DMETRIC_BEST)
return;
if ((req->flags & F_CTL_INVALID) &&
(asp->flags & F_ATTR_PARSE_ERR) == 0)
TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
if (p->flags & PREFIX_NEXTHOP_LINKED)
nexthop_unlink(p);
+ p->dmetric = PREFIX_DMETRIC_INVALID;
}
return;
}
-/* $OpenBSD: rde_decide.c,v 1.92 2022/07/07 10:46:54 claudio Exp $ */
+/* $OpenBSD: rde_decide.c,v 1.93 2022/07/07 12:16:04 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org>
#include "log.h"
int prefix_cmp(struct prefix *, struct prefix *, int *);
+void prefix_set_dmetric(struct prefix *, struct prefix *);
void prefix_insert(struct prefix *, struct prefix *, struct rib_entry *);
void prefix_remove(struct prefix *, struct rib_entry *);
/*
* Compare two prefixes with equal pt_entry. Returns an integer greater than or
* less than 0, according to whether the prefix p1 is more or less preferred
* than the prefix p2. p1 should be used for the new prefix and p2 for a
- * already added prefix.
+ * already added prefix. The absolute value returned specifies the similarity
+ * of the prefixes.
+ * 1: prefixes differ because of validity
+ * 2: prefixes don't belong in any multipath set
+ * 3: prefixes belong only in the as-wide multipath set
+ * 4: prefixes belong in both the ecmp and as-wide multipath set
+ * TODO: maybe we also need a strict ecmp set that requires
+ * prefixes to e.g. equal ASPATH or equal neighbor-as (like for MED).
*/
int
prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall)
struct attr *a;
uint32_t p1id, p2id;
int p1cnt, p2cnt, i;
+ int rv = 1;
/*
* If a match happens before the MED check then the list is
*testall = 0;
if (p1 == NULL)
- return -1;
+ return -rv;
if (p2 == NULL)
- return 1;
+ return rv;
asp1 = prefix_aspath(p1);
asp2 = prefix_aspath(p2);
/* pathes with errors are not eligible */
if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR)
- return -1;
+ return -rv;
if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR)
- return 1;
+ return rv;
/* only loop free pathes are eligible */
if (asp1->flags & F_ATTR_LOOP)
- return -1;
+ return -rv;
if (asp2->flags & F_ATTR_LOOP)
- return 1;
+ return rv;
/*
* 1. check if prefix is eligible a.k.a reachable
*/
if (prefix_nexthop(p2) != NULL &&
prefix_nexthop(p2)->state != NEXTHOP_REACH)
- return 1;
+ return rv;
if (prefix_nexthop(p1) != NULL &&
prefix_nexthop(p1)->state != NEXTHOP_REACH)
- return -1;
+ return -rv;
+
+ /* bump rv, from here on prefix is considered valid */
+ rv++;
/* 2. local preference of prefix, bigger is better */
if (asp1->lpref > asp2->lpref)
- return 1;
+ return rv;
if (asp1->lpref < asp2->lpref)
- return -1;
+ return -rv;
/* 3. aspath count, the shorter the better */
- if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0)
- return (asp2->aspath->ascnt - asp1->aspath->ascnt);
+ if (asp1->aspath->ascnt < asp2->aspath->ascnt)
+ return rv;
+ if (asp1->aspath->ascnt > asp2->aspath->ascnt)
+ return -rv;
/* 4. origin, the lower the better */
- if ((asp2->origin - asp1->origin) != 0)
- return (asp2->origin - asp1->origin);
+ if (asp1->origin < asp2->origin)
+ return rv;
+ if (asp1->origin > asp2->origin)
+ return -rv;
/*
* 5. MED decision
*testall = 2;
/* lowest value wins */
if (asp1->med < asp2->med)
- return 1;
+ return rv;
if (asp1->med > asp2->med)
- return -1;
+ return -rv;
}
if (!(rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS))
*/
if (peer1->conf.ebgp != peer2->conf.ebgp) {
if (peer1->conf.ebgp) /* peer1 is EBGP other is lower */
- return 1;
+ return rv;
else if (peer2->conf.ebgp) /* peer2 is EBGP */
- return -1;
+ return -rv;
}
+ /* bump rv, as-wide multipath */
+ rv++;
+
/*
* 7. local tie-breaker, this weight is here to tip equal long AS
* paths in one or the other direction. It happens more and more
* decision process.
*/
if (asp1->weight > asp2->weight)
- return 1;
+ return rv;
if (asp1->weight < asp2->weight)
- return -1;
+ return -rv;
/* 8. nexthop costs. NOT YET -> IGNORE */
+ /* bump rv, equal cost multipath */
+ rv++;
+
/*
* 9. older route (more stable) wins but only if route-age
* evaluation is enabled.
*/
if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) {
if (p1->lastchange < p2->lastchange) /* p1 is older */
- return 1;
+ return rv;
if (p1->lastchange > p2->lastchange)
- return -1;
+ return -rv;
}
/* 10. lowest BGP Id wins, use ORIGINATOR_ID if present */
} else
p2id = peer2->remote_bgpid;
if (p1id < p2id)
- return 1;
+ return rv;
if (p1id > p2id)
- return -1;
+ return -rv;
/* 11. compare CLUSTER_LIST length, shorter is better */
p1cnt = p2cnt = 0;
p1cnt = a->len / sizeof(uint32_t);
if ((a = attr_optget(asp2, ATTR_CLUSTER_LIST)) != NULL)
p2cnt = a->len / sizeof(uint32_t);
- if ((p2cnt - p1cnt) != 0)
- return (p2cnt - p1cnt);
+ if (p1cnt < p2cnt)
+ return rv;
+ if (p1cnt > p2cnt)
+ return -rv;
/* 12. lowest peer address wins (IPv4 is better than IPv6) */
if (peer1->remote_addr.aid < peer2->remote_addr.aid)
- return 1;
+ return rv;
if (peer1->remote_addr.aid > peer2->remote_addr.aid)
- return -1;
+ return -rv;
switch (peer1->remote_addr.aid) {
case AID_INET:
i = memcmp(&peer1->remote_addr.v4, &peer2->remote_addr.v4,
fatalx("%s: unknown af", __func__);
}
if (i < 0)
- return 1;
+ return rv;
if (i > 0)
- return -1;
+ return -rv;
/* RFC7911 does not specify this but something like this is needed. */
/* 13. lowest path identifier wins */
if (p1->path_id < p2->path_id)
- return 1;
+ return rv;
if (p1->path_id > p2->path_id)
- return -1;
+ return -rv;
fatalx("Uh, oh a politician in the decision process");
}
+/*
+ * set the dmetric value of np based on the return value of
+ * prefix_evaluate(pp, np) or set it to either PREFIX_DMETRIC_BEST
+ * or PREFIX_DMETRIC_INVALID for the first element.
+ */
+void
+prefix_set_dmetric(struct prefix *pp, struct prefix *np)
+{
+ int testall;
+
+ if (np != NULL) {
+ if (pp == NULL)
+ np->dmetric = prefix_eligible(np) ?
+ PREFIX_DMETRIC_BEST : PREFIX_DMETRIC_INVALID;
+ else
+ np->dmetric = prefix_cmp(pp, np, &testall);
+ if (np->dmetric < 0)
+ fatalx("bad dmetric in decision process");
+ }
+}
+
/*
* Insert a prefix keeping the total order of the list. For routes
* that may depend on a MED selection the set is scanned until the
* MED inversion, take out prefix and
* put it onto redo queue.
*/
+ prefix_set_dmetric(TAILQ_PREV(xp, prefix_queue,
+ entry.list.rib), np);
TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
TAILQ_INSERT_TAIL(&redo, xp, entry.list.rib);
} else {
}
}
- if (insertp == NULL)
+ if (insertp == NULL) {
TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
- else
+ } else {
TAILQ_INSERT_AFTER(&re->prefix_h, insertp, new, entry.list.rib);
+ new->dmetric = prefix_cmp(insertp, new, &testall);
+ if (new->dmetric < 0)
+ fatalx("bad dmetric in decision process");
+ }
+
+ prefix_set_dmetric(insertp, new);
+ prefix_set_dmetric(new, TAILQ_NEXT(new, entry.list.rib));
/* Fixup MED order again. All elements are < new */
while (!TAILQ_EMPTY(&redo)) {
int testall;
xp = TAILQ_NEXT(old, entry.list.rib);
+ prefix_set_dmetric(TAILQ_PREV(old, prefix_queue, entry.list.rib), xp);
TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib);
+
/* check if a MED inversion could be possible */
prefix_cmp(old, xp, &testall);
if (testall > 0) {
* possible MED inversion, take out prefix and
* put it onto redo queue.
*/
+ prefix_set_dmetric(TAILQ_PREV(xp, prefix_queue,
+ entry.list.rib), np);
TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
TAILQ_INSERT_TAIL(&redo, xp, entry.list.rib);
}
/* decision process is turned off */
if (old != NULL)
TAILQ_REMOVE(&re->prefix_h, old, entry.list.rib);
- if (new != NULL)
+ if (new != NULL) {
TAILQ_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
+ new->dmetric = PREFIX_DMETRIC_INVALID;
+ }
return;
}