From: claudio Date: Tue, 2 Mar 2021 09:45:07 +0000 (+0000) Subject: Introduce 'rde evaluate all' a mode to work around path hiding in IXP X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=05ebbbf6caddeae625957b393fdd7a13e174146c;p=openbsd Introduce 'rde evaluate all' a mode to work around path hiding in IXP route-server environments. By default only the best path is sent to peers and if that path is filtered then the path is hidden for that peer. On route-servers this is sometimes not desried. For this 'rde evaluate all' will cause the evaluation process to fall back to alternate routes and will redistribute the first non-filtered path to the peer. This is very similar to per-peer RIBs but accomplishes the same effect without the massive increase in memory usage. Compared to the default mode this requires more CPU resources but it is probably less than what per-peer RIBs would require. 'rde evaluate all' can be set and reset globally, on groups and on idividual neighbors. It is not limited to route-server configs but route loops are possible if not properly used. OK benno@ --- diff --git a/usr.sbin/bgpd/bgpd.conf.5 b/usr.sbin/bgpd/bgpd.conf.5 index a1e21a7d760..3d59f9090b0 100644 --- a/usr.sbin/bgpd/bgpd.conf.5 +++ b/usr.sbin/bgpd/bgpd.conf.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: bgpd.conf.5,v 1.208 2021/02/16 08:29:16 claudio Exp $ +.\" $OpenBSD: bgpd.conf.5,v 1.209 2021/03/02 09:45:07 claudio Exp $ .\" .\" Copyright (c) 2004 Claudio Jeker .\" Copyright (c) 2003, 2004 Henning Brauer @@ -16,7 +16,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: February 16 2021 $ +.Dd $Mdocdate: March 2 2021 $ .Dt BGPD.CONF 5 .Os .Sh NAME @@ -268,9 +268,18 @@ daemons, such as .Xr ospfd 8 . .Pp .It Xo -.Ic rde -.Ic med -.Ic compare +.Ic rde Ic evaluate +.Pq Ic default Ns | Ns Ic all +.Xc +If set to +.Ar all +keep evaluating alternative paths in case the selected path is filtered +out. +By default if a path is filtered by the output filters then no alternative +path is sent to this peer. +.Pp +.It Xo +.Ic rde Ic med Ic compare .Pq Ic always Ns | Ns Ic strict .Xc If set to @@ -1152,6 +1161,20 @@ setting. .It Ic remote-as Ar as-number Set the AS number of the remote system. .Pp +.It Xo +.Ic rde Ic evaluate +.Pq Ic default Ns | Ns Ic all +.Xc +If set to +.Ar all +keep evaluating alternative paths in case the selected path is filtered +out. +By default if a path is filtered by the output filters then no alternative +path is sent to this peer. +The default is inherited from the global +.Ic rde Ic evaluate +setting. +.Pp .It Ic rib Ar name Bind the neighbor to the specified RIB. .Pp diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 03578e98493..f56a320c6e6 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.412 2021/02/16 08:29:16 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.413 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -65,7 +65,8 @@ #define BGPD_FLAG_DECISION_ROUTEAGE 0x0100 #define BGPD_FLAG_DECISION_TRANS_AS 0x0200 #define BGPD_FLAG_DECISION_MED_ALWAYS 0x0400 -#define BGPD_FLAG_NO_AS_SET 0x0800 +#define BGPD_FLAG_DECISION_ALL_PATHS 0x0800 +#define BGPD_FLAG_NO_AS_SET 0x1000 #define BGPD_LOG_UPDATES 0x0001 @@ -408,7 +409,8 @@ struct peer_config { #define PEERFLAG_TRANS_AS 0x01 #define PEERFLAG_LOG_UPDATES 0x02 -#define PEERFLAG_NO_AS_SET 0x04 +#define PEERFLAG_EVALUATE_ALL 0x04 +#define PEERFLAG_NO_AS_SET 0x08 enum network_type { NETWORK_DEFAULT, /* from network statements */ diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y index 7dd7f7c2b4f..79f6c97cb71 100644 --- a/usr.sbin/bgpd/parse.y +++ b/usr.sbin/bgpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.413 2021/02/16 08:29:16 claudio Exp $ */ +/* $OpenBSD: parse.y,v 1.414 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2002, 2003, 2004 Henning Brauer @@ -791,6 +791,19 @@ conf_main : AS as4number { } free($4); } + | RDE EVALUATE STRING { + if (!strcmp($3, "all")) + conf->flags |= BGPD_FLAG_DECISION_ALL_PATHS; + else if (!strcmp($3, "default")) + conf->flags &= ~BGPD_FLAG_DECISION_ALL_PATHS; + else { + yyerror("rde evaluate: " + "unknown setting \"%s\"", $3); + free($3); + YYERROR; + } + free($3); + } | NEXTHOP QUALIFY VIA STRING { if (!strcmp($4, "bgp")) conf->flags |= BGPD_FLAG_NEXTHOP_BGP; @@ -1727,6 +1740,19 @@ peeropts : REMOTEAS as4number { else curpeer->conf.flags &= ~PEERFLAG_NO_AS_SET; } + | RDE EVALUATE STRING { + if (!strcmp($3, "all")) + curpeer->conf.flags |= PEERFLAG_EVALUATE_ALL; + else if (!strcmp($3, "default")) + curpeer->conf.flags &= ~PEERFLAG_EVALUATE_ALL; + else { + yyerror("rde evaluate: " + "unknown setting \"%s\"", $3); + free($3); + YYERROR; + } + free($3); + } ; restart : /* nada */ { $$ = 0; } @@ -3888,6 +3914,8 @@ alloc_peer(void) if (conf->flags & BGPD_FLAG_DECISION_TRANS_AS) p->conf.flags |= PEERFLAG_TRANS_AS; + if (conf->flags & BGPD_FLAG_DECISION_ALL_PATHS) + p->conf.flags |= PEERFLAG_EVALUATE_ALL; if (conf->flags & BGPD_FLAG_NO_AS_SET) p->conf.flags |= PEERFLAG_NO_AS_SET; @@ -3910,8 +3938,6 @@ new_peer(void) sizeof(p->conf.descr)) >= sizeof(p->conf.descr)) fatalx("new_peer descr strlcpy"); p->conf.groupid = curgroup->conf.id; - p->conf.local_as = curgroup->conf.local_as; - p->conf.local_short_as = curgroup->conf.local_short_as; } return (p); } diff --git a/usr.sbin/bgpd/printconf.c b/usr.sbin/bgpd/printconf.c index 122ae266689..2940b27fc11 100644 --- a/usr.sbin/bgpd/printconf.c +++ b/usr.sbin/bgpd/printconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: printconf.c,v 1.146 2021/02/16 08:29:16 claudio Exp $ */ +/* $OpenBSD: printconf.c,v 1.147 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -390,9 +390,10 @@ print_mainconf(struct bgpd_config *conf) if (conf->flags & BGPD_FLAG_DECISION_ROUTEAGE) printf("rde route-age evaluate\n"); - if (conf->flags & BGPD_FLAG_DECISION_MED_ALWAYS) printf("rde med compare always\n"); + if (conf->flags & BGPD_FLAG_DECISION_ALL_PATHS) + printf("rde evaluate all\n"); if (conf->flags & BGPD_FLAG_NO_AS_SET) printf("reject as-set yes\n"); @@ -682,6 +683,14 @@ print_peer(struct peer_config *p, struct bgpd_config *conf, const char *c) if (p->flags & PEERFLAG_TRANS_AS) printf("%s\ttransparent-as yes\n", c); + if (conf->flags & BGPD_FLAG_DECISION_ALL_PATHS) { + if (!(p->flags & PEERFLAG_EVALUATE_ALL)) + printf("%s\trde evaluate default\n", c); + } else { + if (p->flags & PEERFLAG_EVALUATE_ALL) + printf("%s\trde evaluate all\n", c); + } + if (conf->flags & BGPD_FLAG_NO_AS_SET) { if (!(p->flags & PEERFLAG_NO_AS_SET)) printf("%s\treject as-set no\n", c); diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index ca6a515b412..51091eee9bb 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.515 2021/02/16 08:29:16 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.516 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -2875,8 +2875,17 @@ rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old) /* * update specific functions */ +static int rde_eval_all; + +int +rde_evaluate_all(void) +{ + return rde_eval_all; +} + void -rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old) +rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old, + int eval_all) { struct rde_peer *peer; u_int8_t aid; @@ -2889,7 +2898,7 @@ rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old) if (old == NULL && new == NULL) return; - if ((rib->flags & F_RIB_NOFIB) == 0) + if (!eval_all && (rib->flags & F_RIB_NOFIB) == 0) rde_send_kroute(rib, new, old); if (new) @@ -2897,13 +2906,22 @@ rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old) else aid = old->pt->aid; + rde_eval_all = 0; LIST_FOREACH(peer, &peerlist, peer_l) { - if (peer->conf.id == 0) - continue; - if (peer->loc_rib_id != rib->id) + /* skip ourself */ + if (peer == peerself) continue; if (peer->state != PEER_UP) continue; + /* handle evaluate all, keep track if it is needed */ + if (peer->conf.flags & PEERFLAG_EVALUATE_ALL) + rde_eval_all = 1; + else if (eval_all) + /* skip default peers if the best path didn't change */ + continue; + /* skip peers using a different rib */ + if (peer->loc_rib_id != rib->id) + continue; /* check if peer actually supports the address family */ if (peer->capa.mp[aid] == 0) continue; @@ -3556,7 +3574,7 @@ rde_softreconfig_sync_reeval(struct rib_entry *re, void *arg) nexthop_unlink(p); } if (re->active) { - rde_generate_updates(rib, NULL, re->active); + rde_generate_updates(rib, NULL, re->active, 0); re->active = NULL; } return; diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 973d5691a74..df20239c164 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.236 2021/01/13 11:34:01 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.237 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -371,8 +371,9 @@ void rde_send_nexthop(struct bgpd_addr *, int); void rde_pftable_add(u_int16_t, struct prefix *); void rde_pftable_del(u_int16_t, struct prefix *); +int rde_evaluate_all(void); void rde_generate_updates(struct rib *, struct prefix *, - struct prefix *); + struct prefix *, int); u_int32_t rde_local_as(void); int rde_decisionflags(void); int rde_as4byte(struct rde_peer *); @@ -486,6 +487,7 @@ communities_unref(struct rde_community *comm) int community_to_rd(struct community *, u_int64_t *); /* rde_decide.c */ +int prefix_eligible(struct prefix *); void prefix_evaluate(struct rib_entry *, struct prefix *, struct prefix *); /* rde_filter.c */ diff --git a/usr.sbin/bgpd/rde_decide.c b/usr.sbin/bgpd/rde_decide.c index be011ee5034..7f5647ae9ea 100644 --- a/usr.sbin/bgpd/rde_decide.c +++ b/usr.sbin/bgpd/rde_decide.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_decide.c,v 1.81 2021/02/02 15:24:43 claudio Exp $ */ +/* $OpenBSD: rde_decide.c,v 1.82 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -91,8 +91,8 @@ void prefix_remove(struct prefix *, struct rib_entry *); /* * Decision Engine OUR implementation: - * Our implementation has only one RIB. The filtering is done first. The - * filtering calculates the preference and stores it in LOCAL_PREF (Phase 1). + * The filtering is done first. The filtering calculates the preference and + * stores it in LOCAL_PREF (Phase 1). * Ineligible routes are flagged as ineligible via nexthop_add(). * Phase 3 is done together with Phase 2. * In following cases a prefix needs to be reevaluated: @@ -284,6 +284,13 @@ prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall) fatalx("Uh, oh a politician in the decision process"); } +/* + * Insert a prefix keeping the total order of the list. For routes + * that may depend on a MED selection the set is scanned until the + * condition is cleared. If a MED inversion is detected the respective + * prefix is taken of the rib list and put onto a redo queue. All + * prefixes on the redo queue are re-inserted at the end. + */ void prefix_insert(struct prefix *new, struct prefix *ep, struct rib_entry *re) { @@ -351,6 +358,15 @@ prefix_insert(struct prefix *new, struct prefix *ep, struct rib_entry *re) } } +/* + * Remove a prefix from the RIB list ensuring that the total order of the + * list remains intact. All routes that differ in the MED are taken of the + * list and put on the redo list. To figure out if a route could cause a + * resort because of a MED check the next prefix of the to-remove prefix + * is compared with the old prefix. A full scan is only done if the next + * route differs because of the MED or later checks. + * Again at the end all routes on the redo queue are reinserted. + */ void prefix_remove(struct prefix *old, struct rib_entry *re) { @@ -397,6 +413,23 @@ prefix_remove(struct prefix *old, struct rib_entry *re) } } +/* helper function to check if a prefix is valid to be selected */ +int +prefix_eligible(struct prefix *p) +{ + struct rde_aspath *asp = prefix_aspath(p); + struct nexthop *nh = prefix_nexthop(p); + + /* The aspath needs to be loop and error free */ + if (asp == NULL || asp->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR)) + return 0; + /* The nexthop needs to exist and be reachable */ + if (nh == NULL || nh->state != NEXTHOP_REACH) + return 0; + + return 1; +} + /* * Find the correct place to insert the prefix in the prefix list. * If the active prefix has changed we need to send an update. @@ -420,7 +453,7 @@ prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) * active. Clean up now to ensure that the RIB * is consistant. */ - rde_generate_updates(re_rib(re), NULL, re->active); + rde_generate_updates(re_rib(re), NULL, re->active, 0); re->active = NULL; } return; @@ -433,15 +466,8 @@ prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) prefix_insert(new, NULL, re); xp = LIST_FIRST(&re->prefix_h); - if (xp != NULL) { - struct rde_aspath *xasp = prefix_aspath(xp); - if (xasp == NULL || - xasp->flags & (F_ATTR_LOOP|F_ATTR_PARSE_ERR) || - (prefix_nexthop(xp) != NULL && prefix_nexthop(xp)->state != - NEXTHOP_REACH)) - /* xp is ineligible */ - xp = NULL; - } + if (xp != NULL && !prefix_eligible(xp)) + xp = NULL; /* * If the active prefix changed or the active prefix was removed @@ -453,7 +479,17 @@ prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old) * but remember that xp may be NULL aka ineligible. * Additional decision may be made by the called functions. */ - rde_generate_updates(re_rib(re), xp, re->active); + rde_generate_updates(re_rib(re), xp, re->active, 0); re->active = xp; + return; } + + /* + * If there are peers with 'rde evaluate all' every update needs + * to be passed on (not only a change of the best prefix). + * rde_generate_updates() will then take care of distribution. + */ + if (rde_evaluate_all()) + if ((new != NULL && prefix_eligible(new)) || old != NULL) + rde_generate_updates(re_rib(re), re->active, NULL, 1); } diff --git a/usr.sbin/bgpd/rde_update.c b/usr.sbin/bgpd/rde_update.c index c448060f9b7..ac8862e60dd 100644 --- a/usr.sbin/bgpd/rde_update.c +++ b/usr.sbin/bgpd/rde_update.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_update.c,v 1.124 2021/01/09 16:49:41 claudio Exp $ */ +/* $OpenBSD: rde_update.c,v 1.125 2021/03/02 09:45:07 claudio Exp $ */ /* * Copyright (c) 2004 Claudio Jeker @@ -23,6 +23,7 @@ #include #include #include +#include #include "bgpd.h" #include "rde.h" @@ -49,26 +50,22 @@ up_test_update(struct rde_peer *peer, struct prefix *p) { struct rde_aspath *asp; struct rde_community *comm; - struct rde_peer *prefp; + struct rde_peer *frompeer; - if (p == NULL) - /* no prefix available */ - return (0); - - prefp = prefix_peer(p); + frompeer = prefix_peer(p); asp = prefix_aspath(p); comm = prefix_communities(p); - if (peer == prefp) - /* Do not send routes back to sender */ - return (0); - if (asp == NULL || asp->flags & F_ATTR_PARSE_ERR) fatalx("try to send out a botched path"); if (asp->flags & F_ATTR_LOOP) fatalx("try to send out a looped path"); - if (!prefp->conf.ebgp && !peer->conf.ebgp) { + if (peer == frompeer) + /* Do not send routes back to sender */ + return (0); + + if (!frompeer->conf.ebgp && !peer->conf.ebgp) { /* * route reflector redistribution rules: * 1. if announce is set -> announce @@ -77,7 +74,7 @@ up_test_update(struct rde_peer *peer, struct prefix *p) * 4. old non-client, new client -> yes * 5. old client, new client -> yes */ - if (prefp->conf.reflector_client == 0 && + if (frompeer->conf.reflector_client == 0 && peer->conf.reflector_client == 0 && (asp->flags & F_PREFIX_ANNOUNCED) == 0) /* Do not redistribute updates to ibgp peers */ @@ -101,14 +98,12 @@ void up_generate_updates(struct filter_head *rules, struct rde_peer *peer, struct prefix *new, struct prefix *old) { - struct filterstate state; - struct bgpd_addr addr; - - if (peer->state != PEER_UP) - return; + struct filterstate state; + struct bgpd_addr addr; + int need_withdraw; +again: if (new == NULL) { -withdraw: if (old == NULL) /* no prefix to withdraw */ return; @@ -121,8 +116,28 @@ withdraw: peer->up_wcnt++; } } else { - if (!up_test_update(peer, new)) { - goto withdraw; + need_withdraw = 0; + /* + * up_test_update() needs to run before the output filters + * else the well known communities wont work properly. + * The output filters would not be able to add well known + * communities. + */ + if (!up_test_update(peer, new)) + need_withdraw = 1; + + /* + * if 'rde evaluate all' is set for this peer then + * delay the the withdraw because of up_test_update(). + * The filters may actually skip this prefix and so this + * decision needs to be delayed. + * For the default mode we can just give up here and + * skip the filters. + */ + if (need_withdraw && + !(peer->conf.flags & PEERFLAG_EVALUATE_ALL)) { + new = NULL; + goto again; } rde_filterstate_prep(&state, prefix_aspath(new), @@ -133,7 +148,18 @@ withdraw: new->pt->prefixlen, prefix_vstate(new), &state) == ACTION_DENY) { rde_filterstate_clean(&state); - goto withdraw; + if (peer->conf.flags & PEERFLAG_EVALUATE_ALL) + new = LIST_NEXT(new, entry.list.rib); + else + new = NULL; + if (new != NULL && !prefix_eligible(new)) + new = NULL; + goto again; + } + + if (need_withdraw) { + new = NULL; + goto again; } /* only send update if path changed */