From 5014683f694edcb7bd80453d3bc6266ea238e431 Mon Sep 17 00:00:00 2001 From: claudio Date: Mon, 11 Jul 2022 17:08:21 +0000 Subject: [PATCH] Implement send side of RFC7911 ADD-PATH This allows to send out more then one path per perfix to a neighbor that supports add-path receive. OpenBGPD supports a few different modes to select which paths to send: - all: send all valid paths (the ones with a * in bgpctl output) - best: send out only the single best path - ecmp: send out paths that evaluate the same up and including the nexthop metric - as-wide-best: send out paths that evaluete the same up but not including the nexthop metric Currently ecmp and as-wide-best are the same. On top of this best, ecmp and as-wide-best allow to include extra paths (e.g. best plus 2) and for the multipath modes there is also a maximum (e.g. ecmp plus 2 max 4) OK tb@ --- usr.sbin/bgpd/bgpd.h | 17 ++++- usr.sbin/bgpd/parse.y | 87 ++++++++++++++++++++++-- usr.sbin/bgpd/printconf.c | 28 +++++++- usr.sbin/bgpd/rde.c | 22 +++++- usr.sbin/bgpd/rde.h | 5 +- usr.sbin/bgpd/rde_peer.c | 18 ++++- usr.sbin/bgpd/rde_update.c | 134 ++++++++++++++++++++++++++++++++++++- 7 files changed, 299 insertions(+), 12 deletions(-) diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 3d592a3679f..c7caaea58e9 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.440 2022/07/07 12:16:04 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.441 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -307,6 +307,20 @@ struct bgpd_config { extern int cmd_opts; +enum addpath_mode { + ADDPATH_EVAL_NONE, + ADDPATH_EVAL_BEST, + ADDPATH_EVAL_ECMP, + ADDPATH_EVAL_AS_WIDE, + ADDPATH_EVAL_ALL, +}; + +struct addpath_eval { + enum addpath_mode mode; + int extrapaths; + int maxpaths; +}; + enum export_type { EXPORT_UNSET, EXPORT_NONE, @@ -402,6 +416,7 @@ struct peer_config { struct bgpd_addr local_addr_v6; struct peer_auth auth; struct capabilities capabilities; + struct addpath_eval eval; char group[PEER_DESCR_LEN]; char descr[PEER_DESCR_LEN]; char reason[REASON_LEN]; diff --git a/usr.sbin/bgpd/parse.y b/usr.sbin/bgpd/parse.y index a7c553c0475..baa36e6c20f 100644 --- a/usr.sbin/bgpd/parse.y +++ b/usr.sbin/bgpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.431 2022/06/27 13:26:51 claudio Exp $ */ +/* $OpenBSD: parse.y,v 1.432 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2002, 2003, 2004 Henning Brauer @@ -210,7 +210,7 @@ typedef struct { %token EBGP IBGP %token LOCALAS REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX RESTART %token ANNOUNCE CAPABILITIES REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH -%token SEND RECV POLICY +%token SEND RECV PLUS POLICY %token DEMOTE ENFORCE NEIGHBORAS ASOVERRIDE REFLECTOR DEPEND DOWN %token DUMP IN OUT SOCKET RESTRICTED %token LOG TRANSPARENT @@ -230,12 +230,13 @@ typedef struct { %token IPSEC ESP AH SPI IKE %token IPV4 IPV6 %token QUALIFY VIA -%token NE LE GE XRANGE LONGER MAXLEN +%token NE LE GE XRANGE LONGER MAXLEN MAX %token STRING %token NUMBER %type asnumber as4number as4number_any optnumber %type espah family safi restart origincode nettype %type yesno inout restricted validity expires enforce +%type addpathextra addpathmax %type string %type address %type prefix addrspec @@ -718,7 +719,7 @@ conf_main : AS as4number { struct rde_rib *rr; rr = find_rib("Loc-RIB"); if (rr == NULL) - fatalx("RTABLE can not find the main RIB!"); + fatalx("RTABLE cannot find the main RIB!"); if ($2 == 0) rr->flags |= F_RIB_NOFIBSYNC; @@ -880,7 +881,7 @@ conf_main : AS as4number { } rr = find_rib("Loc-RIB"); if (rr == NULL) - fatalx("RTABLE can not find the main RIB!"); + fatalx("RTABLE cannot find the main RIB!"); rr->rtableid = $2; } | CONNECTRETRY NUMBER { @@ -1356,6 +1357,28 @@ groupopts_l : /* empty */ | groupopts_l error '\n' ; +addpathextra : /* empty */ { $$ = 0; } + | PLUS NUMBER { + if ($2 < 1 || $2 > USHRT_MAX) { + yyerror("additional paths must be between " + "%u and %u", 1, USHRT_MAX); + YYERROR; + } + $$ = $2; + } + ; + +addpathmax : /* empty */ { $$ = 0; } + | MAX NUMBER { + if ($2 < 1 || $2 > USHRT_MAX) { + yyerror("maximum additional paths must be " + "between %u and %u", 1, USHRT_MAX); + YYERROR; + } + $$ = $2; + } + ; + peeropts_h : '{' '\n' peeropts_l '}' | '{' peeropts '}' | /* empty */ @@ -1515,6 +1538,50 @@ peeropts : REMOTEAS as4number { else *ap++ &= ~CAPA_AP_RECV; } + | ANNOUNCE ADDPATH SEND STRING addpathextra addpathmax { + int8_t *ap = curpeer->conf.capabilities.add_path; + enum addpath_mode mode; + u_int8_t i; + + if (!strcmp($4, "no")) { + free($4); + if ($5 != 0 || $6 != 0) { + yyerror("no additional option allowed " + "for 'add-path send no'"); + YYERROR; + } + for (i = 0; i < AID_MAX; i++) + *ap++ &= ~CAPA_AP_SEND; + break; + } else if (!strcmp($4, "all")) { + free($4); + if ($5 != 0 || $6 != 0) { + yyerror("no additional option allowed " + "for 'add-path send all'"); + YYERROR; + } + mode = ADDPATH_EVAL_ALL; + } else if (!strcmp($4, "best")) { + free($4); + mode = ADDPATH_EVAL_BEST; + } else if (!strcmp($4, "ecmp")) { + free($4); + mode = ADDPATH_EVAL_ECMP; + } else if (!strcmp($4, "as-wide-best")) { + free($4); + mode = ADDPATH_EVAL_AS_WIDE; + } else { + yyerror("announce add-path send: " + "unknown mode \"%s\"", $4); + free($4); + YYERROR; + } + for (i = 0; i < AID_MAX; i++) + *ap++ |= CAPA_AP_SEND; + curpeer->conf.eval.mode = mode; + curpeer->conf.eval.extrapaths = $5; + curpeer->conf.eval.maxpaths = $6; + } | ANNOUNCE POLICY STRING enforce { curpeer->conf.capabilities.role_ena = $4; if (strcmp($3, "no") == 0) { @@ -3070,6 +3137,7 @@ lookup(char *s) { "localpref", LOCALPREF}, { "log", LOG}, { "match", MATCH}, + { "max", MAX}, { "max-as-len", MAXASLEN}, { "max-as-seq", MAXASSEQ}, { "max-communities", MAXCOMMUNITIES}, @@ -3098,6 +3166,7 @@ lookup(char *s) { "password", PASSWORD}, { "peer-as", PEERAS}, { "pftable", PFTABLE}, + { "plus", PLUS}, { "policy", POLICY}, { "port", PORT}, { "prefix", PREFIX}, @@ -4596,6 +4665,14 @@ neighbor_consistent(struct peer *p) return (-1); } + /* bail if add-path send and rde evaluate all is used together */ + if ((p->conf.flags & PEERFLAG_EVALUATE_ALL) && + (p->conf.capabilities.add_path[0] & CAPA_AP_SEND)) { + yyerror("neighbors with add-path send cannot use " + "'rde evaluate all'"); + return (-1); + } + return (0); } diff --git a/usr.sbin/bgpd/printconf.c b/usr.sbin/bgpd/printconf.c index e707185b5b7..8b017df5d21 100644 --- a/usr.sbin/bgpd/printconf.c +++ b/usr.sbin/bgpd/printconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: printconf.c,v 1.155 2022/06/28 11:46:05 claudio Exp $ */ +/* $OpenBSD: printconf.c,v 1.156 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -768,6 +768,23 @@ print_enc_alg(enum auth_enc_alg alg) } } +static const char * +print_addpath_mode(enum addpath_mode mode) +{ + switch (mode) { + case ADDPATH_EVAL_NONE: + return "none"; + case ADDPATH_EVAL_BEST: + return "best"; + case ADDPATH_EVAL_ECMP: + return "ecmp"; + case ADDPATH_EVAL_AS_WIDE: + return "as-wide-best"; + case ADDPATH_EVAL_ALL: + return "all"; + } +} + void print_announce(struct peer_config *p, const char *c) { @@ -790,6 +807,15 @@ print_announce(struct peer_config *p, const char *c) printf("%s\tannounce as4byte no\n", c); if (p->capabilities.add_path[0] & CAPA_AP_RECV) printf("%s\tannounce add-path recv yes\n", c); + if (p->capabilities.add_path[0] & CAPA_AP_SEND) { + printf("%s\tannounce add-path send %s", c, + print_addpath_mode(p->eval.mode)); + if (p->eval.extrapaths != 0) + printf(" plus %d", p->eval.extrapaths); + if (p->eval.maxpaths != 0) + printf(" max %d", p->eval.maxpaths); + printf("\n"); + } if (p->capabilities.role_ena) { printf("%s\tannounce policy %s%s\n", c, log_policy(p->capabilities.role), diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 25f600e6a6d..11324a7c1d7 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.553 2022/07/11 16:51:01 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.554 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -1091,6 +1091,9 @@ rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula) imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id, 0, -1, NULL, 0); } + /* make sure rde_eval_all is on if needed. */ + if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND)) + rde_eval_all = 1; break; case IMSG_SESSION_DOWN: peer_down(peer, NULL); @@ -3494,6 +3497,23 @@ rde_reload_done(void) fatalx("King Bula's peer met an unknown RIB"); peer->reconf_rib = 1; } + /* + * Update add-path settings but only if the session is + * running with add-path and the config uses add-path + * as well. + */ + if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND)) { + if (peer->conf.eval.mode != ADDPATH_EVAL_NONE && + memcmp(&peer->eval, &peer->conf.eval, + sizeof(peer->eval)) != 0) { + log_peer_info(&peer->conf, + "addpath eval change, reloading"); + peer->reconf_out = 1; + peer->eval = peer->conf.eval; + } + /* add-path send needs rde_eval_all */ + rde_eval_all = 1; + } peer->export_type = peer->conf.export_type; peer->flags = peer->conf.flags; if (peer->flags & PEERFLAG_EVALUATE_ALL) diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index 0a485879ca0..4499d329931 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.258 2022/07/08 10:01:52 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.259 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -87,6 +87,7 @@ struct rde_peer { struct bgpd_addr local_v4_addr; struct bgpd_addr local_v6_addr; struct capabilities capa; + struct addpath_eval eval; struct prefix_index adj_rib_out; struct prefix_tree updates[AID_MAX]; struct prefix_tree withdraws[AID_MAX]; @@ -697,6 +698,8 @@ int nexthop_compare(struct nexthop *, struct nexthop *); void up_init(struct rde_peer *); void up_generate_updates(struct filter_head *, struct rde_peer *, struct prefix *, struct prefix *); +void up_generate_addpath(struct filter_head *, struct rde_peer *, + struct prefix *, struct prefix *); void up_generate_default(struct filter_head *, struct rde_peer *, uint8_t); int up_is_eor(struct rde_peer *, uint8_t); diff --git a/usr.sbin/bgpd/rde_peer.c b/usr.sbin/bgpd/rde_peer.c index c0bffe2f21b..7572c6c240b 100644 --- a/usr.sbin/bgpd/rde_peer.c +++ b/usr.sbin/bgpd/rde_peer.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_peer.c,v 1.18 2022/07/07 10:46:54 claudio Exp $ */ +/* $OpenBSD: rde_peer.c,v 1.19 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2019 Claudio Jeker @@ -58,6 +58,13 @@ peer_has_add_path(struct rde_peer *peer, uint8_t aid, int mode) { if (aid > AID_MAX) return 0; + if (aid == AID_UNSPEC) { + /* check if at capability is set for at least one AID */ + for (aid = AID_MIN; aid < AID_MAX; aid++) + if (peer->capa.add_path[aid] & mode) + return 1; + return 0; + } return (peer->capa.add_path[aid] & mode); } @@ -200,6 +207,7 @@ peer_add(uint32_t id, struct peer_config *p_conf) if (peer->loc_rib_id == RIB_NOTFOUND) fatalx("King Bula's new peer met an unknown RIB"); peer->state = PEER_NONE; + peer->eval = peer->conf.eval; peer->export_type = peer->conf.export_type; peer->flags = peer->conf.flags; SIMPLEQ_INIT(&peer->imsg_queue); @@ -244,10 +252,16 @@ peer_generate_update(struct rde_peer *peer, uint16_t rib_id, /* if reconf skip peers which don't need to reconfigure */ if (mode == EVAL_RECONF && peer->reconf_out == 0) return; + + /* handle peers with add-path */ + if (peer_has_add_path(peer, aid, CAPA_AP_SEND)) { + up_generate_addpath(out_rules, peer, new, old); + return; + } + /* skip regular peers if the best path didn't change */ if (mode == EVAL_ALL && (peer->flags & PEERFLAG_EVALUATE_ALL) == 0) return; - up_generate_updates(out_rules, peer, new, old); } diff --git a/usr.sbin/bgpd/rde_update.c b/usr.sbin/bgpd/rde_update.c index 49253b30e18..6fd8180c61c 100644 --- a/usr.sbin/bgpd/rde_update.c +++ b/usr.sbin/bgpd/rde_update.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_update.c,v 1.144 2022/07/11 16:58:58 claudio Exp $ */ +/* $OpenBSD: rde_update.c,v 1.145 2022/07/11 17:08:21 claudio Exp $ */ /* * Copyright (c) 2004 Claudio Jeker @@ -230,6 +230,138 @@ up_generate_updates(struct filter_head *rules, struct rde_peer *peer, prefix_adjout_withdraw(p); } +/* + * Generate updates for the add-path send case. Depending on the + * peer eval settings prefixes are selected and distributed. + * This highly depends on the Adj-RIB-Out to handle prefixes with no + * changes gracefully. It may be possible to improve the API so that + * less churn is needed. + */ +void +up_generate_addpath(struct filter_head *rules, struct rde_peer *peer, + struct prefix *new, struct prefix *old) +{ + struct filterstate state; + struct bgpd_addr addr; + struct prefix *head, *p; + uint8_t prefixlen; + int maxpaths = 0, extrapaths = 0, extra; + int checkmode = 1; + + if (new == NULL) { + pt_getaddr(old->pt, &addr); + prefixlen = old->pt->prefixlen; + } else { + pt_getaddr(new->pt, &addr); + prefixlen = new->pt->prefixlen; + } + + head = prefix_adjout_lookup(peer, &addr, prefixlen); + + /* mark all paths as stale */ + for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) + p->flags |= PREFIX_FLAG_STALE; + + /* update paths */ + for ( ; new != NULL; new = TAILQ_NEXT(new, entry.list.rib)) { + /* since list is sorted, stop at first invalid prefix */ + if (!prefix_eligible(new)) + break; + + /* check limits and stop when a limit is reached */ + if (peer->eval.maxpaths != 0 && + maxpaths >= peer->eval.maxpaths) + break; + if (peer->eval.extrapaths != 0 && + extrapaths >= peer->eval.extrapaths) + break; + + extra = 1; + if (checkmode) { + switch (peer->eval.mode) { + case ADDPATH_EVAL_BEST: + if (new->dmetric == PREFIX_DMETRIC_BEST) + extra = 0; + else + checkmode = 0; + break; + case ADDPATH_EVAL_ECMP: + if (new->dmetric == PREFIX_DMETRIC_BEST || + new->dmetric == PREFIX_DMETRIC_ECMP) + extra = 0; + else + checkmode = 0; + break; + case ADDPATH_EVAL_AS_WIDE: + if (new->dmetric == PREFIX_DMETRIC_BEST || + new->dmetric == PREFIX_DMETRIC_ECMP || + new->dmetric == PREFIX_DMETRIC_AS_WIDE) + extra = 0; + else + checkmode = 0; + break; + case ADDPATH_EVAL_ALL: + /* nothing to check */ + checkmode = 0; + break; + default: + fatalx("unknown add-path eval mode"); + } + } + + /* + * up_test_update() needs to run before the output filters + * else the well known communities won't work properly. + * The output filters would not be able to add well known + * communities. + */ + if (!up_test_update(peer, new)) + continue; + + rde_filterstate_prep(&state, prefix_aspath(new), + prefix_communities(new), prefix_nexthop(new), + prefix_nhflags(new)); + if (rde_filter(rules, peer, prefix_peer(new), &addr, + prefixlen, prefix_vstate(new), &state) == ACTION_DENY) { + rde_filterstate_clean(&state); + continue; + } + + if (up_enforce_open_policy(peer, &state)) { + rde_filterstate_clean(&state); + continue; + } + + /* from here on we know this is an update */ + maxpaths++; + extrapaths += extra; + + p = prefix_adjout_get(peer, new->path_id_tx, &addr, + new->pt->prefixlen); + + up_prep_adjout(peer, &state, addr.aid); + prefix_adjout_update(p, peer, &state, &addr, + new->pt->prefixlen, new->path_id_tx, prefix_vstate(new)); + rde_filterstate_clean(&state); + + /* max prefix checker outbound */ + if (peer->conf.max_out_prefix && + peer->prefix_out_cnt > peer->conf.max_out_prefix) { + log_peer_warnx(&peer->conf, + "outbound prefix limit reached (>%u/%u)", + peer->prefix_out_cnt, peer->conf.max_out_prefix); + rde_update_err(peer, ERR_CEASE, + ERR_CEASE_MAX_SENT_PREFIX, NULL, 0); + } + } + + /* withdraw stale paths */ + for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) { + if (p->flags & PREFIX_FLAG_STALE) + prefix_adjout_withdraw(p); + } +} + struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int); void rib_remove(struct rib_entry *); int rib_empty(struct rib_entry *); -- 2.20.1