Implement send side of RFC7911 ADD-PATH
authorclaudio <claudio@openbsd.org>
Mon, 11 Jul 2022 17:08:21 +0000 (17:08 +0000)
committerclaudio <claudio@openbsd.org>
Mon, 11 Jul 2022 17:08:21 +0000 (17:08 +0000)
This allows to send out more then one path per perfix to a neighbor that
supports add-path receive. OpenBGPD supports a few different modes to
select which paths to send:
  - all: send all valid paths (the ones with a * in bgpctl output)
  - best: send out only the single best path
  - ecmp: send out paths that evaluate the same up and including
                the nexthop metric
  - as-wide-best: send out paths that evaluete the same up but not including
  the nexthop metric
Currently ecmp and as-wide-best are the same. On top of this best, ecmp
and as-wide-best allow to include extra paths (e.g. best plus 2) and
for the multipath modes there is also a maximum (e.g. ecmp plus 2 max 4)

OK tb@

usr.sbin/bgpd/bgpd.h
usr.sbin/bgpd/parse.y
usr.sbin/bgpd/printconf.c
usr.sbin/bgpd/rde.c
usr.sbin/bgpd/rde.h
usr.sbin/bgpd/rde_peer.c
usr.sbin/bgpd/rde_update.c

index 3d592a3..c7caaea 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: bgpd.h,v 1.440 2022/07/07 12:16:04 claudio Exp $ */
+/*     $OpenBSD: bgpd.h,v 1.441 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -307,6 +307,20 @@ struct bgpd_config {
 
 extern int cmd_opts;
 
+enum addpath_mode {
+       ADDPATH_EVAL_NONE,
+       ADDPATH_EVAL_BEST,
+       ADDPATH_EVAL_ECMP,
+       ADDPATH_EVAL_AS_WIDE,
+       ADDPATH_EVAL_ALL,
+};
+
+struct addpath_eval {
+       enum addpath_mode       mode;
+       int                     extrapaths;
+       int                     maxpaths;
+};
+
 enum export_type {
        EXPORT_UNSET,
        EXPORT_NONE,
@@ -402,6 +416,7 @@ struct peer_config {
        struct bgpd_addr         local_addr_v6;
        struct peer_auth         auth;
        struct capabilities      capabilities;
+       struct addpath_eval      eval;
        char                     group[PEER_DESCR_LEN];
        char                     descr[PEER_DESCR_LEN];
        char                     reason[REASON_LEN];
index a7c553c..baa36e6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: parse.y,v 1.431 2022/06/27 13:26:51 claudio Exp $ */
+/*     $OpenBSD: parse.y,v 1.432 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2002, 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -210,7 +210,7 @@ typedef struct {
 %token EBGP IBGP
 %token LOCALAS REMOTEAS DESCR LOCALADDR MULTIHOP PASSIVE MAXPREFIX RESTART
 %token ANNOUNCE CAPABILITIES REFRESH AS4BYTE CONNECTRETRY ENHANCED ADDPATH
-%token SEND RECV POLICY
+%token SEND RECV PLUS POLICY
 %token DEMOTE ENFORCE NEIGHBORAS ASOVERRIDE REFLECTOR DEPEND DOWN
 %token DUMP IN OUT SOCKET RESTRICTED
 %token LOG TRANSPARENT
@@ -230,12 +230,13 @@ typedef struct {
 %token IPSEC ESP AH SPI IKE
 %token IPV4 IPV6
 %token QUALIFY VIA
-%token NE LE GE XRANGE LONGER MAXLEN
+%token NE LE GE XRANGE LONGER MAXLEN MAX
 %token <v.string>              STRING
 %token <v.number>              NUMBER
 %type  <v.number>              asnumber as4number as4number_any optnumber
 %type  <v.number>              espah family safi restart origincode nettype
 %type  <v.number>              yesno inout restricted validity expires enforce
+%type  <v.number>              addpathextra addpathmax
 %type  <v.string>              string
 %type  <v.addr>                address
 %type  <v.prefix>              prefix addrspec
@@ -718,7 +719,7 @@ conf_main   : AS as4number          {
                        struct rde_rib *rr;
                        rr = find_rib("Loc-RIB");
                        if (rr == NULL)
-                               fatalx("RTABLE can not find the main RIB!");
+                               fatalx("RTABLE cannot find the main RIB!");
 
                        if ($2 == 0)
                                rr->flags |= F_RIB_NOFIBSYNC;
@@ -880,7 +881,7 @@ conf_main   : AS as4number          {
                        }
                        rr = find_rib("Loc-RIB");
                        if (rr == NULL)
-                               fatalx("RTABLE can not find the main RIB!");
+                               fatalx("RTABLE cannot find the main RIB!");
                        rr->rtableid = $2;
                }
                | CONNECTRETRY NUMBER {
@@ -1356,6 +1357,28 @@ groupopts_l      : /* empty */
                | groupopts_l error '\n'
                ;
 
+addpathextra   : /* empty */           { $$ = 0;       }
+               | PLUS NUMBER           {
+                       if ($2 < 1 || $2 > USHRT_MAX) {
+                               yyerror("additional paths must be between "
+                                   "%u and %u", 1, USHRT_MAX);
+                               YYERROR;
+                       }
+                       $$ = $2;
+               }
+               ;
+
+addpathmax     : /* empty */           { $$ = 0;       }
+               | MAX NUMBER            {
+                       if ($2 < 1 || $2 > USHRT_MAX) {
+                               yyerror("maximum additional paths must be "
+                                   "between %u and %u", 1, USHRT_MAX);
+                               YYERROR;
+                       }
+                       $$ = $2;
+               }
+               ;
+
 peeropts_h     : '{' '\n' peeropts_l '}'
                | '{' peeropts '}'
                | /* empty */
@@ -1515,6 +1538,50 @@ peeropts : REMOTEAS as4number    {
                                else
                                        *ap++ &= ~CAPA_AP_RECV;
                }
+               | ANNOUNCE ADDPATH SEND STRING addpathextra addpathmax {
+                       int8_t *ap = curpeer->conf.capabilities.add_path;
+                       enum addpath_mode mode;
+                       u_int8_t i;
+
+                       if (!strcmp($4, "no")) {
+                               free($4);
+                               if ($5 != 0 || $6 != 0) {
+                                       yyerror("no additional option allowed "
+                                           "for 'add-path send no'");
+                                       YYERROR;
+                               }
+                               for (i = 0; i < AID_MAX; i++)
+                                       *ap++ &= ~CAPA_AP_SEND;
+                               break;
+                       } else if (!strcmp($4, "all")) {
+                               free($4);
+                               if ($5 != 0 || $6 != 0) {
+                                       yyerror("no additional option allowed "
+                                           "for 'add-path send all'");
+                                       YYERROR;
+                               }
+                               mode = ADDPATH_EVAL_ALL;
+                       } else if (!strcmp($4, "best")) {
+                               free($4);
+                               mode = ADDPATH_EVAL_BEST;
+                       } else if (!strcmp($4, "ecmp")) {
+                               free($4);
+                               mode = ADDPATH_EVAL_ECMP;
+                       } else if (!strcmp($4, "as-wide-best")) {
+                               free($4);
+                               mode = ADDPATH_EVAL_AS_WIDE;
+                       } else {
+                               yyerror("announce add-path send: "
+                                   "unknown mode \"%s\"", $4);
+                               free($4);
+                               YYERROR;
+                       }
+                       for (i = 0; i < AID_MAX; i++)
+                               *ap++ |= CAPA_AP_SEND;
+                       curpeer->conf.eval.mode = mode;
+                       curpeer->conf.eval.extrapaths = $5;
+                       curpeer->conf.eval.maxpaths = $6;
+               }
                | ANNOUNCE POLICY STRING enforce {
                        curpeer->conf.capabilities.role_ena = $4;
                        if (strcmp($3, "no") == 0) {
@@ -3070,6 +3137,7 @@ lookup(char *s)
                { "localpref",          LOCALPREF},
                { "log",                LOG},
                { "match",              MATCH},
+               { "max",                MAX},
                { "max-as-len",         MAXASLEN},
                { "max-as-seq",         MAXASSEQ},
                { "max-communities",    MAXCOMMUNITIES},
@@ -3098,6 +3166,7 @@ lookup(char *s)
                { "password",           PASSWORD},
                { "peer-as",            PEERAS},
                { "pftable",            PFTABLE},
+               { "plus",               PLUS},
                { "policy",             POLICY},
                { "port",               PORT},
                { "prefix",             PREFIX},
@@ -4596,6 +4665,14 @@ neighbor_consistent(struct peer *p)
                return (-1);
        }
 
+       /* bail if add-path send and rde evaluate all is used together */
+       if ((p->conf.flags & PEERFLAG_EVALUATE_ALL) &&
+           (p->conf.capabilities.add_path[0] & CAPA_AP_SEND)) {
+               yyerror("neighbors with add-path send cannot use "
+                   "'rde evaluate all'");
+               return (-1);
+       }
+
        return (0);
 }
 
index e707185..8b017df 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: printconf.c,v 1.155 2022/06/28 11:46:05 claudio Exp $ */
+/*     $OpenBSD: printconf.c,v 1.156 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -768,6 +768,23 @@ print_enc_alg(enum auth_enc_alg alg)
        }
 }
 
+static const char *
+print_addpath_mode(enum addpath_mode mode)
+{
+       switch (mode) {
+       case ADDPATH_EVAL_NONE:
+               return "none";
+       case ADDPATH_EVAL_BEST:
+               return "best";
+       case ADDPATH_EVAL_ECMP:
+               return "ecmp";
+       case ADDPATH_EVAL_AS_WIDE:
+               return "as-wide-best";
+       case ADDPATH_EVAL_ALL:
+               return "all";
+       }
+}
+
 void
 print_announce(struct peer_config *p, const char *c)
 {
@@ -790,6 +807,15 @@ print_announce(struct peer_config *p, const char *c)
                printf("%s\tannounce as4byte no\n", c);
        if (p->capabilities.add_path[0] & CAPA_AP_RECV)
                printf("%s\tannounce add-path recv yes\n", c);
+       if (p->capabilities.add_path[0] & CAPA_AP_SEND) {
+               printf("%s\tannounce add-path send %s", c,
+                    print_addpath_mode(p->eval.mode));
+               if (p->eval.extrapaths != 0)
+                       printf(" plus %d", p->eval.extrapaths);
+               if (p->eval.maxpaths != 0)
+                       printf(" max %d", p->eval.maxpaths);
+               printf("\n");
+       }
        if (p->capabilities.role_ena) {
                printf("%s\tannounce policy %s%s\n", c,
                    log_policy(p->capabilities.role),
index 25f600e..11324a7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde.c,v 1.553 2022/07/11 16:51:01 claudio Exp $ */
+/*     $OpenBSD: rde.c,v 1.554 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -1091,6 +1091,9 @@ rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula)
                        imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id,
                            0, -1, NULL, 0);
                }
+               /* make sure rde_eval_all is on if needed. */
+               if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND))
+                       rde_eval_all = 1;
                break;
        case IMSG_SESSION_DOWN:
                peer_down(peer, NULL);
@@ -3494,6 +3497,23 @@ rde_reload_done(void)
                                fatalx("King Bula's peer met an unknown RIB");
                        peer->reconf_rib = 1;
                }
+               /*
+                * Update add-path settings but only if the session is
+                * running with add-path and the config uses add-path
+                * as well.
+                */
+               if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND)) {
+                       if (peer->conf.eval.mode != ADDPATH_EVAL_NONE &&
+                           memcmp(&peer->eval, &peer->conf.eval,
+                           sizeof(peer->eval)) != 0) {
+                               log_peer_info(&peer->conf,
+                                   "addpath eval change, reloading");
+                               peer->reconf_out = 1;
+                               peer->eval = peer->conf.eval;
+                       }
+                       /* add-path send needs rde_eval_all */
+                       rde_eval_all = 1;
+               }
                peer->export_type = peer->conf.export_type;
                peer->flags = peer->conf.flags;
                if (peer->flags & PEERFLAG_EVALUATE_ALL)
index 0a48587..4499d32 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde.h,v 1.258 2022/07/08 10:01:52 claudio Exp $ */
+/*     $OpenBSD: rde.h,v 1.259 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and
@@ -87,6 +87,7 @@ struct rde_peer {
        struct bgpd_addr                 local_v4_addr;
        struct bgpd_addr                 local_v6_addr;
        struct capabilities              capa;
+       struct addpath_eval              eval;
        struct prefix_index              adj_rib_out;
        struct prefix_tree               updates[AID_MAX];
        struct prefix_tree               withdraws[AID_MAX];
@@ -697,6 +698,8 @@ int          nexthop_compare(struct nexthop *, struct nexthop *);
 void            up_init(struct rde_peer *);
 void            up_generate_updates(struct filter_head *, struct rde_peer *,
                     struct prefix *, struct prefix *);
+void            up_generate_addpath(struct filter_head *, struct rde_peer *,
+                    struct prefix *, struct prefix *);
 void            up_generate_default(struct filter_head *, struct rde_peer *,
                     uint8_t);
 int             up_is_eor(struct rde_peer *, uint8_t);
index c0bffe2..7572c6c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde_peer.c,v 1.18 2022/07/07 10:46:54 claudio Exp $ */
+/*     $OpenBSD: rde_peer.c,v 1.19 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2019 Claudio Jeker <claudio@openbsd.org>
@@ -58,6 +58,13 @@ peer_has_add_path(struct rde_peer *peer, uint8_t aid, int mode)
 {
        if (aid > AID_MAX)
                return 0;
+       if (aid == AID_UNSPEC) {
+               /* check if at capability is set for at least one AID */
+               for (aid = AID_MIN; aid < AID_MAX; aid++)
+                       if (peer->capa.add_path[aid] & mode)
+                               return 1;
+               return 0;
+       }
        return (peer->capa.add_path[aid] & mode);
 }
 
@@ -200,6 +207,7 @@ peer_add(uint32_t id, struct peer_config *p_conf)
        if (peer->loc_rib_id == RIB_NOTFOUND)
                fatalx("King Bula's new peer met an unknown RIB");
        peer->state = PEER_NONE;
+       peer->eval = peer->conf.eval;
        peer->export_type = peer->conf.export_type;
        peer->flags = peer->conf.flags;
        SIMPLEQ_INIT(&peer->imsg_queue);
@@ -244,10 +252,16 @@ peer_generate_update(struct rde_peer *peer, uint16_t rib_id,
        /* if reconf skip peers which don't need to reconfigure */
        if (mode == EVAL_RECONF && peer->reconf_out == 0)
                return;
+
+       /* handle peers with add-path */
+       if (peer_has_add_path(peer, aid, CAPA_AP_SEND)) {
+               up_generate_addpath(out_rules, peer, new, old);
+               return;
+       }
+
        /* skip regular peers if the best path didn't change */
        if (mode == EVAL_ALL && (peer->flags & PEERFLAG_EVALUATE_ALL) == 0)
                return;
-
        up_generate_updates(out_rules, peer, new, old);
 }
 
index 49253b3..6fd8180 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde_update.c,v 1.144 2022/07/11 16:58:58 claudio Exp $ */
+/*     $OpenBSD: rde_update.c,v 1.145 2022/07/11 17:08:21 claudio Exp $ */
 
 /*
  * Copyright (c) 2004 Claudio Jeker <claudio@openbsd.org>
@@ -230,6 +230,138 @@ up_generate_updates(struct filter_head *rules, struct rde_peer *peer,
                prefix_adjout_withdraw(p);
 }
 
+/*
+ * Generate updates for the add-path send case. Depending on the
+ * peer eval settings prefixes are selected and distributed.
+ * This highly depends on the Adj-RIB-Out to handle prefixes with no
+ * changes gracefully. It may be possible to improve the API so that
+ * less churn is needed.
+ */
+void
+up_generate_addpath(struct filter_head *rules, struct rde_peer *peer,
+    struct prefix *new, struct prefix *old)
+{
+       struct filterstate      state;
+       struct bgpd_addr        addr;
+       struct prefix           *head, *p;
+       uint8_t                 prefixlen;
+       int                     maxpaths = 0, extrapaths = 0, extra;
+       int                     checkmode = 1;
+
+       if (new == NULL) {
+               pt_getaddr(old->pt, &addr);
+               prefixlen = old->pt->prefixlen;
+       } else {
+               pt_getaddr(new->pt, &addr);
+               prefixlen = new->pt->prefixlen;
+       }
+
+       head = prefix_adjout_lookup(peer, &addr, prefixlen);
+
+       /* mark all paths as stale */
+       for (p = head; p != NULL; p = prefix_adjout_next(peer, p))
+               p->flags |= PREFIX_FLAG_STALE;
+
+       /* update paths */
+       for ( ; new != NULL; new = TAILQ_NEXT(new, entry.list.rib)) {
+               /* since list is sorted, stop at first invalid prefix */
+               if (!prefix_eligible(new))
+                       break;
+
+               /* check limits and stop when a limit is reached */
+               if (peer->eval.maxpaths != 0 &&
+                   maxpaths >= peer->eval.maxpaths)
+                       break;
+               if (peer->eval.extrapaths != 0 &&
+                   extrapaths >= peer->eval.extrapaths)
+                       break;
+
+               extra = 1;
+               if (checkmode) {
+                       switch (peer->eval.mode) {
+                       case ADDPATH_EVAL_BEST:
+                               if (new->dmetric == PREFIX_DMETRIC_BEST)
+                                       extra = 0;
+                               else
+                                       checkmode = 0;
+                               break;
+                       case ADDPATH_EVAL_ECMP:
+                               if (new->dmetric == PREFIX_DMETRIC_BEST ||
+                                   new->dmetric == PREFIX_DMETRIC_ECMP)
+                                       extra = 0;
+                               else
+                                       checkmode = 0;
+                               break;
+                       case ADDPATH_EVAL_AS_WIDE:
+                               if (new->dmetric == PREFIX_DMETRIC_BEST ||
+                                   new->dmetric == PREFIX_DMETRIC_ECMP ||
+                                   new->dmetric == PREFIX_DMETRIC_AS_WIDE)
+                                       extra = 0;
+                               else
+                                       checkmode = 0;
+                               break;
+                       case ADDPATH_EVAL_ALL:
+                               /* nothing to check */
+                               checkmode = 0;
+                               break;
+                       default:
+                               fatalx("unknown add-path eval mode");
+                       }
+               }
+
+               /*
+                * up_test_update() needs to run before the output filters
+                * else the well known communities won't work properly.
+                * The output filters would not be able to add well known
+                * communities.
+                */
+               if (!up_test_update(peer, new))
+                       continue;
+
+               rde_filterstate_prep(&state, prefix_aspath(new),
+                   prefix_communities(new), prefix_nexthop(new),
+                   prefix_nhflags(new));
+               if (rde_filter(rules, peer, prefix_peer(new), &addr,
+                   prefixlen, prefix_vstate(new), &state) == ACTION_DENY) {
+                       rde_filterstate_clean(&state);
+                       continue;
+               }
+
+               if (up_enforce_open_policy(peer, &state)) {
+                       rde_filterstate_clean(&state);
+                       continue;
+               }
+
+               /* from here on we know this is an update */
+               maxpaths++;
+               extrapaths += extra;
+
+               p = prefix_adjout_get(peer, new->path_id_tx, &addr,
+                   new->pt->prefixlen);
+
+               up_prep_adjout(peer, &state, addr.aid);
+               prefix_adjout_update(p, peer, &state, &addr,
+                   new->pt->prefixlen, new->path_id_tx, prefix_vstate(new));
+               rde_filterstate_clean(&state);
+
+               /* max prefix checker outbound */
+               if (peer->conf.max_out_prefix &&
+                   peer->prefix_out_cnt > peer->conf.max_out_prefix) {
+                       log_peer_warnx(&peer->conf,
+                           "outbound prefix limit reached (>%u/%u)",
+                           peer->prefix_out_cnt, peer->conf.max_out_prefix);
+                       rde_update_err(peer, ERR_CEASE,
+                           ERR_CEASE_MAX_SENT_PREFIX, NULL, 0);
+               }
+       }
+
+       /* withdraw stale paths */
+       for (p = head; p != NULL; p = prefix_adjout_next(peer, p)) {
+               if (p->flags & PREFIX_FLAG_STALE)
+                       prefix_adjout_withdraw(p);
+       }
+}
+
 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);
 void rib_remove(struct rib_entry *);
 int rib_empty(struct rib_entry *);