From a02daadde29b2a2d13f5b1b0aec5558b755c1098 Mon Sep 17 00:00:00 2001 From: claudio Date: Fri, 7 Sep 2018 10:49:22 +0000 Subject: [PATCH] Implement a fast presix-set lookup. This magic trie is able to match a prefix addr/plen to a prefix-set spec addr/plen prefixlen min - max (a prefix including prefixlen range). Every addr/plen pair is a node in the trie and the prefixlen is added as a bitmask to those nodes. For the lookup the any match is OK, there is no need to do longest or best prefix matching. Inspiration for this solution comes from the way bird implements this which was done by Ondrej Zajicek santiago (at) crfreenet.org OK benno@ --- usr.sbin/bgpd/Makefile | 5 +- usr.sbin/bgpd/bgpd.c | 4 +- usr.sbin/bgpd/bgpd.h | 7 +- usr.sbin/bgpd/rde.c | 97 +++--- usr.sbin/bgpd/rde.h | 27 +- usr.sbin/bgpd/rde_filter.c | 32 +- usr.sbin/bgpd/rde_trie.c | 588 +++++++++++++++++++++++++++++++++++++ 7 files changed, 699 insertions(+), 61 deletions(-) create mode 100644 usr.sbin/bgpd/rde_trie.c diff --git a/usr.sbin/bgpd/Makefile b/usr.sbin/bgpd/Makefile index b57e53914f9..753e43652e3 100644 --- a/usr.sbin/bgpd/Makefile +++ b/usr.sbin/bgpd/Makefile @@ -1,10 +1,11 @@ -# $OpenBSD: Makefile,v 1.33 2018/09/07 05:43:33 claudio Exp $ +# $OpenBSD: Makefile,v 1.34 2018/09/07 10:49:22 claudio Exp $ PROG= bgpd SRCS= bgpd.c session.c log.c logmsg.c parse.y config.c \ rde.c rde_rib.c rde_decide.c rde_prefix.c mrt.c kroute.c \ control.c pfkey.c rde_update.c rde_attr.c printconf.c \ - rde_filter.c rde_sets.c pftable.c name2id.c util.c carp.c timer.c + rde_filter.c rde_sets.c rde_trie.c pftable.c name2id.c \ + util.c carp.c timer.c CFLAGS+= -Wall -I${.CURDIR} CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes CFLAGS+= -Wmissing-declarations diff --git a/usr.sbin/bgpd/bgpd.c b/usr.sbin/bgpd/bgpd.c index 894b3127052..c590e10046a 100644 --- a/usr.sbin/bgpd/bgpd.c +++ b/usr.sbin/bgpd/bgpd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.c,v 1.195 2018/09/07 05:43:33 claudio Exp $ */ +/* $OpenBSD: bgpd.c,v 1.196 2018/09/07 10:49:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -507,7 +507,7 @@ reconfigure(char *conffile, struct bgpd_config *conf, struct peer **peer_l) while ((ps = SIMPLEQ_FIRST(conf->prefixsets)) != NULL) { SIMPLEQ_REMOVE_HEAD(conf->prefixsets, entry); if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIXSET, 0, 0, -1, - ps, sizeof(*ps)) == -1) + ps->name, sizeof(ps->name)) == -1) return (-1); while ((psi = SIMPLEQ_FIRST(&ps->psitems)) != NULL) { SIMPLEQ_REMOVE_HEAD(&ps->psitems, entry); diff --git a/usr.sbin/bgpd/bgpd.h b/usr.sbin/bgpd/bgpd.h index 91c9c8c2233..9b5469f891c 100644 --- a/usr.sbin/bgpd/bgpd.h +++ b/usr.sbin/bgpd/bgpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: bgpd.h,v 1.334 2018/09/07 05:43:33 claudio Exp $ */ +/* $OpenBSD: bgpd.h,v 1.335 2018/09/07 10:49:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -212,6 +212,8 @@ TAILQ_HEAD(network_head, network); struct prefixset; SIMPLEQ_HEAD(prefixset_head, prefixset); +struct rde_prefixset_head; +struct rde_prefixset; struct as_set; SIMPLEQ_HEAD(as_set_head, as_set); @@ -226,6 +228,7 @@ struct bgpd_config { struct listen_addrs *listen_addrs; struct mrt_head *mrt; struct prefixset_head *prefixsets; + struct rde_prefixset_head *rde_prefixsets; struct as_set_head *as_sets; char *csock; char *rcsock; @@ -680,7 +683,7 @@ struct filter_aslen { struct filter_prefixset { int flags; char name[SET_NAME_LEN]; - struct prefixset *ps; + struct rde_prefixset *ps; }; struct filter_community { diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index f7a077fb5fa..059834cc4a6 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.419 2018/09/07 05:43:33 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.420 2018/09/07 10:49:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -100,8 +100,10 @@ static void rde_softreconfig_unload_peer(struct rib_entry *, void *); void rde_up_dump_upcall(struct rib_entry *, void *); void rde_update_queue_runner(void); void rde_update6_queue_runner(u_int8_t); -void rde_mark_prefixsets_dirty(struct prefixset_head *, - struct prefixset_head *); +struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head *); +void rde_free_prefixsets(struct rde_prefixset_head *); +void rde_mark_prefixsets_dirty(struct rde_prefixset_head *, + struct rde_prefixset_head *); void peer_init(u_int32_t); void peer_shutdown(void); @@ -128,7 +130,7 @@ struct bgpd_config *conf, *nconf; time_t reloadtime; struct rde_peer_head peerlist; struct rde_peer *peerself; -struct prefixset_head *prefixsets_tmp, *prefixsets_old; +struct rde_prefixset_head *prefixsets_tmp, *prefixsets_old; struct as_set_head *as_sets_tmp, *as_sets_old; struct filter_head *out_rules, *out_rules_tmp; struct rdomain_head *rdomains_l, *newdomains; @@ -686,9 +688,9 @@ badnetdel: void rde_dispatch_imsg_parent(struct imsgbuf *ibuf) { - static struct rdomain *rd; - static struct prefixset *last_prefixset; + static struct rde_prefixset *last_prefixset; static struct as_set *last_as_set; + static struct rdomain *rd; struct imsg imsg; struct mrt xmrt; struct rde_rib rn; @@ -697,8 +699,8 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) struct filter_rule *r; struct filter_set *s; struct rib *rib; - struct prefixset *ps; - struct prefixset_item *psi; + struct rde_prefixset *ps; + struct prefixset_item psi; char *name; size_t nmemb; int n, fd; @@ -769,7 +771,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) fatalx("IMSG_RECONF_CONF bad len"); reloadtime = time(NULL); prefixsets_tmp = calloc(1, - sizeof(struct prefixset_head)); + sizeof(struct rde_prefixset_head)); if (prefixsets_tmp == NULL) fatal(NULL); SIMPLEQ_INIT(prefixsets_tmp); @@ -832,7 +834,7 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) memcpy(r, imsg.data, sizeof(struct filter_rule)); if (r->match.prefixset.flags != 0) { r->match.prefixset.ps = - find_prefixset(r->match.prefixset.name, + rde_find_prefixset(r->match.prefixset.name, prefixsets_tmp); if (r->match.prefixset.ps == NULL) log_warnx("%s: no prefixset for %s", @@ -877,27 +879,27 @@ rde_dispatch_imsg_parent(struct imsgbuf *ibuf) break; case IMSG_RECONF_PREFIXSET: if (imsg.hdr.len - IMSG_HEADER_SIZE != - sizeof(struct prefixset)) + sizeof(ps->name)) fatalx("IMSG_RECONF_PREFIXSET bad len"); - ps = malloc(sizeof(struct prefixset)); + ps = calloc(1, sizeof(struct rde_prefixset)); if (ps == NULL) fatal(NULL); - memcpy(ps, imsg.data, sizeof(struct prefixset)); - SIMPLEQ_INIT(&ps->psitems); + memcpy(ps->name, imsg.data, sizeof(ps->name)); SIMPLEQ_INSERT_TAIL(prefixsets_tmp, ps, entry); last_prefixset = ps; break; case IMSG_RECONF_PREFIXSETITEM: if (imsg.hdr.len - IMSG_HEADER_SIZE != - sizeof(struct prefixset_item)) + sizeof(psi)) fatalx("IMSG_RECONF_PREFIXSETITEM bad len"); - psi = malloc(sizeof(struct prefixset_item)); - if (psi == NULL) - fatal(NULL); - memcpy(psi, imsg.data, sizeof(struct prefixset_item)); + memcpy(&psi, imsg.data, sizeof(psi)); if (last_prefixset == NULL) fatalx("King Bula has no prefixset"); - SIMPLEQ_INSERT_TAIL(&last_prefixset->psitems, psi, entry); + if (trie_add(&last_prefixset->th, &psi.p.addr, + psi.p.len, psi.p.len_min, psi.p.len_max) == -1) + log_warnx("trie_add(%s) %s/%u, %u-%u) failed", + last_prefixset->name, log_addr(&psi.p.addr), + psi.p.len, psi.p.len_min, psi.p.len_max); break; case IMSG_RECONF_AS_SET: if (imsg.hdr.len - IMSG_HEADER_SIZE != @@ -2793,13 +2795,14 @@ rde_reload_done(void) nconf->flags &= ~BGPD_FLAG_NO_EVALUATE; } - prefixsets_old = conf->prefixsets; + prefixsets_old = conf->rde_prefixsets; as_sets_old = conf->as_sets; memcpy(conf, nconf, sizeof(struct bgpd_config)); conf->listen_addrs = NULL; conf->csock = NULL; conf->rcsock = NULL; + conf->prefixsets = NULL; free(nconf); nconf = NULL; @@ -2827,7 +2830,7 @@ rde_reload_done(void) as_sets_mark_dirty(as_sets_old, as_sets_tmp); /* swap the prefixsets */ - conf->prefixsets = prefixsets_tmp; + conf->rde_prefixsets = prefixsets_tmp; prefixsets_tmp = NULL; /* and the as_sets */ conf->as_sets = as_sets_tmp; @@ -3018,7 +3021,7 @@ rde_softreconfig_done(void) ribs[rid].state = RECONF_NONE; } - free_prefixsets(prefixsets_old); + rde_free_prefixsets(prefixsets_old); prefixsets_old = NULL; as_sets_free(as_sets_old); as_sets_old = NULL; @@ -3809,25 +3812,47 @@ sa_cmp(struct bgpd_addr *a, struct sockaddr *b) return (0); } +struct rde_prefixset * +rde_find_prefixset(char *name, struct rde_prefixset_head *p) +{ + struct rde_prefixset *ps; + + SIMPLEQ_FOREACH(ps, p, entry) { + if (!strcmp(ps->name, name)) + return (ps); + } + return (NULL); +} + +void +rde_free_prefixsets(struct rde_prefixset_head *psh) +{ + struct rde_prefixset *ps; + + if (psh == NULL) + return; + + while (!SIMPLEQ_EMPTY(psh)) { + ps = SIMPLEQ_FIRST(psh); + trie_free(&ps->th); + SIMPLEQ_REMOVE_HEAD(psh, entry); + free(ps); + } +} + void -rde_mark_prefixsets_dirty(struct prefixset_head *psold, - struct prefixset_head *psnew) +rde_mark_prefixsets_dirty(struct rde_prefixset_head *psold, + struct rde_prefixset_head *psnew) { - struct prefixset *new, *fps; - struct prefixset_item *item; + struct rde_prefixset *new, *old; SIMPLEQ_FOREACH(new, psnew, entry) { if ((psold == NULL) || - (fps = find_prefixset(new->name, psold)) == NULL) { - new->sflags |= PREFIXSET_FLAG_DIRTY; + (old = rde_find_prefixset(new->name, psold)) == NULL) { + new->dirty = 1; } else { - SIMPLEQ_FOREACH(item, &new->psitems, entry) { - if (find_prefixsetitem(item, &fps->psitems) - == NULL) { - new->sflags |= PREFIXSET_FLAG_DIRTY; - break; - } - } + if (trie_equal(&new->th, &old->th) == 0) + new->dirty = 1; } } } diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index f32c8b034ed..c67e7cf660f 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.187 2018/09/07 05:43:33 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.188 2018/09/07 10:49:22 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -323,6 +323,23 @@ struct filterstate { u_int8_t nhflags; }; +struct tentry_v4; +struct tentry_v6; +struct trie_head { + struct tentry_v4 *root_v4; + struct tentry_v6 *root_v6; + int match_default_v4; + int match_default_v6; +}; + +struct rde_prefixset { + char name[SET_NAME_LEN]; + struct trie_head th; + SIMPLEQ_ENTRY(rde_prefixset) entry; + int dirty; +}; +SIMPLEQ_HEAD(rde_prefixset_head, rde_prefixset); + extern struct rde_memstats rdemem; /* prototypes */ @@ -563,4 +580,12 @@ u_char *up_dump_mp_unreach(u_char *, u_int16_t *, struct rde_peer *, int up_dump_mp_reach(u_char *, u_int16_t *, struct rde_peer *, u_int8_t); +/* rde_trie.c */ +int trie_add(struct trie_head *, struct bgpd_addr *, u_int8_t, + u_int8_t, u_int8_t); +void trie_free(struct trie_head *); +int trie_match(struct trie_head *, struct bgpd_addr *, u_int8_t); +void trie_dump(struct trie_head *); +int trie_equal(struct trie_head *, struct trie_head *); + #endif /* __RDE_H__ */ diff --git a/usr.sbin/bgpd/rde_filter.c b/usr.sbin/bgpd/rde_filter.c index 3633e94b3ac..52d36ad33af 100644 --- a/usr.sbin/bgpd/rde_filter.c +++ b/usr.sbin/bgpd/rde_filter.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_filter.c,v 1.102 2018/09/07 05:43:33 claudio Exp $ */ +/* $OpenBSD: rde_filter.c,v 1.103 2018/09/07 10:49:22 claudio Exp $ */ /* * Copyright (c) 2004 Claudio Jeker @@ -342,7 +342,6 @@ rde_filter_match(struct filter_rule *f, struct rde_peer *peer, { int cas, type; int64_t las, ld1, ld2; - struct prefixset_item *psi; struct rde_aspath *asp = NULL; if (state != NULL) @@ -483,17 +482,15 @@ rde_filter_match(struct filter_rule *f, struct rde_peer *peer, * prefixset and prefix filter rules are mutual exclusive */ if (f->match.prefixset.flags != 0) { - log_debug("%s: processing filter for prefixset %s", - __func__, f->match.prefixset.name); - SIMPLEQ_FOREACH(psi, &f->match.prefixset.ps->psitems, entry) { - if (rde_prefix_match(&psi->p, p)) { - log_debug("%s: prefixset %s matched %s", - __func__, f->match.prefixset.ps->name, - log_addr(&psi->p.addr)); - return (1); - } - } - return (0); + struct bgpd_addr addr, *prefix = &addr; + u_int8_t plen; + + pt_getaddr(p->re->prefix, prefix); + plen = p->re->prefix->prefixlen; + + if (f->match.prefixset.ps == NULL || + !trie_match(&f->match.prefixset.ps->th, prefix, plen)) + return (0); } else if (f->match.prefix.addr.aid != 0) return (rde_prefix_match(&f->match.prefix, p)); @@ -574,7 +571,7 @@ rde_filter_equal(struct filter_head *a, struct filter_head *b, struct rde_peer *peer) { struct filter_rule *fa, *fb; - struct prefixset *psa, *psb; + struct rde_prefixset *psa, *psb; struct as_set *asa, *asb; fa = a ? TAILQ_FIRST(a) : NULL; @@ -615,10 +612,9 @@ rde_filter_equal(struct filter_head *a, struct filter_head *b, fa->match.as.aset = asa; fb->match.as.aset = asb; - if ((fa->match.prefixset.flags != 0) && - (fa->match.prefixset.ps != NULL) && - ((fa->match.prefixset.ps->sflags - & PREFIXSET_FLAG_DIRTY) != 0)) { + if (fa->match.prefixset.flags != 0 && + fa->match.prefixset.ps != NULL && + fa->match.prefixset.ps->dirty) { log_debug("%s: prefixset %s has changed", __func__, fa->match.prefixset.name); return (0); diff --git a/usr.sbin/bgpd/rde_trie.c b/usr.sbin/bgpd/rde_trie.c new file mode 100644 index 00000000000..781aa778863 --- /dev/null +++ b/usr.sbin/bgpd/rde_trie.c @@ -0,0 +1,588 @@ +/* $OpenBSD: rde_trie.c,v 1.1 2018/09/07 10:49:22 claudio Exp $ */ + +/* + * Copyright (c) 2018 Claudio Jeker + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include +#include + +#include + +#include +#include +#include + +#include "bgpd.h" +#include "rde.h" + +/* + * Bitwise compressed trie for prefix-sets. + * Since the trie is path compressed the traversing function needs to check + * that the lookup is still on path before taking a branch. + * There are two types of nodes in the trie. Internal branch nodes and real + * nodes. Internal nodes are added when needed because off path nodes are being + * inserted and are just used for branching. + * During lookup every node defines which bit is checked for branching. This + * offset is strictly increasing. For IPv4 the maximum is therefor 32 levels. + * Every node checks the bit at position plen to decide which branch to take. + * The real nodes also include a prefixlen mask which represents the prefixlen + * range that was defined. The prefixlen mask for IPv4 only has 32 bits but + * the prefixlen range is from 0 - 32 which needs 33bits. Now prefixlen 0 is + * special and only one element -- the default route -- can have prefixlen 0. + * So this default route is added as a flag to the trie_head and needs to be + * checked independent of the trie when doing a lookup. + * On insertion a prefix is added with its prefixlen plen and a min & max + * for the prefixlen range. The following precondition needs to hold + * plen <= min <= max + * To only match the prefix itself min and max are set to plen. + * If a same prefix (addr/plen) is added to the trie but with different + * min & max values then the masks of both nodes are merged with a binary OR. + * The match function returns true the moment the first node is found which + * covers the looked up prefix and where the prefixlen mask matches for the + * looked up prefixlen. The moment the plen of a node is bigger is bigger than + * the prefixlen of the looked up prefix the search can be stopped since there + * will be no match. + */ +struct tentry_v4 { + struct tentry_v4 *trie[2]; + struct in_addr addr; + struct in_addr plenmask; + u_int8_t plen; + u_int8_t node; + + /* roa source-as list pointer */ +}; + +struct tentry_v6 { + struct tentry_v6 *trie[2]; + struct in6_addr addr; + struct in6_addr plenmask; + u_int8_t plen; + u_int8_t node; + + /* roa source-as list pointer */ +}; + +/* + * Find first different bit between a & b starting from the MSB, + * a & b have to be different. + */ +static int +inet4findmsb(struct in_addr *a, struct in_addr *b) +{ + int r = 0; + u_int32_t v; + + v = ntohl(a->s_addr ^ b->s_addr); + if (v > 0xffff) { r += 16; v >>= 16; } + if (v > 0x00ff) { r += 8; v >>= 8; } + if (v > 0x000f) { r += 4; v >>= 4; } + if (v > 0x0003) { r += 2; v >>= 2; } + if (v > 0x0001) { r += 1; } + + return 31 - r; +} + +/* + * Find first different bit between a & b starting from the MSB, + * a & b have to be different. + */ +static int +inet6findmsb(struct in6_addr *a, struct in6_addr *b) +{ + int r = 0; + u_int8_t i, x; + + for (i = 0; i < sizeof(*a) && a->s6_addr[i] == b->s6_addr[i]; i++) + ; + /* first different octet */ + x = a->s6_addr[i] ^ b->s6_addr[i]; + + /* first different bit */ + if (x > 0xf) { r += 4; x >>= 4; } + if (x > 0x3) { r += 2; x >>= 2; } + if (x > 0x1) { r += 1; } + + return i * 8 + 7 - r; +} + +static int +inet4isset(struct in_addr *addr, u_int8_t bit) +{ + return addr->s_addr & htonl(1 << (31 - bit)); +} + +static int +inet6isset(struct in6_addr *addr, u_int8_t bit) +{ + return addr->s6_addr[bit / 8] & (0x80 >> (bit % 8)); +} + +static void +inet4setbit(struct in_addr *addr, u_int8_t bit) +{ + /* bit 0 sets the MSB and 31 sets the LSB */ + addr->s_addr |= (1 << (31 - bit)); +} + +static void +inet6setbit(struct in6_addr *addr, u_int8_t bit) +{ + addr->s6_addr[bit / 8] |= (0x80 >> (bit % 8)); +} + +static int +trie_add_v4(struct trie_head *th, struct in_addr *prefix, u_int8_t plen, + u_int8_t min, u_int8_t max) +{ + struct tentry_v4 *n, *new, *b, **prev; + struct in_addr p, plenmask; + u_int8_t i; + + /* + * check for default route, this is special cased since prefixlen 0 + * can't be checked in the prefixlen mask plenmask. Also there is only + * one default route so using a flag for this works. + */ + if (min == 0) { + th->match_default_v4 = 1; + if (max == 0) /* just the default route */ + return 0; + min = 1; + } + + inet4applymask(&p, prefix, plen); + + /* + * The prefixlen mask goes from 1 to 32 but the bitmask + * starts at 0 and so all bits are set with an offset of 1. + * The default /0 route is handled specially above. + */ + memset(&plenmask, 0, sizeof(plenmask)); + for (i = min; i <= max; i++) + inet4setbit(&plenmask, i - 1); + + /* walk tree finding spot to insert */ + prev = &th->root_v4; + n = *prev; + while (n) { + struct in_addr mp; + u_int8_t minlen; + + minlen = n->plen > plen ? plen : n->plen; + inet4applymask(&mp, &p, minlen); + if (n->addr.s_addr != mp.s_addr) { + /* + * out of path, insert intermediary node between + * np and n, then insert n and new node there + */ + if ((b = calloc(1, sizeof(*b))) == NULL) + return -1; + b->plen = inet4findmsb(&n->addr, &mp); + inet4applymask(&b->addr, &n->addr, b->plen); + + *prev = b; + if (inet4isset(&n->addr, b->plen)) { + b->trie[1] = n; + prev = &b->trie[0]; + } else { + b->trie[0] = n; + prev = &b->trie[1]; + } + n = NULL; + break; + } + + if (n->plen > plen) { + /* n is more specific, just insert new in between */ + break; + } + + if (n->plen == plen) { + /* matching node, adjust */ + n->node = 1; + n->plenmask.s_addr |= plenmask.s_addr; + return 0; + } + + /* no need to check for n->plen == 32 because of above if */ + if (inet4isset(&p, n->plen)) + prev = &n->trie[1]; + else + prev = &n->trie[0]; + n = *prev; + } + + /* create new node */ + if ((new = calloc(1, sizeof(*new))) == NULL) + return -1; + new->addr = p; + new->plen = plen; + new->plenmask = plenmask; + new->node = 1; + + /* link node */ + *prev = new; + if (n) { + if (inet4isset(&n->addr, new->plen)) + new->trie[1] = n; + else + new->trie[0] = n; + } + return 0; +} + +static int +trie_add_v6(struct trie_head *th, struct in6_addr *prefix, u_int8_t plen, + u_int8_t min, u_int8_t max) +{ + struct tentry_v6 *n, *new, *b, **prev; + struct in6_addr p, plenmask; + u_int8_t i; + + /* + * check for default route, this is special cased since prefixlen 0 + * can't be checked in the prefixlen mask plenmask. Also there is only + * one default route so using a flag for this works. + */ + if (min == 0) { + th->match_default_v6 = 1; + if (max == 0) /* just the default route */ + return 0; + min = 1; + } + + inet6applymask(&p, prefix, plen); + + /* + * The prefixlen mask goes from 1 to 128 but the bitmask + * starts at 0 and so all bits are set with an offset of 1. + * The default /0 route is handled specially above. + */ + memset(&plenmask, 0, sizeof(plenmask)); + for (i = min; i <= max; i++) + inet6setbit(&plenmask, i - 1); + + /* walk tree finding spot to insert */ + prev = &th->root_v6; + n = *prev; + while (n) { + struct in6_addr mp; + u_int8_t minlen; + + minlen = n->plen > plen ? plen : n->plen; + inet6applymask(&mp, &p, minlen); + if (memcmp(&n->addr, &mp, sizeof(mp)) != 0) { + /* + * out of path, insert intermediary node between + * np and n, then insert n and new node there + */ + if ((b = calloc(1, sizeof(*b))) == NULL) + return -1; + b->plen = inet6findmsb(&n->addr, &mp); + inet6applymask(&b->addr, &n->addr, b->plen); + + *prev = b; + if (inet6isset(&n->addr, b->plen)) { + b->trie[1] = n; + prev = &b->trie[0]; + } else { + b->trie[0] = n; + prev = &b->trie[1]; + } + n = NULL; + break; + } + + if (n->plen > plen) { + /* n is more specific, just insert new in between */ + break; + } + + if (n->plen == plen) { + /* matching node, adjust */ + n->node = 1; + for (i = 0; i < sizeof(plenmask); i++) + n->plenmask.s6_addr[i] |= plenmask.s6_addr[i]; + return 0; + } + + /* no need to check for n->plen == 32 because of above if */ + if (inet6isset(&p, n->plen)) + prev = &n->trie[1]; + else + prev = &n->trie[0]; + n = *prev; + } + + /* create new node */ + if ((new = calloc(1, sizeof(*new))) == NULL) + return -1; + new->addr = p; + new->plen = plen; + new->plenmask = plenmask; + new->node = 1; + + /* link node */ + *prev = new; + if (n) { + if (inet6isset(&n->addr, new->plen)) + new->trie[1] = n; + else + new->trie[0] = n; + } + return 0; +} + +int +trie_add(struct trie_head *th, struct bgpd_addr *prefix, u_int8_t plen, + u_int8_t min, u_int8_t max) +{ + /* precondition plen <= min <= max */ + if (plen > min || min > max) + return -1; + + switch (prefix->aid) { + case AID_INET: + if (max > 32) + return -1; + return trie_add_v4(th, &prefix->v4, plen, min, max); + case AID_INET6: + if (max > 128) + return -1; + return trie_add_v6(th, &prefix->v6, plen, min, max); + default: + /* anything else fails */ + return -1; + } +} + +static void +trie_free_v4(struct tentry_v4 *n) +{ + if (n == NULL) + return; + trie_free_v4(n->trie[0]); + trie_free_v4(n->trie[1]); + free(n); +} + +static void +trie_free_v6(struct tentry_v6 *n) +{ + if (n == NULL) + return; + trie_free_v6(n->trie[0]); + trie_free_v6(n->trie[1]); + free(n); +} + +void +trie_free(struct trie_head *th) +{ + trie_free_v4(th->root_v4); + trie_free_v6(th->root_v6); +} + +static int +trie_match_v4(struct trie_head *th, struct in_addr *prefix, u_int8_t plen) +{ + struct tentry_v4 *n; + + if (plen == 0) { + /* special handling for default route */ + return th->match_default_v4; + } + + n = th->root_v4; + while (n) { + struct in_addr mp; + + if (n->plen > plen) + break; /* too specific, no match possible */ + + inet4applymask(&mp, prefix, n->plen); + if (n->addr.s_addr != mp.s_addr) + break; /* off path, no match possible */ + + /* plen is from 1 - 32 but the bitmask starts with 0 */ + if (n->node && inet4isset(&n->plenmask, plen - 1)) + return 1; /* prefixlen allowed, match */ + + if (n->plen == 32) + break; /* can't go deeper */ + if (inet4isset(prefix, n->plen)) + n = n->trie[1]; + else + n = n->trie[0]; + } + + return 0; +} + +static int +trie_match_v6(struct trie_head *th, struct in6_addr *prefix, u_int8_t plen) +{ + struct tentry_v6 *n; + + if (plen == 0) { + /* special handling for default route */ + return th->match_default_v6; + } + + n = th->root_v6; + while (n) { + struct in6_addr mp; + + if (n->plen > plen) + break; /* too specific, no match possible */ + + inet6applymask(&mp, prefix, n->plen); + if (memcmp(&n->addr, &mp, sizeof(mp)) != 0) + break; /* off path, no match possible */ + + /* plen is from 1 - 128 but the bitmask starts with 0 */ + if (n->node && inet6isset(&n->plenmask, plen - 1)) + return 1; /* prefixlen allowed, match */ + + if (n->plen == 128) + break; /* can't go deeper */ + if (inet6isset(prefix, n->plen)) + n = n->trie[1]; + else + n = n->trie[0]; + } + return 0; +} + +/* find first matching element in the trie for prefix "prefix/plen" */ +int +trie_match(struct trie_head *th, struct bgpd_addr *prefix, u_int8_t plen) +{ + switch (prefix->aid) { + case AID_INET: + return trie_match_v4(th, &prefix->v4, plen); + case AID_INET6: + return trie_match_v6(th, &prefix->v6, plen); + default: + /* anything else is no match */ + return 0; + } +} + +static int +trie_equal_v4(struct tentry_v4 *a, struct tentry_v4 *b) +{ + if (a == NULL && b == NULL) + return 1; + if (a == NULL || b == NULL) + return 0; + + if (a->addr.s_addr != b->addr.s_addr || + a->plen != b->plen || + a->node != b->node || + a->plenmask.s_addr != b->plenmask.s_addr) + return 0; + + if (trie_equal_v4(a->trie[0], b->trie[0]) == 0 || + trie_equal_v4(a->trie[1], b->trie[1]) == 0) + return 0; + + return 1; +} + +static int +trie_equal_v6(struct tentry_v6 *a, struct tentry_v6 *b) +{ + if (a == NULL && b == NULL) + return 1; + if (a == NULL || b == NULL) + return 0; + + if (memcmp(&a->addr, &b->addr, sizeof(a->addr)) != 0 || + a->plen != b->plen || + a->node != b->node || + memcmp(&a->plenmask, &b->plenmask, sizeof(a->plenmask)) != 0) + return 0; + + if (trie_equal_v6(a->trie[0], b->trie[0]) == 0 || + trie_equal_v6(a->trie[1], b->trie[1]) == 0) + return 0; + + return 1; +} + +/* Compare two tires and return 1 if they are the same else 0. */ +int +trie_equal(struct trie_head *a, struct trie_head *b) +{ + if (a->match_default_v4 != b->match_default_v4 || + a->match_default_v6 != b->match_default_v6) + return 0; + if (trie_equal_v4(a->root_v4, b->root_v4) == 0) + return 0; + if (trie_equal_v6(a->root_v6, b->root_v6) == 0) + return 0; + return 1; +} + +/* debugging functions for printing the trie */ +static void +trie_dump_v4(struct tentry_v4 *n) +{ + if (n == NULL) + return; + if (n->node) + printf("%s/%u plenmask %08x\n", inet_ntoa(n->addr), n->plen, + n->plenmask.s_addr); + else + printf(" %s/%u\n", inet_ntoa(n->addr), n->plen); + + trie_dump_v4(n->trie[0]); + trie_dump_v4(n->trie[1]); +} + +static void +trie_dump_v6(struct tentry_v6 *n) +{ + char buf[48]; + unsigned int i; + + if (n == NULL) + return; + if (n->node) { + printf("%s/%u plenmask ", + inet_ntop(AF_INET6, &n->addr, buf, sizeof(buf)), n->plen); + for (i = 0; i < sizeof(n->plenmask); i++) + printf("%02x", n->plenmask.s6_addr[i]); + printf("\n"); + } else + printf(" %s/%u\n", + inet_ntop(AF_INET6, &n->addr, buf, sizeof(buf)), n->plen); + + trie_dump_v6(n->trie[0]); + trie_dump_v6(n->trie[1]); +} + +void +trie_dump(struct trie_head *th) +{ + if (th->match_default_v4) + printf("0.0.0.0/0 plenmask %08x\n", 0); + trie_dump_v4(th->root_v4); + if (th->match_default_v6) + printf("::/0 plenmask 0\n"); + trie_dump_v6(th->root_v6); +} -- 2.20.1