From 76c3be877c24531904c83dc13708142ae82d710d Mon Sep 17 00:00:00 2001 From: claudio Date: Mon, 12 Sep 2022 10:03:17 +0000 Subject: [PATCH] Introduce tree walkers that only walk a subtree of the RIB. In some cases only a "small" part of the RIB needs to be looked at. Like bgpctl show rib 10/8 or-longer that only needs to travers nodes under 10/8 all other RIB entries do not matter. By setting the start node to the RB_NFIND(10/8) the all nodes below this point can be skipped. Using prefix_compare() while walking the tree with RB_NEXT() the walker know when it steps outside of the 10/8 subtree and stops. With this the or-longer commands become a lot faster. Looks good to tb@ --- usr.sbin/bgpd/rde.c | 78 ++++------------------------ usr.sbin/bgpd/rde.h | 11 +++- usr.sbin/bgpd/rde_rib.c | 111 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 122 insertions(+), 78 deletions(-) diff --git a/usr.sbin/bgpd/rde.c b/usr.sbin/bgpd/rde.c index 1d6212ad278..c3c6d05ee06 100644 --- a/usr.sbin/bgpd/rde.c +++ b/usr.sbin/bgpd/rde.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.c,v 1.575 2022/09/09 13:33:24 claudio Exp $ */ +/* $OpenBSD: rde.c,v 1.576 2022/09/12 10:03:17 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Henning Brauer @@ -2647,35 +2647,6 @@ rde_dump_upcall(struct rib_entry *re, void *ptr) rde_dump_filter(p, &ctx->req, 0); } -static void -rde_dump_prefix_upcall(struct rib_entry *re, void *ptr) -{ - struct rde_dump_ctx *ctx = ptr; - struct prefix *p; - struct pt_entry *pt; - struct bgpd_addr addr; - - pt = re->prefix; - pt_getaddr(pt, &addr); - if (addr.aid != ctx->req.prefix.aid) - return; - if (ctx->req.flags & F_LONGER) { - if (ctx->req.prefixlen > pt->prefixlen) - return; - if (!prefix_compare(&ctx->req.prefix, &addr, - ctx->req.prefixlen)) - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) - rde_dump_filter(p, &ctx->req, 0); - } else { - if (ctx->req.prefixlen < pt->prefixlen) - return; - if (!prefix_compare(&addr, &ctx->req.prefix, - pt->prefixlen)) - TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) - rde_dump_filter(p, &ctx->req, 0); - } -} - static void rde_dump_adjout_upcall(struct prefix *p, void *ptr) { @@ -2688,35 +2659,6 @@ rde_dump_adjout_upcall(struct prefix *p, void *ptr) rde_dump_filter(p, &ctx->req, 1); } -static void -rde_dump_adjout_prefix_upcall(struct prefix *p, void *ptr) -{ - struct rde_dump_ctx *ctx = ptr; - struct bgpd_addr addr; - - if ((p->flags & PREFIX_FLAG_ADJOUT) == 0) - fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__); - if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) - return; - - pt_getaddr(p->pt, &addr); - if (addr.aid != ctx->req.prefix.aid) - return; - if (ctx->req.flags & F_LONGER) { - if (ctx->req.prefixlen > p->pt->prefixlen) - return; - if (!prefix_compare(&ctx->req.prefix, &addr, - ctx->req.prefixlen)) - rde_dump_filter(p, &ctx->req, 1); - } else { - if (ctx->req.prefixlen < p->pt->prefixlen) - return; - if (!prefix_compare(&addr, &ctx->req.prefix, - p->pt->prefixlen)) - rde_dump_filter(p, &ctx->req, 1); - } -} - static int rde_dump_throttled(void *arg) { @@ -2745,10 +2687,10 @@ rde_dump_done(void *arg, uint8_t aid) goto nomem; break; case IMSG_CTL_SHOW_RIB_PREFIX: - if (prefix_dump_new(peer, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, - rde_dump_adjout_prefix_upcall, - rde_dump_done, rde_dump_throttled) == -1) + if (prefix_dump_subtree(peer, &ctx->req.prefix, + ctx->req.prefixlen, CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_upcall, rde_dump_done, + rde_dump_throttled) == -1) goto nomem; break; default: @@ -2817,9 +2759,9 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, break; case IMSG_CTL_SHOW_RIB_PREFIX: if (req->flags & F_LONGER) { - if (prefix_dump_new(peer, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, - rde_dump_adjout_prefix_upcall, + if (prefix_dump_subtree(peer, &req->prefix, + req->prefixlen, CTL_MSG_HIGH_MARK, ctx, + rde_dump_adjout_upcall, rde_dump_done, rde_dump_throttled) == -1) goto nomem; break; @@ -2900,8 +2842,8 @@ rde_dump_ctx_new(struct ctl_show_rib_request *req, pid_t pid, break; case IMSG_CTL_SHOW_RIB_PREFIX: if (req->flags & F_LONGER) { - if (rib_dump_new(rid, ctx->req.aid, - CTL_MSG_HIGH_MARK, ctx, rde_dump_prefix_upcall, + if (rib_dump_subtree(rid, &req->prefix, req->prefixlen, + CTL_MSG_HIGH_MARK, ctx, rde_dump_upcall, rde_dump_done, rde_dump_throttled) == -1) goto nomem; break; diff --git a/usr.sbin/bgpd/rde.h b/usr.sbin/bgpd/rde.h index ffc327f1bc1..3930b435ae0 100644 --- a/usr.sbin/bgpd/rde.h +++ b/usr.sbin/bgpd/rde.h @@ -1,4 +1,4 @@ -/* $OpenBSD: rde.h,v 1.270 2022/09/01 13:23:24 claudio Exp $ */ +/* $OpenBSD: rde.h,v 1.271 2022/09/12 10:03:17 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker and @@ -570,6 +570,11 @@ int rib_dump_new(uint16_t, uint8_t, unsigned int, void *, void (*)(struct rib_entry *, void *), void (*)(void *, uint8_t), int (*)(void *)); +int rib_dump_subtree(uint16_t, struct bgpd_addr *, uint8_t, + unsigned int count, void *arg, + void (*)(struct rib_entry *, void *), + void (*)(void *, uint8_t), + int (*)(void *)); void rib_dump_terminate(void *); static inline struct rib * @@ -612,6 +617,10 @@ void prefix_adjout_dump(struct rde_peer *, void *, int prefix_dump_new(struct rde_peer *, uint8_t, unsigned int, void *, void (*)(struct prefix *, void *), void (*)(void *, uint8_t), int (*)(void *)); +int prefix_dump_subtree(struct rde_peer *, struct bgpd_addr *, + uint8_t, unsigned int, void *, + void (*)(struct prefix *, void *), + void (*)(void *, uint8_t), int (*)(void *)); int prefix_write(u_char *, int, struct bgpd_addr *, uint8_t, int); int prefix_writebuf(struct ibuf *, struct bgpd_addr *, uint8_t); struct prefix *prefix_bypeer(struct rib_entry *, struct rde_peer *, diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 0798ee9eb57..e256376365a 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.248 2022/09/01 13:19:11 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.249 2022/09/12 10:03:17 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -58,8 +58,10 @@ struct rib_context { void (*ctx_done)(void *, uint8_t); int (*ctx_throttle)(void *); void *ctx_arg; + struct bgpd_addr ctx_subtree; unsigned int ctx_count; uint8_t ctx_aid; + uint8_t ctx_subtreelen; }; LIST_HEAD(, rib_context) rib_dumps = LIST_HEAD_INITIALIZER(rib_dumps); @@ -396,16 +398,17 @@ rib_empty(struct rib_entry *re) static struct rib_entry * rib_restart(struct rib_context *ctx) { - struct rib_entry *re; + struct rib_entry *re = NULL; - re = re_unlock(ctx->ctx_re); + if (ctx->ctx_re) + re = re_unlock(ctx->ctx_re); /* find first non empty element */ while (re && rib_empty(re)) re = RB_NEXT(rib_tree, unused, re); /* free the previously locked rib element if empty */ - if (rib_empty(ctx->ctx_re)) + if (ctx->ctx_re && rib_empty(ctx->ctx_re)) rib_remove(ctx->ctx_re); ctx->ctx_re = NULL; return (re); @@ -422,7 +425,7 @@ rib_dump_r(struct rib_context *ctx) if (rib == NULL) fatalx("%s: rib id %u gone", __func__, ctx->ctx_id); - if (ctx->ctx_re == NULL) + if (ctx->ctx_re == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) re = RB_MIN(rib_tree, rib_tree(rib)); else re = rib_restart(ctx); @@ -435,6 +438,14 @@ rib_dump_r(struct rib_context *ctx) if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != re->prefix->aid) continue; + if (ctx->ctx_subtree.aid != AID_UNSPEC) { + struct bgpd_addr addr; + pt_getaddr(re->prefix, &addr); + if (prefix_compare(&ctx->ctx_subtree, &addr, + ctx->ctx_subtreelen) != 0) + /* left subtree, walk is done */ + break; + } if (ctx->ctx_count && i++ >= ctx->ctx_count && !re_is_locked(re)) { /* store and lock last element */ @@ -543,6 +554,42 @@ rib_dump_new(uint16_t id, uint8_t aid, unsigned int count, void *arg, return 0; } +int +rib_dump_subtree(uint16_t id, struct bgpd_addr *subtree, uint8_t subtreelen, + unsigned int count, void *arg, void (*upcall)(struct rib_entry *, void *), + void (*done)(void *, uint8_t), int (*throttle)(void *)) +{ + struct rib_context *ctx; + struct rib_entry xre; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_id = id; + ctx->ctx_aid = subtree->aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_rib_call = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; + ctx->ctx_subtree = *subtree; + ctx->ctx_subtreelen = subtreelen; + + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* lookup start of subtree */ + memset(&xre, 0, sizeof(xre)); + xre.prefix = pt_fill(subtree, subtreelen); + ctx->ctx_re = RB_NFIND(rib_tree, rib_tree(rib_byid(id)), &xre); + if (ctx->ctx_re) + re_lock(ctx->ctx_re); + + /* requested a sync traversal */ + if (count == 0) + rib_dump_r(ctx); + + return 0; +} + /* path specific functions */ static struct rde_aspath *path_lookup(struct rde_aspath *); @@ -1253,11 +1300,12 @@ prefix_adjout_destroy(struct prefix *p) static struct prefix * prefix_restart(struct rib_context *ctx) { - struct prefix *p; + struct prefix *p = NULL; - p = prefix_unlock(ctx->ctx_p); + if (ctx->ctx_p) + p = prefix_unlock(ctx->ctx_p); - if (prefix_is_dead(p)) { + if (p && prefix_is_dead(p)) { struct prefix *next; next = RB_NEXT(prefix_index, unused, p); @@ -1278,7 +1326,7 @@ prefix_dump_r(struct rib_context *ctx) if ((peer = peer_get(ctx->ctx_id)) == NULL) goto done; - if (ctx->ctx_p == NULL) + if (ctx->ctx_p == NULL && ctx->ctx_subtree.aid == AID_UNSPEC) p = RB_MIN(prefix_index, &peer->adj_rib_out); else p = prefix_restart(ctx); @@ -1290,6 +1338,14 @@ prefix_dump_r(struct rib_context *ctx) if (ctx->ctx_aid != AID_UNSPEC && ctx->ctx_aid != p->pt->aid) continue; + if (ctx->ctx_subtree.aid != AID_UNSPEC) { + struct bgpd_addr addr; + pt_getaddr(p->pt, &addr); + if (prefix_compare(&ctx->ctx_subtree, &addr, + ctx->ctx_subtreelen) != 0) + /* left subtree, walk is done */ + break; + } if (ctx->ctx_count && i++ >= ctx->ctx_count && !prefix_is_locked(p)) { /* store and lock last element */ @@ -1332,6 +1388,43 @@ prefix_dump_new(struct rde_peer *peer, uint8_t aid, unsigned int count, return 0; } +int +prefix_dump_subtree(struct rde_peer *peer, struct bgpd_addr *subtree, + uint8_t subtreelen, unsigned int count, void *arg, + void (*upcall)(struct prefix *, void *), void (*done)(void *, uint8_t), + int (*throttle)(void *)) +{ + struct rib_context *ctx; + struct prefix xp; + + if ((ctx = calloc(1, sizeof(*ctx))) == NULL) + return -1; + ctx->ctx_id = peer->conf.id; + ctx->ctx_aid = subtree->aid; + ctx->ctx_count = count; + ctx->ctx_arg = arg; + ctx->ctx_prefix_call = upcall; + ctx->ctx_done = done; + ctx->ctx_throttle = throttle; + ctx->ctx_subtree = *subtree; + ctx->ctx_subtreelen = subtreelen; + + LIST_INSERT_HEAD(&rib_dumps, ctx, entry); + + /* lookup start of subtree */ + memset(&xp, 0, sizeof(xp)); + xp.pt = pt_fill(subtree, subtreelen); + ctx->ctx_p = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp); + if (ctx->ctx_p) + prefix_lock(ctx->ctx_p); + + /* requested a sync traversal */ + if (count == 0) + prefix_dump_r(ctx); + + return 0; +} + /* dump a prefix into specified buffer */ int prefix_write(u_char *buf, int len, struct bgpd_addr *prefix, uint8_t plen, -- 2.20.1