Handle IMSG_SESSION_* messages immediatly when received and do not put
authorclaudio <claudio@openbsd.org>
Fri, 26 Aug 2022 14:10:52 +0000 (14:10 +0000)
committerclaudio <claudio@openbsd.org>
Fri, 26 Aug 2022 14:10:52 +0000 (14:10 +0000)
them on the per peer imsg queue. This is mainly for IMSG_SESSION_DOWN.
Delaying the session down can race against IMSG_SESSION_ADD which is
handled immediatly and as a result an establised connection may be
removed in the RDE because of it.
The various graceful restart imsgs need similar treatment for similar
reasons. In the end when a session is reset/closed the RDE needs to
stop all work and flush the per peer imsg queue.
With this only update and route refresh messages are handled via the
imsg queue.
OK tb@

usr.sbin/bgpd/bgpd.h
usr.sbin/bgpd/rde.c
usr.sbin/bgpd/rde.h
usr.sbin/bgpd/rde_peer.c
usr.sbin/bgpd/session.c

index 6c256da..de8d50b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: bgpd.h,v 1.449 2022/08/10 14:17:01 claudio Exp $ */
+/*     $OpenBSD: bgpd.h,v 1.450 2022/08/26 14:10:52 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -592,6 +592,7 @@ enum imsg_type {
        IMSG_SESSION_UP,
        IMSG_SESSION_DOWN,
        IMSG_SESSION_STALE,
+       IMSG_SESSION_NOGRACE,
        IMSG_SESSION_FLUSH,
        IMSG_SESSION_RESTARTED,
        IMSG_SESSION_DEPENDON,
index 53b78e3..febdc17 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde.c,v 1.564 2022/08/17 15:15:26 claudio Exp $ */
+/*     $OpenBSD: rde.c,v 1.565 2022/08/26 14:10:52 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
@@ -358,10 +358,10 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf)
 {
        struct imsg              imsg;
        struct peer              p;
-       struct peer_config       pconf;
        struct ctl_show_set      cset;
        struct ctl_show_rib      csr;
        struct ctl_show_rib_request     req;
+       struct session_up        sup;
        struct rde_peer         *peer;
        struct rde_aspath       *asp;
        struct rde_hashstats     rdehash;
@@ -373,6 +373,7 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf)
        size_t                   aslen;
        int                      verbose;
        uint16_t                 len;
+       uint8_t                  aid;
 
        while (ibuf) {
                if ((n = imsg_get(ibuf, &imsg)) == -1)
@@ -382,11 +383,6 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf)
 
                switch (imsg.hdr.type) {
                case IMSG_UPDATE:
-               case IMSG_SESSION_UP:
-               case IMSG_SESSION_DOWN:
-               case IMSG_SESSION_STALE:
-               case IMSG_SESSION_FLUSH:
-               case IMSG_SESSION_RESTARTED:
                case IMSG_REFRESH:
                        if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
                                log_warnx("rde_dispatch: unknown peer id %d",
@@ -396,14 +392,71 @@ rde_dispatch_imsg_session(struct imsgbuf *ibuf)
                        peer_imsg_push(peer, &imsg);
                        break;
                case IMSG_SESSION_ADD:
-                       if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pconf))
+                       if (imsg.hdr.len - IMSG_HEADER_SIZE !=
+                           sizeof(struct peer_config))
+                               fatalx("incorrect size of session request");
+                       peer = peer_add(imsg.hdr.peerid, imsg.data);
+                       /* make sure rde_eval_all is on if needed. */
+                       if (peer->conf.flags & PEERFLAG_EVALUATE_ALL)
+                               rde_eval_all = 1;
+                       break;
+               case IMSG_SESSION_UP:
+                       if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
+                               log_warnx("%s: unknown peer id %d",
+                                   "IMSG_SESSION_UP", imsg.hdr.peerid);
+                               break;
+                       }
+                       if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
                                fatalx("incorrect size of session request");
-                       memcpy(&pconf, imsg.data, sizeof(pconf));
-                       peer_add(imsg.hdr.peerid, &pconf);
+                       memcpy(&sup, imsg.data, sizeof(sup));
+                       peer_up(peer, &sup);
                        /* make sure rde_eval_all is on if needed. */
-                       if (pconf.flags & PEERFLAG_EVALUATE_ALL)
+                       if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND))
                                rde_eval_all = 1;
                        break;
+               case IMSG_SESSION_DOWN:
+                       if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
+                               log_warnx("%s: unknown peer id %d",
+                                   "IMSG_SESSION_DOWN", imsg.hdr.peerid);
+                               break;
+                       }
+                       peer_down(peer, NULL);
+                       break;
+               case IMSG_SESSION_STALE:
+               case IMSG_SESSION_NOGRACE:
+               case IMSG_SESSION_FLUSH:
+               case IMSG_SESSION_RESTARTED:
+                       if ((peer = peer_get(imsg.hdr.peerid)) == NULL) {
+                               log_warnx("%s: unknown peer id %d",
+                                   "graceful restart", imsg.hdr.peerid);
+                               break;
+                       }
+                       if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
+                               log_warnx("%s: wrong imsg len", __func__);
+                               break;
+                       }
+                       memcpy(&aid, imsg.data, sizeof(aid));
+                       if (aid >= AID_MAX) {
+                               log_warnx("%s: bad AID", __func__);
+                               break;
+                       }
+
+                       switch (imsg.hdr.type) {
+                       case IMSG_SESSION_STALE:
+                       case IMSG_SESSION_NOGRACE:
+                               peer_stale(peer, aid,
+                                   imsg.hdr.type == IMSG_SESSION_NOGRACE);
+                               break;
+                       case IMSG_SESSION_FLUSH:
+                               peer_flush(peer, aid, peer->staletime[aid]);
+                               break;
+                       case IMSG_SESSION_RESTARTED:
+                               if (peer->staletime[aid])
+                                       peer_flush(peer, aid,
+                                           peer->staletime[aid]);
+                               break;
+                       }
+                       break;
                case IMSG_NETWORK_ADD:
                        if (imsg.hdr.len - IMSG_HEADER_SIZE !=
                            sizeof(struct network_config)) {
@@ -1069,9 +1122,7 @@ void
 rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula)
 {
        struct route_refresh rr;
-       struct session_up sup;
        struct imsg imsg;
-       uint8_t aid;
 
        if (!peer_imsg_pop(peer, &imsg))
                return;
@@ -1082,48 +1133,6 @@ rde_dispatch_imsg_peer(struct rde_peer *peer, void *bula)
                        break;
                rde_update_dispatch(peer, &imsg);
                break;
-       case IMSG_SESSION_UP:
-               if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(sup))
-                       fatalx("incorrect size of session request");
-               memcpy(&sup, imsg.data, sizeof(sup));
-               if (peer_up(peer, &sup) == -1) {
-                       peer->state = PEER_DOWN;
-                       imsg_compose(ibuf_se, IMSG_SESSION_DOWN, peer->conf.id,
-                           0, -1, NULL, 0);
-               }
-               /* make sure rde_eval_all is on if needed. */
-               if (peer_has_add_path(peer, AID_UNSPEC, CAPA_AP_SEND))
-                       rde_eval_all = 1;
-               break;
-       case IMSG_SESSION_DOWN:
-               peer_down(peer, NULL);
-               break;
-       case IMSG_SESSION_STALE:
-       case IMSG_SESSION_FLUSH:
-       case IMSG_SESSION_RESTARTED:
-               if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(aid)) {
-                       log_warnx("%s: wrong imsg len", __func__);
-                       break;
-               }
-               memcpy(&aid, imsg.data, sizeof(aid));
-               if (aid >= AID_MAX) {
-                       log_warnx("%s: bad AID", __func__);
-                       break;
-               }
-
-               switch (imsg.hdr.type) {
-               case IMSG_SESSION_STALE:
-                       peer_stale(peer, aid);
-                       break;
-               case IMSG_SESSION_FLUSH:
-                       peer_flush(peer, aid, peer->staletime[aid]);
-                       break;
-               case IMSG_SESSION_RESTARTED:
-                       if (peer->staletime[aid])
-                               peer_flush(peer, aid, peer->staletime[aid]);
-                       break;
-               }
-               break;
        case IMSG_REFRESH:
                if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(rr)) {
                        log_warnx("route refresh: wrong imsg len");
index 0160f6b..3508b07 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde.h,v 1.262 2022/08/03 08:56:23 claudio Exp $ */
+/*     $OpenBSD: rde.h,v 1.263 2022/08/26 14:10:52 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004 Claudio Jeker <claudio@openbsd.org> and
@@ -418,10 +418,10 @@ struct rde_peer   *peer_get(uint32_t);
 struct rde_peer *peer_match(struct ctl_neighbor *, uint32_t);
 struct rde_peer        *peer_add(uint32_t, struct peer_config *);
 
-int             peer_up(struct rde_peer *, struct session_up *);
+void            peer_up(struct rde_peer *, struct session_up *);
 void            peer_down(struct rde_peer *, void *);
 void            peer_flush(struct rde_peer *, uint8_t, time_t);
-void            peer_stale(struct rde_peer *, uint8_t);
+void            peer_stale(struct rde_peer *, uint8_t, int);
 void            peer_dump(struct rde_peer *, uint8_t);
 void            peer_begin_rrefresh(struct rde_peer *, uint8_t);
 
index 838d088..6df2516 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: rde_peer.c,v 1.21 2022/08/17 15:15:26 claudio Exp $ */
+/*     $OpenBSD: rde_peer.c,v 1.22 2022/08/26 14:10:52 claudio Exp $ */
 
 /*
  * Copyright (c) 2019 Claudio Jeker <claudio@openbsd.org>
@@ -104,9 +104,6 @@ peer_init(uint32_t hashsize)
        pc.id = PEER_ID_SELF;
 
        peerself = peer_add(PEER_ID_SELF, &pc);
-       if (peerself == NULL)
-               fatalx("peer_init add self");
-
        peerself->state = PEER_UP;
 }
 
@@ -194,7 +191,7 @@ peer_add(uint32_t id, struct peer_config *p_conf)
 
        if ((peer = peer_get(id))) {
                memcpy(&peer->conf, p_conf, sizeof(struct peer_config));
-               return (NULL);
+               return (peer);
        }
 
        peer = calloc(1, sizeof(struct rde_peer));
@@ -408,7 +405,7 @@ rde_up_dump_done(void *ptr, uint8_t aid)
 /*
  * Session got established, bring peer up, load RIBs do initial table dump.
  */
-int
+void
 peer_up(struct rde_peer *peer, struct session_up *sup)
 {
        uint8_t  i;
@@ -418,6 +415,8 @@ peer_up(struct rde_peer *peer, struct session_up *sup)
                 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
                 * So just do a full reset of the peer here.
                 */
+               rib_dump_terminate(peer);
+               peer_imsg_flush(peer);
                if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
                    peer_adjout_clear_upcall, NULL, NULL) == -1)
                        fatal("%s: prefix_dump_new", __func__);
@@ -448,8 +447,6 @@ peer_up(struct rde_peer *peer, struct session_up *sup)
                if (peer->capa.mp[i])
                        peer_dump(peer, i);
        }
-
-       return (0);
 }
 
 /*
@@ -461,8 +458,12 @@ peer_down(struct rde_peer *peer, void *bula)
 {
        peer->remote_bgpid = 0;
        peer->state = PEER_DOWN;
-       /* stop all pending dumps which may depend on this peer */
+       /*
+        * stop all pending dumps which may depend on this peer
+        * and flush all pending imsg from the SE.
+        */
        rib_dump_terminate(peer);
+       peer_imsg_flush(peer);
 
        /* flush Adj-RIB-Out */
        if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
@@ -474,8 +475,6 @@ peer_down(struct rde_peer *peer, void *bula)
        peer->prefix_cnt = 0;
        peer->prefix_out_cnt = 0;
 
-       peer_imsg_flush(peer);
-
        LIST_REMOVE(peer, hash_l);
        LIST_REMOVE(peer, peer_l);
        free(peer);
@@ -511,7 +510,7 @@ peer_flush(struct rde_peer *peer, uint8_t aid, time_t staletime)
  * is set to the current timestamp for identifying stale routes in Adj-RIB-In.
  */
 void
-peer_stale(struct rde_peer *peer, uint8_t aid)
+peer_stale(struct rde_peer *peer, uint8_t aid, int flushall)
 {
        time_t now;
 
@@ -522,9 +521,19 @@ peer_stale(struct rde_peer *peer, uint8_t aid)
        peer->staletime[aid] = now = getmonotime();
        peer->state = PEER_DOWN;
 
+       /*
+        * stop all pending dumps which may depend on this peer
+        * and flush all pending imsg from the SE.
+        */
+       rib_dump_terminate(peer);
+       peer_imsg_flush(peer);
+
+       if (flushall)
+               peer_flush(peer, aid, 0);
+
        /* XXX this is not quite correct */
        /* mark Adj-RIB-Out stale for this peer */
-       if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+       if (prefix_dump_new(peer, aid, 0, NULL,
            peer_adjout_stale_upcall, NULL, NULL) == -1)
                fatal("%s: prefix_dump_new", __func__);
 
index 40a9a70..204e64b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: session.c,v 1.433 2022/08/17 15:15:26 claudio Exp $ */
+/*     $OpenBSD: session.c,v 1.434 2022/08/26 14:10:52 claudio Exp $ */
 
 /*
  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
@@ -1760,7 +1760,7 @@ session_graceful_restart(struct peer *p)
                            aid2str(i));
                        p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
                } else if (p->capa.neg.mp[i]) {
-                       if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
+                       if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
                            &i, sizeof(i)) == -1)
                                return (-1);
                        log_peer_warnx(&p->conf,