From: dlg Date: Wed, 14 Jul 2010 00:42:57 +0000 (+0000) Subject: if we produce a lot of rtsock messages it is possible we will hit a X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=692f0c3bd1f1307d7fcf2e437d044f27864c38c0;p=openbsd if we produce a lot of rtsock messages it is possible we will hit a condition that prevents us from queuing it, which in turn means that processes listening on the routing socket for changes to the kernel state will get out of sync. currently this is handled by the following comment: /* should notify about lost packet */ this change introduces a new rtsock message called RTM_DESYNC that notifies about lost packets and uses it instead of this comment. when we detect loss we flush all the message in the routing socket and attempt to queue an RTM_DESYNC message instead. to guarantee that we will enqueue DESYNC we keep trying it when an attempt to enqueue or dequeue any messages is made, and in the worst case a timeout tries to guarantee that desync is added to the socket. ive been running this in production for 2 or 3 weeks. tested by sthen@ ok sthen@ claudio@ deraadt@ code written by andrew sallaway at the univeristy of queensland. --- diff --git a/sys/net/route.h b/sys/net/route.h index 5c97082cebd..08ee0b05350 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,4 +1,4 @@ -/* $OpenBSD: route.h,v 1.71 2010/07/09 15:44:20 claudio Exp $ */ +/* $OpenBSD: route.h,v 1.72 2010/07/14 00:42:57 dlg Exp $ */ /* $NetBSD: route.h,v 1.9 1996/02/13 22:00:49 christos Exp $ */ /* @@ -230,6 +230,7 @@ struct rt_msghdr { #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_IFANNOUNCE 0xf /* iface arrival/departure */ +#define RTM_DESYNC 0x10 /* route socket buffer overflow */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 53088937a4e..6308d190b99 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rtsock.c,v 1.103 2010/07/09 15:36:54 claudio Exp $ */ +/* $OpenBSD: rtsock.c,v 1.104 2010/07/14 00:42:57 dlg Exp $ */ /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ /* @@ -82,6 +82,8 @@ #endif #include +#include +#include struct sockaddr route_dst = { 2, PF_ROUTE, }; struct sockaddr route_src = { 2, PF_ROUTE, }; @@ -112,15 +114,30 @@ void rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); struct routecb { struct rawcb rcb; unsigned int msgfilter; + unsigned int flags; + struct timeout timeout; }; #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) +/* + * These flags and timeout are used for indicating to userland (via a + * RTM_DESYNC msg) when the route socket has overflowed and messages + * have been lost. + */ +#define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ +#define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before + queueing more packets */ + +#define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ + +void rt_senddesync(void *); int route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) { struct rawcb *rp; + struct routecb *rop; int s, af; int error = 0; @@ -136,6 +153,8 @@ route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, */ rp = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); so->so_pcb = rp; + /* Init the timeout structure */ + timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp); /* * Don't call raw_usrreq() in the attach case, because * we want to allow non-privileged processes to listen @@ -165,8 +184,22 @@ route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, so->so_options |= SO_USELOOPBACK; break; + case PRU_RCVD: + rop = (struct routecb *)rp; + + /* + * If we are in a FLUSH state, check if the buffer is + * empty so that we can clear the flag. + */ + if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && + ((sbspace(&rp->rcb_socket->so_rcv) == + rp->rcb_socket->so_rcv.sb_hiwat))) + rop->flags &= ~ROUTECB_FLAG_FLUSH; + break; + case PRU_DETACH: if (rp) { + timeout_del(&((struct routecb *)rp)->timeout); af = rp->rcb_proto.sp_protocol; if (af == AF_INET) route_cb.ip_count--; @@ -233,6 +266,35 @@ route_ctloutput(int op, struct socket *so, int level, int optname, return (error); } +void +rt_senddesync(void *data) +{ + struct rawcb *rp; + struct routecb *rop; + struct mbuf *desync_mbuf; + + rp = (struct rawcb *)data; + rop = (struct routecb *)rp; + + /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ + if ((rop->flags & ROUTECB_FLAG_DESYNC) != 0) { + /* + * If we fail to alloc memory or if sbappendaddr() + * fails, re-add timeout and try again. + */ + desync_mbuf = rt_msg1(RTM_DESYNC, NULL); + if ((desync_mbuf != NULL) && + (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, + desync_mbuf, (struct mbuf *)0) != 0)) { + rop->flags &= ~ROUTECB_FLAG_DESYNC; + sorwakeup(rp->rcb_socket); + } else { + /* Re-add timeout to try sending msg again */ + timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT); + } + } +} + void route_input(struct mbuf *m0, ...) { @@ -286,14 +348,29 @@ route_input(struct mbuf *m0, ...) mtod(m, struct rt_msghdr *)->rtm_type))) continue; + /* + * Check to see if the flush flag is set. If so, don't queue + * any more messages until the flag is cleared. + */ + if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0) + continue; + if (last) { struct mbuf *n; if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (sbappendaddr(&last->so_rcv, sosrc, - n, (struct mbuf *)0) == 0) - /* should notify about lost packet */ + n, (struct mbuf *)0) == 0) { + /* + * Flag socket as desync'ed and + * flush required + */ + sotoroutecb(last)->flags |= + ROUTECB_FLAG_DESYNC | + ROUTECB_FLAG_FLUSH; + sbflush(&last->so_rcv); + rt_senddesync((void *) sotorawcb(last)); m_freem(n); - else { + } else { sorwakeup(last); sockets++; } @@ -303,9 +380,14 @@ route_input(struct mbuf *m0, ...) } if (last) { if (sbappendaddr(&last->so_rcv, sosrc, - m, (struct mbuf *)0) == 0) + m, (struct mbuf *)0) == 0) { + /* Flag socket as desync'ed and flush required */ + sotoroutecb(last)->flags |= + ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; + sbflush(&last->so_rcv); + rt_senddesync((void *) sotorawcb(last)); m_freem(m); - else { + } else { sorwakeup(last); sockets++; } @@ -1319,7 +1401,7 @@ sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, extern struct domain routedomain; /* or at least forward */ struct protosw routesw[] = { -{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR|PR_WANTRCVD, route_input, route_output, raw_ctlinput, route_ctloutput, route_usrreq, raw_init, 0, 0, 0,