From: florian Date: Mon, 11 Sep 2023 06:00:23 +0000 (+0000) Subject: Improve handling of ENOBUFS by not running in a tight loop forever. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=afda2c0320d498fdab6d54743c6adaa7208e5ff6;p=openbsd Improve handling of ENOBUFS by not running in a tight loop forever. This cherry-picks upstream git commit 0ee44ef384593ed0382d1ce6048d5a9c9440b45c. Issue reported by landry@, which could be traced back to a buggy WiFi driver that would constantly return ENOBUFS. This in turn lead unwind(8) enter a tight loop through poll / sendto without making any progress and burning 100% cpu. OK sthen --- diff --git a/sbin/unwind/resolver.c b/sbin/unwind/resolver.c index 98c5f472205..9cd19b5ee25 100644 --- a/sbin/unwind/resolver.c +++ b/sbin/unwind/resolver.c @@ -1,4 +1,4 @@ -/* $OpenBSD: resolver.c,v 1.160 2023/04/18 09:57:51 florian Exp $ */ +/* $OpenBSD: resolver.c,v 1.161 2023/09/11 06:00:23 florian Exp $ */ /* @@ -920,6 +920,8 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode, uint8_t *p, *data; uint8_t answer_imsg[MAX_IMSGSIZE - IMSG_HEADER_SIZE]; + log_debug("%s: %d", __func__, rcode); + clock_gettime(CLOCK_MONOTONIC, &tp); query_imsg = (struct query_imsg *)arg; @@ -1074,14 +1076,17 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode, goto out; servfail: + log_debug("%s: foo: 1", __func__); /* try_next_resolver() might free rq */ if (try_next_resolver(rq) != 0 && running_res == 0) { /* we are the last one, send SERVFAIL */ answer_header->srvfail = 1; + log_debug("%s: foo: 2", __func__); resolver_imsg_compose_frontend(IMSG_ANSWER, 0, answer_imsg, sizeof(*answer_header)); } out: + log_debug("%s: foo: 3", __func__); free(query_imsg); sldns_buffer_free(buf); regional_destroy(region); diff --git a/usr.sbin/unbound/doc/Changelog b/usr.sbin/unbound/doc/Changelog index a7c9c40026e..be48561d8f4 100644 --- a/usr.sbin/unbound/doc/Changelog +++ b/usr.sbin/unbound/doc/Changelog @@ -1,3 +1,8 @@ +8 September 2023: Wouter + - Fix send of udp retries when ENOBUFS is returned. It stops looping + and also waits for the condition to go away. Reported by Florian + Obser. + 25 August 2023: Wouter - Fix compile error on NetBSD in util/netevent.h. diff --git a/usr.sbin/unbound/util/netevent.c b/usr.sbin/unbound/util/netevent.c index 204e4883cf2..b9395a8998b 100644 --- a/usr.sbin/unbound/util/netevent.c +++ b/usr.sbin/unbound/util/netevent.c @@ -116,6 +116,8 @@ /** timeout in millisec to wait for write to unblock, packets dropped after.*/ #define SEND_BLOCKED_WAIT_TIMEOUT 200 +/** max number of times to wait for write to unblock, packets dropped after.*/ +#define SEND_BLOCKED_MAX_RETRY 5 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ #ifndef SO_TIMESTAMP @@ -402,9 +404,10 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif + int retries = 0; /* if we set the fd blocking, other threads suddenly * have a blocking fd that they operate on */ - while(sent == -1 && ( + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -419,6 +422,13 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -457,8 +467,48 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; if (!is_connected) { sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), sldns_buffer_remaining(packet), 0, @@ -665,7 +715,8 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif - while(sent == -1 && ( + int retries = 0; + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -680,6 +731,13 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -718,8 +776,48 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; sent = sendmsg(c->fd, &msg, 0); } }