From cece5eebf1b537dde7c716dcd3197e0b31964d63 Mon Sep 17 00:00:00 2001 From: florian Date: Fri, 15 Sep 2023 05:32:00 +0000 Subject: [PATCH] Improve handling of ENOBUFS by not running in a tight loop forever. This cherry-picks upstream git commit 0ee44ef384593ed0382d1ce6048d5a9c9440b45c. Issue reported by landry@, which could be traced back to a buggy WiFi driver that would constantly return ENOBUFS. This in turn lead unwind(8) enter a tight loop through poll / sendto without making any progress and burning 100% cpu. OK sthen I missed to commit this file when commiting to unbound. Pointed out by tb, thanks! --- sbin/unwind/libunbound/util/netevent.c | 102 ++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/sbin/unwind/libunbound/util/netevent.c b/sbin/unwind/libunbound/util/netevent.c index 204e4883cf2..b9395a8998b 100644 --- a/sbin/unwind/libunbound/util/netevent.c +++ b/sbin/unwind/libunbound/util/netevent.c @@ -116,6 +116,8 @@ /** timeout in millisec to wait for write to unblock, packets dropped after.*/ #define SEND_BLOCKED_WAIT_TIMEOUT 200 +/** max number of times to wait for write to unblock, packets dropped after.*/ +#define SEND_BLOCKED_MAX_RETRY 5 /** Let's make timestamping code cleaner and redefine SO_TIMESTAMP* */ #ifndef SO_TIMESTAMP @@ -402,9 +404,10 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif + int retries = 0; /* if we set the fd blocking, other threads suddenly * have a blocking fd that they operate on */ - while(sent == -1 && ( + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -419,6 +422,13 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -457,8 +467,48 @@ comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; if (!is_connected) { sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), sldns_buffer_remaining(packet), 0, @@ -665,7 +715,8 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, WSAGetLastError() == WSAENOBUFS || WSAGetLastError() == WSAEWOULDBLOCK) { #endif - while(sent == -1 && ( + int retries = 0; + while(sent == -1 && retries < SEND_BLOCKED_MAX_RETRY && ( #ifndef USE_WINSOCK errno == EAGAIN || errno == EINTR || # ifdef EWOULDBLOCK @@ -680,6 +731,13 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, #endif )) { #if defined(HAVE_POLL) || defined(USE_WINSOCK) + int send_nobufs = ( +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ); struct pollfd p; int pret; memset(&p, 0, sizeof(p)); @@ -718,8 +776,48 @@ comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet, log_err("poll udp out failed: %s", sock_strerror(errno)); return 0; + } else if((pret < 0 && +#ifndef USE_WINSOCK + errno == ENOBUFS +#else + WSAGetLastError() == WSAENOBUFS +#endif + ) || (send_nobufs && retries > 0)) { + /* ENOBUFS, and poll returned without + * a timeout. Or the retried send call + * returned ENOBUFS. It is good to + * wait a bit for the error to clear. */ + /* The timeout is 20*(2^(retries+1)), + * it increases exponentially, starting + * at 40 msec. After 5 tries, 1240 msec + * have passed in total, when poll + * returned the error, and 1200 msec + * when send returned the errors. */ +#ifndef USE_WINSOCK + pret = poll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#else + pret = WSAPoll(NULL, 0, (SEND_BLOCKED_WAIT_TIMEOUT/10)<<(retries+1)); +#endif + if(pret < 0 && +#ifndef USE_WINSOCK + errno != EAGAIN && errno != EINTR && +# ifdef EWOULDBLOCK + errno != EWOULDBLOCK && +# endif + errno != ENOBUFS +#else + WSAGetLastError() != WSAEINPROGRESS && + WSAGetLastError() != WSAEINTR && + WSAGetLastError() != WSAENOBUFS && + WSAGetLastError() != WSAEWOULDBLOCK +#endif + ) { + log_err("poll udp out timer failed: %s", + sock_strerror(errno)); + } } #endif /* defined(HAVE_POLL) || defined(USE_WINSOCK) */ + retries++; sent = sendmsg(c->fd, &msg, 0); } } -- 2.20.1