From 3a651fdc829792d263a459fc6cf1a0bb62eca050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tatuya=20JINMEI=20=E7=A5=9E=E6=98=8E=E9=81=94=E5=93=89?= Date: Sat, 8 Nov 2008 22:40:39 +0000 Subject: [PATCH] 2489. [port] solaris: Workaround Solaris's kernel bug about /dev/poll: http://bugs.opensolaris.org/view_bug.do?bug_id=6724237 Define ISC_SOCKET_USE_POLLWATCH at build time to enable this workaround. [RT #18870] --- CHANGES | 5 ++++ README | 4 +++ lib/isc/unix/socket.c | 68 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 172edb51be..c4b8a66349 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +2489. [port] solaris: Workaround Solaris's kernel bug about + /dev/poll: + http://bugs.opensolaris.org/view_bug.do?bug_id=6724237 + Define ISC_SOCKET_USE_POLLWATCH at build time to enable + this workaround. [RT #18870] --- 9.4.3rc1 released --- diff --git a/README b/README index 0b452c86bd..0a0bc9e86f 100644 --- a/README +++ b/README @@ -490,6 +490,10 @@ Building To disable the default check set. -DCHECK_SIBLING=0 named-checkzone checks out-of-zone addresses by default. To disable this default set. -DCHECK_LOCAL=0 + Enable workaround for Solaris kernel bug about /dev/poll + -DISC_SOCKET_USE_POLLWATCH=1 + The watch timeout is also configurable, e.g., + -DISC_SOCKET_POLLWATCH_TIMEOUT=20 LDFLAGS Linker flags. Defaults to empty string. diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index e9826d84c3..a2a535f9b2 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.237.18.54 2008/09/23 02:35:25 marka Exp $ */ +/* $Id: socket.c,v 1.237.18.55 2008/11/08 22:40:39 jinmei Exp $ */ /*! \file */ @@ -143,6 +143,35 @@ struct isc_socketwait { #endif /* __APPLE__ */ #endif /* USE_SELECT */ +#ifdef ISC_SOCKET_USE_POLLWATCH +/*% + * If this macro is defined, enable workaround for a Solaris /dev/poll kernel + * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for + * some of the specified FD. The idea is based on the observation that it's + * likely for a busy server to keep receiving packets. It specifically works + * as follows: the socket watcher is first initialized with the state of + * "poll_idle". While it's in the idle state it keeps sleeping until a socket + * event occurs. When it wakes up for a socket I/O event, it moves to the + * poll_active state, and sets the poll timeout to a short period + * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the + * watcher goes to the poll_checking state with the same timeout period. + * In this state, the watcher tries to detect whether this is a break + * during intermittent events or the kernel bug is triggered. If the next + * polling reports an event within the short period, the previous timeout is + * likely to be a kernel bug, and so the watcher goes back to the active state. + * Otherwise, it moves to the idle state again. + * + * It's not clear whether this is a thread-related bug, but since we've only + * seen this with threads, this workaround is used only when enabling threads. + */ + +typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; + +#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT +#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 +#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ +#endif /* ISC_SOCKET_USE_POLLWATCH */ + /*% * Size of per-FD lock buckets. */ @@ -3028,6 +3057,9 @@ watcher(void *uap) { int maxfd; #endif char strbuf[ISC_STRERRORSIZE]; +#ifdef ISC_SOCKET_USE_POLLWATCH + pollstate_t pollstate = poll_idle; +#endif /* * Get the control fd here. This will never change. @@ -3045,7 +3077,14 @@ watcher(void *uap) { #elif defined(USE_DEVPOLL) dvp.dp_fds = manager->events; dvp.dp_nfds = manager->nevents; +#ifndef ISC_SOCKET_USE_POLLWATCH dvp.dp_timeout = -1; +#else + if (pollstate == poll_idle) + dvp.dp_timeout = -1; + else + dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT; +#endif /* ISC_SOCKET_USE_POLLWATCH */ cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp); #elif defined(USE_SELECT) LOCK(&manager->lock); @@ -3069,6 +3108,33 @@ watcher(void *uap) { ISC_MSG_FAILED, "failed"), strbuf); } + +#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) + if (cc == 0) { + if (pollstate == poll_active) + pollstate = poll_checking; + else if (pollstate == poll_checking) + pollstate = poll_idle; + } else if (cc > 0) { + if (pollstate == poll_checking) { + /* + * XXX: We'd like to use a more + * verbose log level as it's actually an + * unexpected event, but the kernel bug + * reportedly happens pretty frequently + * (and it can also be a false positive) + * so it would be just too noisy. + */ + manager_log(manager, + ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, + ISC_LOG_DEBUG(1), + ISC_LOG_INFO, + "unexpected POLL timeout"); + } + pollstate = poll_active; + } +#endif } while (cc < 0); #if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL)