From b94ae785ba05cc8690d25c43c802931e799452ed Mon Sep 17 00:00:00 2001 From: dmiller Date: Fri, 11 Oct 2024 20:59:34 +0000 Subject: [PATCH] Fix handling of non-select()-able pcap handles 1. pcap_get_selectable_fd() may return -1 for some devices, even if the platform as a whole supports select() on these handles. Check for this condition throughout. 2. The various backend system calls (kevent, poll, epoll, etc.) do not sleep for the timeout period if no valid handles are registered, unlike select on UNIX. This leads to busy wait, looping continuously. Instead, we call usleep() in those cases. --- nsock/src/engine_epoll.c | 111 +++++++++++++++++----------- nsock/src/engine_iocp.c | 104 ++++++++++++++++---------- nsock/src/engine_kqueue.c | 143 +++++++++++++++++++++--------------- nsock/src/engine_poll.c | 132 ++++++++++++++++++++------------- nsock/src/engine_select.c | 150 +++++++++++++++++++------------------- nsock/src/nsock_core.c | 4 +- 6 files changed, 374 insertions(+), 270 deletions(-) diff --git a/nsock/src/engine_epoll.c b/nsock/src/engine_epoll.c index 8f1a6d19a..9b81a6f6b 100644 --- a/nsock/src/engine_epoll.c +++ b/nsock/src/engine_epoll.c @@ -109,9 +109,8 @@ void process_iod_events(struct npool *nsp, struct niod *nsi, int ev); void process_event(struct npool *nsp, gh_list_t *evlist, struct nevent *nse, int ev); void process_expired_events(struct npool *nsp); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT int pcap_read_on_nonselect(struct npool *nsp); -#endif +void iterate_through_pcap_events(struct npool *nsp); #endif /* defined in nsock_event.c */ @@ -131,6 +130,8 @@ struct epoll_engine_info { int evlen; /* list of epoll events, resized if necessary (when polling over large numbers of IODs) */ struct epoll_event *events; + /* Number of IODs incompatible with epoll */ + int num_pcap_nonselect; }; @@ -142,6 +143,7 @@ int epoll_init(struct npool *nsp) { einfo->epfd = epoll_create(10); /* argument is ignored */ einfo->evlen = INITIAL_EV_COUNT; einfo->events = (struct epoll_event *)safe_malloc(einfo->evlen * sizeof(struct epoll_event)); + einfo->num_pcap_nonselect = 0; nsp->engine_data = (void *)einfo; @@ -166,18 +168,26 @@ int epoll_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, iod->watched_events = ev; - memset(&epev, 0x00, sizeof(struct epoll_event)); - epev.events = EPOLLET; - epev.data.ptr = (void *)iod; - - if (ev & EV_READ) - epev.events |= EPOLL_R_FLAGS; - if (ev & EV_WRITE) - epev.events |= EPOLL_W_FLAGS; - sd = nsock_iod_get_sd(iod); - if (epoll_ctl(einfo->epfd, EPOLL_CTL_ADD, sd, &epev) < 0) - fatal("Unable to register IOD #%lu: %s", iod->id, strerror(errno)); + if (sd == -1) { + if (iod->pcap) + einfo->num_pcap_nonselect++; + else + fatal("Unable to get descriptor for IOD #%lu", iod->id); + } + else { + memset(&epev, 0x00, sizeof(struct epoll_event)); + epev.events = EPOLLET; + epev.data.ptr = (void *)iod; + + if (ev & EV_READ) + epev.events |= EPOLL_R_FLAGS; + if (ev & EV_WRITE) + epev.events |= EPOLL_W_FLAGS; + + if (epoll_ctl(einfo->epfd, EPOLL_CTL_ADD, sd, &epev) < 0) + fatal("Unable to register IOD #%lu: %s", iod->id, strerror(errno)); + } IOD_PROPSET(iod, IOD_REGISTERED); return 1; @@ -193,7 +203,13 @@ int epoll_iod_unregister(struct npool *nsp, struct niod *iod) { int sd; sd = nsock_iod_get_sd(iod); - epoll_ctl(einfo->epfd, EPOLL_CTL_DEL, sd, NULL); + if (sd == -1) { + assert(iod->pcap); + einfo->num_pcap_nonselect--; + } + else { + epoll_ctl(einfo->epfd, EPOLL_CTL_DEL, sd, NULL); + } IOD_PROPCLR(iod, IOD_REGISTERED); } @@ -222,16 +238,17 @@ int epoll_iod_modify(struct npool *nsp, struct niod *iod, struct nevent *nse, in iod->watched_events = new_events; - /* regenerate the current set of events for this IOD */ - if (iod->watched_events & EV_READ) - epev.events |= EPOLL_R_FLAGS; - if (iod->watched_events & EV_WRITE) - epev.events |= EPOLL_W_FLAGS; - sd = nsock_iod_get_sd(iod); + if (sd != -1) { + /* regenerate the current set of events for this IOD */ + if (iod->watched_events & EV_READ) + epev.events |= EPOLL_R_FLAGS; + if (iod->watched_events & EV_WRITE) + epev.events |= EPOLL_W_FLAGS; - if (epoll_ctl(einfo->epfd, EPOLL_CTL_MOD, sd, &epev) < 0) - fatal("Unable to update events for IOD #%lu: %s", iod->id, strerror(errno)); + if (epoll_ctl(einfo->epfd, EPOLL_CTL_MOD, sd, &epev) < 0) + fatal("Unable to update events for IOD #%lu: %s", iod->id, strerror(errno)); + } return 1; } @@ -250,7 +267,7 @@ int epoll_loop(struct npool *nsp, int msec_timeout) { return 0; /* No need to wait on 0 events ... */ - iod_count = gh_list_count(&nsp->active_iods); + iod_count = gh_list_count(&nsp->active_iods) - einfo->num_pcap_nonselect; if (iod_count > einfo->evlen) { einfo->evlen = iod_count * 2; einfo->events = (struct epoll_event *)safe_realloc(einfo->events, einfo->evlen * sizeof(struct epoll_event)); @@ -260,6 +277,7 @@ int epoll_loop(struct npool *nsp, int msec_timeout) { struct nevent *nse; nsock_log_debug_all("wait for events"); + results_left = 0; nse = next_expirable_event(nsp); if (!nse) @@ -270,33 +288,38 @@ int epoll_loop(struct npool *nsp, int msec_timeout) { } #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* Force a low timeout when capturing packets on systems where - * the pcap descriptor is not select()able. */ - if (gh_list_count(&nsp->pcap_read_events) > 0) - if (event_msecs > PCAP_POLL_INTERVAL) - event_msecs = PCAP_POLL_INTERVAL; -#endif -#endif + if (einfo->num_pcap_nonselect > 0 && gh_list_count(&nsp->pcap_read_events) > 0) { + /* do non-blocking read on pcap devices that doesn't support select() + * If there is anything read, just leave this loop. */ + if (pcap_read_on_nonselect(nsp)) { + /* okay, something was read. */ + // Check all pcap events that won't be signaled + gettimeofday(&nsock_tod, NULL); + iterate_through_pcap_events(nsp); + // Make the system call non-blocking + event_msecs = 0; + } + /* Force a low timeout when capturing packets on systems where + * the pcap descriptor is not select()able. */ + else if (event_msecs > PCAP_POLL_INTERVAL) { + event_msecs = PCAP_POLL_INTERVAL; + } + } +#endif /* We cast to unsigned because we want -1 to be very high (since it means no * timeout) */ combined_msecs = MIN((unsigned)event_msecs, (unsigned)msec_timeout); -#if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* do non-blocking read on pcap devices that doesn't support select() - * If there is anything read, just leave this loop. */ - if (pcap_read_on_nonselect(nsp)) { - /* okay, something was read. */ - // Make the epoll_wait call non-blocking - combined_msecs = 0; + if (iod_count > 0) { + results_left = epoll_wait(einfo->epfd, einfo->events, einfo->evlen, combined_msecs); + if (results_left == -1) + sock_err = socket_errno(); + } + else if (combined_msecs > 0) { + // No compatible IODs; sleep the remainder of the wait time. + usleep(combined_msecs * 1000); } -#endif -#endif - results_left = epoll_wait(einfo->epfd, einfo->events, einfo->evlen, combined_msecs); - if (results_left == -1) - sock_err = socket_errno(); gettimeofday(&nsock_tod, NULL); /* Due to epoll delay */ } while (results_left == -1 && sock_err == EINTR); /* repeat only if signal occurred */ diff --git a/nsock/src/engine_iocp.c b/nsock/src/engine_iocp.c index bab20b6cc..95cd3bf87 100644 --- a/nsock/src/engine_iocp.c +++ b/nsock/src/engine_iocp.c @@ -159,6 +159,8 @@ struct extended_overlapped { #define IOCP_NOT_FORCED 0 #define IOCP_FORCED 1 #define IOCP_FORCED_POSTED 2 + /* Number of IODs incompatible with IO completion ports */ + int num_pcap_nonselect; }; /* --- INTERNAL PROTOTYPES --- */ @@ -175,9 +177,7 @@ void process_iod_events(struct npool *nsp, struct niod *nsi, int ev); void process_event(struct npool *nsp, gh_list_t *evlist, struct nevent *nse, int ev); void process_expired_events(struct npool *nsp); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT int pcap_read_on_nonselect(struct npool *nsp); -#endif void iterate_through_pcap_events(struct npool *nsp); #endif @@ -200,6 +200,7 @@ int iocp_init(struct npool *nsp) { iinfo->eov = NULL; iinfo->entries_removed = 0; iinfo->eov_list = (OVERLAPPED_ENTRY *)safe_malloc(iinfo->capacity * sizeof(OVERLAPPED_ENTRY)); + iinfo->num_pcap_nonselect = 0; nsp->engine_data = (void *)iinfo; return 1; @@ -237,13 +238,24 @@ void iocp_destroy(struct npool *nsp) { } int iocp_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, int ev) { + int sd; struct iocp_engine_info *iinfo = (struct iocp_engine_info *)nsp->engine_data; HANDLE result; assert(!IOD_PROPGET(iod, IOD_REGISTERED)); iod->watched_events = ev; - result = CreateIoCompletionPort((HANDLE)iod->sd, iinfo->iocp, NULL, 0); - assert(result); + + sd = nsock_iod_get_sd(iod); + if (sd == -1) { + if (iod->pcap) + iinfo->num_pcap_nonselect++; + else + fatal("Unable to get descriptor for IOD #%lu", iod->id); + } + else { + result = CreateIoCompletionPort((HANDLE)sd, iinfo->iocp, NULL, 0); + assert(result && result == iinfo->iocp); + } IOD_PROPSET(iod, IOD_REGISTERED); @@ -254,10 +266,20 @@ int iocp_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, i /* Sadly a socket can't be unassociated with a completion port */ int iocp_iod_unregister(struct npool *nsp, struct niod *iod) { + int sd; + struct iocp_engine_info *iinfo = (struct iocp_engine_info *)nsp->engine_data; if (IOD_PROPGET(iod, IOD_REGISTERED)) { - /* Nuke all uncompleted operations on that iod */ - CancelIo((HANDLE)iod->sd); + sd = nsock_iod_get_sd(iod); + if (sd == -1) { + assert(iod->pcap); + iinfo->num_pcap_nonselect--; + } + else { + /* Nuke all uncompleted operations on that iod */ + CancelIo((HANDLE)iod->sd); + } + IOD_PROPCLR(iod, IOD_REGISTERED); } @@ -339,48 +361,52 @@ int iocp_loop(struct npool *nsp, int msec_timeout) { } #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* Force a low timeout when capturing packets on systems where - * the pcap descriptor is not select()able. */ - if (gh_list_count(&nsp->pcap_read_events) > 0) - if (event_msecs > PCAP_POLL_INTERVAL) - event_msecs = PCAP_POLL_INTERVAL; -#endif + if (iinfo->num_pcap_nonselect > 0 && gh_list_count(&nsp->pcap_read_events) > 0) { + + /* do non-blocking read on pcap devices that doesn't support select() + * If there is anything read, just leave this loop. */ + if (pcap_read_on_nonselect(nsp)) { + /* okay, something was read. */ + // Check all pcap events that won't be signaled + gettimeofday(&nsock_tod, NULL); + iterate_through_pcap_events(nsp); + // Make the system call non-blocking + event_msecs = 0; + } + /* Force a low timeout when capturing packets on systems where + * the pcap descriptor is not select()able. */ + else if (event_msecs > PCAP_POLL_INTERVAL) { + event_msecs = PCAP_POLL_INTERVAL; + } + } #endif /* We cast to unsigned because we want -1 to be very high (since it means no * timeout) */ combined_msecs = MIN((unsigned)event_msecs, (unsigned)msec_timeout); -#if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* do non-blocking read on pcap devices that doesn't support select() - * If there is anything read, just leave this loop. */ - if (pcap_read_on_nonselect(nsp)) { - /* okay, something was read. */ - gettimeofday(&nsock_tod, NULL); - iterate_through_pcap_events(nsp); - // Only do a non-blocking check for completed IO on the non-pcap events - combined_msecs = 0; - } -#endif -#endif - /* It is mandatory these values are reset before calling GetQueuedCompletionStatusEx */ - iinfo->entries_removed = 0; - memset(iinfo->eov_list, 0, iinfo->capacity * sizeof(OVERLAPPED_ENTRY)); - bRet = GetQueuedCompletionStatusEx(iinfo->iocp, iinfo->eov_list, iinfo->capacity, &iinfo->entries_removed, combined_msecs, FALSE); + if (total_events > 0) { + /* It is mandatory these values are reset before calling GetQueuedCompletionStatusEx */ + iinfo->entries_removed = 0; + memset(iinfo->eov_list, 0, iinfo->capacity * sizeof(OVERLAPPED_ENTRY)); + bRet = GetQueuedCompletionStatusEx(iinfo->iocp, iinfo->eov_list, iinfo->capacity, &iinfo->entries_removed, combined_msecs, FALSE); - gettimeofday(&nsock_tod, NULL); /* Due to iocp delay */ - if (!bRet) { - sock_err = socket_errno(); - if (!iinfo->eov && sock_err != WAIT_TIMEOUT) { - nsock_log_error("nsock_loop error %d: %s", sock_err, socket_strerror(sock_err)); - nsp->errnum = sock_err; - return -1; + gettimeofday(&nsock_tod, NULL); /* Due to iocp delay */ + if (!bRet) { + sock_err = socket_errno(); + if (!iinfo->eov && sock_err != WAIT_TIMEOUT) { + nsock_log_error("nsock_loop error %d: %s", sock_err, socket_strerror(sock_err)); + nsp->errnum = sock_err; + return -1; + } + } + else { + iterate_through_event_lists(nsp); } } - else { - iterate_through_event_lists(nsp); + else if (combined_msecs > 0) { + // No compatible IODs; sleep the remainder of the wait time. + usleep(combined_msecs * 1000); } /* iterate through timers and expired events */ diff --git a/nsock/src/engine_kqueue.c b/nsock/src/engine_kqueue.c index e6dc417b7..5643b948f 100644 --- a/nsock/src/engine_kqueue.c +++ b/nsock/src/engine_kqueue.c @@ -102,9 +102,8 @@ void process_iod_events(struct npool *nsp, struct niod *nsi, int ev); void process_event(struct npool *nsp, gh_list_t *evlist, struct nevent *nse, int ev); void process_expired_events(struct npool *nsp); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT int pcap_read_on_nonselect(struct npool *nsp); -#endif +void iterate_through_pcap_events(struct npool *nsp); #endif /* defined in nsock_event.c */ @@ -119,9 +118,10 @@ extern struct timeval nsock_tod; */ struct kqueue_engine_info { int kqfd; - int maxfd; size_t evlen; struct kevent *events; + /* Number of IODs incompatible with kqueue */ + int num_pcap_nonselect; }; @@ -131,9 +131,9 @@ int kqueue_init(struct npool *nsp) { kinfo = (struct kqueue_engine_info *)safe_malloc(sizeof(struct kqueue_engine_info)); kinfo->kqfd = kqueue(); - kinfo->maxfd = -1; kinfo->evlen = INITIAL_EV_COUNT; kinfo->events = (struct kevent *)safe_malloc(INITIAL_EV_COUNT * sizeof(struct kevent)); + kinfo->num_pcap_nonselect = 0; nsp->engine_data = (void *)kinfo; @@ -150,6 +150,7 @@ void kqueue_destroy(struct npool *nsp) { } int kqueue_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, int ev) { + int sd; struct kqueue_engine_info *kinfo = (struct kqueue_engine_info *)nsp->engine_data; assert(!IOD_PROPGET(iod, IOD_REGISTERED)); @@ -157,25 +158,40 @@ int kqueue_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, IOD_PROPSET(iod, IOD_REGISTERED); iod->watched_events = EV_NONE; - kqueue_iod_modify(nsp, iod, nse, ev, EV_NONE); - - if (nsock_iod_get_sd(iod) > kinfo->maxfd) - kinfo->maxfd = nsock_iod_get_sd(iod); + sd = nsock_iod_get_sd(iod); + if (sd == -1) { + if (iod->pcap) + kinfo->num_pcap_nonselect++; + else + fatal("Unable to get descriptor for IOD #%lu", iod->id); + } + else { + kqueue_iod_modify(nsp, iod, nse, ev, EV_NONE); + } return 1; } int kqueue_iod_unregister(struct npool *nsp, struct niod *iod) { + int sd; struct kqueue_engine_info *kinfo = (struct kqueue_engine_info *)nsp->engine_data; /* some IODs can be unregistered here if they're associated to an event that was * immediately completed */ if (IOD_PROPGET(iod, IOD_REGISTERED)) { - kqueue_iod_modify(nsp, iod, NULL, EV_NONE, EV_READ|EV_WRITE); - IOD_PROPCLR(iod, IOD_REGISTERED); + struct kqueue_engine_info *kinfo = (struct kqueue_engine_info *)nsp->engine_data; + int sd; - if (nsock_iod_get_sd(iod) == kinfo->maxfd) - kinfo->maxfd--; + sd = nsock_iod_get_sd(iod); + if (sd == -1) { + assert(iod->pcap); + kinfo->num_pcap_nonselect--; + } + else { + kqueue_iod_modify(nsp, iod, NULL, EV_NONE, EV_READ|EV_WRITE); + } + + IOD_PROPCLR(iod, IOD_REGISTERED); } iod->watched_events = EV_NONE; return 1; @@ -185,7 +201,7 @@ int kqueue_iod_unregister(struct npool *nsp, struct niod *iod) { int kqueue_iod_modify(struct npool *nsp, struct niod *iod, struct nevent *nse, int ev_set, int ev_clr) { struct kevent kev[2]; - int new_events, i; + int new_events, i, sd; struct kqueue_engine_info *kinfo = (struct kqueue_engine_info *)nsp->engine_data; assert((ev_set & ev_clr) == 0); @@ -198,18 +214,22 @@ int kqueue_iod_modify(struct npool *nsp, struct niod *iod, struct nevent *nse, i if (new_events == iod->watched_events) return 1; /* nothing to do */ - i = 0; - if ((ev_set ^ ev_clr) & EV_READ) { - EV_SET(&kev[i], nsock_iod_get_sd(iod), EVFILT_READ, EV_SETFLAG(ev_set, EV_READ), 0, 0, (void *)iod); - i++; - } - if ((ev_set ^ ev_clr) & EV_WRITE) { - EV_SET(&kev[i], nsock_iod_get_sd(iod), EVFILT_WRITE, EV_SETFLAG(ev_set, EV_WRITE), 0, 0, (void *)iod); - i++; - } + sd = nsock_iod_get_sd(iod); + if (sd != -1) { - if (i > 0 && kevent(kinfo->kqfd, kev, i, NULL, 0, NULL) < 0) - fatal("Unable to update events for IOD #%lu: %s", iod->id, strerror(errno)); + i = 0; + if ((ev_set ^ ev_clr) & EV_READ) { + EV_SET(&kev[i], sd, EVFILT_READ, EV_SETFLAG(ev_set, EV_READ), 0, 0, (void *)iod); + i++; + } + if ((ev_set ^ ev_clr) & EV_WRITE) { + EV_SET(&kev[i], sd, EVFILT_WRITE, EV_SETFLAG(ev_set, EV_WRITE), 0, 0, (void *)iod); + i++; + } + + if (i > 0 && kevent(kinfo->kqfd, kev, i, NULL, 0, NULL) < 0) + fatal("Unable to update events for IOD #%lu: %s", iod->id, strerror(errno)); + } iod->watched_events = new_events; return 1; @@ -221,6 +241,7 @@ int kqueue_loop(struct npool *nsp, int msec_timeout) { int combined_msecs; struct timespec ts, *ts_p; int sock_err = 0; + unsigned int iod_count; struct kqueue_engine_info *kinfo = (struct kqueue_engine_info *)nsp->engine_data; assert(msec_timeout >= -1); @@ -229,8 +250,9 @@ int kqueue_loop(struct npool *nsp, int msec_timeout) { return 0; /* No need to wait on 0 events ... */ - if (gh_list_count(&nsp->active_iods) > kinfo->evlen) { - kinfo->evlen = gh_list_count(&nsp->active_iods) * 2; + iod_count = gh_list_count(&nsp->active_iods) - kinfo->num_pcap_nonselect; + if (iod_count > kinfo->evlen) { + kinfo->evlen = iod_count * 2; kinfo->events = (struct kevent *)safe_realloc(kinfo->events, kinfo->evlen * sizeof(struct kevent)); } @@ -238,6 +260,7 @@ int kqueue_loop(struct npool *nsp, int msec_timeout) { struct nevent *nse; nsock_log_debug_all("wait for events"); + results_left = 0; nse = next_expirable_event(nsp); if (!nse) @@ -248,43 +271,48 @@ int kqueue_loop(struct npool *nsp, int msec_timeout) { } #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* Force a low timeout when capturing packets on systems where - * the pcap descriptor is not select()able. */ - if (gh_list_count(&nsp->pcap_read_events) > 0) - if (event_msecs > PCAP_POLL_INTERVAL) - event_msecs = PCAP_POLL_INTERVAL; -#endif -#endif + if (kinfo->num_pcap_nonselect > 0 && gh_list_count(&nsp->pcap_read_events) > 0) { + /* do non-blocking read on pcap devices that doesn't support select() + * If there is anything read, just leave this loop. */ + if (pcap_read_on_nonselect(nsp)) { + /* okay, something was read. */ + // Check all pcap events that won't be signaled + gettimeofday(&nsock_tod, NULL); + iterate_through_pcap_events(nsp); + // Make the system call non-blocking + event_msecs = 0; + } + /* Force a low timeout when capturing packets on systems where + * the pcap descriptor is not select()able. */ + else if (event_msecs > PCAP_POLL_INTERVAL) { + event_msecs = PCAP_POLL_INTERVAL; + } + } +#endif /* We cast to unsigned because we want -1 to be very high (since it means no * timeout) */ combined_msecs = MIN((unsigned)event_msecs, (unsigned)msec_timeout); -#if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* do non-blocking read on pcap devices that doesn't support select() - * If there is anything read, just leave this loop. */ - if (pcap_read_on_nonselect(nsp)) { - /* okay, something was read. */ - // Make the kevent call non-blocking - combined_msecs = 0; - } -#endif -#endif - /* Set up the timeval pointer we will give to kevent() */ - memset(&ts, 0, sizeof(struct timespec)); - if (combined_msecs >= 0) { - ts.tv_sec = combined_msecs / 1000; - ts.tv_nsec = (combined_msecs % 1000) * 1000000L; - ts_p = &ts; - } else { - ts_p = NULL; - } + if (iod_count > 0) { + /* Set up the timeval pointer we will give to kevent() */ + memset(&ts, 0, sizeof(struct timespec)); + if (combined_msecs >= 0) { + ts.tv_sec = combined_msecs / 1000; + ts.tv_nsec = (combined_msecs % 1000) * 1000000L; + ts_p = &ts; + } else { + ts_p = NULL; + } - results_left = kevent(kinfo->kqfd, NULL, 0, kinfo->events, kinfo->evlen, ts_p); - if (results_left == -1) - sock_err = socket_errno(); + results_left = kevent(kinfo->kqfd, NULL, 0, kinfo->events, kinfo->evlen, ts_p); + if (results_left == -1) + sock_err = socket_errno(); + } + else if (combined_msecs > 0) { + // No compatible IODs; sleep the remainder of the wait time. + usleep(combined_msecs * 1000); + } gettimeofday(&nsock_tod, NULL); /* Due to kevent delay */ } while (results_left == -1 && sock_err == EINTR); /* repeat only if signal occurred */ @@ -361,7 +389,6 @@ void iterate_through_event_lists(struct npool *nsp, int evcount) { } } } - /* iterate through timers and expired events */ process_expired_events(nsp); } diff --git a/nsock/src/engine_poll.c b/nsock/src/engine_poll.c index 8f2bb1978..e904cb5aa 100644 --- a/nsock/src/engine_poll.c +++ b/nsock/src/engine_poll.c @@ -134,9 +134,8 @@ void process_iod_events(struct npool *nsp, struct niod *nsi, int ev); void process_event(struct npool *nsp, gh_list_t *evlist, struct nevent *nse, int ev); void process_expired_events(struct npool *nsp); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT int pcap_read_on_nonselect(struct npool *nsp); -#endif +void iterate_through_pcap_events(struct npool *nsp); #endif /* defined in nsock_event.c */ @@ -154,6 +153,8 @@ struct poll_engine_info { int max_fd; /* index of the highest poll event */ POLLFD *events; + /* Number of IODs incompatible with poll */ + int num_pcap_nonselect; }; @@ -195,6 +196,7 @@ int poll_init(struct npool *nsp) { pinfo = (struct poll_engine_info *)safe_malloc(sizeof(struct poll_engine_info)); pinfo->capacity = 0; pinfo->max_fd = -1; + pinfo->num_pcap_nonselect = 0; evlist_grow(pinfo); nsp->engine_data = (void *)pinfo; @@ -219,23 +221,32 @@ int poll_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, i iod->watched_events = ev; sd = nsock_iod_get_sd(iod); - while (pinfo->capacity < sd + 1) - evlist_grow(pinfo); - pinfo->events[sd].fd = sd; - pinfo->events[sd].events = 0; - pinfo->events[sd].revents = 0; + if (sd == -1) { + if (iod->pcap) + pinfo->num_pcap_nonselect++; + else + fatal("Unable to get descriptor for IOD #%lu", iod->id); + } + else { + while (pinfo->capacity < sd + 1) + evlist_grow(pinfo); - pinfo->max_fd = MAX(pinfo->max_fd, sd); + pinfo->events[sd].fd = sd; + pinfo->events[sd].events = 0; + pinfo->events[sd].revents = 0; - if (ev & EV_READ) - pinfo->events[sd].events |= POLL_R_FLAGS; - if (ev & EV_WRITE) - pinfo->events[sd].events |= POLL_W_FLAGS; + pinfo->max_fd = MAX(pinfo->max_fd, sd); + + if (ev & EV_READ) + pinfo->events[sd].events |= POLL_R_FLAGS; + if (ev & EV_WRITE) + pinfo->events[sd].events |= POLL_W_FLAGS; #ifndef WIN32 - if (ev & EV_EXCEPT) - pinfo->events[sd].events |= POLL_X_FLAGS; + if (ev & EV_EXCEPT) + pinfo->events[sd].events |= POLL_X_FLAGS; #endif + } IOD_PROPSET(iod, IOD_REGISTERED); return 1; @@ -251,14 +262,20 @@ int poll_iod_unregister(struct npool *nsp, struct niod *iod) { int sd; sd = nsock_iod_get_sd(iod); - pinfo->events[sd].fd = -1; - pinfo->events[sd].events = 0; - pinfo->events[sd].revents = 0; + if (sd == -1) { + assert(iod->pcap); + pinfo->num_pcap_nonselect--; + } + else { + pinfo->events[sd].fd = -1; + pinfo->events[sd].events = 0; + pinfo->events[sd].revents = 0; - if (pinfo->max_fd == sd) - lower_max_fd(pinfo); + if (pinfo->max_fd == sd) + lower_max_fd(pinfo); - IOD_PROPCLR(iod, IOD_REGISTERED); + IOD_PROPCLR(iod, IOD_REGISTERED); + } } return 1; } @@ -281,15 +298,17 @@ int poll_iod_modify(struct npool *nsp, struct niod *iod, struct nevent *nse, int iod->watched_events = new_events; sd = nsock_iod_get_sd(iod); + if (sd >= 0) { - pinfo->events[sd].fd = sd; - pinfo->events[sd].events = 0; + pinfo->events[sd].fd = sd; + pinfo->events[sd].events = 0; - /* regenerate the current set of events for this IOD */ - if (iod->watched_events & EV_READ) - pinfo->events[sd].events |= POLL_R_FLAGS; - if (iod->watched_events & EV_WRITE) - pinfo->events[sd].events |= POLL_W_FLAGS; + /* regenerate the current set of events for this IOD */ + if (iod->watched_events & EV_READ) + pinfo->events[sd].events |= POLL_R_FLAGS; + if (iod->watched_events & EV_WRITE) + pinfo->events[sd].events |= POLL_W_FLAGS; + } return 1; } @@ -299,6 +318,7 @@ int poll_loop(struct npool *nsp, int msec_timeout) { int event_msecs; /* msecs before an event goes off */ int combined_msecs; int sock_err = 0; + unsigned int iod_count; struct poll_engine_info *pinfo = (struct poll_engine_info *)nsp->engine_data; assert(msec_timeout >= -1); @@ -306,10 +326,12 @@ int poll_loop(struct npool *nsp, int msec_timeout) { if (nsp->events_pending == 0) return 0; /* No need to wait on 0 events ... */ + iod_count = gh_list_count(&nsp->active_iods) - pinfo->num_pcap_nonselect; do { struct nevent *nse; nsock_log_debug_all("wait for events"); + results_left = 0; nse = next_expirable_event(nsp); if (!nse) @@ -320,33 +342,37 @@ int poll_loop(struct npool *nsp, int msec_timeout) { } #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* Force a low timeout when capturing packets on systems where - * the pcap descriptor is not select()able. */ - if (gh_list_count(&nsp->pcap_read_events) > 0) - if (event_msecs > PCAP_POLL_INTERVAL) - event_msecs = PCAP_POLL_INTERVAL; -#endif -#endif + if (pinfo->num_pcap_nonselect > 0 && gh_list_count(&nsp->pcap_read_events) > 0) { + /* do non-blocking read on pcap devices that doesn't support select() + * If there is anything read, just leave this loop. */ + if (pcap_read_on_nonselect(nsp)) { + /* okay, something was read. */ + // poll engine's iterate_through_event_lists() also handles pcap iods. + // Make the system call non-blocking + event_msecs = 0; + } + /* Force a low timeout when capturing packets on systems where + * the pcap descriptor is not select()able. */ + else if (event_msecs > PCAP_POLL_INTERVAL) { + event_msecs = PCAP_POLL_INTERVAL; + } + } +#endif /* We cast to unsigned because we want -1 to be very high (since it means no * timeout) */ combined_msecs = MIN((unsigned)event_msecs, (unsigned)msec_timeout); -#if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* do non-blocking read on pcap devices that doesn't support select() - * If there is anything read, just leave this loop. */ - if (pcap_read_on_nonselect(nsp)) { - /* okay, something was read. */ - // Make the Poll call non-blocking - combined_msecs = 0; + if (iod_count > 0) { + results_left = Poll(pinfo->events, pinfo->max_fd + 1, combined_msecs); + if (results_left == -1) + sock_err = socket_errno(); } -#endif -#endif - results_left = Poll(pinfo->events, pinfo->max_fd + 1, combined_msecs); - if (results_left == -1) - sock_err = socket_errno(); + else if (combined_msecs > 0) { + // No compatible IODs; sleep the remainder of the wait time. + usleep(combined_msecs * 1000); + } + gettimeofday(&nsock_tod, NULL); /* Due to poll delay */ } while (results_left == -1 && sock_err == EINTR); /* repeat only if signal occurred */ @@ -382,12 +408,14 @@ static inline int get_evmask(struct npool *nsp, struct niod *nsi) { && nsi->events_pending && IOD_PROPGET(nsi, IOD_REGISTERED)) { + sd = nsock_iod_get_sd(nsi); #if HAVE_PCAP - if (nsi->pcap) - sd = ((mspcap *)nsi->pcap)->pcap_desc; - else + // For incompatible pcap handles, we have to just try a non-blocking read. + if (sd == -1 && nsi->pcap) + return EV_READ; #endif - sd = nsi->sd; + + assert(sd >= 0); assert(sd < pinfo->capacity); pev = &pinfo->events[sd]; diff --git a/nsock/src/engine_select.c b/nsock/src/engine_select.c index e91a43b34..139421231 100644 --- a/nsock/src/engine_select.c +++ b/nsock/src/engine_select.c @@ -97,10 +97,8 @@ void process_iod_events(struct npool *nsp, struct niod *nsi, int ev); void process_expired_events(struct npool *nsp); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT int pcap_read_on_nonselect(struct npool *nsp); #endif -#endif /* defined in nsock_event.c */ void update_first_events(struct nevent *nse); @@ -128,6 +126,8 @@ struct select_engine_info { /* The highest sd we have set in any of our fd_set's (max_sd + 1 is used in * select() calls). Note that it can be -1, when there are no valid sockets */ int max_sd; + /* Number of IODs incompatible with select */ + int num_pcap_nonselect; }; @@ -141,6 +141,7 @@ int select_init(struct npool *nsp) { FD_ZERO(&sinfo->fds_master_x); sinfo->max_sd = -1; + sinfo->num_pcap_nonselect = 0; nsp->engine_data = (void *)sinfo; @@ -153,8 +154,12 @@ void select_destroy(struct npool *nsp) { } int select_iod_register(struct npool *nsp, struct niod *iod, struct nevent *nse, int ev) { + struct select_engine_info *sinfo = (struct select_engine_info *)nsp->engine_data; assert(!IOD_PROPGET(iod, IOD_REGISTERED)); + if (nsock_iod_get_sd(iod) == -1) { + sinfo->num_pcap_nonselect++; + } iod->watched_events = ev; select_iod_modify(nsp, iod, nse, ev, EV_NONE); IOD_PROPSET(iod, IOD_REGISTERED); @@ -169,26 +174,22 @@ int select_iod_unregister(struct npool *nsp, struct niod *iod) { /* some IODs can be unregistered here if they're associated to an event that was * immediately completed */ if (IOD_PROPGET(iod, IOD_REGISTERED)) { -#if HAVE_PCAP - if (iod->pcap) { - int sd = ((mspcap *)iod->pcap)->pcap_desc; - if (sd >= 0) { - checked_fd_clr(sd, &sinfo->fds_master_r); - checked_fd_clr(sd, &sinfo->fds_results_r); - } - } else -#endif - { - checked_fd_clr(iod->sd, &sinfo->fds_master_r); - checked_fd_clr(iod->sd, &sinfo->fds_master_w); - checked_fd_clr(iod->sd, &sinfo->fds_master_x); - checked_fd_clr(iod->sd, &sinfo->fds_results_r); - checked_fd_clr(iod->sd, &sinfo->fds_results_w); - checked_fd_clr(iod->sd, &sinfo->fds_results_x); + int sd = nsock_iod_get_sd(iod); + if (sd == -1) { + assert(iod->pcap); + sinfo->num_pcap_nonselect--; } + else { + checked_fd_clr(sd, &sinfo->fds_master_r); + checked_fd_clr(sd, &sinfo->fds_master_w); + checked_fd_clr(sd, &sinfo->fds_master_x); + checked_fd_clr(sd, &sinfo->fds_results_r); + checked_fd_clr(sd, &sinfo->fds_results_w); + checked_fd_clr(sd, &sinfo->fds_results_x); - if (sinfo->max_sd == iod->sd) - sinfo->max_sd--; + if (sinfo->max_sd == sd) + sinfo->max_sd--; + } IOD_PROPCLR(iod, IOD_REGISTERED); } @@ -246,6 +247,7 @@ int select_loop(struct npool *nsp, int msec_timeout) { int sock_err = 0; struct timeval select_tv; struct timeval *select_tv_p; + unsigned int iod_count; struct select_engine_info *sinfo = (struct select_engine_info *)nsp->engine_data; assert(msec_timeout >= -1); @@ -253,10 +255,13 @@ int select_loop(struct npool *nsp, int msec_timeout) { if (nsp->events_pending == 0) return 0; /* No need to wait on 0 events ... */ + iod_count = gh_list_count(&nsp->active_iods) - sinfo->num_pcap_nonselect; + do { struct nevent *nse; nsock_log_debug_all("wait for events"); + results_left = 0; nse = next_expirable_event(nsp); if (!nse) @@ -266,57 +271,60 @@ int select_loop(struct npool *nsp, int msec_timeout) { event_msecs = MAX(0, event_msecs); } - #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* Force a low timeout when capturing packets on systems where - * the pcap descriptor is not select()able. */ - if (gh_list_count(&nsp->pcap_read_events)) - if (event_msecs > PCAP_POLL_INTERVAL) + if (sinfo->num_pcap_nonselect > 0 && gh_list_count(&nsp->pcap_read_events) > 0) { + + /* do non-blocking read on pcap devices that doesn't support select() + * If there is anything read, just leave this loop. */ + if (pcap_read_on_nonselect(nsp)) { + /* okay, something was read. */ + // select engine's iterate_through_event_lists() also handles pcap iods. + // Make the system call non-blocking + event_msecs = 0; + } + /* Force a low timeout when capturing packets on systems where + * the pcap descriptor is not select()able. */ + else if (event_msecs > PCAP_POLL_INTERVAL) { event_msecs = PCAP_POLL_INTERVAL; -#endif + } + } #endif /* We cast to unsigned because we want -1 to be very high (since it means no * timeout) */ combined_msecs = MIN((unsigned)event_msecs, (unsigned)msec_timeout); -#if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - /* do non-blocking read on pcap devices that doesn't support select() - * If there is anything read, just leave this loop. */ - if (pcap_read_on_nonselect(nsp)) { - /* okay, something was read. */ - // Make the select call non-blocking - combined_msecs = 0; + if (iod_count > 0) { + /* Set up the timeval pointer we will give to select() */ + memset(&select_tv, 0, sizeof(select_tv)); + if (combined_msecs > 0) { + select_tv.tv_sec = combined_msecs / 1000; + select_tv.tv_usec = (combined_msecs % 1000) * 1000; + select_tv_p = &select_tv; + } else if (combined_msecs == 0) { + /* we want the tv_sec and tv_usec to be zero but they already are from bzero */ + select_tv_p = &select_tv; + } else { + assert(combined_msecs == -1); + select_tv_p = NULL; + } + + /* Set up the descriptors for select */ + sinfo->fds_results_r = sinfo->fds_master_r; + sinfo->fds_results_w = sinfo->fds_master_w; + sinfo->fds_results_x = sinfo->fds_master_x; + + results_left = fselect(sinfo->max_sd + 1, &sinfo->fds_results_r, + &sinfo->fds_results_w, &sinfo->fds_results_x, select_tv_p); + + if (results_left == -1) + sock_err = socket_errno(); } -#endif -#endif - /* Set up the timeval pointer we will give to select() */ - memset(&select_tv, 0, sizeof(select_tv)); - if (combined_msecs > 0) { - select_tv.tv_sec = combined_msecs / 1000; - select_tv.tv_usec = (combined_msecs % 1000) * 1000; - select_tv_p = &select_tv; - } else if (combined_msecs == 0) { - /* we want the tv_sec and tv_usec to be zero but they already are from bzero */ - select_tv_p = &select_tv; - } else { - assert(combined_msecs == -1); - select_tv_p = NULL; + else if (combined_msecs > 0) { + // No compatible IODs; sleep the remainder of the wait time. + usleep(combined_msecs * 1000); } - /* Set up the descriptors for select */ - sinfo->fds_results_r = sinfo->fds_master_r; - sinfo->fds_results_w = sinfo->fds_master_w; - sinfo->fds_results_x = sinfo->fds_master_x; - - results_left = fselect(sinfo->max_sd + 1, &sinfo->fds_results_r, - &sinfo->fds_results_w, &sinfo->fds_results_x, select_tv_p); - - if (results_left == -1) - sock_err = socket_errno(); - gettimeofday(&nsock_tod, NULL); /* Due to select delay */ } while (results_left == -1 && sock_err == EINTR); /* repeat only if signal occurred */ @@ -334,29 +342,19 @@ int select_loop(struct npool *nsp, int msec_timeout) { /* ---- INTERNAL FUNCTIONS ---- */ -static inline int get_evmask(const struct npool *nsp, const struct niod *nsi) { +static inline int get_evmask(const struct npool *nsp, struct niod *nsi) { struct select_engine_info *sinfo = (struct select_engine_info *)nsp->engine_data; int sd, evmask; evmask = EV_NONE; + sd = nsock_iod_get_sd(nsi); #if HAVE_PCAP -#ifndef PCAP_CAN_DO_SELECT - if (nsi->pcap) { - /* Always assume readable for a non-blocking read. We can't check checked_fd_isset - because we don't have a pcap_desc. */ - evmask |= EV_READ; - return evmask; - } + /* Always assume readable for a non-blocking read. We can't check checked_fd_isset + because we don't have a pcap_desc. */ + if (sd == -1 && nsi->pcap) + return EV_READ; #endif -#endif - -#if HAVE_PCAP - if (nsi->pcap) - sd = ((mspcap *)nsi->pcap)->pcap_desc; - else -#endif - sd = nsi->sd; assert(sd >= 0); diff --git a/nsock/src/nsock_core.c b/nsock/src/nsock_core.c index 614235b63..12b2664dc 100644 --- a/nsock/src/nsock_core.c +++ b/nsock/src/nsock_core.c @@ -883,7 +883,9 @@ int pcap_read_on_nonselect(struct npool *nsp) { current != NULL; current = next) { nse = lnode_nevent2(current); - if (do_actual_pcap_read(nse) == 1) { + int sd = nsock_iod_get_sd(nse->iod); + // We only care about non-selectable handles + if (sd == -1 && do_actual_pcap_read(nse) == 1) { /* something received */ ret++; break;