From cbf33da826c1234a87a42f966fb805c5138e125c Mon Sep 17 00:00:00 2001 From: visa Date: Fri, 12 Nov 2021 04:34:22 +0000 Subject: [PATCH] Keep knotes between poll/select systems calls Reduce the time overhead of kqueue-based poll(2) and select(2) by keeping knotes registered between the system calls. It is expected that the set of monitored file descriptors is relatively unchanged between consecutive iterations of these system calls. By keeping the knotes, the system saves the effort of repeated knote unregistering and re-registering. To avoid receiving events from file descriptors that are no longer in the monitored set, each poll/select knote is assigned an increasing serial number. Every iteration of poll/select uses a previously unused range of serials for its knotes. In the setup stage, kqueue_register() updates the serials of any existing knotes in the currently monitored set. Function kqueue_scan() delivers only the events whose serials are recent enough; expired knotes are dropped. When the serial range is about to wrap around, all the knotes in the kqueue backend are dropped. This change is a space-time tradeoff. Memory usage is increased somewhat because of the retained knotes. The increase is limited by the number of open file descriptors and active threads. Idea from DragonFly BSD, initial patch by mpi@, kqueue_scan()-based approach by me. Tested by anton@ and mpi@ OK mpi@ --- sys/kern/kern_event.c | 70 ++++++++++++++++++++++++++++++++---------- sys/kern/sys_generic.c | 49 ++++++++++------------------- sys/sys/event.h | 6 ++-- 3 files changed, 73 insertions(+), 52 deletions(-) diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index ab663547142..f33373fbaf7 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.170 2021/11/06 05:48:47 visa Exp $ */ +/* $OpenBSD: kern_event.c,v 1.171 2021/11/12 04:34:22 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -73,6 +73,7 @@ void kqueue_terminate(struct proc *p, struct kqueue *); void KQREF(struct kqueue *); void KQRELE(struct kqueue *); +void kqueue_purge(struct proc *, struct kqueue *); int kqueue_sleep(struct kqueue *, struct timespec *); int kqueue_read(struct file *, struct uio *, int); @@ -763,29 +764,55 @@ filter_process(struct knote *kn, struct kevent *kev) return (active); } +/* + * Initialize the current thread for poll/select system call. + * num indicates the number of serials that the system call may utilize. + * After this function, the valid range of serials is + * p_kq_serial <= x < p_kq_serial + num. + */ void -kqpoll_init(void) +kqpoll_init(unsigned int num) { struct proc *p = curproc; struct filedesc *fdp; - if (p->p_kq != NULL) { - /* - * Discard any badfd knotes that have been enqueued after - * previous scan. - * This prevents them from accumulating in case - * scan does not make progress for some reason. - */ - kqpoll_dequeue(p, 0); - return; + if (p->p_kq == NULL) { + p->p_kq = kqueue_alloc(p->p_fd); + p->p_kq_serial = arc4random(); + fdp = p->p_fd; + fdplock(fdp); + LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); + fdpunlock(fdp); } - p->p_kq = kqueue_alloc(p->p_fd); - p->p_kq_serial = arc4random(); - fdp = p->p_fd; - fdplock(fdp); - LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); - fdpunlock(fdp); + if (p->p_kq_serial + num < p->p_kq_serial) { + /* Serial is about to wrap. Clear all attached knotes. */ + kqueue_purge(p, p->p_kq); + p->p_kq_serial = 0; + } + + /* + * Discard any detached knotes that have been enqueued after + * previous scan. + * This prevents them from accumulating in case + * scan does not make progress for some reason. + */ + kqpoll_dequeue(p, 0); +} + +/* + * Finish poll/select system call. + * num must have the same value that was used with kqpoll_init(). + */ +void +kqpoll_done(unsigned int num) +{ + struct proc *p = curproc; + + KASSERT(p->p_kq != NULL); + KASSERT(p->p_kq_serial + num >= p->p_kq_serial); + + p->p_kq_serial += num; } void @@ -1383,6 +1410,15 @@ retry: mtx_leave(&kq->kq_lock); + /* Drop expired kqpoll knotes. */ + if (p->p_kq == kq && + p->p_kq_serial > (unsigned long)kn->kn_udata) { + filter_detach(kn); + knote_drop(kn, p); + mtx_enter(&kq->kq_lock); + continue; + } + memset(kevp, 0, sizeof(*kevp)); if (filter_process(kn, kevp) == 0) { mtx_enter(&kq->kq_lock); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 9a10bcc0057..0af21d981fa 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.139 2021/10/29 15:52:44 anton Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.140 2021/11/12 04:34:22 visa Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -633,7 +633,7 @@ dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, pobits[2] = (fd_set *)&bits[5]; } - kqpoll_init(); + kqpoll_init(nd); #define getbits(name, x) \ if (name && (error = copyin(name, pibits[x], ni))) \ @@ -730,8 +730,7 @@ done: if (pibits[0] != (fd_set *)&bits[0]) free(pibits[0], M_TEMP, 6 * ni); - kqueue_purge(p, p->p_kq); - p->p_kq_serial += nd; + kqpoll_done(nd); return (error); } @@ -759,7 +758,7 @@ pselregister(struct proc *p, fd_set *pibits[3], fd_set *pobits[3], int nfd, DPRINTFN(2, "select fd %d mask %d serial %lu\n", fd, msk, p->p_kq_serial); EV_SET(&kev, fd, evf[msk], - EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, + EV_ADD|EV_ENABLE|__EV_POLL, evff[msk], 0, (void *)(p->p_kq_serial)); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) @@ -804,13 +803,10 @@ int pselcollect(struct proc *p, struct kevent *kevp, fd_set *pobits[3], int *ncollected) { - /* Filter out and lazily delete spurious events */ if ((unsigned long)kevp->udata != p->p_kq_serial) { - DPRINTFN(0, "select fd %u mismatched serial %lu\n", - (int)kevp->ident, p->p_kq_serial); - kevp->flags = EV_DISABLE|EV_DELETE; - kqueue_register(p->p_kq, kevp, p); - return (0); + panic("%s: spurious kevp %p fd %d udata 0x%lx serial 0x%lx", + __func__, kevp, (int)kevp->ident, + (unsigned long)kevp->udata, p->p_kq_serial); } if (kevp->flags & EV_ERROR) { @@ -1001,14 +997,14 @@ ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered) kevp = kev; if (pl[i].events & (POLLIN | POLLRDNORM)) { EV_SET(kevp, pl[i].fd, EVFILT_READ, - EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, 0, 0, + EV_ADD|EV_ENABLE|__EV_POLL, 0, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; } if (pl[i].events & (POLLOUT | POLLWRNORM)) { EV_SET(kevp, pl[i].fd, EVFILT_WRITE, - EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, 0, 0, + EV_ADD|EV_ENABLE|__EV_POLL, 0, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; @@ -1017,7 +1013,7 @@ ppollregister(struct proc *p, struct pollfd *pl, int nfds, int *nregistered) int evff = forcehup ? 0 : NOTE_OOB; EV_SET(kevp, pl[i].fd, EVFILT_EXCEPT, - EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, evff, 0, + EV_ADD|EV_ENABLE|__EV_POLL, evff, 0, (void *)(p->p_kq_serial + i)); nkev++; kevp++; @@ -1143,7 +1139,7 @@ doppoll(struct proc *p, struct pollfd *fds, u_int nfds, return (EINVAL); } - kqpoll_init(); + kqpoll_init(nfds); sz = nfds * sizeof(*pl); @@ -1230,8 +1226,7 @@ bad: if (pl != pfds) free(pl, M_TEMP, sz); - kqueue_purge(p, p->p_kq); - p->p_kq_serial += nfds; + kqpoll_done(nfds); return (error); } @@ -1248,25 +1243,15 @@ ppollcollect(struct proc *p, struct kevent *kevp, struct pollfd *pl, u_int nfds) /* Extract poll array index */ i = (unsigned long)kevp->udata - p->p_kq_serial; - /* - * Lazily delete spurious events. - * - * This should not happen as long as kqueue_purge() is called - * at the end of every syscall. It migh be interesting to do - * like DragonFlyBSD and not always allocated a new knote in - * kqueue_register() with that lazy removal makes sense. - */ if (i >= nfds) { - DPRINTFN(0, "poll get out of range udata %lu vs serial %lu\n", + panic("%s: spurious kevp %p nfds %u udata 0x%lx serial 0x%lx", + __func__, kevp, nfds, (unsigned long)kevp->udata, p->p_kq_serial); - kevp->flags = EV_DISABLE|EV_DELETE; - kqueue_register(p->p_kq, kevp, p); - return (0); } if ((int)kevp->ident != pl[i].fd) { - DPRINTFN(0, "poll get %lu/%d mismatch fd %u!=%d serial %lu\n", - i+1, nfds, (int)kevp->ident, pl[i].fd, p->p_kq_serial); - return (0); + panic("%s: kevp %p %lu/%d mismatch fd %d!=%d serial 0x%lx", + __func__, kevp, i + 1, nfds, (int)kevp->ident, pl[i].fd, + p->p_kq_serial); } /* diff --git a/sys/sys/event.h b/sys/sys/event.h index fe4b9f1560e..c67eb800bf7 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.57 2021/10/24 07:02:47 visa Exp $ */ +/* $OpenBSD: event.h,v 1.58 2021/11/12 04:34:23 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -287,7 +287,8 @@ extern const struct filterops sig_filtops; extern const struct filterops dead_filtops; extern const struct klistops socket_klistops; -extern void kqpoll_init(void); +extern void kqpoll_init(unsigned int); +extern void kqpoll_done(unsigned int); extern void kqpoll_exit(void); extern void knote(struct klist *list, long hint); extern void knote_fdclose(struct proc *p, int fd); @@ -302,7 +303,6 @@ extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *, struct timespec *, struct proc *, int *); extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *); extern void kqueue_scan_finish(struct kqueue_scan_state *); -extern void kqueue_purge(struct proc *, struct kqueue *); extern int filt_seltrue(struct knote *kn, long hint); extern int seltrue_kqfilter(dev_t, struct knote *); extern void klist_init(struct klist *, const struct klistops *, void *); -- 2.20.1