From 13ce2699ae7dee2b836e0a3a32b459922f3b6e20 Mon Sep 17 00:00:00 2001 From: visa Date: Mon, 29 Nov 2021 15:54:04 +0000 Subject: [PATCH] kqueue: Revise badfd knote handling When closing a file descriptor and converting the poll/select knotes into badfd knotes, keep the knotes attached to the by-fd table. This should prevent kqueue_purge() from returning before the kqueue has become quiescent. This in turn should fix a KASSERT(TAILQ_EMPTY(&kq->kq_head)) panic in KQRELE() that bluhm@ has reported. The badfd conversion is only needed when a poll/select scan is ongoing. The system can skip the conversion if the knote is not part of the active event set. The code of this commit skips the conversion when the fd is closed by the same thread that has done the fd polling. This can be improved but should already cover typical fd usage patterns. As badfd knotes now hold slots in the by-fd table, kqueue_register() clears them. poll/select use kqueue_register() to set up a new scan; any found fd close notification is a leftover from the previous scan. The new badfd handling should be free of accidental knote accumulation. This obsoletes kqpoll_dequeue() and lowers kqpoll_init() overhead. Re-enable lazy removal of poll/select knotes because the panic should no longer happen. OK mpi@ --- sys/kern/kern_event.c | 118 +++++++++++------------------------------- sys/sys/event.h | 4 +- 2 files changed, 32 insertions(+), 90 deletions(-) diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index a62f40460b0..0cd03a729d1 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.173 2021/11/15 15:48:54 visa Exp $ */ +/* $OpenBSD: kern_event.c,v 1.174 2021/11/29 15:54:04 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -93,8 +93,6 @@ void kqueue_do_check(struct kqueue *kq, const char *func, int line); #define kqueue_check(kq) do {} while (0) #endif -void kqpoll_dequeue(struct proc *p, int all); - static int filter_attach(struct knote *kn); static void filter_detach(struct knote *kn); static int filter_event(struct knote *kn, long hint); @@ -790,14 +788,6 @@ kqpoll_init(unsigned int num) kqueue_purge(p, p->p_kq); p->p_kq_serial = 0; } - - /* - * Discard any detached knotes that have been enqueued after - * previous scan. - * This prevents them from accumulating in case - * scan does not make progress for some reason. - */ - kqpoll_dequeue(p, 0); } /* @@ -813,9 +803,6 @@ kqpoll_done(unsigned int num) KASSERT(p->p_kq_serial + num >= p->p_kq_serial); p->p_kq_serial += num; - - /* XXX Work around a race condition. */ - kqueue_purge(p, p->p_kq); } void @@ -827,70 +814,12 @@ kqpoll_exit(void) return; kqueue_purge(p, p->p_kq); - /* Clear any detached knotes that remain in the queue. */ - kqpoll_dequeue(p, 1); kqueue_terminate(p, p->p_kq); KASSERT(p->p_kq->kq_refs == 1); KQRELE(p->p_kq); p->p_kq = NULL; } -void -kqpoll_dequeue(struct proc *p, int all) -{ - struct knote marker; - struct knote *kn; - struct kqueue *kq = p->p_kq; - - /* - * Bail out early without locking if the queue appears empty. - * - * This thread might not see the latest value of kq_count yet. - * However, if there is any sustained increase in the queue size, - * this thread will eventually observe that kq_count has become - * non-zero. - */ - if (all == 0 && kq->kq_count == 0) - return; - - memset(&marker, 0, sizeof(marker)); - marker.kn_filter = EVFILT_MARKER; - marker.kn_status = KN_PROCESSING; - - mtx_enter(&kq->kq_lock); - kn = TAILQ_FIRST(&kq->kq_head); - while (kn != NULL) { - /* This kqueue should not be scanned by other threads. */ - KASSERT(kn->kn_filter != EVFILT_MARKER); - - if (all == 0 && (kn->kn_status & KN_ATTACHED)) { - kn = TAILQ_NEXT(kn, kn_tqe); - continue; - } - - TAILQ_INSERT_BEFORE(kn, &marker, kn_tqe); - - if (!knote_acquire(kn, NULL, 0)) { - /* knote_acquire() has released kq_lock. */ - } else { - kqueue_check(kq); - TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~KN_QUEUED; - kq->kq_count--; - mtx_leave(&kq->kq_lock); - - filter_detach(kn); - knote_drop(kn, p); - } - - mtx_enter(&kq->kq_lock); - kqueue_check(kq); - kn = TAILQ_NEXT(&marker, kn_tqe); - TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); - } - mtx_leave(&kq->kq_lock); -} - struct kqueue * kqueue_alloc(struct filedesc *fdp) { @@ -1230,6 +1159,21 @@ again: mtx_enter(&kq->kq_lock); if (active) knote_activate(kn); + } else if (kn->kn_fop == &badfd_filtops) { + /* + * Nothing expects this badfd knote any longer. + * Drop it to make room for the new knote and retry. + */ + KASSERT(kq == p->p_kq); + mtx_leave(&kq->kq_lock); + filter_detach(kn); + knote_drop(kn, p); + + KASSERT(fp != NULL); + FRELE(fp, p); + fp = NULL; + + goto again; } else { /* * The user may change some filter values after the @@ -1445,7 +1389,6 @@ retry: kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; - KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } else { mtx_enter(&kq->kq_lock); @@ -1455,7 +1398,6 @@ retry: kn->kn_status |= KN_QUEUED; TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } - KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } kqueue_check(kq); @@ -1811,6 +1753,17 @@ knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) while ((kn = SLIST_FIRST(list)) != NULL) { KASSERT(kn->kn_kq == kq); + + if (!purge) { + /* Skip pending badfd knotes. */ + while (kn->kn_fop == &badfd_filtops) { + kn = SLIST_NEXT(kn, kn_link); + if (kn == NULL) + return; + KASSERT(kn->kn_kq == kq); + } + } + if (!knote_acquire(kn, NULL, 0)) { /* knote_acquire() has released kq_lock. */ mtx_enter(&kq->kq_lock); @@ -1825,15 +1778,12 @@ knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) * * This reuses the original knote for delivering the * notification so as to avoid allocating memory. - * The knote will be reachable only through the queue - * of active knotes and is freed either by kqueue_scan() - * or kqpoll_dequeue(). */ - if (!purge && (kn->kn_flags & __EV_POLL) != 0) { + if (!purge && (kn->kn_flags & __EV_POLL) && + !(p->p_kq == kq && + p->p_kq_serial > (unsigned long)kn->kn_udata) && + kn->kn_fop != &badfd_filtops) { KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); - mtx_enter(&kq->kq_lock); - knote_detach(kn); - mtx_leave(&kq->kq_lock); FRELE(kn->kn_fp, p); kn->kn_fp = NULL; @@ -1900,9 +1850,7 @@ knote_attach(struct knote *kn) MUTEX_ASSERT_LOCKED(&kq->kq_lock); KASSERT(kn->kn_status & KN_PROCESSING); - KASSERT((kn->kn_status & KN_ATTACHED) == 0); - kn->kn_status |= KN_ATTACHED; if (kn->kn_fop->f_flags & FILTEROP_ISFD) { KASSERT(kq->kq_knlistsize > kn->kn_id); list = &kq->kq_knlist[kn->kn_id]; @@ -1922,15 +1870,11 @@ knote_detach(struct knote *kn) MUTEX_ASSERT_LOCKED(&kq->kq_lock); KASSERT(kn->kn_status & KN_PROCESSING); - if ((kn->kn_status & KN_ATTACHED) == 0) - return; - if (kn->kn_fop->f_flags & FILTEROP_ISFD) list = &kq->kq_knlist[kn->kn_id]; else list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; SLIST_REMOVE(list, kn, knote, kn_link); - kn->kn_status &= ~KN_ATTACHED; } /* diff --git a/sys/sys/event.h b/sys/sys/event.h index c67eb800bf7..20a1fd33c82 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.58 2021/11/12 04:34:23 visa Exp $ */ +/* $OpenBSD: event.h,v 1.59 2021/11/29 15:54:04 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -251,8 +251,6 @@ struct knote { #define KN_DETACHED 0x0008 /* knote is detached */ #define KN_PROCESSING 0x0010 /* knote is being processed */ #define KN_WAITING 0x0020 /* waiting on processing */ -#define KN_ATTACHED 0x0040 /* knote is attached to - * a knlist of the kqueue */ #define kn_id kn_kevent.ident /* [I] */ #define kn_filter kn_kevent.filter /* [I] */ -- 2.20.1