From: visa Date: Sat, 6 Nov 2021 05:48:47 +0000 (+0000) Subject: Make kqread event filter MP-safe X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=d0fd4be49d14ebcc7e4146a2ad4009619095c86f;p=openbsd Make kqread event filter MP-safe Use the monitored kqueue's kq_lock to serialize kqueue and knote access. Typically, the "object lock" would cover also the klist, but that is not possible with kqueues. knote_activate() needs kq_lock of the monitoring kqueue, which would create lock order troubles if kq_lock was held when calling KNOTE(&kq->kq_sel.si_note). Avoid this by using a separate klist lock for kqueues. The new klist lock is system-wide. Each kqueue instance could have a dedicated klist lock. However, the efficacy of dedicated versus system-wide lock is somewhat limited because the current implementation activates kqueue knotes through a single thread. OK mpi@ --- diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 030b20eba84..ab663547142 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.169 2021/07/24 09:16:51 mpi Exp $ */ +/* $OpenBSD: kern_event.c,v 1.170 2021/11/06 05:48:47 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -128,6 +128,9 @@ void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, void filt_kqdetach(struct knote *kn); int filt_kqueue(struct knote *kn, long hint); +int filt_kqueuemodify(struct kevent *kev, struct knote *kn); +int filt_kqueueprocess(struct knote *kn, struct kevent *kev); +int filt_kqueue_common(struct knote *kn, struct kqueue *kq); int filt_procattach(struct knote *kn); void filt_procdetach(struct knote *kn); int filt_proc(struct knote *kn, long hint); @@ -140,10 +143,12 @@ int filt_timerprocess(struct knote *kn, struct kevent *kev); void filt_seltruedetach(struct knote *kn); const struct filterops kqread_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_kqdetach, .f_event = filt_kqueue, + .f_modify = filt_kqueuemodify, + .f_process = filt_kqueueprocess, }; const struct filterops proc_filtops = { @@ -171,6 +176,7 @@ const struct filterops timer_filtops = { struct pool knote_pool; struct pool kqueue_pool; +struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); int kq_ntimeouts = 0; int kq_timeoutmax = (4 * 1024); @@ -219,6 +225,7 @@ KQRELE(struct kqueue *kq) free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * sizeof(struct knlist)); hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); + klist_free(&kq->kq_sel.si_note); pool_put(&kqueue_pool, kq); } @@ -254,7 +261,7 @@ kqueue_kqfilter(struct file *fp, struct knote *kn) return (EINVAL); kn->kn_fop = &kqread_filtops; - klist_insert_locked(&kq->kq_sel.si_note, kn); + klist_insert(&kq->kq_sel.si_note, kn); return (0); } @@ -263,18 +270,62 @@ filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; - klist_remove_locked(&kq->kq_sel.si_note, kn); + klist_remove(&kq->kq_sel.si_note, kn); +} + +int +filt_kqueue_common(struct knote *kn, struct kqueue *kq) +{ + MUTEX_ASSERT_LOCKED(&kq->kq_lock); + + kn->kn_data = kq->kq_count; + + return (kn->kn_data > 0); } int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; + int active; mtx_enter(&kq->kq_lock); - kn->kn_data = kq->kq_count; + active = filt_kqueue_common(kn, kq); mtx_leave(&kq->kq_lock); - return (kn->kn_data > 0); + + return (active); +} + +int +filt_kqueuemodify(struct kevent *kev, struct knote *kn) +{ + struct kqueue *kq = kn->kn_fp->f_data; + int active; + + mtx_enter(&kq->kq_lock); + knote_modify(kev, kn); + active = filt_kqueue_common(kn, kq); + mtx_leave(&kq->kq_lock); + + return (active); +} + +int +filt_kqueueprocess(struct knote *kn, struct kevent *kev) +{ + struct kqueue *kq = kn->kn_fp->f_data; + int active; + + mtx_enter(&kq->kq_lock); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = filt_kqueue_common(kn, kq); + if (active) + knote_submit(kn, kev); + mtx_leave(&kq->kq_lock); + + return (active); } int @@ -821,6 +872,7 @@ kqueue_alloc(struct filedesc *fdp) TAILQ_INIT(&kq->kq_head); mtx_init(&kq->kq_lock, IPL_HIGH); task_set(&kq->kq_task, kqueue_task, kq); + klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); return (kq); } @@ -1529,6 +1581,7 @@ kqueue_task(void *arg) /* Kernel lock is needed inside selwakeup(). */ KERNEL_ASSERT_LOCKED(); + mtx_enter(&kqueue_klist_lock); mtx_enter(&kq->kq_lock); if (kq->kq_state & KQ_SEL) { kq->kq_state &= ~KQ_SEL; @@ -1538,6 +1591,7 @@ kqueue_task(void *arg) mtx_leave(&kq->kq_lock); KNOTE(&kq->kq_sel.si_note, 0); } + mtx_leave(&kqueue_klist_lock); KQRELE(kq); }