From 6d57c564656ee8c1c46a869a92c0b1fe7532302b Mon Sep 17 00:00:00 2001 From: visa Date: Wed, 24 Feb 2021 14:59:52 +0000 Subject: [PATCH] kqueue: Revise filterops interface Extend kqueue's filterops interface with new callbacks so that it becomes easier to use with fine-grained locking. The new interface delegates the serialization of kn_event access to event sources. Now kqueue uses filterops callbacks to read or write kn_event. This hides event sources' locking patterns from kqueue, and allows clean implementation of atomic read-and-clear for EV_CLEAR, for instance. There are so many existing filterops instances that converting all of them in one go is tricky. This patch adds a wrapper mechanism that kqueue uses when the new callbacks are missing. The new filterops interface has been influenced by XNU's kqueue. OK mpi@ semarie@ --- sys/kern/kern_event.c | 229 ++++++++++++++++++++++++++++++++++-------- sys/sys/event.h | 80 +++++++++++++-- 2 files changed, 257 insertions(+), 52 deletions(-) diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 5471b2913d6..caa263a71bc 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.160 2021/01/27 02:58:03 visa Exp $ */ +/* $OpenBSD: kern_event.c,v 1.161 2021/02/24 14:59:52 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -95,6 +95,11 @@ void kqueue_do_check(struct kqueue *kq, const char *func, int line); void kqpoll_dequeue(struct proc *p); +static int filter_attach(struct knote *kn); +static void filter_detach(struct knote *kn); +static int filter_event(struct knote *kn, long hint); +static int filter_modify(struct kevent *kev, struct knote *kn); +static int filter_process(struct knote *kn, struct kevent *kev); static void kqueue_expand_hash(struct kqueue *kq); static void kqueue_expand_list(struct kqueue *kq, int fd); static void kqueue_task(void *); @@ -374,7 +379,7 @@ filt_proc(struct knote *kn, long hint) kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ - kev.udata = kn->kn_kevent.udata; /* preserve udata */ + kev.udata = kn->kn_udata; /* preserve udata */ error = kqueue_register(kn->kn_kq, &kev, NULL); if (error) kn->kn_fflags |= NOTE_TRACKERR; @@ -469,6 +474,20 @@ filt_seltrue(struct knote *kn, long hint) return (1); } +int +filt_seltruemodify(struct kevent *kev, struct knote *kn) +{ + knote_modify(kev, kn); + return (1); +} + +int +filt_seltrueprocess(struct knote *kn, struct kevent *kev) +{ + knote_submit(kn, kev); + return (1); +} + /* * This provides full kqfilter entry for device switch tables, which * has same effect as filter using filt_seltrue() as filter method. @@ -480,10 +499,12 @@ filt_seltruedetach(struct knote *kn) } const struct filterops seltrue_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_seltruedetach, .f_event = filt_seltrue, + .f_modify = filt_seltruemodify, + .f_process = filt_seltrueprocess, }; int @@ -519,10 +540,12 @@ filt_deaddetach(struct knote *kn) } const struct filterops dead_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_dead, + .f_modify = filt_seltruemodify, + .f_process = filt_seltrueprocess, }; static int @@ -535,12 +558,104 @@ filt_badfd(struct knote *kn, long hint) /* For use with kqpoll. */ const struct filterops badfd_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_badfd, + .f_modify = filt_seltruemodify, + .f_process = filt_seltrueprocess, }; +static int +filter_attach(struct knote *kn) +{ + int error; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + error = kn->kn_fop->f_attach(kn); + } else { + KERNEL_LOCK(); + error = kn->kn_fop->f_attach(kn); + KERNEL_UNLOCK(); + } + return (error); +} + +static void +filter_detach(struct knote *kn) +{ + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + kn->kn_fop->f_detach(kn); + } else { + KERNEL_LOCK(); + kn->kn_fop->f_detach(kn); + KERNEL_UNLOCK(); + } +} + +static int +filter_event(struct knote *kn, long hint) +{ + if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + + return (kn->kn_fop->f_event(kn, hint)); +} + +static int +filter_modify(struct kevent *kev, struct knote *kn) +{ + int active, s; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + active = kn->kn_fop->f_modify(kev, kn); + } else { + KERNEL_LOCK(); + if (kn->kn_fop->f_modify != NULL) { + active = kn->kn_fop->f_modify(kev, kn); + } else { + /* Emulate f_modify using f_event. */ + s = splhigh(); + knote_modify(kev, kn); + active = kn->kn_fop->f_event(kn, 0); + splx(s); + } + KERNEL_UNLOCK(); + } + return (active); +} + +static int +filter_process(struct knote *kn, struct kevent *kev) +{ + int active, s; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + active = kn->kn_fop->f_process(kn, kev); + } else { + KERNEL_LOCK(); + if (kn->kn_fop->f_process != NULL) { + active = kn->kn_fop->f_process(kn, kev); + } else { + /* Emulate f_process using f_event. */ + s = splhigh(); + /* + * If called from kqueue_scan(), skip f_event + * when EV_ONESHOT is set, to preserve old behaviour. + */ + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = kn->kn_fop->f_event(kn, 0); + if (active) + knote_submit(kn, kev); + splx(s); + } + KERNEL_UNLOCK(); + } + return (active); +} + void kqpoll_init(void) { @@ -918,7 +1033,8 @@ again: kn->kn_kevent = *kev; knote_attach(kn); - if ((error = fops->f_attach(kn)) != 0) { + error = filter_attach(kn); + if (error != 0) { knote_drop(kn, p); goto done; } @@ -937,28 +1053,29 @@ again: * seen it. This corresponds to the insert * happening in full before the close. */ - kn->kn_fop->f_detach(kn); + filter_detach(kn); knote_drop(kn, p); goto done; } + + /* Check if there is a pending event. */ + if (filter_process(kn, NULL)) + knote_activate(kn); } else { /* * The user may change some filter values after the * initial EV_ADD, but doing so will not reset any * filters which have already been triggered. */ - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; - kn->kn_kevent.udata = kev->udata; + if (filter_modify(kev, kn)) + knote_activate(kn); + if (kev->flags & EV_ERROR) { + error = kev->data; + goto release; + } } - - s = splhigh(); - if (kn->kn_fop->f_event(kn, 0)) - knote_activate(kn); - splx(s); - } else if (kev->flags & EV_DELETE) { - kn->kn_fop->f_detach(kn); + filter_detach(kn); knote_drop(kn, p); goto done; } @@ -973,14 +1090,13 @@ again: if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { s = splhigh(); kn->kn_status &= ~KN_DISABLED; - if (kn->kn_fop->f_event(kn, 0)) - kn->kn_status |= KN_ACTIVE; - if ((kn->kn_status & KN_ACTIVE) && - ((kn->kn_status & KN_QUEUED) == 0)) - knote_enqueue(kn); splx(s); + /* Check if there is a pending event. */ + if (filter_process(kn, NULL)) + knote_activate(kn); } +release: s = splhigh(); knote_release(kn); splx(s); @@ -1110,39 +1226,36 @@ retry: knote_release(kn); continue; } - if ((kn->kn_flags & EV_ONESHOT) == 0 && - kn->kn_fop->f_event(kn, 0) == 0) { + + splx(s); + + memset(kevp, 0, sizeof(*kevp)); + if (filter_process(kn, kevp) == 0) { + s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; knote_release(kn); kqueue_check(kq); continue; } - *kevp = kn->kn_kevent; - kevp++; - nkev++; - scan->kqs_nevent++; /* * Post-event action on the note */ - if (kn->kn_flags & EV_ONESHOT) { - splx(s); - kn->kn_fop->f_detach(kn); + if (kevp->flags & EV_ONESHOT) { + filter_detach(kn); knote_drop(kn, p); s = splhigh(); - } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { - if (kn->kn_flags & EV_CLEAR) { - kn->kn_data = 0; - kn->kn_fflags = 0; - } - if (kn->kn_flags & EV_DISPATCH) + } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { + s = splhigh(); + if (kevp->flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } else { + s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) { kqueue_check(kq); kq->kq_count++; @@ -1153,6 +1266,10 @@ retry: knote_release(kn); } kqueue_check(kq); + + kevp++; + nkev++; + scan->kqs_nevent++; } TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); splx(s); @@ -1449,7 +1566,7 @@ knote(struct klist *list, long hint) KLIST_ASSERT_LOCKED(list); SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) - if (kn->kn_fop->f_event(kn, hint)) + if (filter_event(kn, hint)) knote_activate(kn); } @@ -1469,7 +1586,7 @@ knote_remove(struct proc *p, struct knlist *list, int purge) continue; } splx(s); - kn->kn_fop->f_detach(kn); + filter_detach(kn); /* * Notify poll(2) and select(2) when a monitored @@ -1655,6 +1772,36 @@ knote_dequeue(struct knote *kn) kqueue_check(kq); } +/* + * Modify the knote's parameters. + * + * The knote's object lock must be held. + */ +void +knote_modify(const struct kevent *kev, struct knote *kn) +{ + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + kn->kn_udata = kev->udata; +} + +/* + * Submit the knote's event for delivery. + * + * The knote's object lock must be held. + */ +void +knote_submit(struct knote *kn, struct kevent *kev) +{ + if (kev != NULL) { + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_fflags = 0; + kn->kn_data = 0; + } + } +} + void klist_init(struct klist *klist, const struct klistops *ops, void *arg) { @@ -1737,10 +1884,10 @@ klist_invalidate(struct klist *list) } klist_unlock(list, ls); splx(s); - kn->kn_fop->f_detach(kn); + filter_detach(kn); if (kn->kn_fop->f_flags & FILTEROP_ISFD) { kn->kn_fop = &dead_filtops; - kn->kn_fop->f_event(kn, 0); + filter_event(kn, 0); knote_activate(kn); s = splhigh(); knote_release(kn); diff --git a/sys/sys/event.h b/sys/sys/event.h index b01bfde3780..105f7e5376f 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.53 2021/01/17 05:56:32 visa Exp $ */ +/* $OpenBSD: event.h,v 1.54 2021/02/24 14:59:52 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -165,30 +165,85 @@ struct klist { */ #define NOTE_SIGNAL 0x08000000 +/* + * = Event filter interface + * + * == .f_flags + * + * Defines properties of the event filter: + * + * - FILTEROP_ISFD Each knote of this filter is associated + * with a file descriptor. + * + * - FILTEROP_MPSAFE The kqueue subsystem can invoke .f_attach(), + * .f_detach(), .f_modify() and .f_process() without + * the kernel lock. + * + * == .f_attach() + * + * Attaches the knote to the object. + * + * == .f_detach() + * + * Detaches the knote from the object. The object must not use this knote + * for delivering events after this callback has returned. + * + * == .f_event() + * + * Notifies the filter about an event. Called through knote(). + * + * == .f_modify() + * + * Modifies the knote with new state from the user. + * + * Returns non-zero if the knote has become active. + * + * == .f_process() + * + * Checks if the event is active and returns non-zero if the event should be + * returned to the user. + * + * If kev is non-NULL and the event is active, the callback should store + * the event's state in kev for delivery to the user. + * + * == Concurrency control + * + * The kqueue subsystem serializes calls of .f_attach(), .f_detach(), + * .f_modify() and .f_process(). + */ + #define FILTEROP_ISFD 0x00000001 /* ident == filedescriptor */ +#define FILTEROP_MPSAFE 0x00000002 /* safe without kernel lock */ struct filterops { int f_flags; int (*f_attach)(struct knote *kn); void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); + int (*f_modify)(struct kevent *kev, struct knote *kn); + int (*f_process)(struct knote *kn, struct kevent *kev); }; +/* + * Locking: + * I immutable after creation + * o object lock + */ struct knote { SLIST_ENTRY(knote) kn_link; /* for fd */ SLIST_ENTRY(knote) kn_selnext; /* for struct selinfo */ TAILQ_ENTRY(knote) kn_tqe; - struct kqueue *kn_kq; /* which queue we are on */ + struct kqueue *kn_kq; /* [I] which queue we are on */ struct kevent kn_kevent; int kn_status; - int kn_sfflags; /* saved filter flags */ - __int64_t kn_sdata; /* saved data field */ + int kn_sfflags; /* [o] saved filter flags */ + __int64_t kn_sdata; /* [o] saved data field */ union { struct file *p_fp; /* file data pointer */ struct process *p_process; /* process pointer */ } kn_ptr; const struct filterops *kn_fop; - void *kn_hook; + void *kn_hook; /* [o] */ #define KN_ACTIVE 0x0001 /* event has been triggered */ #define KN_QUEUED 0x0002 /* event is on queue */ #define KN_DISABLED 0x0004 /* event is disabled */ @@ -198,12 +253,13 @@ struct knote { #define KN_ATTACHED 0x0040 /* knote is attached to * a knlist of the kqueue */ -#define kn_id kn_kevent.ident -#define kn_filter kn_kevent.filter -#define kn_flags kn_kevent.flags -#define kn_fflags kn_kevent.fflags -#define kn_data kn_kevent.data -#define kn_fp kn_ptr.p_fp +#define kn_id kn_kevent.ident /* [I] */ +#define kn_filter kn_kevent.filter /* [I] */ +#define kn_flags kn_kevent.flags /* [o] */ +#define kn_fflags kn_kevent.fflags /* [o] */ +#define kn_data kn_kevent.data /* [o] */ +#define kn_udata kn_kevent.udata /* [o] */ +#define kn_fp kn_ptr.p_fp /* [o] */ }; struct klistops { @@ -234,6 +290,8 @@ extern void kqpoll_exit(void); extern void knote(struct klist *list, long hint); extern void knote_fdclose(struct proc *p, int fd); extern void knote_processexit(struct proc *); +extern void knote_modify(const struct kevent *, struct knote *); +extern void knote_submit(struct knote *, struct kevent *); extern int kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p); extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *, -- 2.20.1