From: claudio Date: Tue, 6 Aug 2024 08:44:54 +0000 (+0000) Subject: Stop using KERNEL_LOCK to protect the per process kqueue list X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=0747e3d20bf138958765ed50a0a2854bbcb6704f;p=openbsd Stop using KERNEL_LOCK to protect the per process kqueue list Instead of the KERNEL_LOCK use the ps_mtx for most operations. If the ps_klist is modified an additional global rwlock (kqueue_ps_list_lock) is required. This includes the knotes with NOTE_FORK and NOTE_EXIT since in either cases a ps_klist is changed. In the NOTE_FORK | NOTE_TRACK case the call to kqueue_register() can sleep this is why a global rwlock is used. Adjust the reaper() to call knote_processexit() without KERNEL_LOCK. Double lock idea from visa@ OK mvs@ --- diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index e2c99fe1e78..a319241400c 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.199 2024/07/29 12:42:53 claudio Exp $ */ +/* $OpenBSD: kern_event.c,v 1.200 2024/08/06 08:44:54 claudio Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -124,6 +124,8 @@ int filt_kqueue_common(struct knote *kn, struct kqueue *kq); int filt_procattach(struct knote *kn); void filt_procdetach(struct knote *kn); int filt_proc(struct knote *kn, long hint); +int filt_procmodify(struct kevent *kev, struct knote *kn); +int filt_procprocess(struct knote *kn, struct kevent *kev); int filt_sigattach(struct knote *kn); void filt_sigdetach(struct knote *kn); int filt_signal(struct knote *kn, long hint); @@ -145,17 +147,21 @@ const struct filterops kqread_filtops = { }; const struct filterops proc_filtops = { - .f_flags = 0, + .f_flags = FILTEROP_MPSAFE, .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, + .f_modify = filt_procmodify, + .f_process = filt_procprocess, }; const struct filterops sig_filtops = { - .f_flags = 0, + .f_flags = FILTEROP_MPSAFE, .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, + .f_modify = filt_procmodify, + .f_process = filt_procprocess, }; const struct filterops file_filtops = { @@ -177,6 +183,7 @@ const struct filterops timer_filtops = { struct pool knote_pool; struct pool kqueue_pool; struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); +struct rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl"); int kq_ntimeouts = 0; int kq_timeoutmax = (4 * 1024); @@ -333,7 +340,7 @@ int filt_procattach(struct knote *kn) { struct process *pr; - int s; + int nolock; if ((curproc->p_p->ps_flags & PS_PLEDGE) && (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) @@ -342,13 +349,14 @@ filt_procattach(struct knote *kn) if (kn->kn_id > PID_MAX) return ESRCH; + KERNEL_LOCK(); pr = prfind(kn->kn_id); if (pr == NULL) - return (ESRCH); + goto fail; /* exiting processes can't be specified */ if (pr->ps_flags & PS_EXITING) - return (ESRCH); + goto fail; kn->kn_ptr.p_process = pr; kn->kn_flags |= EV_CLEAR; /* automatically set */ @@ -360,13 +368,26 @@ filt_procattach(struct knote *kn) kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; kn->kn_flags &= ~EV_FLAG1; + rw_assert_wrlock(&kqueue_ps_list_lock); } - s = splhigh(); + /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ + nolock = (rw_status(&kqueue_ps_list_lock) == RW_WRITE); + if (!nolock) + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); klist_insert_locked(&pr->ps_klist, kn); - splx(s); + mtx_leave(&pr->ps_mtx); + if (!nolock) + rw_exit_write(&kqueue_ps_list_lock); + + KERNEL_UNLOCK(); return (0); + +fail: + KERNEL_UNLOCK(); + return (ESRCH); } /* @@ -380,25 +401,25 @@ filt_procattach(struct knote *kn) void filt_procdetach(struct knote *kn) { - struct kqueue *kq = kn->kn_kq; struct process *pr = kn->kn_ptr.p_process; - int s, status; + int status; - mtx_enter(&kq->kq_lock); + /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); status = kn->kn_status; - mtx_leave(&kq->kq_lock); - if (status & KN_DETACHED) - return; + if ((status & KN_DETACHED) == 0) + klist_remove_locked(&pr->ps_klist, kn); - s = splhigh(); - klist_remove_locked(&pr->ps_klist, kn); - splx(s); + mtx_leave(&pr->ps_mtx); + rw_exit_write(&kqueue_ps_list_lock); } int filt_proc(struct knote *kn, long hint) { + struct process *pr = kn->kn_ptr.p_process; struct kqueue *kq = kn->kn_kq; u_int event; @@ -419,17 +440,14 @@ filt_proc(struct knote *kn, long hint) */ if (event == NOTE_EXIT) { struct process *pr = kn->kn_ptr.p_process; - int s; mtx_enter(&kq->kq_lock); kn->kn_status |= KN_DETACHED; mtx_leave(&kq->kq_lock); - s = splhigh(); kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); klist_remove_locked(&pr->ps_klist, kn); - splx(s); return (1); } @@ -452,7 +470,12 @@ filt_proc(struct knote *kn, long hint) kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_udata; /* preserve udata */ + + rw_assert_wrlock(&kqueue_ps_list_lock); + mtx_leave(&pr->ps_mtx); error = kqueue_register(kq, &kev, 0, NULL); + mtx_enter(&pr->ps_mtx); + if (error) kn->kn_fflags |= NOTE_TRACKERR; } @@ -460,6 +483,37 @@ filt_proc(struct knote *kn, long hint) return (kn->kn_fflags != 0); } +int +filt_procmodify(struct kevent *kev, struct knote *kn) +{ + struct process *pr = kn->kn_ptr.p_process; + int active; + + mtx_enter(&pr->ps_mtx); + active = knote_modify(kev, kn); + mtx_leave(&pr->ps_mtx); + + return (active); +} + +/* + * By default only grab the mutex here. If the event requires extra protection + * because it alters the klist (NOTE_EXIT, NOTE_FORK the caller of the knote + * needs to grab the rwlock first. + */ +int +filt_procprocess(struct knote *kn, struct kevent *kev) +{ + struct process *pr = kn->kn_ptr.p_process; + int active; + + mtx_enter(&pr->ps_mtx); + active = knote_process(kn, kev); + mtx_leave(&pr->ps_mtx); + + return (active); +} + /* * signal knotes are shared with proc knotes, so we apply a mask to * the hint in order to differentiate them from process hints. This @@ -470,7 +524,6 @@ int filt_sigattach(struct knote *kn) { struct process *pr = curproc->p_p; - int s; if (kn->kn_id >= NSIG) return EINVAL; @@ -478,9 +531,12 @@ filt_sigattach(struct knote *kn) kn->kn_ptr.p_process = pr; kn->kn_flags |= EV_CLEAR; /* automatically set */ - s = splhigh(); + /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); klist_insert_locked(&pr->ps_klist, kn); - splx(s); + mtx_leave(&pr->ps_mtx); + rw_exit_write(&kqueue_ps_list_lock); return (0); } @@ -489,17 +545,17 @@ void filt_sigdetach(struct knote *kn) { struct process *pr = kn->kn_ptr.p_process; - int s; - s = splhigh(); + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); klist_remove_locked(&pr->ps_klist, kn); - splx(s); + mtx_leave(&pr->ps_mtx); + rw_exit_write(&kqueue_ps_list_lock); } int filt_signal(struct knote *kn, long hint) { - if (hint & NOTE_SIGNAL) { hint &= ~NOTE_SIGNAL; @@ -2002,14 +2058,28 @@ knote_fdclose(struct proc *p, int fd) void knote_processexit(struct process *pr) { - KERNEL_ASSERT_LOCKED(); - + /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); knote_locked(&pr->ps_klist, NOTE_EXIT); + mtx_leave(&pr->ps_mtx); + rw_exit_write(&kqueue_ps_list_lock); /* remove other knotes hanging off the process */ klist_invalidate(&pr->ps_klist); } +void +knote_processfork(struct process *pr, pid_t pid) +{ + /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */ + rw_enter_write(&kqueue_ps_list_lock); + mtx_enter(&pr->ps_mtx); + knote_locked(&pr->ps_klist, NOTE_FORK | pid); + mtx_leave(&pr->ps_mtx); + rw_exit_write(&kqueue_ps_list_lock); +} + void knote_attach(struct knote *kn) { diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index df61b2c5716..189596c534c 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exec.c,v 1.256 2024/07/08 13:17:12 claudio Exp $ */ +/* $OpenBSD: kern_exec.c,v 1.257 2024/08/06 08:44:54 claudio Exp $ */ /* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */ /*- @@ -711,7 +711,7 @@ sys_execve(struct proc *p, void *v, register_t *retval) /* * notify others that we exec'd */ - knote_locked(&pr->ps_klist, NOTE_EXEC); + knote(&pr->ps_klist, NOTE_EXEC); /* map the process's timekeep page, needs to be before exec_elf_fixup */ if (exec_timekeep_map(pr)) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 4882de63652..db989e29957 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_exit.c,v 1.228 2024/07/29 09:49:49 claudio Exp $ */ +/* $OpenBSD: kern_exit.c,v 1.229 2024/08/06 08:44:54 claudio Exp $ */ /* $NetBSD: kern_exit.c,v 1.39 1996/04/22 01:38:25 christos Exp $ */ /* @@ -477,16 +477,13 @@ reaper(void *arg) /* Release the rest of the process's vmspace */ uvm_exit(pr); + /* Notify listeners of our demise and clean up. */ + knote_processexit(pr); + KERNEL_LOCK(); if ((pr->ps_flags & PS_NOZOMBIE) == 0) { /* Process is now a true zombie. */ atomic_setbits_int(&pr->ps_flags, PS_ZOMBIE); - } - - /* Notify listeners of our demise and clean up. */ - knote_processexit(pr); - - if (pr->ps_flags & PS_ZOMBIE) { /* Post SIGCHLD and wake up parent. */ prsignal(pr->ps_pptr, SIGCHLD); wakeup(pr->ps_pptr); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 35632bbc936..52a7c2cd1b6 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_fork.c,v 1.260 2024/06/03 12:48:25 claudio Exp $ */ +/* $OpenBSD: kern_fork.c,v 1.261 2024/08/06 08:44:54 claudio Exp $ */ /* $NetBSD: kern_fork.c,v 1.29 1996/02/09 18:59:34 christos Exp $ */ /* @@ -199,6 +199,7 @@ process_initialize(struct process *pr, struct proc *p) rw_init(&pr->ps_lock, "pslock"); mtx_init(&pr->ps_mtx, IPL_HIGH); + klist_init_mutex(&pr->ps_klist, &pr->ps_mtx); timeout_set_flags(&pr->ps_realit_to, realitexpire, pr, KCLOCK_UPTIME, 0); @@ -484,7 +485,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, /* * Notify any interested parties about the new process. */ - knote_locked(&curpr->ps_klist, NOTE_FORK | pr->ps_pid); + knote_processfork(curpr, pr->ps_pid); /* * Update stats now that we know the fork was successful. diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 2eafc58109e..211675a6749 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sig.c,v 1.336 2024/07/29 12:42:53 claudio Exp $ */ +/* $OpenBSD: kern_sig.c,v 1.337 2024/08/06 08:44:54 claudio Exp $ */ /* $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $ */ /* @@ -985,7 +985,7 @@ ptsignal(struct proc *p, int signum, enum signal_type type) } if (type != SPROPAGATED) - knote_locked(&pr->ps_klist, NOTE_SIGNAL | signum); + knote(&pr->ps_klist, NOTE_SIGNAL | signum); prop = sigprop[signum]; diff --git a/sys/sys/event.h b/sys/sys/event.h index 20cca20bcad..ca530a2bddd 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -1,4 +1,4 @@ -/* $OpenBSD: event.h,v 1.72 2024/07/29 12:42:53 claudio Exp $ */ +/* $OpenBSD: event.h,v 1.73 2024/08/06 08:44:54 claudio Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -294,6 +294,7 @@ extern void knote(struct klist *list, long hint); extern void knote_locked(struct klist *list, long hint); extern void knote_fdclose(struct proc *p, int fd); extern void knote_processexit(struct process *); +extern void knote_processfork(struct process *, pid_t); extern void knote_assign(const struct kevent *, struct knote *); extern void knote_submit(struct knote *, struct kevent *); extern void kqueue_init(void); diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 8843c792684..8527bb1f623 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proc.h,v 1.366 2024/07/29 09:49:49 claudio Exp $ */ +/* $OpenBSD: proc.h,v 1.367 2024/08/06 08:44:54 claudio Exp $ */ /* $NetBSD: proc.h,v 1.44 1996/04/22 01:23:21 christos Exp $ */ /*- @@ -136,6 +136,7 @@ struct pinsyscall { * K kernel lock * m this process' `ps_mtx' * p this process' `ps_lock' + * Q kqueue_ps_list_lock * R rlimit_lock * S scheduler lock * T itimer_mtx @@ -181,7 +182,7 @@ struct process { /* The following fields are all zeroed upon creation in process_new. */ #define ps_startzero ps_klist - struct klist ps_klist; /* knotes attached to this process */ + struct klist ps_klist; /* [Q,m] knotes attached to process */ u_int ps_flags; /* [a] PS_* flags. */ int ps_siglist; /* Signals pending for the process. */