From: cheloha Date: Sun, 16 Apr 2023 21:19:26 +0000 (+0000) Subject: clockintr: add shadow copy of running clock interrupt to clockintr_queue X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=58898dd51592a3a2b52be0c4486b758e5f9f9cc8;p=openbsd clockintr: add shadow copy of running clock interrupt to clockintr_queue cq_shadow is a private copy of the running clock interrupt passed to cl_func() during the dispatch loop. It resembles the real clockintr object, though the two are distinct (hence "shadow"). A private copy is useful for two reasons: 1. Scheduling operations performed on cq_shadow (advance, cancel, schedule) are recorded as requests with the CLST_SHADOW_PENDING flag and are normally performed on the real clockintr when cl_func() returns. However, if an outside thread performs a scheduling operation on the real clockintr while cl_func() is running, the CLST_IGNORE_SHADOW flag is set and any scheduling operations requested by the running clock interrupt are ignored. The upshot of this arrangement is that outside scheduling operations have priority over those requested by the running clock interrupt. Because there is no race, periodic clock interrupts can now be safely stopped without employing the serialization mechanisms needed to safely stop periodic timeouts or tasks. 2. &cq->cq_shadow is a unique address, so most clockintr_* API calls made while cl_func() is running now don't need to enter/leave cq_mtx: the API can recognize when it is being called in the midst of clockintr_dispatch(). Tested by mlarkin@. With input from dlg@. In particular, dlg@ expressed some design concerns but then stopped responding. I have changes planned to address some of the concerns. I think if we hit a wall with the current clockintr design we could change the allocation scheme without too much suffering. I don't anticipate there being more than ~20 distinct clock interrupts. --- diff --git a/sys/kern/kern_clockintr.c b/sys/kern/kern_clockintr.c index eb1351c37bb..d683e65ce9e 100644 --- a/sys/kern/kern_clockintr.c +++ b/sys/kern/kern_clockintr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_clockintr.c,v 1.9 2023/04/05 00:23:06 cheloha Exp $ */ +/* $OpenBSD: kern_clockintr.c,v 1.10 2023/04/16 21:19:26 cheloha Exp $ */ /* * Copyright (c) 2003 Dale Rahn * Copyright (c) 2020 Mark Kettenis @@ -114,6 +114,7 @@ clockintr_cpu_init(const struct intrclock *ic) KASSERT(ISSET(clockintr_flags, CL_INIT)); if (!ISSET(cq->cq_flags, CL_CPU_INIT)) { + cq->cq_shadow.cl_queue = cq; mtx_init(&cq->cq_mtx, IPL_CLOCK); TAILQ_INIT(&cq->cq_est); TAILQ_INIT(&cq->cq_pend); @@ -239,13 +240,23 @@ clockintr_dispatch(void *frame) break; } clockintr_cancel_locked(cl); + cq->cq_shadow.cl_expiration = cl->cl_expiration; cq->cq_running = cl; mtx_leave(&cq->cq_mtx); - cl->cl_func(cl, frame); + cl->cl_func(&cq->cq_shadow, frame); mtx_enter(&cq->cq_mtx); cq->cq_running = NULL; + if (ISSET(cl->cl_flags, CLST_IGNORE_SHADOW)) { + CLR(cl->cl_flags, CLST_IGNORE_SHADOW); + CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING); + } + if (ISSET(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING)) { + CLR(cq->cq_shadow.cl_flags, CLST_SHADOW_PENDING); + clockintr_schedule_locked(cl, + cq->cq_shadow.cl_expiration); + } run++; } @@ -293,12 +304,20 @@ clockintr_advance(struct clockintr *cl, uint64_t period) uint64_t count, expiration; struct clockintr_queue *cq = cl->cl_queue; + if (cl == &cq->cq_shadow) { + count = nsec_advance(&cl->cl_expiration, period, cq->cq_uptime); + SET(cl->cl_flags, CLST_SHADOW_PENDING); + return count; + } + mtx_enter(&cq->cq_mtx); expiration = cl->cl_expiration; count = nsec_advance(&expiration, period, cq->cq_uptime); if (ISSET(cl->cl_flags, CLST_PENDING)) clockintr_cancel_locked(cl); clockintr_schedule_locked(cl, expiration); + if (cl == cq->cq_running) + SET(cl->cl_flags, CLST_IGNORE_SHADOW); mtx_leave(&cq->cq_mtx); return count; } @@ -308,9 +327,16 @@ clockintr_cancel(struct clockintr *cl) { struct clockintr_queue *cq = cl->cl_queue; + if (cl == &cq->cq_shadow) { + CLR(cl->cl_flags, CLST_SHADOW_PENDING); + return; + } + mtx_enter(&cq->cq_mtx); if (ISSET(cl->cl_flags, CLST_PENDING)) clockintr_cancel_locked(cl); + if (cl == cq->cq_running) + SET(cl->cl_flags, CLST_IGNORE_SHADOW); mtx_leave(&cq->cq_mtx); } @@ -350,6 +376,9 @@ clockintr_expiration(const struct clockintr *cl) uint64_t expiration; struct clockintr_queue *cq = cl->cl_queue; + if (cl == &cq->cq_shadow) + return cl->cl_expiration; + mtx_enter(&cq->cq_mtx); expiration = cl->cl_expiration; mtx_leave(&cq->cq_mtx); @@ -361,10 +390,18 @@ clockintr_schedule(struct clockintr *cl, uint64_t expiration) { struct clockintr_queue *cq = cl->cl_queue; + if (cl == &cq->cq_shadow) { + cl->cl_expiration = expiration; + SET(cl->cl_flags, CLST_SHADOW_PENDING); + return; + } + mtx_enter(&cq->cq_mtx); if (ISSET(cl->cl_flags, CLST_PENDING)) clockintr_cancel_locked(cl); clockintr_schedule_locked(cl, expiration); + if (cl == cq->cq_running) + SET(cl->cl_flags, CLST_IGNORE_SHADOW); mtx_leave(&cq->cq_mtx); } @@ -456,6 +493,7 @@ clockintr_setstatclockrate(int freq) uint64_t clockintr_nsecuptime(const struct clockintr *cl) { + KASSERT(cl == &cl->cl_queue->cq_shadow); return cl->cl_queue->cq_uptime; } diff --git a/sys/sys/clockintr.h b/sys/sys/clockintr.h index df81cab5ae3..d3c431c07b5 100644 --- a/sys/sys/clockintr.h +++ b/sys/sys/clockintr.h @@ -1,4 +1,4 @@ -/* $OpenBSD: clockintr.h,v 1.4 2023/04/03 00:20:24 cheloha Exp $ */ +/* $OpenBSD: clockintr.h,v 1.5 2023/04/16 21:19:26 cheloha Exp $ */ /* * Copyright (c) 2020-2022 Scott Cheloha * @@ -75,7 +75,9 @@ struct clockintr { u_int cl_flags; /* [m] CLST_* flags */ }; -#define CLST_PENDING 0x00000001 /* scheduled to run */ +#define CLST_PENDING 0x00000001 /* scheduled to run */ +#define CLST_SHADOW_PENDING 0x00000002 /* shadow is scheduled to run */ +#define CLST_IGNORE_SHADOW 0x00000004 /* ignore shadow copy */ /* * Per-CPU clock interrupt state. @@ -88,6 +90,7 @@ struct clockintr { * o Owned by a single CPU. */ struct clockintr_queue { + struct clockintr cq_shadow; /* [o] copy of running clockintr */ struct mutex cq_mtx; /* [a] per-queue mutex */ uint64_t cq_uptime; /* [o] cached uptime */ TAILQ_HEAD(, clockintr) cq_est; /* [m] established clockintr list */