./usr/include/sys/cdefs.h
./usr/include/sys/cdio.h
./usr/include/sys/chio.h
+./usr/include/sys/clockintr.h
./usr/include/sys/conf.h
./usr/include/sys/core.h
./usr/include/sys/ctf.h
./usr/share/man/man9/bufq_init.9
./usr/share/man/man9/bus_dma.9
./usr/share/man/man9/bus_space.9
+./usr/share/man/man9/clockintr.9
./usr/share/man/man9/cond_init.9
./usr/share/man/man9/config_attach.9
./usr/share/man/man9/config_defer.9
-# $OpenBSD: Makefile,v 1.307 2022/03/10 15:19:01 bluhm Exp $
+# $OpenBSD: Makefile,v 1.308 2022/11/05 19:29:45 cheloha Exp $
# $NetBSD: Makefile,v 1.4 1996/01/09 03:23:01 thorpej Exp $
# Makefile for section 9 (kernel function and variable) manual pages.
audio.9 autoconf.9 \
bemtoh32.9 bio_register.9 bintimeadd.9 boot.9 bpf_mtap.9 buffercache.9 \
bufq_init.9 bus_dma.9 bus_space.9 \
+ clockintr.9 \
copy.9 cond_init.9 config_attach.9 config_defer.9 counters_alloc.9 \
cpumem_get.9 crypto.9 \
delay.9 disk.9 disklabel.9 dma_alloc.9 dohooks.9 \
--- /dev/null
+.\" $OpenBSD: clockintr.9,v 1.1 2022/11/05 19:29:45 cheloha Exp $
+.\"
+.\" Copyright (c) 2020-2022 Scott Cheloha <cheloha@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: November 5 2022 $
+.Dt CLOCKINTR 9
+.Os
+.Sh NAME
+.Nm clockintr_cpu_init ,
+.Nm clockintr_dispatch ,
+.Nm clockintr_init ,
+.Nm clockintr_setstatclockrate ,
+.Nm clockintr_trigger
+.Nd clock interrupt scheduler
+.Sh SYNOPSIS
+.In sys/clockintr.h
+.Ft void
+.Fo clockintr_init
+.Fa "u_int flags"
+.Fc
+.Ft void
+.Fo clockintr_cpu_init
+.Fa "struct intrclock *ic"
+.Fc
+.Ft int
+.Fo clockintr_dispatch
+.Fa "void *frame"
+.Fc
+.Ft void
+.Fo clockintr_setstatclockrate
+.Fa "int freq"
+.Fc
+.Ft void
+.Fo clockintr_trigger
+.Fa "void"
+.Fc
+.In sys/kernel.h
+.Vt extern int hz;
+.Vt extern int stathz;
+.Vt extern int profhz;
+.In sys/sched.h
+.Vt extern int schedhz;
+.Sh DESCRIPTION
+The
+.Nm
+subsystem maintains a schedule of events,
+dispatches expired events,
+and rearms the local interrupt clock for each CPU in the system.
+.Pp
+The
+.Fn clockintr_init
+function initializes the subsystem as follows:
+.Bl -dash
+.It
+.Xr hardclock 9
+is configured to run
+.Xr hz 9
+times per second on each CPU.
+It is an error if
+.Vt hz
+is less than one or greater than one billion.
+.It
+.Fn statclock
+is configured to run
+.Vt stathz
+times per second on each CPU.
+It is an error if
+.Vt stathz
+is less than one or greater than one billion.
+.It
+When appropriate,
+.Fn statclock
+will be reconfigured to run
+.Vt profhz
+times per second on each CPU.
+.Vt profhz
+must be a non-zero integer multiple of
+.Vt stathz .
+It is an error if
+.Vt profhz
+is less than
+.Vt stathz
+or greater than one billion.
+.It
+If
+.Vt schedhz
+is non-zero,
+.Fn schedclock
+is configured to run
+.Vt schedhz
+times per second on each CPU.
+It is an error if
+.Vt schedhz
+is less than zero or greater than one billion.
+.El
+.Pp
+The event schedule has a resolution of one nanosecond and event periods are
+computed using integer divison.
+If
+.Vt hz ,
+.Vt stathz ,
+.Vt profhz ,
+or
+.Vt schedhz
+do not divide evenly into one billion,
+the corresponding event will not be dispatched at the specified frequency.
+.Pp
+The
+.Fn clockintr_init
+function accepts the bitwise OR of zero or more of the following
+.Fa flags :
+.Bl -tag -width CL_RNDSTAT
+.It Dv CL_RNDSTAT
+Randomize the
+.Fn statclock .
+Instead of using a fixed period,
+the subsystem will select pseudorandom intervals in a range such that
+the average
+.Fn statclock
+period is equal to the inverse of
+.Vt stathz .
+.El
+.Pp
+The
+.Fn clockintr_init
+function must be called exactly once and only by the primary CPU.
+It should be called after all timecounters are installed with
+.Xr tc_init 9 .
+.Pp
+The
+.Fn clockintr_cpu_init
+function prepares the calling CPU for
+.Fn clockintr_dispatch .
+The first time it is called on a given CPU,
+if
+.Fa ic
+is not
+.Dv NULL ,
+the caller is configured to use the given
+.Fa intrclock
+during
+.Fn clockintr_dispatch ;
+otherwise the caller is responsible for rearming its own interrupt
+clock after each
+.Fn clockintr_dispatch .
+Subsequent calls ignore
+.Fa ic :
+instead,
+the caller's event schedule is advanced past any expired events
+without dispatching those events.
+It is an error to call this function before the subsystem is initialized with
+.Fn clockintr_init .
+All CPUs should call
+.Fn clockintr_cpu_init
+during each system resume after the system time is updated with
+.Xr inittodr 9 ,
+otherwise they will needlessly dispatch every event that expired while
+the system was suspended.
+.Pp
+The
+.Fn clockintr_dispatch
+function executes all expired events on the caller's event schedule and,
+if configured,
+rearms the caller's interrupt clock to fire when the next event is scheduled
+to expire.
+The
+.Fa frame
+argument must point to the caller's
+.Dv clockframe
+struct.
+The
+.Fn clockintr_dispatch
+function should only be called from a clock interrupt handler at
+.Dv IPL_CLOCK
+.Pq see Xr spl 9 .
+It is an error to call this function on a given CPU before
+.Fn clockintr_cpu_init .
+.Pp
+The
+.Fn clockintr_setstatclockrate
+function changes the effective dispatch frequency for
+.Fn statclock
+to
+.Fa freq .
+It should be called from the machine-dependent
+.Fn setstatclockrate
+function after performing any needed hardware reconfiguration.
+It is an error if
+.Fa freq
+is not equal to
+.Vt stathz
+or
+.Vt profhz .
+It is an error to call this function before the subsystem is initialized with
+.Fn clockintr_init .
+.Pp
+The
+.Fn clockintr_trigger
+function causes the
+.Fn clockintr_dispatch
+function to run in the appropriate context as soon as possible if
+the caller was configured with an
+.Fa intrclock
+when
+.Fn clockintr_cpu_init
+was first called.
+If the caller was not configured with an
+.Fa intrclock ,
+the function does nothing.
+It is an error to call this function on a given CPU before
+.Fn clockintr_cpu_init .
+.Pp
+The
+.Fa ic
+argument to
+.Fn clockintr_cpu_init
+points to an
+.Fa intrclock
+structure:
+.Bd -literal -offset indent
+struct intrclock {
+ void *ic_cookie;
+ void (*ic_rearm)(void *cookie, uint64_t nsecs);
+ void (*ic_trigger)(void *cookie);
+};
+.Ed
+.Pp
+The
+.Fa intrclock
+structure provides the
+.Nm
+subsystem with a uniform interface for manipulating an interrupt clock.
+It has the following members:
+.Bl -tag -width XXXXXXXXXX
+.It Fa ic_cookie
+May point to any resources needed during
+.Fa ic_rearm
+or
+.Fa ic_trigger
+to arm the underlying interrupt clock
+.Pq see below .
+.It Fa ic_rearm
+Should cause
+.Fn clockintr_dispatch
+to run on the calling CPU in the appropriate context after at least
+.Fa nsecs
+nanoseconds have elapsed.
+The first argument,
+.Fa cookie ,
+is the
+.Fa ic_cookie
+member of the parent structure.
+The second argument,
+.Fa nsecs ,
+is a non-zero count of nanoseconds.
+.It Fa ic_trigger
+Should cause
+.Fn clockintr_dispatch
+to run on the calling CPU in the appropriate context as soon as possible.
+The first argument,
+.Fa cookie ,
+is the
+.Fa ic_cookie
+member of the parent structure.
+.El
+.Sh CONTEXT
+The
+.Fn clockintr_init ,
+.Fn clockintr_cpu_init ,
+and
+.Fn clockintr_trigger
+functions may be called during autoconf.
+.Pp
+The
+.Fn clockintr_dispatch
+function may be called from interrupt context at
+.Dv IPL_CLOCK .
+.Pp
+The
+.Fn clockintr_setstatclockrate
+function may be called during autoconf,
+from process context,
+or from interrupt context.
+.Sh RETURN VALUES
+The
+.Fn clockintr_dispatch
+function returns non-zero if at least one event was dispatched,
+otherwise it returns zero.
+.Sh CODE REFERENCES
+.Pa sys/kern/kern_clockintr.c
+.Sh SEE ALSO
+.Xr hardclock 9 ,
+.Xr hz 9 ,
+.Xr inittodr 9 ,
+.Xr nanouptime 9 ,
+.Xr spl 9 ,
+.Xr tc_init 9 ,
+.Xr timeout 9
+.Rs
+.%A Steven McCanne
+.%A Chris Torek
+.%T A Randomized Sampling Clock for CPU Utilization Estimation and Code Profiling
+.%B In Proc. Winter 1993 USENIX Conference
+.%D 1993
+.%P pp. 387\(en394
+.%I USENIX Association
+.Re
+.Rs
+.%A Richard McDougall
+.%A Jim Mauro
+.%B Solaris Internals: Solaris 10 and OpenSolaris Kernel Architecture
+.%I Prentice Hall
+.%I Sun Microsystems Press
+.%D 2nd Edition, 2007
+.%P pp. 912\(en925
+.Re
+.Sh HISTORY
+The
+.Nm
+subsystem first appeared in
+.Ox 7.3 .
-# $OpenBSD: files,v 1.716 2022/07/29 17:47:11 semarie Exp $
+# $OpenBSD: files,v 1.717 2022/11/05 19:29:45 cheloha Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
file kern/kern_acct.c accounting
file kern/kern_bufq.c
file kern/kern_clock.c
+file kern/kern_clockintr.c
file kern/kern_descrip.c
file kern/kern_event.c
file kern/kern_exec.c
-/* $OpenBSD: db_command.c,v 1.96 2022/07/29 17:47:11 semarie Exp $ */
+/* $OpenBSD: db_command.c,v 1.97 2022/11/05 19:29:45 cheloha Exp $ */
/* $NetBSD: db_command.c,v 1.20 1996/03/30 22:30:05 christos Exp $ */
/*
const struct db_command db_show_all_cmds[] = {
{ "procs", db_show_all_procs, 0, NULL },
{ "callout", db_show_callout, 0, NULL },
+#ifdef __HAVE_CLOCKINTR
+ { "clockintr", db_show_all_clockintr, 0, NULL },
+#endif
{ "pools", db_show_all_pools, 0, NULL },
{ "mounts", db_show_all_mounts, 0, NULL },
{ "vnodes", db_show_all_vnodes, 0, NULL },
-/* $OpenBSD: db_interface.h,v 1.24 2022/07/29 17:47:11 semarie Exp $ */
+/* $OpenBSD: db_interface.h,v 1.25 2022/11/05 19:29:45 cheloha Exp $ */
/* $NetBSD: db_interface.h,v 1.1 1996/02/05 01:57:03 christos Exp $ */
/*
void db_kill_cmd(db_expr_t, int, db_expr_t, char *);
void db_show_all_procs(db_expr_t, int, db_expr_t, char *);
+/* kern/kern_clockintr.c */
+#ifdef __HAVE_CLOCKINTR
+void db_show_all_clockintr(db_expr_t, int, db_expr_t, char *);
+#endif
+
/* kern/kern_timeout.c */
void db_show_callout(db_expr_t, int, db_expr_t, char *);
--- /dev/null
+/* $OpenBSD: kern_clockintr.c,v 1.1 2022/11/05 19:29:46 cheloha Exp $ */
+/*
+ * Copyright (c) 2003 Dale Rahn <drahn@openbsd.org>
+ * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
+ * Copyright (c) 2020-2022 Scott Cheloha <cheloha@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/clockintr.h>
+#include <sys/kernel.h>
+#include <sys/mutex.h>
+#include <sys/stdint.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+
+#ifdef __HAVE_CLOCKINTR
+
+/*
+ * Protection for global variables in this file:
+ *
+ * C Global clockintr configuration mutex (clockintr_mtx).
+ * I Immutable after initialization.
+ */
+struct mutex clockintr_mtx = MUTEX_INITIALIZER(IPL_CLOCK);
+
+u_int clockintr_flags; /* [I] global state + behavior flags */
+uint32_t hardclock_period; /* [I] hardclock period (ns) */
+uint32_t schedclock_period; /* [I] schedclock period (ns) */
+volatile u_int statclock_gen = 1; /* [C] statclock update generation */
+volatile uint32_t statclock_avg; /* [C] average statclock period (ns) */
+uint32_t statclock_min; /* [C] minimum statclock period (ns) */
+uint32_t statclock_mask; /* [C] set of allowed offsets */
+uint32_t stat_avg; /* [I] average stathz period (ns) */
+uint32_t stat_min; /* [I] set of allowed offsets */
+uint32_t stat_mask; /* [I] max offset from minimum (ns) */
+uint32_t prof_avg; /* [I] average profhz period (ns) */
+uint32_t prof_min; /* [I] minimum profhz period (ns) */
+uint32_t prof_mask; /* [I] set of allowed offsets */
+
+void clockintr_statvar_init(int, uint32_t *, uint32_t *, uint32_t *);
+uint64_t nsec_advance(uint64_t *, uint64_t, uint64_t);
+
+/*
+ * Initialize global state. Set flags and compute intervals.
+ */
+void
+clockintr_init(u_int flags)
+{
+ KASSERT(CPU_IS_PRIMARY(curcpu()));
+ KASSERT(clockintr_flags == 0);
+ KASSERT(!ISSET(flags, ~CL_FLAG_MASK));
+
+ KASSERT(hz > 0 && hz <= 1000000000);
+ hardclock_period = 1000000000 / hz;
+
+ KASSERT(stathz >= 1 && stathz <= 1000000000);
+ KASSERT(profhz >= stathz && profhz <= 1000000000);
+ KASSERT(profhz % stathz == 0);
+ clockintr_statvar_init(stathz, &stat_avg, &stat_min, &stat_mask);
+ clockintr_statvar_init(profhz, &prof_avg, &prof_min, &prof_mask);
+ SET(clockintr_flags, CL_STATCLOCK);
+ clockintr_setstatclockrate(stathz);
+
+ KASSERT(schedhz >= 0 && schedhz <= 1000000000);
+ if (schedhz != 0) {
+ schedclock_period = 1000000000 / schedhz;
+ SET(clockintr_flags, CL_SCHEDCLOCK);
+ }
+
+ SET(clockintr_flags, flags | CL_INIT);
+}
+
+/*
+ * Ready the calling CPU for clockintr_dispatch(). If this is our
+ * first time here, install the intrclock, if any, and set necessary
+ * flags. Advance the schedule as needed.
+ */
+void
+clockintr_cpu_init(const struct intrclock *ic)
+{
+ uint64_t multiplier, now;
+ struct cpu_info *ci = curcpu();
+ struct clockintr_queue *cq = &ci->ci_queue;
+
+ KASSERT(ISSET(clockintr_flags, CL_INIT));
+
+ if (!ISSET(cq->cq_flags, CL_CPU_INIT)) {
+ if (ic != NULL) {
+ cq->cq_intrclock = *ic;
+ SET(cq->cq_flags, CL_CPU_INTRCLOCK);
+ }
+ cq->cq_gen = 1;
+ }
+
+ /*
+ * Until we understand scheduler lock contention better, stagger
+ * the hardclock and statclock so they don't all happen at once.
+ * If we have no intrclock it doesn't matter, we have no control
+ * anyway. The primary CPU's starting offset is always zero, so
+ * set multiplier to zero.
+ */
+ if (!CPU_IS_PRIMARY(ci) && ISSET(cq->cq_flags, CL_CPU_INTRCLOCK))
+ multiplier = CPU_INFO_UNIT(ci);
+ else
+ multiplier = 0;
+
+ now = nsecuptime();
+
+ /*
+ * The first time we do this, the primary CPU cannot skip any
+ * hardclocks. We can skip hardclocks on subsequent calls because
+ * the global tick value is advanced during inittodr(9) on our
+ * behalf.
+ */
+ if (!CPU_IS_PRIMARY(ci) || ISSET(cq->cq_flags, CL_CPU_INIT)) {
+ cq->cq_next_hardclock = hardclock_period / ncpus * multiplier;
+ nsec_advance(&cq->cq_next_hardclock, hardclock_period, now);
+ }
+
+ /*
+ * We can always advance the statclock and schedclock.
+ */
+ cq->cq_next_statclock = stat_avg / ncpus * multiplier;
+ nsec_advance(&cq->cq_next_statclock, stat_avg, now);
+ if (ISSET(clockintr_flags, CL_SCHEDCLOCK)) {
+ cq->cq_next_schedclock = schedclock_period / ncpus * multiplier;
+ nsec_advance(&cq->cq_next_schedclock, schedclock_period, now);
+ }
+
+ SET(cq->cq_flags, CL_CPU_INIT);
+}
+
+/*
+ * If we have an intrclock, trigger it to start the dispatch cycle.
+ */
+void
+clockintr_trigger(void)
+{
+ struct clockintr_queue *cq = &curcpu()->ci_queue;
+
+ KASSERT(ISSET(cq->cq_flags, CL_CPU_INIT));
+
+ if (ISSET(cq->cq_flags, CL_CPU_INTRCLOCK))
+ intrclock_trigger(&cq->cq_intrclock);
+}
+
+/*
+ * Run all expired events scheduled on the calling CPU.
+ */
+int
+clockintr_dispatch(void *frame)
+{
+ uint64_t count, i, lateness, now, run = 0, start;
+ struct cpu_info *ci = curcpu();
+ struct clockintr_queue *cq = &ci->ci_queue;
+ struct proc *p = curproc;
+ uint32_t mask, min, off;
+ u_int gen, ogen;
+
+ if (cq->cq_dispatch != 0)
+ panic("%s: recursive dispatch", __func__);
+ cq->cq_dispatch = 1;
+
+ splassert(IPL_CLOCK);
+ KASSERT(ISSET(cq->cq_flags, CL_CPU_INIT));
+
+ /*
+ * If we arrived too early we have nothing to do.
+ */
+ start = nsecuptime();
+ now = start;
+ if (now < cq->cq_next)
+ goto done;
+ lateness = now - cq->cq_next;
+
+ /*
+ * Dispatch expired events.
+ */
+again:
+ /* hardclock */
+ count = nsec_advance(&cq->cq_next_hardclock, hardclock_period, now);
+ for (i = 0; i < count; i++)
+ hardclock(frame);
+ run += count;
+
+ /* statclock */
+ if (ISSET(clockintr_flags, CL_RNDSTAT)) {
+ do {
+ gen = statclock_gen;
+ membar_consumer();
+ min = statclock_min;
+ mask = statclock_mask;
+ membar_consumer();
+ } while (gen == 0 || gen != statclock_gen);
+ count = 0;
+ while (cq->cq_next_statclock <= now) {
+ while ((off = (random() & mask)) == 0)
+ continue;
+ cq->cq_next_statclock += min + off;
+ count++;
+ }
+ } else {
+ count = nsec_advance(&cq->cq_next_statclock, statclock_avg,
+ now);
+ }
+ for (i = 0; i < count; i++)
+ statclock(frame);
+ run += count;
+
+ /* schedclock */
+ if (ISSET(clockintr_flags, CL_SCHEDCLOCK)) {
+ count = nsec_advance(&cq->cq_next_schedclock,
+ schedclock_period, now);
+ if (p != NULL) {
+ for (i = 0; i < count; i++)
+ schedclock(p);
+ }
+ run += count;
+ }
+
+ /* Run the dispatch again if the next event has already expired. */
+ cq->cq_next = cq->cq_next_hardclock;
+ if (cq->cq_next_statclock < cq->cq_next)
+ cq->cq_next = cq->cq_next_statclock;
+ if (ISSET(clockintr_flags, CL_SCHEDCLOCK)) {
+ if (cq->cq_next_schedclock < cq->cq_next)
+ cq->cq_next = cq->cq_next_schedclock;
+ }
+ now = nsecuptime();
+ if (cq->cq_next <= now)
+ goto again;
+
+ /*
+ * Dispatch complete.
+ */
+done:
+ /* Rearm the interrupt clock if we have one. */
+ if (ISSET(cq->cq_flags, CL_CPU_INTRCLOCK))
+ intrclock_rearm(&cq->cq_intrclock, cq->cq_next - now);
+
+ /* Update our stats. */
+ ogen = cq->cq_gen;
+ cq->cq_gen = 0;
+ membar_producer();
+ cq->cq_stat.cs_dispatched += now - start;
+ if (run > 0) {
+ cq->cq_stat.cs_lateness += lateness;
+ cq->cq_stat.cs_prompt++;
+ cq->cq_stat.cs_run += run;
+ } else {
+ cq->cq_stat.cs_early++;
+ cq->cq_stat.cs_earliness += cq->cq_next - now;
+ }
+ membar_producer();
+ cq->cq_gen = MAX(1, ogen + 1);
+
+ if (cq->cq_dispatch != 1)
+ panic("%s: unexpected value: %u", __func__, cq->cq_dispatch);
+ cq->cq_dispatch = 0;
+
+ return run > 0;
+}
+
+/*
+ * Compute the period (avg) for the given frequency and a range around
+ * that period. The range is [min + 1, min + mask]. The range is used
+ * during dispatch to choose a new pseudorandom deadline for each statclock
+ * event.
+ */
+void
+clockintr_statvar_init(int freq, uint32_t *avg, uint32_t *min, uint32_t *mask)
+{
+ uint32_t half_avg, var;
+
+ KASSERT(!ISSET(clockintr_flags, CL_INIT | CL_STATCLOCK));
+ KASSERT(freq > 0 && freq <= 1000000000);
+
+ /* Compute avg, the average period. */
+ *avg = 1000000000 / freq;
+
+ /* Find var, the largest power of two such that var <= avg / 2. */
+ half_avg = *avg / 2;
+ for (var = 1U << 31; var > half_avg; var /= 2)
+ continue;
+
+ /* Using avg and var, set a lower bound for the range. */
+ *min = *avg - (var / 2);
+
+ /* The mask is just (var - 1). */
+ *mask = var - 1;
+}
+
+/*
+ * Update the statclock_* variables according to the given frequency.
+ * Must only be called after clockintr_statvar_init() initializes both
+ * stathz_* and profhz_*.
+ */
+void
+clockintr_setstatclockrate(int freq)
+{
+ u_int ogen;
+
+ KASSERT(ISSET(clockintr_flags, CL_STATCLOCK));
+
+ mtx_enter(&clockintr_mtx);
+
+ ogen = statclock_gen;
+ statclock_gen = 0;
+ membar_producer();
+ if (freq == stathz) {
+ statclock_avg = stat_avg;
+ statclock_min = stat_min;
+ statclock_mask = stat_mask;
+ } else if (freq == profhz) {
+ statclock_avg = prof_avg;
+ statclock_min = prof_min;
+ statclock_mask = prof_mask;
+ } else {
+ panic("%s: frequency is not stathz (%d) or profhz (%d): %d",
+ __func__, stathz, profhz, freq);
+ }
+ membar_producer();
+ statclock_gen = MAX(1, ogen + 1);
+
+ mtx_leave(&clockintr_mtx);
+}
+
+/*
+ * Advance *next in increments of period until it exceeds now.
+ * Returns the number of increments *next was advanced.
+ *
+ * We check the common cases first to avoid division if possible.
+ * This does no overflow checking.
+ */
+uint64_t
+nsec_advance(uint64_t *next, uint64_t period, uint64_t now)
+{
+ uint64_t elapsed;
+
+ if (now < *next)
+ return 0;
+
+ if (now < *next + period) {
+ *next += period;
+ return 1;
+ }
+
+ elapsed = (now - *next) / period + 1;
+ *next += period * elapsed;
+ return elapsed;
+}
+
+int
+sysctl_clockintr(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ struct clockintr_stat sum = { 0 }, tmp;
+ struct clockintr_queue *cq;
+ struct cpu_info *ci;
+ CPU_INFO_ITERATOR cii;
+ u_int gen;
+
+ if (namelen != 1)
+ return ENOTDIR;
+
+ switch (name[0]) {
+ case KERN_CLOCKINTR_STATS:
+ CPU_INFO_FOREACH(cii, ci) {
+ cq = &ci->ci_queue;
+ if (!ISSET(cq->cq_flags, CL_CPU_INIT))
+ continue;
+ do {
+ gen = cq->cq_gen;
+ membar_consumer();
+ tmp = cq->cq_stat;
+ membar_consumer();
+ } while (gen == 0 || gen != cq->cq_gen);
+ sum.cs_dispatched += tmp.cs_dispatched;
+ sum.cs_early += tmp.cs_early;
+ sum.cs_earliness += tmp.cs_earliness;
+ sum.cs_lateness += tmp.cs_lateness;
+ sum.cs_prompt += tmp.cs_prompt;
+ sum.cs_run += tmp.cs_run;
+ }
+ return sysctl_rdstruct(oldp, oldlenp, newp, &sum, sizeof sum);
+ default:
+ break;
+ }
+
+ return EINVAL;
+}
+
+#ifdef DDB
+
+#include <machine/db_machdep.h>
+
+#include <ddb/db_interface.h>
+#include <ddb/db_output.h>
+#include <ddb/db_sym.h>
+
+void db_show_clockintr(uint64_t, u_int, const char *);
+void db_show_clockintr_cpu(struct cpu_info *);
+
+void
+db_show_all_clockintr(db_expr_t addr, int haddr, db_expr_t count, char *modif)
+{
+ struct timespec now;
+ struct cpu_info *ci;
+ CPU_INFO_ITERATOR cii;
+
+ nanouptime(&now);
+ db_printf("%20s\n", "UPTIME");
+ db_printf("%10lld.%09ld\n", now.tv_sec, now.tv_nsec);
+ db_printf("\n");
+ db_printf("%20s %3s %s\n", "EXPIRATION", "CPU", "NAME");
+ CPU_INFO_FOREACH(cii, ci) {
+ if (ISSET(ci->ci_queue.cq_flags, CL_CPU_INIT))
+ db_show_clockintr_cpu(ci);
+ }
+}
+
+void
+db_show_clockintr_cpu(struct cpu_info *ci)
+{
+ struct clockintr_queue *cq = &ci->ci_queue;
+ u_int cpu = CPU_INFO_UNIT(ci);
+
+ db_show_clockintr(cq->cq_next_hardclock, cpu, "hardclock");
+ db_show_clockintr(cq->cq_next_statclock, cpu, "statclock");
+ if (ISSET(clockintr_flags, CL_SCHEDCLOCK))
+ db_show_clockintr(cq->cq_next_schedclock, cpu, "schedclock");
+}
+
+void
+db_show_clockintr(uint64_t expiration, u_int cpu, const char *name)
+{
+ struct timespec ts;
+
+ NSEC_TO_TIMESPEC(expiration, &ts);
+ db_printf("%10lld.%09ld %3u %s\n", ts.tv_sec, ts.tv_nsec, cpu, name);
+}
+
+#endif /* DDB */
+#endif /*__HAVE_CLOCKINTR */
-/* $OpenBSD: kern_sysctl.c,v 1.406 2022/08/16 13:29:52 visa Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.407 2022/11/05 19:29:46 cheloha Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
#include <sys/vnode.h>
#include <sys/unistd.h>
#include <sys/buf.h>
+#include <sys/clockintr.h>
#include <sys/tty.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
case KERN_CPUSTATS:
return (sysctl_cpustats(name, namelen, oldp, oldlenp,
newp, newlen));
+#ifdef __HAVE_CLOCKINTR
+ case KERN_CLOCKINTR:
+ return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
+ newlen);
+#endif
default:
return (ENOTDIR); /* overloaded */
}
-/* $OpenBSD: subr_suspend.c,v 1.12 2022/09/03 18:05:10 kettenis Exp $ */
+/* $OpenBSD: subr_suspend.c,v 1.13 2022/11/05 19:29:46 cheloha Exp $ */
/*
* Copyright (c) 2005 Thorsten Lockert <tholo@sigmasoft.com>
* Copyright (c) 2005 Jordan Hargrave <jordan@openbsd.org>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
+#include <sys/clockintr.h>
#include <sys/reboot.h>
#include <sys/sensors.h>
#include <sys/sysctl.h>
splx(s);
inittodr(gettime());
+#ifdef __HAVE_CLOCKINTR
+ clockintr_cpu_init(NULL);
+ clockintr_trigger();
+#endif
sleep_resume(v);
resume_randomness(rndbuf, rndbuflen);
#ifdef MULTIPROCESSOR
--- /dev/null
+/* $OpenBSD: clockintr.h,v 1.1 2022/11/05 19:29:46 cheloha Exp $ */
+/*
+ * Copyright (c) 2020-2022 Scott Cheloha <cheloha@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _SYS_CLOCKINTR_H_
+#define _SYS_CLOCKINTR_H_
+
+#include <sys/stdint.h>
+
+struct clockintr_stat {
+ uint64_t cs_dispatched; /* total time in dispatch (ns) */
+ uint64_t cs_early; /* number of early dispatch calls */
+ uint64_t cs_earliness; /* total earliness (ns) */
+ uint64_t cs_lateness; /* total lateness (ns) */
+ uint64_t cs_prompt; /* number of prompt dispatch calls */
+ uint64_t cs_run; /* number of events dispatched */
+};
+
+#ifdef _KERNEL
+
+/*
+ * Platform API
+ */
+
+struct intrclock {
+ void *ic_cookie;
+ void (*ic_rearm)(void *, uint64_t);
+ void (*ic_trigger)(void *);
+};
+
+static inline void
+intrclock_rearm(struct intrclock *ic, uint64_t nsecs)
+{
+ ic->ic_rearm(ic->ic_cookie, nsecs);
+}
+
+static inline void
+intrclock_trigger(struct intrclock *ic)
+{
+ ic->ic_trigger(ic->ic_cookie);
+}
+
+/*
+ * Per-CPU clock interrupt state.
+ *
+ * Struct member protections:
+ *
+ * I Immutable after initialization.
+ * o Owned by a single CPU.
+ */
+struct clockintr_queue {
+ uint64_t cq_next; /* [o] next event expiration */
+ uint64_t cq_next_hardclock; /* [o] next hardclock expiration */
+ uint64_t cq_next_schedclock; /* [o] next schedclock expiration */
+ uint64_t cq_next_statclock; /* [o] next statclock expiration */
+ struct intrclock cq_intrclock; /* [I] local interrupt clock */
+ struct clockintr_stat cq_stat; /* [o] dispatch statistics */
+ volatile u_int cq_gen; /* [o] cq_stat update generation */
+ volatile u_int cq_dispatch; /* [o] dispatch is running */
+ u_int cq_flags; /* [I] local state flags */
+};
+
+/* Global state flags. */
+#define CL_INIT 0x00000001 /* global init done */
+#define CL_STATCLOCK 0x00000002 /* statclock variables set */
+#define CL_SCHEDCLOCK 0x00000004 /* run separate schedclock */
+#define CL_STATE_MASK 0x00000007
+
+/* Global behavior flags. */
+#define CL_RNDSTAT 0x80000000 /* randomized statclock */
+#define CL_FLAG_MASK 0x80000000
+
+/* Per-CPU state flags. */
+#define CL_CPU_INIT 0x00000001 /* CPU is ready for dispatch */
+#define CL_CPU_INTRCLOCK 0x00000002 /* CPU has intrclock */
+#define CL_CPU_STATE_MASK 0x00000003
+
+void clockintr_cpu_init(const struct intrclock *);
+int clockintr_dispatch(void *);
+void clockintr_init(u_int);
+void clockintr_setstatclockrate(int);
+void clockintr_trigger(void);
+
+/*
+ * Kernel API
+ */
+
+int sysctl_clockintr(int *, u_int, void *, size_t *, void *, size_t);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_CLOCKINTR_H_ */
-/* $OpenBSD: sysctl.h,v 1.229 2022/08/16 13:29:53 visa Exp $ */
+/* $OpenBSD: sysctl.h,v 1.230 2022/11/05 19:29:46 cheloha Exp $ */
/* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */
/*
#define KERN_TIMEOUT_STATS 87 /* struct: timeout status and stats */
#define KERN_UTC_OFFSET 88 /* int: adjust RTC time to UTC */
#define KERN_VIDEO 89 /* struct: video properties */
-#define KERN_MAXID 90 /* number of valid kern ids */
+#define KERN_CLOCKINTR 90 /* node: clockintr */
+#define KERN_MAXID 91 /* number of valid kern ids */
#define CTL_KERN_NAMES { \
{ 0, 0 }, \
{ "timeout_stats", CTLTYPE_STRUCT }, \
{ "utc_offset", CTLTYPE_INT }, \
{ "video", CTLTYPE_STRUCT }, \
+ { "clockintr", CTLTYPE_NODE }, \
}
/*
{ "choice", CTLTYPE_STRING }, \
}
+/*
+ * KERN_CLOCKINTR
+ */
+#define KERN_CLOCKINTR_STATS 1 /* struct: stats */
+#define KERN_CLOCKINTR_MAXID 2
+
+#define CTL_KERN_CLOCKINTR_NAMES { \
+ { 0, 0 }, \
+ { "stats", CTLTYPE_STRUCT }, \
+}
+
/*
* CTL_FS identifiers
*/