From 9e761879e36ccabc9a1c69d0234941809e96d51d Mon Sep 17 00:00:00 2001 From: cheloha Date: Tue, 8 Nov 2022 17:34:12 +0000 Subject: [PATCH] amd64: switch to clockintr(9) Switch amd64 to the clockintr(9) subsystem. There are lots of little changes, but the bigs ones are listed here. When using the local apic timer: - Run the timer in one-shot mode. - lapic_delay() is gone. We can't use it to delay(9) when running the timer in one-shot mode. - Add a randomized statclock(); stathz = hz. - Add support for switching to profhz when profiling is enabled; profhz = stathz * 10. When using the i8254/mc146818: - i8254's clockintr() no longer has a monopoly on hardclock(). - mc146818's rtcintr() no longer has a monopoly on statclock(). - In profiling mode, the statclock() will drift very slightly because (profhz = 1024) does not divide evenly into one billion. We could avoid this by setting (profhz = 512) instead and programming the RTC to run at that rate. Early revisions reviewed by mlarkin@. Extensively tested by mlarkin@ on a variety of physical and virtual hardware. Additional testing from dv@ and jmc@. Link: https://marc.info/?l=openbsd-tech&m=166776339203279&w=2 ok kettenis@ mlarkin@ --- sys/arch/amd64/amd64/acpi_machdep.c | 5 +- sys/arch/amd64/amd64/cpu.c | 5 +- sys/arch/amd64/amd64/lapic.c | 121 +++++++++++----------------- sys/arch/amd64/include/_types.h | 4 +- sys/arch/amd64/include/cpu.h | 5 +- sys/arch/amd64/isa/clock.c | 29 ++++--- 6 files changed, 74 insertions(+), 95 deletions(-) diff --git a/sys/arch/amd64/amd64/acpi_machdep.c b/sys/arch/amd64/amd64/acpi_machdep.c index 1d979315716..502bd70a7ed 100644 --- a/sys/arch/amd64/amd64/acpi_machdep.c +++ b/sys/arch/amd64/amd64/acpi_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: acpi_machdep.c,v 1.105 2022/09/20 14:28:27 robert Exp $ */ +/* $OpenBSD: acpi_machdep.c,v 1.106 2022/11/08 17:34:12 cheloha Exp $ */ /* * Copyright (c) 2005 Thorsten Lockert * @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -469,8 +470,6 @@ acpi_resume_cpu(struct acpi_softc *sc, int state) #if NLAPIC > 0 lapic_enable(); - if (initclock_func == lapic_initclocks) - lapic_startclock(); lapic_set_lvt(); #endif diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 22736b3fc3e..288a31704af 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.161 2022/09/22 04:36:37 robert Exp $ */ +/* $OpenBSD: cpu.c,v 1.162 2022/11/08 17:34:13 cheloha Exp $ */ /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ /*- @@ -937,7 +937,6 @@ cpu_hatch(void *v) atomic_setbits_int(&ci->ci_flags, CPUF_PRESENT); lapic_enable(); - lapic_startclock(); cpu_ucode_apply(ci); cpu_tsx_disable(ci); @@ -1004,6 +1003,8 @@ cpu_hatch(void *v) nanouptime(&ci->ci_schedstate.spc_runtime); splx(s); + lapic_startclock(); + SCHED_LOCK(s); cpu_switchto(NULL, sched_chooseproc()); } diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index 3e850b66aab..3b8c9eaa15a 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.63 2022/09/10 01:30:14 cheloha Exp $ */ +/* $OpenBSD: lapic.c,v 1.64 2022/11/08 17:34:13 cheloha Exp $ */ /* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */ /*- @@ -34,6 +34,8 @@ #include #include +#include +#include #include #include @@ -72,7 +74,6 @@ struct evcount clk_count; struct evcount ipi_count; #endif -void lapic_delay(int); static u_int32_t lapic_gettick(void); void lapic_clockintr(void *, struct intrframe); void lapic_initclocks(void); @@ -400,19 +401,43 @@ lapic_gettick(void) #include /* for hz */ -u_int32_t lapic_tval; - /* * this gets us up to a 4GHz busclock.... */ u_int32_t lapic_per_second = 0; -u_int32_t lapic_frac_usec_per_cycle; -u_int64_t lapic_frac_cycle_per_usec; -u_int32_t lapic_delaytab[26]; +uint64_t lapic_timer_nsec_cycle_ratio; +uint64_t lapic_timer_nsec_max; + +void lapic_timer_rearm(void *, uint64_t); +void lapic_timer_trigger(void *); + +const struct intrclock lapic_timer_intrclock = { + .ic_rearm = lapic_timer_rearm, + .ic_trigger = lapic_timer_trigger +}; void lapic_timer_oneshot(uint32_t, uint32_t); void lapic_timer_periodic(uint32_t, uint32_t); +void +lapic_timer_rearm(void *unused, uint64_t nsecs) +{ + uint32_t cycles; + + if (nsecs > lapic_timer_nsec_max) + nsecs = lapic_timer_nsec_max; + cycles = (nsecs * lapic_timer_nsec_cycle_ratio) >> 32; + if (cycles == 0) + cycles = 1; + lapic_timer_oneshot(0, cycles); +} + +void +lapic_timer_trigger(void *unused) +{ + lapic_timer_oneshot(0, 1); +} + /* * Start the local apic countdown timer. * @@ -448,7 +473,7 @@ lapic_clockintr(void *arg, struct intrframe frame) floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ci->ci_ilevel; - hardclock((struct clockframe *)&frame); + clockintr_dispatch(&frame); ci->ci_handled_intr_level = floor; clk_count.ec_count++; @@ -457,13 +482,19 @@ lapic_clockintr(void *arg, struct intrframe frame) void lapic_startclock(void) { - lapic_timer_periodic(0, lapic_tval); + clockintr_cpu_init(&lapic_timer_intrclock); + clockintr_trigger(); } void lapic_initclocks(void) { i8254_inittimecounter_simple(); + + stathz = hz; + profhz = stathz * 10; + clockintr_init(CL_RNDSTAT); + lapic_startclock(); } @@ -555,74 +586,14 @@ skip_calibration: printf("%s: apic clock running at %dMHz\n", ci->ci_dev->dv_xname, lapic_per_second / (1000 * 1000)); - if (lapic_per_second != 0) { - /* - * reprogram the apic timer to run in periodic mode. - * XXX need to program timer on other cpu's, too. - */ - lapic_tval = (lapic_per_second * 2) / hz; - lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1); - - lapic_timer_periodic(LAPIC_LVTT_M, lapic_tval); - - /* - * Compute fixed-point ratios between cycles and - * microseconds to avoid having to do any division - * in lapic_delay. - */ - - tmp = (1000000 * (u_int64_t)1 << 32) / lapic_per_second; - lapic_frac_usec_per_cycle = tmp; - - tmp = (lapic_per_second * (u_int64_t)1 << 32) / 1000000; - - lapic_frac_cycle_per_usec = tmp; - - /* - * Compute delay in cycles for likely short delays in usec. - */ - for (i = 0; i < 26; i++) - lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >> - 32; - - /* - * Now that the timer's calibrated, use the apic timer routines - * for all our timing needs.. - */ - delay_init(lapic_delay, 3000); - initclock_func = lapic_initclocks; - } -} - -/* - * delay for N usec. - */ - -void -lapic_delay(int usec) -{ - int32_t tick, otick; - int64_t deltat; /* XXX may want to be 64bit */ - - otick = lapic_gettick(); - - if (usec <= 0) + /* XXX What do we do if this is zero? */ + if (lapic_per_second == 0) return; - if (usec <= 25) - deltat = lapic_delaytab[usec]; - else - deltat = (lapic_frac_cycle_per_usec * usec) >> 32; - while (deltat > 0) { - tick = lapic_gettick(); - if (tick > otick) - deltat -= lapic_tval - (tick - otick); - else - deltat -= otick - tick; - otick = tick; - - CPU_BUSY_CYCLE(); - } + lapic_timer_nsec_cycle_ratio = + lapic_per_second * (1ULL << 32) / 1000000000; + lapic_timer_nsec_max = UINT64_MAX / lapic_timer_nsec_cycle_ratio; + initclock_func = lapic_initclocks; } /* diff --git a/sys/arch/amd64/include/_types.h b/sys/arch/amd64/include/_types.h index 67c67a1472f..a9445bb4d93 100644 --- a/sys/arch/amd64/include/_types.h +++ b/sys/arch/amd64/include/_types.h @@ -1,4 +1,4 @@ -/* $OpenBSD: _types.h,v 1.17 2018/03/05 01:15:25 deraadt Exp $ */ +/* $OpenBSD: _types.h,v 1.18 2022/11/08 17:34:13 cheloha Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -35,6 +35,8 @@ #ifndef _MACHINE__TYPES_H_ #define _MACHINE__TYPES_H_ +#define __HAVE_CLOCKINTR + /* * _ALIGN(p) rounds p (pointer or byte index) up to a correctly-aligned * value for all data types (int, long, ...). The result is an diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index d5ce2f5d717..7dcbc82be11 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.152 2022/11/08 14:46:51 cheloha Exp $ */ +/* $OpenBSD: cpu.h,v 1.153 2022/11/08 17:34:13 cheloha Exp $ */ /* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */ /*- @@ -47,6 +47,7 @@ #include #endif /* _KERNEL */ +#include #include #include #include @@ -221,6 +222,8 @@ struct cpu_info { paddr_t ci_vmcs_pa; struct rwlock ci_vmcs_lock; + + struct clockintr_queue ci_queue; }; #define CPUF_BSP 0x0001 /* CPU is the original BSP */ diff --git a/sys/arch/amd64/isa/clock.c b/sys/arch/amd64/isa/clock.c index 294fe7483f9..f36ecde0087 100644 --- a/sys/arch/amd64/isa/clock.c +++ b/sys/arch/amd64/isa/clock.c @@ -1,4 +1,4 @@ -/* $OpenBSD: clock.c,v 1.37 2022/11/01 13:59:00 kettenis Exp $ */ +/* $OpenBSD: clock.c,v 1.38 2022/11/08 17:34:13 cheloha Exp $ */ /* $NetBSD: clock.c,v 1.1 2003/04/26 18:39:50 fvdl Exp $ */ /*- @@ -90,6 +90,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #include +#include #include #include #include @@ -169,10 +170,8 @@ startclocks(void) } int -clockintr(void *arg) +clockintr(void *frame) { - struct clockframe *frame = arg; - if (timecounter->tc_get_timecount == i8254_get_timecount) { if (i8254_ticked) { i8254_ticked = 0; @@ -182,25 +181,25 @@ clockintr(void *arg) } } - hardclock(frame); + clockintr_dispatch(frame); return 1; } int -rtcintr(void *arg) +rtcintr(void *frame) { - struct clockframe *frame = arg; u_int stat = 0; /* * If rtcintr is 'late', next intr may happen immediately. * Get them all. (Also, see comment in cpu_initclocks().) */ - while (mc146818_read(NULL, MC_REGC) & MC_REGC_PF) { - statclock(frame); + while (mc146818_read(NULL, MC_REGC) & MC_REGC_PF) stat = 1; - } + + if (stat) + clockintr_dispatch(frame); return (stat); } @@ -281,8 +280,13 @@ rtcdrain(void *v) void i8254_initclocks(void) { + i8254_inittimecounter(); /* hook the interrupt-based i8254 tc */ + stathz = 128; - profhz = 1024; + profhz = 1024; /* XXX does not divide into 1 billion */ + clockintr_init(0); + + clockintr_cpu_init(NULL); /* * While the clock interrupt handler isn't really MPSAFE, the @@ -294,8 +298,6 @@ i8254_initclocks(void) rtcintr, 0, "rtc"); rtcstart(); /* start the mc146818 clock */ - - i8254_inittimecounter(); /* hook the interrupt-based i8254 tc */ } void @@ -518,6 +520,7 @@ setstatclockrate(int arg) mc146818_write(NULL, MC_REGA, MC_BASE_32_KHz | MC_RATE_1024_Hz); } + clockintr_setstatclockrate(arg); } void -- 2.20.1