From c4936e802f99eef18a897df99eca700ff0f88540 Mon Sep 17 00:00:00 2001 From: kettenis Date: Thu, 13 Jul 2023 08:33:36 +0000 Subject: [PATCH] Use the deep idle state available on Apple M1/M2 cores in the idle loop and for suspend. This state makes the CPU lose some of its register state so we need to save these registers before putting the core to sleep and restore them when we wake up. This deep idle state has a higher wakeup latency than the normal WFI idle state. Use similar logic as acpucpu(4) to decide which idle state to pick. If some cores of a cluster are in this deep idle state, turbo states become available to the cores that remain active. So stop skipping these states. This improves single-core performance a little bit. The main win is in power savings when running in a state with a high clock frequency. My M2 Pro mini goes from 14W to 6.5W when idle at the maximum clock frequency. But event at the lowest clock frequency there are small but significant power savings. ok deraadt@, tobhe@ --- sys/arch/arm64/arm64/cpu.c | 15 +++++++-- sys/arch/arm64/arm64/cpufunc_asm.S | 51 +++++++++++++++++++++++++++++- sys/arch/arm64/arm64/machdep.c | 7 ++-- sys/arch/arm64/dev/aplcpu.c | 39 +++++++++++++++++++---- sys/arch/arm64/include/cpu.h | 10 +++++- 5 files changed, 107 insertions(+), 15 deletions(-) diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c index 3a59d5d33bb..813d19e03e7 100644 --- a/sys/arch/arm64/arm64/cpu.c +++ b/sys/arch/arm64/arm64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.95 2023/06/15 22:18:07 cheloha Exp $ */ +/* $OpenBSD: cpu.c,v 1.96 2023/07/13 08:33:36 kettenis Exp $ */ /* * Copyright (c) 2016 Dale Rahn @@ -1010,6 +1010,8 @@ void cpu_boot_secondary(struct cpu_info *ci); void cpu_hatch_secondary(void); void cpu_hatch_secondary_spin(void); +void cpu_suspend_cycle(void); + void cpu_boot_secondary_processors(void) { @@ -1224,7 +1226,7 @@ cpu_halt(void) ci->ci_psci_suspend_param = 0; } else #endif - __asm volatile("wfi"); + cpu_suspend_cycle(); count++; } @@ -1266,9 +1268,16 @@ cpu_unidle(struct cpu_info *ci) void cpu_hatch_primary(void); +void (*cpu_suspend_cycle_fcn)(void) = cpu_wfi; label_t cpu_suspend_jmpbuf; int cpu_suspended; +void +cpu_suspend_cycle(void) +{ + cpu_suspend_cycle_fcn(); +} + void cpu_init_primary(void) { @@ -1342,7 +1351,7 @@ cpu_suspend_primary(void) ci->ci_psci_suspend_param = 0; } else #endif - __asm volatile("wfi"); + cpu_suspend_cycle(); count++; } diff --git a/sys/arch/arm64/arm64/cpufunc_asm.S b/sys/arch/arm64/arm64/cpufunc_asm.S index c243f6141fd..d73e296f68c 100644 --- a/sys/arch/arm64/arm64/cpufunc_asm.S +++ b/sys/arch/arm64/arm64/cpufunc_asm.S @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc_asm.S,v 1.7 2020/11/20 21:48:33 patrick Exp $ */ +/* $OpenBSD: cpufunc_asm.S,v 1.8 2023/07/13 08:33:36 kettenis Exp $ */ /*- * Copyright (c) 2014 Robin Randhawa * Copyright (c) 2015 The FreeBSD Foundation @@ -185,3 +185,52 @@ ENTRY(cpu_icache_sync_range) RETGUARD_CHECK(cpu_icache_sync_range, x15) ret END(cpu_icache_sync_range) + +ENTRY(cpu_wfi) + RETGUARD_SETUP(cpu_wfi, x15) + dsb sy + wfi + RETGUARD_CHECK(cpu_wfi, x15) + ret +END(cpu_wfi) + +ENTRY(aplcpu_deep_wfi) + RETGUARD_SETUP(aplcpu_deep_wfi, x15) + + stp x30, x15, [sp, #-16]! + stp x28, x29, [sp, #-16]! + stp x26, x27, [sp, #-16]! + stp x24, x25, [sp, #-16]! + stp x22, x23, [sp, #-16]! + stp x20, x21, [sp, #-16]! + stp x18, x19, [sp, #-16]! + + mrs x0, daif + str x0, [sp, #-16]! + msr daifset, #3 + + mrs x0, s3_5_c15_c5_0 + orr x0, x0, #(3 << 24) + msr s3_5_c15_c5_0, x0 + + dsb sy + wfi + + mrs x0, s3_5_c15_c5_0 + bic x0, x0, #(1 << 24) + msr s3_5_c15_c5_0, x0 + + ldr x0, [sp], #16 + msr daif, x0 + + ldp x18, x19, [sp], #16 + ldp x20, x21, [sp], #16 + ldp x22, x23, [sp], #16 + ldp x24, x25, [sp], #16 + ldp x26, x27, [sp], #16 + ldp x28, x29, [sp], #16 + ldp x30, x15, [sp], #16 + + RETGUARD_CHECK(aplcpu_deep_wfi, x15) + ret +END(aplcpu_deep_wfi) diff --git a/sys/arch/arm64/arm64/machdep.c b/sys/arch/arm64/arm64/machdep.c index 1a582fa2727..6b2dcba9c05 100644 --- a/sys/arch/arm64/arm64/machdep.c +++ b/sys/arch/arm64/arm64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.82 2023/06/10 19:30:48 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.83 2023/07/13 08:33:36 kettenis Exp $ */ /* * Copyright (c) 2014 Patrick Wildt * Copyright (c) 2021 Mark Kettenis @@ -211,12 +211,13 @@ cpu_idle_enter(void) { } +void (*cpu_idle_cycle_fcn)(void) = cpu_wfi; + void cpu_idle_cycle(void) { enable_irq_daif(); - __asm volatile("dsb sy" ::: "memory"); - __asm volatile("wfi"); + cpu_idle_cycle_fcn(); } void diff --git a/sys/arch/arm64/dev/aplcpu.c b/sys/arch/arm64/dev/aplcpu.c index 9e4c89daa9c..3f3d69fcd1b 100644 --- a/sys/arch/arm64/dev/aplcpu.c +++ b/sys/arch/arm64/dev/aplcpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: aplcpu.c,v 1.7 2023/05/09 10:13:23 kettenis Exp $ */ +/* $OpenBSD: aplcpu.c,v 1.8 2023/07/13 08:33:36 kettenis Exp $ */ /* * Copyright (c) 2022 Mark Kettenis * @@ -42,6 +42,8 @@ #define DVFS_T8112_STATUS_CUR_PS_MASK (0x1f << 5) #define DVFS_T8112_STATUS_CUR_PS_SHIFT 5 +#define APLCPU_DEEP_WFI_LATENCY 10 /* microseconds */ + struct opp { uint64_t opp_hz; uint32_t opp_level; @@ -97,6 +99,8 @@ uint32_t aplcpu_opp_level(struct aplcpu_softc *, int); int aplcpu_clockspeed(int *); void aplcpu_setperf(int level); void aplcpu_refresh_sensors(void *); +void aplcpu_idle_cycle(); +void aplcpu_deep_wfi(void); int aplcpu_match(struct device *parent, void *match, void *aux) @@ -171,6 +175,8 @@ aplcpu_attach(struct device *parent, struct device *self, void *aux) sensordev_install(&sc->sc_sensordev); sensor_task_register(sc, aplcpu_refresh_sensors, 1); + cpu_idle_cycle_fcn = aplcpu_idle_cycle; + cpu_suspend_cycle_fcn = aplcpu_deep_wfi; cpu_cpuspeed = aplcpu_clockspeed; cpu_setperf = aplcpu_setperf; return; @@ -223,11 +229,8 @@ aplcpu_opp_init(struct aplcpu_softc *sc, int node) return; count = 0; - for (child = OF_child(node); child != 0; child = OF_peer(child)) { - if (OF_getproplen(child, "turbo-mode") == 0) - continue; + for (child = OF_child(node); child != 0; child = OF_peer(child)) count++; - } if (count == 0) return; @@ -239,8 +242,6 @@ aplcpu_opp_init(struct aplcpu_softc *sc, int node) count = 0; for (child = OF_child(node); child != 0; child = OF_peer(child)) { - if (OF_getproplen(child, "turbo-mode") == 0) - continue; opp_hz = OF_getpropint64(child, "opp-hz", 0); opp_level = OF_getpropint(child, "opp-level", 0); @@ -430,3 +431,27 @@ aplcpu_refresh_sensors(void *arg) } } } + +void +aplcpu_idle_cycle(void) +{ + struct cpu_info *ci = curcpu(); + struct timeval start, stop; + u_long itime; + + microuptime(&start); + + if (ci->ci_prev_sleep > 3 * APLCPU_DEEP_WFI_LATENCY) + aplcpu_deep_wfi(); + else + cpu_wfi(); + + microuptime(&stop); + timersub(&stop, &start, &stop); + itime = stop.tv_sec * 1000000 + stop.tv_usec; + + ci->ci_last_itime = itime; + itime >>= 1; + ci->ci_prev_sleep = (ci->ci_prev_sleep + (ci->ci_prev_sleep >> 1) + + itime) >> 1; +} diff --git a/sys/arch/arm64/include/cpu.h b/sys/arch/arm64/include/cpu.h index 9ca13c88223..36d2d40ac8c 100644 --- a/sys/arch/arm64/include/cpu.h +++ b/sys/arch/arm64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.36 2023/06/10 19:30:48 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.37 2023/07/13 08:33:36 kettenis Exp $ */ /* * Copyright (c) 2016 Dale Rahn * @@ -154,6 +154,9 @@ struct cpu_info { volatile int ci_opp_max; uint32_t ci_cpu_supply; + u_long ci_prev_sleep; + u_long ci_last_itime; + #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; volatile int ci_flags; @@ -344,6 +347,11 @@ void cpu_startclock(void); int cpu_suspend_primary(void); void cpu_resume_secondary(struct cpu_info *); +extern void (*cpu_idle_cycle_fcn)(void); +extern void (*cpu_suspend_cycle_fcn)(void); + +void cpu_wfi(void); + void delay (unsigned); #define DELAY(x) delay(x) -- 2.20.1