Add support for deep(er) idle states that can be entered using PSCI. For
authorkettenis <kettenis@openbsd.org>
Sun, 19 Feb 2023 17:16:13 +0000 (17:16 +0000)
committerkettenis <kettenis@openbsd.org>
Sun, 19 Feb 2023 17:16:13 +0000 (17:16 +0000)
now this only supports states advertised in device trees, but ACPI support
could be added as well.  The parsing of the idle states as well as the
heuristic to pick the deepest one is probably a bit to simple, but more
complex cases can be added later.  Worst case cores will use WFI and use
more power in suspend.

ok phessler@

sys/arch/arm64/arm64/cpu.c
sys/arch/arm64/include/cpu.h
sys/dev/fdt/psci.c
sys/dev/fdt/pscivar.h

index 1fffc89..e43b09e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpu.c,v 1.84 2023/02/09 23:35:06 jsg Exp $    */
+/*     $OpenBSD: cpu.c,v 1.85 2023/02/19 17:16:13 kettenis Exp $       */
 
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -240,6 +240,7 @@ struct cfdriver cpu_cd = {
 };
 
 void   cpu_opp_init(struct cpu_info *, uint32_t);
+void   cpu_psci_init(struct cpu_info *);
 
 void   cpu_flush_bp_noop(void);
 void   cpu_flush_bp_psci(void);
@@ -879,6 +880,8 @@ cpu_attach(struct device *parent, struct device *dev, void *aux)
        if (opp)
                cpu_opp_init(ci, opp);
 
+       cpu_psci_init(ci);
+
        printf("\n");
 }
 
@@ -1112,12 +1115,19 @@ void
 cpu_halt(void)
 {
        struct cpu_info *ci = curcpu();
+       vaddr_t start_va;
+       paddr_t ci_pa, start_pa;
        int count = 0;
        u_long psw;
+       int32_t status;
 
        KERNEL_ASSERT_UNLOCKED();
        SCHED_ASSERT_UNLOCKED();
 
+       start_va = (vaddr_t)cpu_hatch_secondary;
+       pmap_extract(pmap_kernel(), start_va, &start_pa);
+       pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
+
        psw = intr_disable();
 
        atomic_clearbits_int(&ci->ci_flags,
@@ -1141,7 +1151,15 @@ cpu_halt(void)
            READ_SPECIALREG(cntv_ctl_el0) | CNTV_CTL_IMASK);
 
        while ((ci->ci_flags & CPUF_GO) == 0) {
-               __asm volatile("wfi");
+#if NPSCI > 0
+               if (ci->ci_psci_suspend_param) {
+                       status = psci_cpu_suspend(ci->ci_psci_suspend_param,
+                           start_pa, ci_pa);
+                       if (status != PSCI_SUCCESS)
+                               ci->ci_psci_suspend_param = 0;
+               } else
+#endif
+                       __asm volatile("wfi");
                count++;
        }
 
@@ -1193,7 +1211,6 @@ cpu_init_primary(void)
 
        cpu_startclock();
 
-       cpu_suspended = 1;
        longjmp(&cpu_suspend_jmpbuf);
 }
 
@@ -1204,68 +1221,76 @@ cpu_suspend_primary(void)
        vaddr_t start_va;
        paddr_t ci_pa, start_pa;
        uint64_t ttbr1;
+       int32_t status;
+       int count = 0;
 
-       if (!psci_can_suspend()) {
-               int count = 0;
+       __asm("mrs %x0, ttbr1_el1": "=r"(ttbr1));
+       ci->ci_ttbr1 = ttbr1;
+       cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
 
-               /*
-                * If PSCI doesn't support SYSTEM_SUSPEND, spin in a
-                * low power state waiting for an interrupt that wakes
-                * us up again.
-                */
+       start_va = (vaddr_t)cpu_hatch_primary;
+       pmap_extract(pmap_kernel(), start_va, &start_pa);
+       pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
 
-               /* Mask clock interrupts. */
-               WRITE_SPECIALREG(cntv_ctl_el0,
-                   READ_SPECIALREG(cntv_ctl_el0) | CNTV_CTL_IMASK);
+#if NPSCI > 0
+       if (psci_can_suspend()) {
+               if (setjmp(&cpu_suspend_jmpbuf)) {
+                       /* XXX wait for debug output on Allwinner A64 */
+                       delay(200000);
+                       return 0;
+               }
 
-               /*
-                * All non-wakeup interrupts should be masked at this
-                * point; re-enable interrupts such that wakeup
-                * interrupts actually wake us up.  Set a flag such
-                * that drivers can tell we're suspended and change
-                * their behaviour accordingly.  They can wake us up
-                * by clearing the flag.
-                */
-               cpu_suspended = 1;
-               intr_enable_wakeup();
+               psci_system_suspend(start_pa, ci_pa);
 
-               while (cpu_suspended) {
-                       __asm volatile("wfi");
-                       count++;
-               }
+               return EOPNOTSUPP;
+       }
+#endif
 
-               intr_disable_wakeup();
+       if (setjmp(&cpu_suspend_jmpbuf))
+               goto resume;
 
-               /* Unmask clock interrupts. */
-               WRITE_SPECIALREG(cntv_ctl_el0,
-                   READ_SPECIALREG(cntv_ctl_el0) & ~CNTV_CTL_IMASK);
+       /*
+        * If PSCI doesn't support SYSTEM_SUSPEND, spin in a low power
+        * state waiting for an interrupt that wakes us up again.
+        */
 
-               printf("%s: %d wakeup events\n", ci->ci_dev->dv_xname, count);
+       /* Mask clock interrupts. */
+       WRITE_SPECIALREG(cntv_ctl_el0,
+           READ_SPECIALREG(cntv_ctl_el0) | CNTV_CTL_IMASK);
 
-               return 0;
-       }
+       /*
+        * All non-wakeup interrupts should be masked at this point;
+        * re-enable interrupts such that wakeup interrupts actually
+        * wake us up.  Set a flag such that drivers can tell we're
+        * suspended and change their behaviour accordingly.  They can
+        * wake us up by clearing the flag.
+        */
+       cpu_suspended = 1;
+       intr_enable_wakeup();
 
-       cpu_suspended = 0;
-       setjmp(&cpu_suspend_jmpbuf);
-       if (cpu_suspended) {
-               /* XXX wait for debug output from SCP on Allwinner A64 */
-               delay(200000);
-               return 0;
+       while (cpu_suspended) {
+#if NPSCI > 0
+               if (ci->ci_psci_suspend_param) {
+                       status = psci_cpu_suspend(ci->ci_psci_suspend_param,
+                           start_pa, ci_pa);
+                       if (status != PSCI_SUCCESS)
+                               ci->ci_psci_suspend_param = 0;
+               } else
+#endif
+                       __asm volatile("wfi");
+               count++;
        }
 
-       __asm("mrs %x0, ttbr1_el1": "=r"(ttbr1));
-       ci->ci_ttbr1 = ttbr1;
-       cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
+resume:
+       intr_disable_wakeup();
 
-       start_va = (vaddr_t)cpu_hatch_primary;
-       pmap_extract(pmap_kernel(), start_va, &start_pa);
-       pmap_extract(pmap_kernel(), (vaddr_t)ci, &ci_pa);
+       /* Unmask clock interrupts. */
+       WRITE_SPECIALREG(cntv_ctl_el0,
+           READ_SPECIALREG(cntv_ctl_el0) & ~CNTV_CTL_IMASK);
 
-#if NPSCI > 0
-       psci_system_suspend(start_pa, ci_pa);
-#endif
+       printf("%s: %d wakeup events\n", ci->ci_dev->dv_xname, count);
 
-       return EOPNOTSUPP;
+       return 0;
 }
 
 #ifdef MULTIPROCESSOR
@@ -1608,3 +1633,109 @@ cpu_opp_set_cooling_level(void *cookie, uint32_t *cells, uint32_t level)
                task_add(systq, &cpu_opp_task);
        }
 }
+
+
+void
+cpu_psci_init(struct cpu_info *ci)
+{
+       uint32_t *domains;
+       uint32_t *domain;
+       uint32_t *states;
+       uint32_t ncells;
+       uint32_t cluster;
+       int idx, len, node;
+
+       /*
+        * Hunt for the deppest idle state for this CPU.  This is
+        * fairly complicated as it requires traversing quite a few
+        * nodes in the device tree.  The first step is to look up the
+        * "psci" power domain for this CPU.
+        */
+
+       idx = OF_getindex(ci->ci_node, "psci", "power-domain-names");
+       if (idx < 0)
+               return;
+
+       len = OF_getproplen(ci->ci_node, "power-domains");
+       if (len <= 0)
+               return;
+
+       domains = malloc(len, M_TEMP, M_WAITOK);
+       OF_getpropintarray(ci->ci_node, "power-domains", domains, len);
+
+       domain = domains;
+       while (domain && domain < domains + (len / sizeof(uint32_t))) {
+               if (idx == 0)
+                       break;
+
+               node = OF_getnodebyphandle(domain[0]);
+               if (node == 0)
+                       break;
+
+               ncells = OF_getpropint(node, "#power-domain-cells", 0);
+               domain = domain + ncells + 1;
+               idx--;
+       }
+
+       node = idx == 0 ? OF_getnodebyphandle(domain[0]) : 0;
+       free(domains, M_TEMP, len);
+       if (node == 0)
+               return;
+
+       /*
+        * We found the "psci" power domain.  If this power domain has
+        * a parent power domain, stash its phandle away for later.
+        */
+        
+       cluster = OF_getpropint(node, "power-domains", 0);
+
+       /*
+        * Get the deepest idle state for the CPU; this should be the
+        * last one that is listed.
+        */
+
+       len = OF_getproplen(node, "domain-idle-states");
+       if (len < sizeof(uint32_t))
+               return;
+
+       states = malloc(len, M_TEMP, M_WAITOK);
+       OF_getpropintarray(node, "domain-idle-states", states, len);
+
+       node = OF_getnodebyphandle(states[len / sizeof(uint32_t) - 1]);
+       free(states, M_TEMP, len);
+       if (node == 0)
+               return;
+
+       ci->ci_psci_suspend_param =
+               OF_getpropint(node, "arm,psci-suspend-param", 0);
+
+       /*
+        * Qualcomm Snapdragon always seem to operate in OS Initiated
+        * mode.  This means that the last CPU to suspend can pick the
+        * idle state that powers off the entire cluster.  In our case
+        * that will always be the primary CPU.
+        */
+
+       if (ci->ci_flags & CPUF_AP)
+               return;
+
+       node = OF_getnodebyphandle(cluster);
+       if (node == 0)
+               return;
+
+       /*
+        * Get the deepest idle state for the cluster; this should be
+        * the last one that is listed.
+        */
+
+       states = malloc(len, M_TEMP, M_WAITOK);
+       OF_getpropintarray(node, "domain-idle-states", states, len);
+
+       node = OF_getnodebyphandle(states[len / sizeof(uint32_t) - 1]);
+       free(states, M_TEMP, len);
+       if (node == 0)
+               return;
+
+       ci->ci_psci_suspend_param =
+               OF_getpropint(node, "arm,psci-suspend-param", 0);
+}
index 3ab8a14..2ddaec5 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.34 2022/12/31 00:30:21 patrick Exp $ */
+/* $OpenBSD: cpu.h,v 1.35 2023/02/19 17:16:13 kettenis Exp $ */
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
  *
@@ -142,6 +142,8 @@ struct cpu_info {
        uint64_t                ci_ttbr1;
        vaddr_t                 ci_el1_stkend;
 
+       uint32_t                ci_psci_suspend_param;
+
        struct opp_table        *ci_opp_table;
        volatile int            ci_opp_idx;
        volatile int            ci_opp_max;
index 469cf1f..675a97d 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: psci.c,v 1.13 2023/02/13 19:26:15 kettenis Exp $      */
+/*     $OpenBSD: psci.c,v 1.14 2023/02/19 17:16:13 kettenis Exp $      */
 
 /*
  * Copyright (c) 2016 Jonathan Gray <jsg@openbsd.org>
@@ -37,6 +37,11 @@ extern void (*powerdownfn)(void);
 #define SMCCC_ARCH_WORKAROUND_3        0x80003fff
 
 #define PSCI_VERSION           0x84000000
+#ifdef __LP64__
+#define CPU_SUSPEND            0xc4000001
+#else
+#define CPU_SUSPEND            0x84000001
+#endif
 #define CPU_OFF                        0x84000002
 #ifdef __LP64__
 #define CPU_ON                 0xc4000003
@@ -56,12 +61,13 @@ struct psci_softc {
        struct device    sc_dev;
        register_t       (*sc_callfn)(register_t, register_t, register_t,
                             register_t);
-       uint32_t         sc_psci_version; 
+       uint32_t         sc_psci_version;
        uint32_t         sc_system_off;
        uint32_t         sc_system_reset;
        uint32_t         sc_system_suspend;
        uint32_t         sc_cpu_on;
        uint32_t         sc_cpu_off;
+       uint32_t         sc_cpu_suspend;
 
        uint32_t         sc_smccc_version;
        uint32_t         sc_method;
@@ -131,6 +137,7 @@ psci_attach(struct device *parent, struct device *self, void *aux)
                sc->sc_system_reset = SYSTEM_RESET;
                sc->sc_cpu_on = CPU_ON;
                sc->sc_cpu_off = CPU_OFF;
+               sc->sc_cpu_suspend = CPU_SUSPEND;
        } else if (OF_is_compatible(faa->fa_node, "arm,psci")) {
                sc->sc_system_off = OF_getpropint(faa->fa_node,
                    "system_off", 0);
@@ -138,6 +145,8 @@ psci_attach(struct device *parent, struct device *self, void *aux)
                    "system_reset", 0);
                sc->sc_cpu_on = OF_getpropint(faa->fa_node, "cpu_on", 0);
                sc->sc_cpu_off = OF_getpropint(faa->fa_node, "cpu_off", 0);
+               sc->sc_cpu_suspend = OF_getpropint(faa->fa_node,
+                   "cpu_suspend", 0);
        }
 
        psci_sc = sc;
@@ -332,6 +341,19 @@ psci_cpu_on(register_t target_cpu, register_t entry_point_address,
        return PSCI_NOT_SUPPORTED;
 }
 
+int32_t
+psci_cpu_suspend(register_t power_state, register_t entry_point_address,
+    register_t context_id)
+{
+       struct psci_softc *sc = psci_sc;
+
+       if (sc && sc->sc_callfn && sc->sc_cpu_suspend != 0)
+               return (*sc->sc_callfn)(sc->sc_cpu_suspend, power_state,
+                   entry_point_address, context_id);
+
+       return PSCI_NOT_SUPPORTED;
+}
+
 int32_t
 psci_features(uint32_t psci_func_id)
 {
index 9a40029..04e418f 100644 (file)
@@ -15,6 +15,7 @@ int   psci_can_suspend(void);
 int32_t        psci_system_suspend(register_t, register_t);
 int32_t        psci_cpu_on(register_t, register_t, register_t);
 int32_t        psci_cpu_off(void);
+int32_t        psci_cpu_suspend(register_t, register_t, register_t);
 void   psci_flush_bp(void);
 int    psci_flush_bp_has_bhb(void);
 int    psci_method(void);