Spin up secondary CPUs and add IPI infrastructure. Heavily based on diffs
authorkettenis <kettenis@openbsd.org>
Sun, 28 Jan 2018 13:17:45 +0000 (13:17 +0000)
committerkettenis <kettenis@openbsd.org>
Sun, 28 Jan 2018 13:17:45 +0000 (13:17 +0000)
from dran@.

ok patrick@

sys/arch/arm64/arm64/cpu.c
sys/arch/arm64/arm64/genassym.cf
sys/arch/arm64/arm64/intr.c
sys/arch/arm64/arm64/locore.S
sys/arch/arm64/arm64/machdep.c
sys/arch/arm64/include/cpu.h
sys/arch/arm64/include/intr.h

index e216c70..864ee70 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpu.c,v 1.11 2018/01/17 10:22:25 kettenis Exp $       */
+/*     $OpenBSD: cpu.c,v 1.12 2018/01/28 13:17:45 kettenis Exp $       */
 
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
 #include <sys/device.h>
 #include <sys/sysctl.h>
 
+#include <uvm/uvm.h>
+
 #include <machine/fdt.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_clock.h>
 #include <dev/ofw/fdt.h>
 
+#include <machine/cpufunc.h>
+#include <machine/fdt.h>
+
 #include "psci.h"
 #if NPSCI > 0
 #include <dev/fdt/pscivar.h>
@@ -191,6 +198,7 @@ cpu_identify(struct cpu_info *ci)
        }
 }
 
+int    cpu_hatch_secondary(struct cpu_info *ci, int, uint64_t);
 int    cpu_clockspeed(int *);
 
 int
@@ -217,19 +225,80 @@ cpu_attach(struct device *parent, struct device *dev, void *aux)
 
        if (faa->fa_reg[0].addr == (mpidr & MPIDR_AFF)) {
                ci = &cpu_info_primary;
-               ci->ci_cpuid = dev->dv_unit;
-               ci->ci_dev = dev;
+#ifdef MULTIPROCESSOR
+               ci->ci_flags |= CPUF_RUNNING | CPUF_PRESENT | CPUF_PRIMARY;
+#endif
+       }
+#ifdef MULTIPROCESSOR
+       else {
+               ncpusfound++;
+               ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK | M_ZERO);
+               cpu_info[dev->dv_unit] = ci;
+               ci->ci_next = cpu_info_list->ci_next;
+               cpu_info_list->ci_next = ci;
+               ci->ci_flags |= CPUF_AP;
+       }
+#else
+       else {
+               ncpusfound++;
+               printf(" mpidr %llx not configured\n",
+                   faa->fa_reg[0].addr);
+               return;
+       }
+#endif
+
+       ci->ci_dev = dev;
+       ci->ci_cpuid = dev->dv_unit;
+       ci->ci_mpidr = faa->fa_reg[0].addr;
+       ci->ci_node = faa->fa_node;
+       ci->ci_self = ci;
 
-               printf(":");
+       printf(" mpidr %llx:", ci->ci_mpidr);
+
+#ifdef MULTIPROCESSOR
+       if (ci->ci_flags & CPUF_AP) {
+               char buf[32];
+               uint64_t spinup_data = 0;
+               int spinup_method = 0;
+               int timeout = 10000;
+               int len;
+
+               len = OF_getprop(ci->ci_node, "enable-method",
+                   buf, sizeof(buf));
+               if (strcmp(buf, "psci") == 0) {
+                       spinup_method = 1;
+               } else if (strcmp(buf, "spin-table") == 0) {
+                       spinup_method = 2;
+                       spinup_data = OF_getpropint64(ci->ci_node,
+                           "cpu-release-addr", 0);
+               }
+
+               if (cpu_hatch_secondary(ci, spinup_method, spinup_data)) {
+                       atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
+                       __asm volatile("dsb sy; sev");
+
+                       while ((ci->ci_flags & CPUF_IDENTIFIED) == 0 &&
+                           --timeout)
+                               delay(1000);
+                       if (timeout == 0) {
+                               printf(" failed to identify");
+                               ci->ci_flags = 0;
+                       }
+               } else {
+                       printf(" failed to spin up");
+                       ci->ci_flags = 0;
+               }
+       } else {
+#endif
                cpu_identify(ci);
-               
-               if (OF_getproplen(faa->fa_node, "clocks") > 0) {
-                       cpu_node = faa->fa_node;
+
+               if (OF_getproplen(ci->ci_node, "clocks") > 0) {
+                       cpu_node = ci->ci_node;
                        cpu_cpuspeed = cpu_clockspeed;
                }
-       } else {
-               printf(": not configured");
+#ifdef MULTIPROCESSOR
        }
+#endif
 
        printf("\n");
 }
@@ -257,8 +326,163 @@ cpu_clockspeed(int *freq)
 int    (*cpu_on_fn)(register_t, register_t);
 
 #ifdef MULTIPROCESSOR
+
+void cpu_boot_secondary(struct cpu_info *ci);
+void cpu_hatch(void);
+
 void
 cpu_boot_secondary_processors(void)
 {
+       struct cpu_info *ci;
+       CPU_INFO_ITERATOR cii;
+
+       CPU_INFO_FOREACH(cii, ci) {
+               if ((ci->ci_flags & CPUF_AP) == 0)
+                       continue;
+               if (ci->ci_flags & CPUF_PRIMARY)
+                       continue;
+
+               ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
+               sched_init_cpu(ci);
+               cpu_boot_secondary(ci);
+       }
+}
+
+void
+cpu_hatch_spin_table(struct cpu_info *ci, uint64_t start, uint64_t data)
+{
+       /* this reuses the zero page for the core */
+       vaddr_t start_pg = zero_page + (PAGE_SIZE * ci->ci_cpuid);
+       paddr_t pa = trunc_page(data);
+       uint64_t offset = data - pa;
+       uint64_t *startvec = (uint64_t *)(start_pg + offset);
+
+       pmap_kenter_cache(start_pg, pa, PROT_READ|PROT_WRITE, PMAP_CACHE_CI);
+
+       *startvec = start;
+       __asm volatile("dsb sy; sev");
+
+       pmap_kremove(start_pg, PAGE_SIZE);
+}
+
+int
+cpu_hatch_secondary(struct cpu_info *ci, int method, uint64_t data)
+{
+       extern uint64_t pmap_avail_kvo;
+       extern paddr_t cpu_hatch_ci;
+       paddr_t startaddr;
+       void *kstack;
+       uint64_t ttbr1;
+       int rc = 0;
+
+       kstack = km_alloc(USPACE, &kv_any, &kp_zero, &kd_waitok);
+       ci->ci_el1_stkend = (vaddr_t)kstack + USPACE - 16;
+
+       pmap_extract(pmap_kernel(), (vaddr_t)ci, &cpu_hatch_ci);
+
+       __asm("mrs %x0, ttbr1_el1": "=r"(ttbr1));
+       ci->ci_ttbr1 = ttbr1;
+
+       cpu_dcache_wb_range((vaddr_t)&cpu_hatch_ci, sizeof(paddr_t));
+       cpu_dcache_wb_range((vaddr_t)ci, sizeof(*ci));
+
+       startaddr = (vaddr_t)cpu_hatch + pmap_avail_kvo;
+
+       switch (method) {
+       case 1:
+               /* psci  */
+               if (cpu_on_fn != 0)
+                       rc = !cpu_on_fn(ci->ci_mpidr, startaddr);
+               break;
+       case 2:
+               /* spin-table */
+               cpu_hatch_spin_table(ci, startaddr, data);
+               rc = 1;
+               break;
+       default:
+               /* no method to spin up CPU */
+               ci->ci_flags = 0;       /* mark cpu as not AP */
+       }
+
+       return rc;
+}
+
+void
+cpu_boot_secondary(struct cpu_info *ci)
+{
+       atomic_setbits_int(&ci->ci_flags, CPUF_GO);
+       __asm volatile("dsb sy; sev");
+
+       while ((ci->ci_flags & CPUF_RUNNING) == 0)
+               __asm volatile("wfe");
 }
+
+void
+cpu_start_secondary(struct cpu_info *ci)
+{
+       uint64_t tcr;
+       int s;
+
+       ncpus++;
+       ci->ci_flags |= CPUF_PRESENT;
+       __asm volatile("dsb sy");
+
+       while ((ci->ci_flags & CPUF_IDENTIFY) == 0)
+               __asm volatile("wfe");
+
+       cpu_identify(ci);
+       atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED);
+       __asm volatile("dsb sy");
+
+       while ((ci->ci_flags & CPUF_GO) == 0)
+               __asm volatile("wfe");
+
+       tcr = READ_SPECIALREG(tcr_el1);
+       tcr &= ~TCR_T0SZ(0x3f);
+       tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
+       tcr |= TCR_A1;
+       WRITE_SPECIALREG(tcr_el1, tcr);
+
+       s = splhigh();
+#ifdef notyet
+       arm_intr_cpu_enable();
+       cpu_startclock();
+#endif
+
+       nanouptime(&ci->ci_schedstate.spc_runtime);
+
+       atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
+       __asm volatile("dsb sy; sev");
+
+#ifdef notyet
+       spllower(IPL_NONE);
+
+       SCHED_LOCK(s);
+       cpu_switchto(NULL, sched_chooseproc());
+#else
+       for (;;)
+               __asm volatile("wfe");
+#endif
+}
+
+void
+cpu_kick(struct cpu_info *ci)
+{
+       /* force cpu to enter kernel */
+       if (ci != curcpu())
+               arm_send_ipi(ci, ARM_IPI_NOP);
+}
+
+void
+cpu_unidle(struct cpu_info *ci)
+{
+       /*
+        * This could send IPI or SEV depending on if the other
+        * processor is sleeping (WFI or WFE), in userland, or if the
+        * cpu is in other possible wait states?
+        */
+       if (ci != curcpu())
+               arm_send_ipi(ci, ARM_IPI_NOP);
+}
+
 #endif
index 9fb9eaf..df96bca 100644 (file)
@@ -1,4 +1,4 @@
-#      $OpenBSD: genassym.cf,v 1.4 2018/01/28 12:41:39 kettenis Exp $
+#      $OpenBSD: genassym.cf,v 1.5 2018/01/28 13:17:45 kettenis Exp $
 #      $NetBSD: genassym.cf,v 1.27 2003/11/04 10:33:16 dsl Exp$
 
 # Copyright (c) 1982, 1990 The Regents of the University of California.
@@ -55,6 +55,11 @@ member       sf_sc
 
 struct cpu_info
 member ci_curproc
+ifdef MULTIPROCESSOR
+member ci_el1_stkend
+member ci_ttbr1
+member ci_self
+endif
 
 struct proc
 member p_addr
index e34e4e1..14d722a 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: intr.c,v 1.8 2017/09/08 05:36:51 deraadt Exp $ */
+/* $OpenBSD: intr.c,v 1.9 2018/01/28 13:17:45 kettenis Exp $ */
 /*
  * Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
  *
@@ -721,3 +721,22 @@ intr_barrier(void *ih)
 {
        sched_barrier(NULL);
 }
+
+/*
+ * IPI implementation
+ */
+
+void arm_no_send_ipi(struct cpu_info *ci, int id);
+void (*intr_send_ipi_func)(struct cpu_info *, int) = arm_no_send_ipi;
+
+void
+arm_send_ipi(struct cpu_info *ci, int id)
+{
+       (*intr_send_ipi_func)(ci, id);
+}
+
+void
+arm_no_send_ipi(struct cpu_info *ci, int id)
+{
+       panic("arm_send_ipi() called: no ipi function");
+}
index 5657caf..379366f 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.S,v 1.21 2018/01/21 22:30:41 kettenis Exp $ */
+/* $OpenBSD: locore.S,v 1.22 2018/01/28 13:17:45 kettenis Exp $ */
 /*-
  * Copyright (c) 2012-2014 Andrew Turner
  * All rights reserved.
@@ -352,3 +352,53 @@ sigfillsiz:
        .word   esigfill - sigfill
 
        .text
+
+#ifdef MULTIPROCESSOR
+       .globl cpu_hatch
+cpu_hatch:
+       /* Drop to EL1 */
+       bl      drop_to_el1
+
+       /* Get the virt -> phys offset */
+       bl      get_virt_delta
+
+       /* Set up CPU info */
+       adr     x0, .Lcpu_hatch_ci
+       ldr     x0, [x0]
+       sub     x0, x0, x29
+       ldr     x0, [x0]
+       ldr     x1, [x0, #CI_SELF]
+       msr     tpidr_el1, x1
+
+       /* Enable the mmu */
+       adr     x27, .Lpagetable_l0_ttbr0
+       ldr     x27, [x27] 
+       sub     x27, x27, x29
+       ldr     x26, [x0, #CI_TTBR1]
+       bl      start_mmu
+
+       mrs     x0, tpidr_el1
+       ldr     x1, [x0, #CI_EL1_STKEND]
+       mov     sp, x1
+
+       adr     x1, .Lcpu_start_secondary
+       ldr     x1, [x1]
+       blr     x1
+       b       .
+
+       .align 3
+.Lcpu_start_secondary:
+       .xword  cpu_start_secondary
+.Lpagetable_l0_ttbr0:
+       .xword  pagetable_l0_ttbr0
+.Lcpu_hatch_ci:
+       .xword  cpu_hatch_ci
+
+       .data
+       .align 3
+       .global cpu_hatch_ci
+cpu_hatch_ci:    
+       .xword   0
+
+       .text
+#endif
index 702f12e..9635124 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.26 2018/01/12 14:52:55 kettenis Exp $ */
+/* $OpenBSD: machdep.c,v 1.27 2018/01/28 13:17:45 kettenis Exp $ */
 /*
  * Copyright (c) 2014 Patrick Wildt <patrick@blueri.se>
  *
@@ -416,7 +416,7 @@ need_resched(struct cpu_info *ci)
        /* There's a risk we'll be called before the idle threads start */
        if (ci->ci_curproc) {
                aston(ci->ci_curproc);
-               //cpu_kick(ci); /* multiprocessor only ?? */
+               cpu_kick(ci);
        }
 }
 
index 8329fc5..8c27403 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.5 2018/01/17 10:22:25 kettenis Exp $ */
+/* $OpenBSD: cpu.h,v 1.6 2018/01/28 13:17:45 kettenis Exp $ */
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
  *
@@ -81,10 +81,14 @@ struct cpu_info {
        struct cpu_info         *ci_next;
        struct schedstate_percpu ci_schedstate; /* scheduler state */
 
+       u_int32_t               ci_cpuid;
+       uint64_t                ci_mpidr;
+       int                     ci_node;
+       struct cpu_info         *ci_self;
+
        struct proc             *ci_curproc;
        struct pmap             *ci_curpm;
        struct proc             *ci_fpuproc;
-       u_int32_t                ci_cpuid;
        u_int32_t               ci_randseed;
 
        struct pcb              *ci_curpcb;
@@ -104,6 +108,9 @@ struct cpu_info {
 
 #ifdef MULTIPROCESSOR
        struct srp_hazard       ci_srp_hazards[SRP_HAZARD_NUM];
+       volatile int            ci_flags;
+       uint64_t                ci_ttbr1;
+       vaddr_t                 ci_el1_stkend;
 #endif
 
 #ifdef GPROF
@@ -111,6 +118,14 @@ struct cpu_info {
 #endif
 };
 
+#define CPUF_PRIMARY           (1<<0)
+#define CPUF_AP                        (1<<1)
+#define CPUF_IDENTIFY          (1<<2)
+#define CPUF_IDENTIFIED                (1<<3)
+#define CPUF_PRESENT           (1<<4)
+#define CPUF_GO                        (1<<5)
+#define CPUF_RUNNING           (1<<6)
+
 static inline struct cpu_info *
 curcpu(void)
 {
@@ -137,10 +152,8 @@ extern struct cpu_info *cpu_info_list;
 #define CPU_INFO_ITERATOR              int
 #define CPU_INFO_FOREACH(cii, ci)      for (cii = 0, ci = cpu_info_list; \
                                            ci != NULL; ci = ci->ci_next)
-
 #define CPU_INFO_UNIT(ci)      ((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0)
 #define MAXCPUS        8
-#define cpu_unidle(ci)
 
 extern struct cpu_info *cpu_info[MAXCPUS];
 
@@ -162,7 +175,15 @@ void cpu_boot_secondary_processors(void);
  * process as soon as possible.
  */
 
+#ifdef MULTIPROCESSOR
+void cpu_unidle(struct cpu_info *ci);
+#define signotify(p)            (aston(p), cpu_unidle((p)->p_cpu))
+void cpu_kick(struct cpu_info *);
+#else
+#define cpu_kick(ci)
+#define cpu_unidle(ci)
 #define signotify(p)            setsoftast()
+#endif
 
 /*
  * Preempt the current process if in interrupt from user mode,
@@ -264,6 +285,8 @@ intr_restore(u_long daif)
        restore_daif(daif);
 }
 
+void   cpu_startclock(void);
+
 void   delay (unsigned);
 #define        DELAY(x)        delay(x)
 
index 2bab7cd..b8c7dee 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: intr.h,v 1.7 2018/01/13 15:18:11 mpi Exp $ */
+/*     $OpenBSD: intr.h,v 1.8 2018/01/28 13:17:45 kettenis Exp $ */
 
 /*
  * Copyright (c) 2001-2004 Opsycon AB  (www.opsycon.se / www.opsycon.com)
@@ -164,6 +164,12 @@ void       *arm_intr_parent_establish_fdt(void *, int *, int,
            int (*)(void *), void *, char *);
 void    arm_intr_parent_disestablish_fdt(void *);
 
+void    arm_send_ipi(struct cpu_info *, int);
+extern void (*intr_send_ipi_func)(struct cpu_info *, int);
+
+#define ARM_IPI_NOP    0
+#define ARM_IPI_DDB    1
+
 #ifdef DIAGNOSTIC
 /*
  * Although this function is implemented in MI code, it must be in this MD