Postpone calling the EFI SetVirtualAddressMap() interface until efi(4)
authorkettenis <kettenis@openbsd.org>
Wed, 27 Jul 2022 21:01:38 +0000 (21:01 +0000)
committerkettenis <kettenis@openbsd.org>
Wed, 27 Jul 2022 21:01:38 +0000 (21:01 +0000)
attaches.  This prevents us from having to map the various EFI memory regions
early on.  These early mappings use 1G blocks.  On Apple M1 systems, these
blocks may overlap memory that isn't accessable.  The CPU may speculatively
access this inaccessable memory which will result in SError exceptions.

ok deraadt@

sys/arch/arm64/arm64/machdep.c
sys/arch/arm64/dev/efi.c

index 6453043..785d3d4 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: machdep.c,v 1.69 2022/03/23 23:36:35 kettenis Exp $ */
+/* $OpenBSD: machdep.c,v 1.70 2022/07/27 21:01:38 kettenis Exp $ */
 /*
  * Copyright (c) 2014 Patrick Wildt <patrick@blueri.se>
  * Copyright (c) 2021 Mark Kettenis <kettenis@openbsd.org>
@@ -771,8 +771,6 @@ uint32_t mmap_desc_ver;
 
 EFI_MEMORY_DESCRIPTOR *mmap;
 
-void   remap_efi_runtime(EFI_PHYSICAL_ADDRESS);
-
 void   collect_kernel_args(const char *);
 void   process_kernel_args(void);
 
@@ -950,10 +948,6 @@ initarm(struct arm64_bootparams *abp)
 
        arm64_bs_tag._space_map = map_func_save;
 
-       /* Remap EFI runtime. */
-       if (mmap_start != 0 && system_table != 0)
-               remap_efi_runtime(system_table);
-
        pmap_avail_fixup();
 
        uvmexp.pagesize = PAGE_SIZE;
@@ -1038,94 +1032,6 @@ initarm(struct arm64_bootparams *abp)
        splraise(IPL_IPI);
 }
 
-void
-remap_efi_runtime(EFI_PHYSICAL_ADDRESS system_table)
-{
-       EFI_SYSTEM_TABLE *st = (EFI_SYSTEM_TABLE *)system_table;
-       EFI_RUNTIME_SERVICES *rs;
-       EFI_STATUS status;
-       EFI_MEMORY_DESCRIPTOR *src;
-       EFI_MEMORY_DESCRIPTOR *dst;
-       EFI_PHYSICAL_ADDRESS phys_start = ~0ULL;
-       EFI_PHYSICAL_ADDRESS phys_end = 0;
-       EFI_VIRTUAL_ADDRESS virt_start;
-       vsize_t space;
-       int i, count = 0;
-       paddr_t pa;
-
-       /*
-        * Pick a random address somewhere in the lower half of the
-        * usable virtual address space.
-        */
-       space = 3 * (VM_MAX_ADDRESS - VM_MIN_ADDRESS) / 4;
-       virt_start = VM_MIN_ADDRESS +
-           ((vsize_t)arc4random_uniform(space >> PAGE_SHIFT) << PAGE_SHIFT);
-
-       /* Make sure the EFI system table is mapped. */
-       pmap_map_early(system_table, sizeof(EFI_SYSTEM_TABLE));
-       rs = st->RuntimeServices;
-
-       /*
-        * Make sure memory for EFI runtime services is mapped.  We
-        * only map normal memory at this point and pray that the
-        * SetVirtualAddressMap call doesn't need anything else.
-        */
-       src = mmap;
-       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
-               if (src->Attribute & EFI_MEMORY_RUNTIME) {
-                       if (src->Attribute & EFI_MEMORY_WB) {
-                               pmap_map_early(src->PhysicalStart,
-                                   src->NumberOfPages * PAGE_SIZE);
-                               phys_start = MIN(phys_start,
-                                   src->PhysicalStart);
-                               phys_end = MAX(phys_end, src->PhysicalStart +
-                                   src->NumberOfPages * PAGE_SIZE);
-                       }
-                       count++;
-               }
-               src = NextMemoryDescriptor(src, mmap_desc_size);
-       }
-
-       /* Allocate memory descriptors for new mappings. */
-       pa = pmap_steal_avail(count * mmap_desc_size,
-           mmap_desc_size, NULL);
-       memset((void *)pa, 0, count * mmap_desc_size);
-
-       /*
-        * Establish new mappings.  Apparently some EFI code relies on
-        * the offset between code and data remaining the same so pick
-        * virtual addresses for normal memory that meet that
-        * constraint.  Other mappings are simply tagged to the end of
-        * the last normal memory mapping.
-        */
-       src = mmap;
-       dst = (EFI_MEMORY_DESCRIPTOR *)pa;
-       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
-               if (src->Attribute & EFI_MEMORY_RUNTIME) {
-                       if (src->Attribute & EFI_MEMORY_WB) {
-                               src->VirtualStart = virt_start +
-                                   (src->PhysicalStart - phys_start);
-                       } else {
-                               src->VirtualStart = virt_start +
-                                    (phys_end - phys_start);
-                               phys_end += src->NumberOfPages * PAGE_SIZE;
-                       }
-                       /* Mask address to make sure it fits in our pmap. */
-                       src->VirtualStart &= ((1ULL << USER_SPACE_BITS) - 1);
-                       memcpy(dst, src, mmap_desc_size);
-                       dst = NextMemoryDescriptor(dst, mmap_desc_size);
-               }
-               src = NextMemoryDescriptor(src, mmap_desc_size);
-       }
-
-       /* Install new mappings. */
-       dst = (EFI_MEMORY_DESCRIPTOR *)pa;
-       status = rs->SetVirtualAddressMap(count * mmap_desc_size,
-           mmap_desc_size, mmap_desc_ver, dst);
-       if (status != EFI_SUCCESS)
-               printf("SetVirtualAddressMap failed: %lu\n", status);
-}
-
 char bootargs[256];
 
 void
index a4b41b0..d4de157 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: efi.c,v 1.12 2022/01/01 18:52:37 kettenis Exp $       */
+/*     $OpenBSD: efi.c,v 1.13 2022/07/27 21:01:38 kettenis Exp $       */
 
 /*
  * Copyright (c) 2017 Mark Kettenis <kettenis@openbsd.org>
 
 #include <dev/clock_subr.h>
 
+/*
+ * We need a large address space to allow identity mapping of physical
+ * memory on some machines.
+ */
+#define EFI_SPACE_BITS 48
+
 extern todr_chip_handle_t todr_handle;
 
 extern uint32_t mmap_size;
@@ -65,6 +71,7 @@ struct cfdriver efi_cd = {
        NULL, "efi", DV_DULL
 };
 
+void   efi_remap_runtime(struct efi_softc *);
 void   efi_enter(struct efi_softc *);
 void   efi_leave(struct efi_softc *);
 int    efi_gettime(struct todr_chip_handle *, struct timeval *);
@@ -86,8 +93,6 @@ efi_attach(struct device *parent, struct device *self, void *aux)
        uint64_t system_table;
        bus_space_handle_t ioh;
        EFI_SYSTEM_TABLE *st;
-       EFI_RUNTIME_SERVICES *rs;
-       EFI_MEMORY_DESCRIPTOR *desc;
        EFI_TIME time;
        EFI_STATUS status;
        uint16_t major, minor;
@@ -100,13 +105,13 @@ efi_attach(struct device *parent, struct device *self, void *aux)
        KASSERT(system_table);
 
        if (bus_space_map(faa->fa_iot, system_table, sizeof(EFI_SYSTEM_TABLE),
-           BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE, &ioh)) {
+           BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_CACHEABLE, &ioh)) {
                printf(": can't map system table\n");
                return;
        }
 
        st = bus_space_vaddr(faa->fa_iot, ioh);
-       rs = st->RuntimeServices;
+       sc->sc_rs = st->RuntimeServices;
 
        major = st->Hdr.Revision >> 16;
        minor = st->Hdr.Revision & 0xffff;
@@ -115,61 +120,8 @@ efi_attach(struct device *parent, struct device *self, void *aux)
                printf(".%d", minor % 10);
        printf("\n");
 
-       /*
-        * We don't really want some random executable non-OpenBSD
-        * code lying around in kernel space.  So create a separate
-        * pmap and only activate it when we call runtime services.
-        */
-       sc->sc_pm = pmap_create();
-       sc->sc_pm->pm_privileged = 1;
-
-       desc = mmap;
-       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
-               if (desc->Attribute & EFI_MEMORY_RUNTIME) {
-                       vaddr_t va = desc->VirtualStart;
-                       paddr_t pa = desc->PhysicalStart;
-                       int npages = desc->NumberOfPages;
-                       vm_prot_t prot = PROT_READ | PROT_WRITE;
-
-#ifdef EFI_DEBUG
-                       printf("type 0x%x pa 0x%llx va 0x%llx pages 0x%llx attr 0x%llx\n",
-                           desc->Type, desc->PhysicalStart,
-                           desc->VirtualStart, desc->NumberOfPages,
-                           desc->Attribute);
-#endif
-
-                       /*
-                        * Normal memory is expected to be "write
-                        * back" cacheable.  Everything else is mapped
-                        * as device memory.
-                        */
-                       if ((desc->Attribute & EFI_MEMORY_WB) == 0)
-                               pa |= PMAP_DEVICE;
-
-                       /*
-                        * Only make pages marked as runtime service code
-                        * executable.  This violates the standard but it
-                        * seems we can get away with it.
-                        */
-                       if (desc->Type == EfiRuntimeServicesCode)
-                               prot |= PROT_EXEC;
-
-                       if (desc->Attribute & EFI_MEMORY_RP)
-                               prot &= ~PROT_READ;
-                       if (desc->Attribute & EFI_MEMORY_XP)
-                               prot &= ~PROT_EXEC;
-                       if (desc->Attribute & EFI_MEMORY_RO)
-                               prot &= ~PROT_WRITE;
-
-                       while (npages--) {
-                               pmap_enter(sc->sc_pm, va, pa, prot,
-                                  prot | PMAP_WIRED);
-                               va += PAGE_SIZE;
-                               pa += PAGE_SIZE;
-                       }
-               }
-               desc = NextMemoryDescriptor(desc, mmap_desc_size);
-       }
+       efi_remap_runtime(sc);
+       sc->sc_rs = st->RuntimeServices;
 
        /*
         * The FirmwareVendor and ConfigurationTable fields have been
@@ -206,30 +158,236 @@ efi_attach(struct device *parent, struct device *self, void *aux)
                config_found(self, &fa, NULL);
        }
        
-       if (rs == NULL)
-               return;
-
        efi_enter(sc);
-       status = rs->GetTime(&time, NULL);
+       status = sc->sc_rs->GetTime(&time, NULL);
        efi_leave(sc);
        if (status != EFI_SUCCESS)
                return;
 
-       sc->sc_rs = rs;
        sc->sc_todr.cookie = sc;
        sc->sc_todr.todr_gettime = efi_gettime;
        sc->sc_todr.todr_settime = efi_settime;
        todr_handle = &sc->sc_todr;
 }
 
+void
+efi_remap_runtime(struct efi_softc *sc)
+{
+       EFI_MEMORY_DESCRIPTOR *src;
+       EFI_MEMORY_DESCRIPTOR *dst;
+       EFI_MEMORY_DESCRIPTOR *vmap;
+       EFI_PHYSICAL_ADDRESS phys_start = ~0ULL;
+       EFI_PHYSICAL_ADDRESS phys_end = 0;
+       EFI_VIRTUAL_ADDRESS virt_start;
+       EFI_STATUS status;
+       vsize_t space;
+       int count = 0;
+       int i;
+
+       /*
+        * We don't really want some random executable non-OpenBSD
+        * code lying around in kernel space.  So create a separate
+        * pmap and only activate it when we call runtime services.
+        */
+       sc->sc_pm = pmap_create();
+       sc->sc_pm->pm_privileged = 1;
+       sc->sc_pm->have_4_level_pt = 1;
+
+       /*
+        * We need to provide identity mappings for the complete
+        * memory map for the first SetVirtualAddressMap() call.
+        */
+       src = mmap;
+       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
+               if (src->Type != EfiConventionalMemory) {
+                       vaddr_t va = src->PhysicalStart;
+                       paddr_t pa = src->PhysicalStart;
+                       int npages = src->NumberOfPages;
+                       vm_prot_t prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+
+#ifdef EFI_DEBUG
+                       printf("type 0x%x pa 0x%llx va 0x%llx pages 0x%llx attr 0x%llx\n",
+                           src->Type, src->PhysicalStart,
+                           src->VirtualStart, src->NumberOfPages,
+                           src->Attribute);
+#endif
+
+                       /*
+                        * Normal memory is expected to be "write
+                        * back" cacheable.  Everything else is mapped
+                        * as device memory.
+                        */
+                       if ((src->Attribute & EFI_MEMORY_WB) == 0)
+                               pa |= PMAP_DEVICE;
+
+                       if (src->Attribute & EFI_MEMORY_RP)
+                               prot &= ~PROT_READ;
+                       if (src->Attribute & EFI_MEMORY_XP)
+                               prot &= ~PROT_EXEC;
+                       if (src->Attribute & EFI_MEMORY_RO)
+                               prot &= ~PROT_WRITE;
+
+                       while (npages--) {
+                               pmap_enter(sc->sc_pm, va, pa, prot,
+                                  prot | PMAP_WIRED);
+                               va += PAGE_SIZE;
+                               pa += PAGE_SIZE;
+                       }
+               }
+
+               if (src->Attribute & EFI_MEMORY_RUNTIME) {
+                       if (src->Attribute & EFI_MEMORY_WB) {
+                               phys_start = MIN(phys_start,
+                                   src->PhysicalStart);
+                               phys_end = MAX(phys_end, src->PhysicalStart +
+                                   src->NumberOfPages * PAGE_SIZE);
+                       }
+                       count++;
+               }
+
+               src = NextMemoryDescriptor(src, mmap_desc_size);
+       }
+
+       /* Allocate memory descriptors for new mappings. */
+       vmap = km_alloc(round_page(count * mmap_desc_size),
+           &kv_any, &kp_zero, &kd_waitok);
+
+       /*
+        * Pick a random address somewhere in the lower half of the
+        * usable virtual address space.
+        */
+       space = 3 * (VM_MAX_ADDRESS - VM_MIN_ADDRESS) / 4;
+       virt_start = VM_MIN_ADDRESS +
+           ((vsize_t)arc4random_uniform(space >> PAGE_SHIFT) << PAGE_SHIFT);
+
+       /*
+        * Establish new mappings.  Apparently some EFI code relies on
+        * the offset between code and data remaining the same so pick
+        * virtual addresses for normal memory that meets that
+        * constraint.  Other mappings are simply tagged on to the end
+        * of the last normal memory mapping.
+        */
+       src = mmap;
+       dst = vmap;
+       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
+               if (src->Attribute & EFI_MEMORY_RUNTIME) {
+                       memcpy(dst, src, mmap_desc_size);
+                       if (dst->Attribute & EFI_MEMORY_WB) {
+                               dst->VirtualStart = virt_start +
+                                   (dst->PhysicalStart - phys_start);
+                       } else {
+                               dst->VirtualStart = virt_start +
+                                    (phys_end - phys_start);
+                               phys_end += dst->NumberOfPages * PAGE_SIZE;
+                       }
+                       /* Mask address to make sure it fits in our pmap. */
+                       dst->VirtualStart &= ((1ULL << EFI_SPACE_BITS) - 1);
+                       dst = NextMemoryDescriptor(dst, mmap_desc_size);
+               }
+
+               src = NextMemoryDescriptor(src, mmap_desc_size);
+       }
+
+       efi_enter(sc);
+       status = sc->sc_rs->SetVirtualAddressMap(count * mmap_desc_size,
+           mmap_desc_size, mmap_desc_ver, vmap);
+       efi_leave(sc);
+
+       /*
+        * If remapping fails, undo the translations.
+        */
+       if (status != EFI_SUCCESS) {
+               src = mmap;
+               dst = vmap;
+               for (i = 0; i < mmap_size / mmap_desc_size; i++) {
+                       if (src->Attribute & EFI_MEMORY_RUNTIME) {
+                               dst->VirtualStart = src->PhysicalStart;
+                               dst = NextMemoryDescriptor(dst, mmap_desc_size);
+                       }
+                       src = NextMemoryDescriptor(src, mmap_desc_size);
+               }
+       }
+
+       /*
+        * Remove all mappings from the pmap.
+        */
+       src = mmap;
+       for (i = 0; i < mmap_size / mmap_desc_size; i++) {
+               if (src->Type != EfiConventionalMemory) {
+                       pmap_remove(sc->sc_pm, src->PhysicalStart,
+                           src->PhysicalStart + src->NumberOfPages * PAGE_SIZE);
+               }
+               src = NextMemoryDescriptor(src, mmap_desc_size);
+       }
+
+       /*
+        * Add back the (translated) runtime mappings.
+        */
+       src = vmap;
+       for (i = 0; i < count; i++) {
+               if (src->Attribute & EFI_MEMORY_RUNTIME) {
+                       vaddr_t va = src->VirtualStart;
+                       paddr_t pa = src->PhysicalStart;
+                       int npages = src->NumberOfPages;
+                       vm_prot_t prot = PROT_READ | PROT_WRITE;
+
+#ifdef EFI_DEBUG
+                       printf("type 0x%x pa 0x%llx va 0x%llx pages 0x%llx attr 0x%llx\n",
+                           src->Type, src->PhysicalStart,
+                           src->VirtualStart, src->NumberOfPages,
+                           src->Attribute);
+#endif
+
+                       /*
+                        * Normal memory is expected to be "write
+                        * back" cacheable.  Everything else is mapped
+                        * as device memory.
+                        */
+                       if ((src->Attribute & EFI_MEMORY_WB) == 0)
+                               pa |= PMAP_DEVICE;
+
+                       /*
+                        * Only make pages marked as runtime service code
+                        * executable.  This violates the standard but it
+                        * seems we can get away with it.
+                        */
+                       if (src->Type == EfiRuntimeServicesCode)
+                               prot |= PROT_EXEC;
+
+                       if (src->Attribute & EFI_MEMORY_RP)
+                               prot &= ~PROT_READ;
+                       if (src->Attribute & EFI_MEMORY_XP)
+                               prot &= ~PROT_EXEC;
+                       if (src->Attribute & EFI_MEMORY_RO)
+                               prot &= ~PROT_WRITE;
+
+                       while (npages--) {
+                               pmap_enter(sc->sc_pm, va, pa, prot,
+                                  prot | PMAP_WIRED);
+                               va += PAGE_SIZE;
+                               pa += PAGE_SIZE;
+                       }
+               }
+
+               src = NextMemoryDescriptor(src, mmap_desc_size);
+       }
+
+       km_free(vmap, round_page(count * mmap_desc_size), &kv_any, &kp_zero);
+}
+
 void
 efi_enter(struct efi_softc *sc)
 {
        struct pmap *pm = sc->sc_pm;
+       uint64_t tcr;
 
        sc->sc_psw = intr_disable();
        WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa);
        __asm volatile("isb");
+       tcr = READ_SPECIALREG(tcr_el1);
+       tcr &= ~TCR_T0SZ(0x3f);
+       tcr |= TCR_T0SZ(64 - EFI_SPACE_BITS);
+       WRITE_SPECIALREG(tcr_el1, tcr);
        cpu_setttb(pm->pm_asid, pm->pm_pt0pa);
 
        fpu_kernel_enter();
@@ -239,11 +397,16 @@ void
 efi_leave(struct efi_softc *sc)
 {
        struct pmap *pm = curcpu()->ci_curpm;
+       uint64_t tcr;
 
        fpu_kernel_exit();
 
        WRITE_SPECIALREG(ttbr0_el1, pmap_kernel()->pm_pt0pa);
        __asm volatile("isb");
+       tcr = READ_SPECIALREG(tcr_el1);
+       tcr &= ~TCR_T0SZ(0x3f);
+       tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
+       WRITE_SPECIALREG(tcr_el1, tcr);
        cpu_setttb(pm->pm_asid, pm->pm_pt0pa);
        intr_restore(sc->sc_psw);
 }