Add smmu(4), a driver the ARM System MMU. This IOMMU is basically a
authorpatrick <patrick@openbsd.org>
Sun, 28 Feb 2021 21:39:31 +0000 (21:39 +0000)
committerpatrick <patrick@openbsd.org>
Sun, 28 Feb 2021 21:39:31 +0000 (21:39 +0000)
regular ARM CPU MMU re-used for I/O devices.  Implementations can have a
mix of stage-2 only and stage-2/stage-2 context blocks (domains).  The
IOMMU allows different ways of grouping devices into a single domain.

This implementation only supports SMMUv2, since there is basically
no relevant SMMUv1 hardware.  It also only supports AArch64
pagetables, the same as our pmap.  Hence lots of code was taken from
there.  There is no support for 32-bit pagetables, which would have
also been needed for SMMUv1 support.  I have not yet seen any
machines with SMMUv3, which will probably need a new driver.

There is some work to be done, but the code works and it's about
time it hits the tree.

ok kettenis@

sys/arch/arm64/dev/smmu.c [new file with mode: 0644]
sys/arch/arm64/dev/smmu_acpi.c [new file with mode: 0644]
sys/arch/arm64/dev/smmu_fdt.c [new file with mode: 0644]
sys/arch/arm64/dev/smmureg.h [new file with mode: 0644]
sys/arch/arm64/dev/smmuvar.h [new file with mode: 0644]

diff --git a/sys/arch/arm64/dev/smmu.c b/sys/arch/arm64/dev/smmu.c
new file mode 100644 (file)
index 0000000..175af79
--- /dev/null
@@ -0,0 +1,1404 @@
+/* $OpenBSD: smmu.c,v 1.1 2021/02/28 21:39:31 patrick Exp $ */
+/*
+ * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
+ * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/pool.h>
+#include <sys/atomic.h>
+
+#include <machine/bus.h>
+
+#include <uvm/uvm_extern.h>
+#include <arm64/vmparam.h>
+#include <arm64/pmap.h>
+
+#include <dev/pci/pcivar.h>
+#include <arm64/dev/smmuvar.h>
+#include <arm64/dev/smmureg.h>
+
+struct smmuvp0 {
+       uint64_t l0[VP_IDX0_CNT];
+       struct smmuvp1 *vp[VP_IDX0_CNT];
+};
+
+struct smmuvp1 {
+       uint64_t l1[VP_IDX1_CNT];
+       struct smmuvp2 *vp[VP_IDX1_CNT];
+};
+
+struct smmuvp2 {
+       uint64_t l2[VP_IDX2_CNT];
+       struct smmuvp3 *vp[VP_IDX2_CNT];
+};
+
+struct smmuvp3 {
+       uint64_t l3[VP_IDX3_CNT];
+       struct pte_desc *vp[VP_IDX3_CNT];
+};
+
+CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp1));
+CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp2));
+CTASSERT(sizeof(struct smmuvp0) == sizeof(struct smmuvp3));
+
+struct pte_desc {
+       LIST_ENTRY(pte_desc) pted_pv_list;
+       uint64_t pted_pte;
+       struct smmu_domain *pted_dom;
+       vaddr_t pted_va;
+};
+
+uint32_t smmu_gr0_read_4(struct smmu_softc *, bus_size_t);
+void smmu_gr0_write_4(struct smmu_softc *, bus_size_t, uint32_t);
+uint32_t smmu_gr1_read_4(struct smmu_softc *, bus_size_t);
+void smmu_gr1_write_4(struct smmu_softc *, bus_size_t, uint32_t);
+uint32_t smmu_cb_read_4(struct smmu_softc *, int, bus_size_t);
+void smmu_cb_write_4(struct smmu_softc *, int, bus_size_t, uint32_t);
+uint64_t smmu_cb_read_8(struct smmu_softc *, int, bus_size_t);
+void smmu_cb_write_8(struct smmu_softc *, int, bus_size_t, uint64_t);
+
+void smmu_tlb_sync_global(struct smmu_softc *);
+void smmu_tlb_sync_context(struct smmu_domain *);
+
+struct smmu_domain *smmu_domain_lookup(struct smmu_softc *, uint32_t);
+struct smmu_domain *smmu_domain_create(struct smmu_softc *, uint32_t);
+
+void smmu_set_l1(struct smmu_domain *, uint64_t, struct smmuvp1 *);
+void smmu_set_l2(struct smmu_domain *, uint64_t, struct smmuvp1 *,
+    struct smmuvp2 *);
+void smmu_set_l3(struct smmu_domain *, uint64_t, struct smmuvp2 *,
+    struct smmuvp3 *);
+
+struct pte_desc *smmu_vp_lookup(struct smmu_domain *, vaddr_t, uint64_t **);
+int smmu_vp_enter(struct smmu_domain *, vaddr_t, struct pte_desc *, int);
+
+void smmu_fill_pte(struct smmu_domain *, vaddr_t, paddr_t, struct pte_desc *,
+    vm_prot_t, int, int);
+void smmu_pte_update(struct pte_desc *, uint64_t *);
+void smmu_pte_insert(struct pte_desc *);
+void smmu_pte_remove(struct pte_desc *);
+
+int smmu_prepare(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
+int smmu_enter(struct smmu_domain *, vaddr_t, paddr_t, vm_prot_t, int, int);
+void smmu_remove(struct smmu_domain *, vaddr_t);
+
+int smmu_load_map(struct smmu_domain *, bus_dmamap_t);
+void smmu_unload_map(struct smmu_domain *, bus_dmamap_t);
+
+int smmu_dmamap_create(bus_dma_tag_t , bus_size_t, int,
+     bus_size_t, bus_size_t, int, bus_dmamap_t *);
+void smmu_dmamap_destroy(bus_dma_tag_t , bus_dmamap_t);
+int smmu_dmamap_load(bus_dma_tag_t , bus_dmamap_t, void *,
+     bus_size_t, struct proc *, int);
+int smmu_dmamap_load_mbuf(bus_dma_tag_t , bus_dmamap_t,
+     struct mbuf *, int);
+int smmu_dmamap_load_uio(bus_dma_tag_t , bus_dmamap_t,
+     struct uio *, int);
+int smmu_dmamap_load_raw(bus_dma_tag_t , bus_dmamap_t,
+     bus_dma_segment_t *, int, bus_size_t, int);
+void smmu_dmamap_unload(bus_dma_tag_t , bus_dmamap_t);
+void smmu_dmamap_sync(bus_dma_tag_t , bus_dmamap_t,
+     bus_addr_t, bus_size_t, int);
+
+int smmu_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t,
+     bus_size_t, bus_dma_segment_t *, int, int *, int);
+void smmu_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
+int smmu_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *,
+     int, size_t, caddr_t *, int);
+void smmu_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
+paddr_t smmu_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *,
+     int, off_t, int, int);
+
+struct cfdriver smmu_cd = {
+       NULL, "smmu", DV_DULL
+};
+
+int
+smmu_attach(struct smmu_softc *sc)
+{
+       uint32_t reg;
+       int i;
+
+       printf("\n");
+
+       SIMPLEQ_INIT(&sc->sc_domains);
+
+       pool_init(&sc->sc_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
+           "smmu_pted", NULL);
+       pool_setlowat(&sc->sc_pted_pool, 20);
+       pool_init(&sc->sc_vp_pool, sizeof(struct smmuvp0), PAGE_SIZE, IPL_VM, 0,
+           "smmu_vp", NULL);
+       pool_setlowat(&sc->sc_vp_pool, 20);
+
+       reg = smmu_gr0_read_4(sc, SMMU_IDR0);
+       if (reg & SMMU_IDR0_S1TS)
+               sc->sc_has_s1 = 1;
+       /*
+        * Marvell's 8040 does not support 64-bit writes, hence it
+        * is not possible to invalidate stage-2, because the ASID
+        * is part of the upper 32-bits and they'd be ignored.
+        */
+       if (sc->sc_is_ap806)
+               sc->sc_has_s1 = 0;
+       if (reg & SMMU_IDR0_S2TS)
+               sc->sc_has_s2 = 1;
+       if (!sc->sc_has_s1 && !sc->sc_has_s2)
+               return 1;
+       if (reg & SMMU_IDR0_EXIDS)
+               sc->sc_has_exids = 1;
+
+       sc->sc_num_streams = 1 << SMMU_IDR0_NUMSIDB(reg);
+       if (sc->sc_has_exids)
+               sc->sc_num_streams = 1 << 16;
+       sc->sc_stream_mask = sc->sc_num_streams - 1;
+       if (reg & SMMU_IDR0_SMS) {
+               sc->sc_num_streams = SMMU_IDR0_NUMSMRG(reg);
+               if (sc->sc_num_streams == 0)
+                       return 1;
+               sc->sc_smr = mallocarray(sc->sc_num_streams,
+                   sizeof(*sc->sc_smr), M_DEVBUF, M_WAITOK | M_ZERO);
+       }
+
+       reg = smmu_gr0_read_4(sc, SMMU_IDR1);
+       sc->sc_pagesize = 4 * 1024;
+       if (reg & SMMU_IDR1_PAGESIZE_64K)
+               sc->sc_pagesize = 64 * 1024;
+       sc->sc_numpage = 1 << (SMMU_IDR1_NUMPAGENDXB(reg) + 1);
+
+       /* 0 to NUMS2CB == stage-2, NUMS2CB to NUMCB == stage-1 */
+       sc->sc_num_context_banks = SMMU_IDR1_NUMCB(reg);
+       sc->sc_num_s2_context_banks = SMMU_IDR1_NUMS2CB(reg);
+       if (sc->sc_num_s2_context_banks > sc->sc_num_context_banks)
+               return 1;
+       sc->sc_cb = mallocarray(sc->sc_num_context_banks,
+           sizeof(*sc->sc_cb), M_DEVBUF, M_WAITOK | M_ZERO);
+
+       reg = smmu_gr0_read_4(sc, SMMU_IDR2);
+       if (reg & SMMU_IDR2_VMID16S)
+               sc->sc_has_vmid16s = 1;
+
+       switch (SMMU_IDR2_IAS(reg)) {
+       case SMMU_IDR2_IAS_32BIT:
+               sc->sc_ipa_bits = 32;
+               break;
+       case SMMU_IDR2_IAS_36BIT:
+               sc->sc_ipa_bits = 36;
+               break;
+       case SMMU_IDR2_IAS_40BIT:
+               sc->sc_ipa_bits = 40;
+               break;
+       case SMMU_IDR2_IAS_42BIT:
+               sc->sc_ipa_bits = 42;
+               break;
+       case SMMU_IDR2_IAS_44BIT:
+               sc->sc_ipa_bits = 44;
+               break;
+       case SMMU_IDR2_IAS_48BIT:
+       default:
+               sc->sc_ipa_bits = 48;
+               break;
+       }
+       switch (SMMU_IDR2_OAS(reg)) {
+       case SMMU_IDR2_OAS_32BIT:
+               sc->sc_pa_bits = 32;
+               break;
+       case SMMU_IDR2_OAS_36BIT:
+               sc->sc_pa_bits = 36;
+               break;
+       case SMMU_IDR2_OAS_40BIT:
+               sc->sc_pa_bits = 40;
+               break;
+       case SMMU_IDR2_OAS_42BIT:
+               sc->sc_pa_bits = 42;
+               break;
+       case SMMU_IDR2_OAS_44BIT:
+               sc->sc_pa_bits = 44;
+               break;
+       case SMMU_IDR2_OAS_48BIT:
+       default:
+               sc->sc_pa_bits = 48;
+               break;
+       }
+       switch (SMMU_IDR2_UBS(reg)) {
+       case SMMU_IDR2_UBS_32BIT:
+               sc->sc_va_bits = 32;
+               break;
+       case SMMU_IDR2_UBS_36BIT:
+               sc->sc_va_bits = 36;
+               break;
+       case SMMU_IDR2_UBS_40BIT:
+               sc->sc_va_bits = 40;
+               break;
+       case SMMU_IDR2_UBS_42BIT:
+               sc->sc_va_bits = 42;
+               break;
+       case SMMU_IDR2_UBS_44BIT:
+               sc->sc_va_bits = 44;
+               break;
+       case SMMU_IDR2_UBS_49BIT:
+       default:
+               sc->sc_va_bits = 48;
+               break;
+       }
+
+#if 1
+       reg = smmu_gr0_read_4(sc, SMMU_IDR0);
+       printf("%s: idr0 0x%08x\n", __func__, reg);
+       reg = smmu_gr0_read_4(sc, SMMU_IDR1);
+       printf("%s: idr1 0x%08x\n", __func__, reg);
+       reg = smmu_gr0_read_4(sc, SMMU_IDR2);
+       printf("%s: idr2 0x%08x\n", __func__, reg);
+
+       printf("%s: pagesize %zu numpage %u\n", __func__, sc->sc_pagesize,
+           sc->sc_numpage);
+       printf("%s: total cb %u stage2-only cb %u\n", __func__,
+           sc->sc_num_context_banks, sc->sc_num_s2_context_banks);
+#endif
+
+       /* Clear Global Fault Status Register */
+       smmu_gr0_write_4(sc, SMMU_SGFSR, smmu_gr0_read_4(sc, SMMU_SGFSR));
+
+       for (i = 0; i < sc->sc_num_streams; i++) {
+#if 1
+               /* Setup all streams to fault by default */
+               smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_FAULT);
+#else
+               /* For stream indexing, USFCFG bypass isn't enough! */
+               smmu_gr0_write_4(sc, SMMU_S2CR(i), SMMU_S2CR_TYPE_BYPASS);
+#endif
+               /*  Disable all stream map registers */
+               if (sc->sc_smr)
+                       smmu_gr0_write_4(sc, SMMU_SMR(i), 0);
+       }
+
+       for (i = 0; i < sc->sc_num_context_banks; i++) {
+               /* Disable Context Bank */
+               smmu_cb_write_4(sc, i, SMMU_CB_SCTLR, 0);
+               /* Clear Context Bank Fault Status Register */
+               smmu_cb_write_4(sc, i, SMMU_CB_FSR, SMMU_CB_FSR_MASK);
+       }
+
+       /* Invalidate TLB */
+       smmu_gr0_write_4(sc, SMMU_TLBIALLH, ~0);
+       smmu_gr0_write_4(sc, SMMU_TLBIALLNSNH, ~0);
+
+       if (sc->sc_is_mmu500) {
+               reg = smmu_gr0_read_4(sc, SMMU_SACR);
+               if (SMMU_IDR7_MAJOR(smmu_gr0_read_4(sc, SMMU_IDR7)) >= 2)
+                       reg &= ~SMMU_SACR_MMU500_CACHE_LOCK;
+               reg |= SMMU_SACR_MMU500_SMTNMB_TLBEN |
+                   SMMU_SACR_MMU500_S2CRB_TLBEN;
+               smmu_gr0_write_4(sc, SMMU_SACR, reg);
+               for (i = 0; i < sc->sc_num_context_banks; i++) {
+                       reg = smmu_cb_read_4(sc, i, SMMU_CB_ACTLR);
+                       reg &= ~SMMU_CB_ACTLR_CPRE;
+                       smmu_cb_write_4(sc, i, SMMU_CB_ACTLR, reg);
+               }
+       }
+
+       /* Enable SMMU */
+       reg = smmu_gr0_read_4(sc, SMMU_SCR0);
+       reg &= ~(SMMU_SCR0_CLIENTPD |
+           SMMU_SCR0_FB | SMMU_SCR0_BSU_MASK);
+#if 1
+       /* Disable bypass for unknown streams */
+       reg |= SMMU_SCR0_USFCFG;
+#else
+       /* Enable bypass for unknown streams */
+       reg &= ~SMMU_SCR0_USFCFG;
+#endif
+       reg |= SMMU_SCR0_GFRE | SMMU_SCR0_GFIE |
+           SMMU_SCR0_GCFGFRE | SMMU_SCR0_GCFGFIE |
+           SMMU_SCR0_VMIDPNE | SMMU_SCR0_PTM;
+       if (sc->sc_has_exids)
+               reg |= SMMU_SCR0_EXIDENABLE;
+       if (sc->sc_has_vmid16s)
+               reg |= SMMU_SCR0_VMID16EN;
+
+       smmu_tlb_sync_global(sc);
+       smmu_gr0_write_4(sc, SMMU_SCR0, reg);
+
+       return 0;
+}
+
+int
+smmu_global_irq(void *cookie)
+{
+       struct smmu_softc *sc = cookie;
+       uint32_t reg;
+
+       reg = smmu_gr0_read_4(sc, SMMU_SGFSR);
+       if (reg == 0)
+               return 0;
+
+       printf("%s:%d: SGFSR 0x%08x SGFSYNR0 0x%08x SGFSYNR1 0x%08x "
+           "SGFSYNR2 0x%08x\n", __func__, __LINE__, reg,
+           smmu_gr0_read_4(sc, SMMU_SGFSYNR0),
+           smmu_gr0_read_4(sc, SMMU_SGFSYNR1),
+           smmu_gr0_read_4(sc, SMMU_SGFSYNR2));
+
+       smmu_gr0_write_4(sc, SMMU_SGFSR, reg);
+
+       return 1;
+}
+
+int
+smmu_context_irq(void *cookie)
+{
+       struct smmu_cb_irq *cbi = cookie;
+       struct smmu_softc *sc = cbi->cbi_sc;
+       uint32_t reg;
+
+       reg = smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSR);
+       if ((reg & SMMU_CB_FSR_MASK) == 0)
+               return 0;
+
+       printf("%s:%d: FSR 0x%08x FSYNR0 0x%08x FAR 0x%llx "
+           "CBFRSYNRA 0x%08x\n", __func__, __LINE__, reg,
+           smmu_cb_read_4(sc, cbi->cbi_idx, SMMU_CB_FSYNR0),
+           smmu_cb_read_8(sc, cbi->cbi_idx, SMMU_CB_FAR),
+           smmu_gr1_read_4(sc, SMMU_CBFRSYNRA(cbi->cbi_idx)));
+
+       smmu_cb_write_4(sc, cbi->cbi_idx, SMMU_CB_FSR, reg);
+
+       return 1;
+}
+
+void
+smmu_tlb_sync_global(struct smmu_softc *sc)
+{
+       int i;
+
+       smmu_gr0_write_4(sc, SMMU_STLBGSYNC, ~0);
+       for (i = 1000; i > 0; i--) {
+               if ((smmu_gr0_read_4(sc, SMMU_STLBGSTATUS) &
+                   SMMU_STLBGSTATUS_GSACTIVE) == 0)
+                       return;
+               delay(1000);
+       }
+
+       printf("%s: global TLB sync timeout\n",
+           sc->sc_dev.dv_xname);
+}
+
+void
+smmu_tlb_sync_context(struct smmu_domain *dom)
+{
+       struct smmu_softc *sc = dom->sd_sc;
+       int i;
+
+       smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSYNC, ~0);
+       for (i = 1000; i > 0; i--) {
+               if ((smmu_cb_read_4(sc, dom->sd_cb_idx, SMMU_CB_TLBSTATUS) &
+                   SMMU_CB_TLBSTATUS_SACTIVE) == 0)
+                       return;
+               delay(1000);
+       }
+
+       printf("%s: context TLB sync timeout\n",
+           sc->sc_dev.dv_xname);
+}
+
+uint32_t
+smmu_gr0_read_4(struct smmu_softc *sc, bus_size_t off)
+{
+       uint32_t base = 0 * sc->sc_pagesize;
+
+       return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
+}
+
+void
+smmu_gr0_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
+{
+       uint32_t base = 0 * sc->sc_pagesize;
+
+       bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
+}
+
+uint32_t
+smmu_gr1_read_4(struct smmu_softc *sc, bus_size_t off)
+{
+       uint32_t base = 1 * sc->sc_pagesize;
+
+       return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
+}
+
+void
+smmu_gr1_write_4(struct smmu_softc *sc, bus_size_t off, uint32_t val)
+{
+       uint32_t base = 1 * sc->sc_pagesize;
+
+       bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
+}
+
+uint32_t
+smmu_cb_read_4(struct smmu_softc *sc, int idx, bus_size_t off)
+{
+       uint32_t base;
+
+       base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
+       base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
+
+       return bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off);
+}
+
+void
+smmu_cb_write_4(struct smmu_softc *sc, int idx, bus_size_t off, uint32_t val)
+{
+       uint32_t base;
+
+       base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
+       base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
+
+       bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off, val);
+}
+
+uint64_t
+smmu_cb_read_8(struct smmu_softc *sc, int idx, bus_size_t off)
+{
+       uint64_t reg;
+       uint32_t base;
+
+       base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
+       base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
+
+       if (sc->sc_is_ap806) {
+               reg = bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 4);
+               reg <<= 32;
+               reg |= bus_space_read_4(sc->sc_iot, sc->sc_ioh, base + off + 0);
+               return reg;
+       }
+
+       return bus_space_read_8(sc->sc_iot, sc->sc_ioh, base + off);
+}
+
+void
+smmu_cb_write_8(struct smmu_softc *sc, int idx, bus_size_t off, uint64_t val)
+{
+       uint32_t base;
+
+       base = sc->sc_numpage * sc->sc_pagesize; /* SMMU_CB_BASE */
+       base += idx * sc->sc_pagesize; /* SMMU_CBn_BASE */
+
+       if (sc->sc_is_ap806) {
+               bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 4,
+                   val >> 32);
+               bus_space_write_4(sc->sc_iot, sc->sc_ioh, base + off + 0,
+                   val & 0xffffffff);
+               return;
+       }
+
+       bus_space_write_8(sc->sc_iot, sc->sc_ioh, base + off, val);
+}
+
+bus_dma_tag_t
+smmu_device_hook(struct smmu_softc *sc, uint32_t sid, bus_dma_tag_t dmat)
+{
+       struct smmu_domain *dom;
+
+       dom = smmu_domain_lookup(sc, sid);
+       if (dom == NULL)
+               return dmat;
+       if (dom->sd_parent_dmat == NULL)
+               dom->sd_parent_dmat = dmat;
+       if (dom->sd_parent_dmat != dmat &&
+           memcmp(dom->sd_parent_dmat, dmat, sizeof(*dmat)) != 0) {
+               printf("%s: different dma tag for same stream\n",
+                   sc->sc_dev.dv_xname);
+               return dmat;
+       }
+
+       if (dom->sd_device_dmat == NULL) {
+               dom->sd_device_dmat = malloc(sizeof(*dom->sd_device_dmat),
+                   M_DEVBUF, M_WAITOK);
+               memcpy(dom->sd_device_dmat, dom->sd_parent_dmat,
+                   sizeof(*dom->sd_device_dmat));
+               dom->sd_device_dmat->_cookie = dom;
+               dom->sd_device_dmat->_dmamap_create = smmu_dmamap_create;
+               dom->sd_device_dmat->_dmamap_destroy = smmu_dmamap_destroy;
+               dom->sd_device_dmat->_dmamap_load = smmu_dmamap_load;
+               dom->sd_device_dmat->_dmamap_load_mbuf = smmu_dmamap_load_mbuf;
+               dom->sd_device_dmat->_dmamap_load_uio = smmu_dmamap_load_uio;
+               dom->sd_device_dmat->_dmamap_load_raw = smmu_dmamap_load_raw;
+               dom->sd_device_dmat->_dmamap_unload = smmu_dmamap_unload;
+               dom->sd_device_dmat->_dmamap_sync = smmu_dmamap_sync;
+               dom->sd_device_dmat->_dmamem_map = smmu_dmamem_map;
+       }
+
+       return dom->sd_device_dmat;
+}
+
+void
+smmu_pci_device_hook(void *cookie, uint32_t rid, struct pci_attach_args *pa)
+{
+       struct smmu_softc *sc = cookie;
+
+       printf("%s: rid %x\n", sc->sc_dev.dv_xname, rid);
+       pa->pa_dmat = smmu_device_hook(sc, rid, pa->pa_dmat);
+}
+
+struct smmu_domain *
+smmu_domain_lookup(struct smmu_softc *sc, uint32_t sid)
+{
+       struct smmu_domain *dom;
+
+       printf("%s: looking for %x\n", sc->sc_dev.dv_xname, sid);
+       SIMPLEQ_FOREACH(dom, &sc->sc_domains, sd_list) {
+               if (dom->sd_sid == sid)
+                       return dom;
+       }
+
+       return smmu_domain_create(sc, sid);
+}
+
+struct smmu_domain *
+smmu_domain_create(struct smmu_softc *sc, uint32_t sid)
+{
+       struct smmu_domain *dom;
+       uint32_t iovabits, reg;
+       paddr_t pa;
+       vaddr_t l0va;
+       int i, start, end;
+
+       printf("%s: creating for %x\n", sc->sc_dev.dv_xname, sid);
+       dom = malloc(sizeof(*dom), M_DEVBUF, M_WAITOK | M_ZERO);
+       mtx_init(&dom->sd_mtx, IPL_TTY);
+       dom->sd_sc = sc;
+       dom->sd_sid = sid;
+
+       /* Prefer stage 1 if possible! */
+       if (sc->sc_has_s1) {
+               start = sc->sc_num_s2_context_banks;
+               end = sc->sc_num_context_banks;
+               dom->sd_stage = 1;
+       } else {
+               start = 0;
+               end = sc->sc_num_context_banks;
+               dom->sd_stage = 2;
+       }
+
+       for (i = start; i < end; i++) {
+               if (sc->sc_cb[i] != NULL)
+                       continue;
+               sc->sc_cb[i] = malloc(sizeof(struct smmu_cb),
+                   M_DEVBUF, M_WAITOK | M_ZERO);
+               dom->sd_cb_idx = i;
+               break;
+       }
+       if (i >= end) {
+               free(dom, M_DEVBUF, sizeof(*dom));
+               return NULL;
+       }
+
+       /* Stream indexing is easy */
+       dom->sd_smr_idx = sid;
+
+       /* Stream mapping is a bit more effort */
+       if (sc->sc_smr) {
+               for (i = 0; i < sc->sc_num_streams; i++) {
+                       if (sc->sc_smr[i] != NULL)
+                               continue;
+                       sc->sc_smr[i] = malloc(sizeof(struct smmu_smr),
+                           M_DEVBUF, M_WAITOK | M_ZERO);
+                       dom->sd_smr_idx = i;
+                       break;
+               }
+
+               if (i >= sc->sc_num_streams) {
+                       free(sc->sc_cb[dom->sd_cb_idx], M_DEVBUF,
+                           sizeof(struct smmu_cb));
+                       sc->sc_cb[dom->sd_cb_idx] = NULL;
+                       free(dom, M_DEVBUF, sizeof(*dom));
+                       return NULL;
+               }
+       }
+
+       reg = SMMU_CBA2R_VA64;
+       if (sc->sc_has_vmid16s)
+               reg |= (dom->sd_cb_idx + 1) << SMMU_CBA2R_VMID16_SHIFT;
+       smmu_gr1_write_4(sc, SMMU_CBA2R(dom->sd_cb_idx), reg);
+
+       if (dom->sd_stage == 1) {
+               reg = SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS |
+                   SMMU_CBAR_BPSHCFG_NSH | SMMU_CBAR_MEMATTR_WB;
+       } else {
+               reg = SMMU_CBAR_TYPE_S2_TRANS;
+               if (!sc->sc_has_vmid16s)
+                       reg |= (dom->sd_cb_idx + 1) << SMMU_CBAR_VMID_SHIFT;
+       }
+       smmu_gr1_write_4(sc, SMMU_CBAR(dom->sd_cb_idx), reg);
+
+       if (dom->sd_stage == 1) {
+               reg = SMMU_CB_TCR2_AS | SMMU_CB_TCR2_SEP_UPSTREAM;
+               switch (sc->sc_ipa_bits) {
+               case 32:
+                       reg |= SMMU_CB_TCR2_PASIZE_32BIT;
+                       break;
+               case 36:
+                       reg |= SMMU_CB_TCR2_PASIZE_36BIT;
+                       break;
+               case 40:
+                       reg |= SMMU_CB_TCR2_PASIZE_40BIT;
+                       break;
+               case 42:
+                       reg |= SMMU_CB_TCR2_PASIZE_42BIT;
+                       break;
+               case 44:
+                       reg |= SMMU_CB_TCR2_PASIZE_44BIT;
+                       break;
+               case 48:
+                       reg |= SMMU_CB_TCR2_PASIZE_48BIT;
+                       break;
+               }
+               smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR2, reg);
+       }
+
+       if (dom->sd_stage == 1)
+               iovabits = sc->sc_va_bits;
+       else
+               iovabits = sc->sc_ipa_bits;
+       /*
+        * Marvell's 8040 does not support 64-bit writes, hence we
+        * can only address 44-bits of VA space for TLB invalidation.
+        */
+       if (sc->sc_is_ap806)
+               iovabits = min(44, iovabits);
+       if (iovabits >= 40)
+               dom->sd_4level = 1;
+
+       reg = SMMU_CB_TCR_TG0_4KB | SMMU_CB_TCR_T0SZ(64 - iovabits);
+       if (dom->sd_stage == 1) {
+               reg |= SMMU_CB_TCR_EPD1;
+       } else {
+               if (dom->sd_4level)
+                       reg |= SMMU_CB_TCR_S2_SL0_4KB_L0;
+               else
+                       reg |= SMMU_CB_TCR_S2_SL0_4KB_L1;
+               switch (sc->sc_pa_bits) {
+               case 32:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_32BIT;
+                       break;
+               case 36:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_36BIT;
+                       break;
+               case 40:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_40BIT;
+                       break;
+               case 42:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_42BIT;
+                       break;
+               case 44:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_44BIT;
+                       break;
+               case 48:
+                       reg |= SMMU_CB_TCR_S2_PASIZE_48BIT;
+                       break;
+               }
+       }
+       if (sc->sc_coherent)
+               reg |= SMMU_CB_TCR_IRGN0_WBWA | SMMU_CB_TCR_ORGN0_WBWA |
+                   SMMU_CB_TCR_SH0_ISH;
+       else
+               reg |= SMMU_CB_TCR_IRGN0_NC | SMMU_CB_TCR_ORGN0_NC |
+                   SMMU_CB_TCR_SH0_OSH;
+       smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_TCR, reg);
+
+       if (dom->sd_4level) {
+               while (dom->sd_vp.l0 == NULL) {
+                       dom->sd_vp.l0 = pool_get(&sc->sc_vp_pool,
+                           PR_WAITOK | PR_ZERO);
+               }
+               l0va = (vaddr_t)dom->sd_vp.l0->l0; /* top level is l0 */
+       } else {
+               while (dom->sd_vp.l1 == NULL) {
+                       dom->sd_vp.l1 = pool_get(&sc->sc_vp_pool,
+                           PR_WAITOK | PR_ZERO);
+               }
+               l0va = (vaddr_t)dom->sd_vp.l1->l1; /* top level is l1 */
+       }
+       pmap_extract(pmap_kernel(), l0va, &pa);
+
+       if (dom->sd_stage == 1) {
+               smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0,
+                   (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT | pa);
+               smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR1,
+                   (uint64_t)dom->sd_cb_idx << SMMU_CB_TTBR_ASID_SHIFT);
+       } else
+               smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TTBR0, pa);
+
+       if (dom->sd_stage == 1) {
+               smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR0,
+                   SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRnE, 0) |
+                   SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_nGnRE, 1) |
+                   SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_NC, 2) |
+                   SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WB, 3));
+               smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_MAIR1,
+                   SMMU_CB_MAIR_MAIR_ATTR(SMMU_CB_MAIR_DEVICE_WT, 0));
+       }
+
+       reg = SMMU_CB_SCTLR_M | SMMU_CB_SCTLR_TRE | SMMU_CB_SCTLR_AFE |
+           SMMU_CB_SCTLR_CFRE | SMMU_CB_SCTLR_CFIE;
+       if (dom->sd_stage == 1)
+               reg |= SMMU_CB_SCTLR_ASIDPNE;
+       smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_SCTLR, reg);
+
+       /* Point stream to context block */
+       reg = SMMU_S2CR_TYPE_TRANS | dom->sd_cb_idx;
+       if (sc->sc_has_exids && sc->sc_smr)
+               reg |= SMMU_S2CR_EXIDVALID;
+       smmu_gr0_write_4(sc, SMMU_S2CR(dom->sd_smr_idx), reg);
+
+       /* Map stream idx to S2CR idx */
+       if (sc->sc_smr) {
+               reg = sid;
+               if (!sc->sc_has_exids)
+                       reg |= SMMU_SMR_VALID;
+               smmu_gr0_write_4(sc, SMMU_SMR(dom->sd_smr_idx), reg);
+       }
+
+       snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x",
+           sc->sc_dev.dv_xname, sid);
+       dom->sd_iovamap = extent_create(dom->sd_exname, 0, (1LL << iovabits)-1,
+           M_DEVBUF, NULL, 0, EX_WAITOK | EX_NOCOALESCE);
+
+#if 0
+       /* FIXME MSI map on */
+       {
+               paddr_t msi_pa = 0x78020000; /* Ampere */
+               paddr_t msi_pa = 0x6020000; /* LX2K */
+               size_t msi_len = 0x20000;
+               paddr_t msi_pa = 0x280000; /* 8040 */
+               size_t msi_len = 0x4000;
+               while (msi_len) {
+                       smmu_enter(dom, msi_pa, msi_pa, PROT_READ | PROT_WRITE,
+                           PROT_READ | PROT_WRITE, PMAP_CACHE_WB);
+                       msi_pa += PAGE_SIZE;
+                       msi_len -= PAGE_SIZE;
+               }
+       }
+#endif
+
+       SIMPLEQ_INSERT_TAIL(&sc->sc_domains, dom, sd_list);
+       return dom;
+}
+
+/* basically pmap follows */
+
+/* virtual to physical helpers */
+static inline int
+VP_IDX0(vaddr_t va)
+{
+       return (va >> VP_IDX0_POS) & VP_IDX0_MASK;
+}
+
+static inline int
+VP_IDX1(vaddr_t va)
+{
+       return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
+}
+
+static inline int
+VP_IDX2(vaddr_t va)
+{
+       return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
+}
+
+static inline int
+VP_IDX3(vaddr_t va)
+{
+       return (va >> VP_IDX3_POS) & VP_IDX3_MASK;
+}
+
+static inline uint64_t
+VP_Lx(paddr_t pa)
+{
+       /*
+        * This function takes the pa address given and manipulates it
+        * into the form that should be inserted into the VM table.
+        */
+       return pa | Lx_TYPE_PT;
+}
+
+void
+smmu_set_l1(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *l1_va)
+{
+       uint64_t pg_entry;
+       paddr_t l1_pa;
+       int idx0;
+
+       if (pmap_extract(pmap_kernel(), (vaddr_t)l1_va, &l1_pa) == 0)
+               panic("unable to find vp pa mapping %p\n", l1_va);
+
+       if (l1_pa & (Lx_TABLE_ALIGN-1))
+               panic("misaligned L2 table\n");
+
+       pg_entry = VP_Lx(l1_pa);
+
+       idx0 = VP_IDX0(va);
+       dom->sd_vp.l0->vp[idx0] = l1_va;
+       dom->sd_vp.l0->l0[idx0] = pg_entry;
+}
+
+void
+smmu_set_l2(struct smmu_domain *dom, uint64_t va, struct smmuvp1 *vp1,
+    struct smmuvp2 *l2_va)
+{
+       uint64_t pg_entry;
+       paddr_t l2_pa;
+       int idx1;
+
+       if (pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa) == 0)
+               panic("unable to find vp pa mapping %p\n", l2_va);
+
+       if (l2_pa & (Lx_TABLE_ALIGN-1))
+               panic("misaligned L2 table\n");
+
+       pg_entry = VP_Lx(l2_pa);
+
+       idx1 = VP_IDX1(va);
+       vp1->vp[idx1] = l2_va;
+       vp1->l1[idx1] = pg_entry;
+}
+
+void
+smmu_set_l3(struct smmu_domain *dom, uint64_t va, struct smmuvp2 *vp2,
+    struct smmuvp3 *l3_va)
+{
+       uint64_t pg_entry;
+       paddr_t l3_pa;
+       int idx2;
+
+       if (pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa) == 0)
+               panic("unable to find vp pa mapping %p\n", l3_va);
+
+       if (l3_pa & (Lx_TABLE_ALIGN-1))
+               panic("misaligned L2 table\n");
+
+       pg_entry = VP_Lx(l3_pa);
+
+       idx2 = VP_IDX2(va);
+       vp2->vp[idx2] = l3_va;
+       vp2->l2[idx2] = pg_entry;
+}
+
+struct pte_desc *
+smmu_vp_lookup(struct smmu_domain *dom, vaddr_t va, uint64_t **pl3entry)
+{
+       struct smmuvp1 *vp1;
+       struct smmuvp2 *vp2;
+       struct smmuvp3 *vp3;
+       struct pte_desc *pted;
+
+       if (dom->sd_4level) {
+               if (dom->sd_vp.l0 == NULL) {
+                       return NULL;
+               }
+               vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
+       } else {
+               vp1 = dom->sd_vp.l1;
+       }
+       if (vp1 == NULL) {
+               return NULL;
+       }
+
+       vp2 = vp1->vp[VP_IDX1(va)];
+       if (vp2 == NULL) {
+               return NULL;
+       }
+
+       vp3 = vp2->vp[VP_IDX2(va)];
+       if (vp3 == NULL) {
+               return NULL;
+       }
+
+       pted = vp3->vp[VP_IDX3(va)];
+       if (pl3entry != NULL)
+               *pl3entry = &(vp3->l3[VP_IDX3(va)]);
+
+       return pted;
+}
+
+int
+smmu_vp_enter(struct smmu_domain *dom, vaddr_t va, struct pte_desc *pted,
+    int flags)
+{
+       struct smmu_softc *sc = dom->sd_sc;
+       struct smmuvp1 *vp1;
+       struct smmuvp2 *vp2;
+       struct smmuvp3 *vp3;
+
+       if (dom->sd_4level) {
+               vp1 = dom->sd_vp.l0->vp[VP_IDX0(va)];
+               if (vp1 == NULL) {
+                       vp1 = pool_get(&sc->sc_vp_pool, PR_NOWAIT | PR_ZERO);
+                       if (vp1 == NULL) {
+                               if ((flags & PMAP_CANFAIL) == 0)
+                                       panic("%s: unable to allocate L1",
+                                           __func__);
+                               return ENOMEM;
+                       }
+                       smmu_set_l1(dom, va, vp1);
+               }
+       } else {
+               vp1 = dom->sd_vp.l1;
+       }
+
+       vp2 = vp1->vp[VP_IDX1(va)];
+       if (vp2 == NULL) {
+               vp2 = pool_get(&sc->sc_vp_pool, PR_NOWAIT | PR_ZERO);
+               if (vp2 == NULL) {
+                       if ((flags & PMAP_CANFAIL) == 0)
+                               panic("%s: unable to allocate L2", __func__);
+                       return ENOMEM;
+               }
+               smmu_set_l2(dom, va, vp1, vp2);
+       }
+
+       vp3 = vp2->vp[VP_IDX2(va)];
+       if (vp3 == NULL) {
+               vp3 = pool_get(&sc->sc_vp_pool, PR_NOWAIT | PR_ZERO);
+               if (vp3 == NULL) {
+                       if ((flags & PMAP_CANFAIL) == 0)
+                               panic("%s: unable to allocate L3", __func__);
+                       return ENOMEM;
+               }
+               smmu_set_l3(dom, va, vp2, vp3);
+       }
+
+       vp3->vp[VP_IDX3(va)] = pted;
+       return 0;
+}
+
+void
+smmu_fill_pte(struct smmu_domain *dom, vaddr_t va, paddr_t pa,
+    struct pte_desc *pted, vm_prot_t prot, int flags, int cache)
+{
+       pted->pted_va = va;
+       pted->pted_dom = dom;
+
+       switch (cache) {
+       case PMAP_CACHE_WB:
+               break;
+       case PMAP_CACHE_WT:
+               break;
+       case PMAP_CACHE_CI:
+               break;
+       case PMAP_CACHE_DEV_NGNRNE:
+               break;
+       case PMAP_CACHE_DEV_NGNRE:
+               break;
+       default:
+               panic("%s: invalid cache mode", __func__);
+       }
+       pted->pted_va |= cache;
+
+       pted->pted_va |= prot & (PROT_READ|PROT_WRITE|PROT_EXEC);
+
+       pted->pted_pte = pa & PTE_RPGN;
+       pted->pted_pte |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC);
+}
+
+void
+smmu_pte_update(struct pte_desc *pted, uint64_t *pl3)
+{
+       struct smmu_domain *dom = pted->pted_dom;
+       uint64_t pte, access_bits;
+       uint64_t attr = 0;
+
+       /* see mair in locore.S */
+       switch (pted->pted_va & PMAP_CACHE_BITS) {
+       case PMAP_CACHE_WB:
+               /* inner and outer writeback */
+               if (dom->sd_stage == 1)
+                       attr |= ATTR_IDX(PTE_ATTR_WB);
+               else
+                       attr |= ATTR_IDX(PTE_MEMATTR_WB);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       case PMAP_CACHE_WT:
+               /* inner and outer writethrough */
+               if (dom->sd_stage == 1)
+                       attr |= ATTR_IDX(PTE_ATTR_WT);
+               else
+                       attr |= ATTR_IDX(PTE_MEMATTR_WT);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       case PMAP_CACHE_CI:
+               if (dom->sd_stage == 1)
+                       attr |= ATTR_IDX(PTE_ATTR_CI);
+               else
+                       attr |= ATTR_IDX(PTE_MEMATTR_CI);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       case PMAP_CACHE_DEV_NGNRNE:
+               if (dom->sd_stage == 1)
+                       attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRNE);
+               else
+                       attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRNE);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       case PMAP_CACHE_DEV_NGNRE:
+               if (dom->sd_stage == 1)
+                       attr |= ATTR_IDX(PTE_ATTR_DEV_NGNRE);
+               else
+                       attr |= ATTR_IDX(PTE_MEMATTR_DEV_NGNRE);
+               attr |= ATTR_SH(SH_INNER);
+               break;
+       default:
+               panic("%s: invalid cache mode", __func__);
+       }
+
+       access_bits = ATTR_PXN | ATTR_AF;
+       if (dom->sd_stage == 1) {
+               attr |= ATTR_nG;
+               access_bits |= ATTR_AP(1);
+               if ((pted->pted_pte & PROT_READ) &&
+                   !(pted->pted_pte & PROT_WRITE))
+                       access_bits |= ATTR_AP(2);
+       } else {
+               if (pted->pted_pte & PROT_READ)
+                       access_bits |= ATTR_AP(1);
+               if (pted->pted_pte & PROT_WRITE)
+                       access_bits |= ATTR_AP(2);
+       }
+
+       pte = (pted->pted_pte & PTE_RPGN) | attr | access_bits | L3_P;
+       *pl3 = pte;
+}
+
+void
+smmu_pte_insert(struct pte_desc *pted)
+{
+       struct smmu_domain *dom = pted->pted_dom;
+       uint64_t *pl3;
+
+       if (smmu_vp_lookup(dom, pted->pted_va, &pl3) == NULL) {
+               panic("%s: have a pted, but missing a vp"
+                   " for %lx va domain %p", __func__, pted->pted_va, dom);
+       }
+
+       smmu_pte_update(pted, pl3);
+       membar_producer(); /* XXX bus dma sync? */
+}
+
+void
+smmu_pte_remove(struct pte_desc *pted)
+{
+       /* put entry into table */
+       /* need to deal with ref/change here */
+       struct smmuvp1 *vp1;
+       struct smmuvp2 *vp2;
+       struct smmuvp3 *vp3;
+       struct smmu_domain *dom = pted->pted_dom;
+
+       if (dom->sd_4level)
+               vp1 = dom->sd_vp.l0->vp[VP_IDX0(pted->pted_va)];
+       else
+               vp1 = dom->sd_vp.l1;
+       if (vp1 == NULL) {
+               panic("have a pted, but missing the l1 for %lx va domain %p",
+                   pted->pted_va, dom);
+       }
+       vp2 = vp1->vp[VP_IDX1(pted->pted_va)];
+       if (vp2 == NULL) {
+               panic("have a pted, but missing the l2 for %lx va domain %p",
+                   pted->pted_va, dom);
+       }
+       vp3 = vp2->vp[VP_IDX2(pted->pted_va)];
+       if (vp3 == NULL) {
+               panic("have a pted, but missing the l3 for %lx va domain %p",
+                   pted->pted_va, dom);
+       }
+       vp3->l3[VP_IDX3(pted->pted_va)] = 0;
+       vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
+}
+
+int
+smmu_prepare(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
+    int flags, int cache)
+{
+       struct smmu_softc *sc = dom->sd_sc;
+       struct pte_desc *pted;
+       int error;
+
+       /* printf("%s: 0x%lx -> 0x%lx\n", __func__, va, pa); */
+
+       pted = smmu_vp_lookup(dom, va, NULL);
+       if (pted == NULL) {
+               pted = pool_get(&sc->sc_pted_pool, PR_NOWAIT | PR_ZERO);
+               if (pted == NULL) {
+                       if ((flags & PMAP_CANFAIL) == 0)
+                               panic("%s: failed to allocate pted", __func__);
+                       error = ENOMEM;
+                       goto out;
+               }
+               if (smmu_vp_enter(dom, va, pted, flags)) {
+                       if ((flags & PMAP_CANFAIL) == 0)
+                               panic("%s: failed to allocate L2/L3", __func__);
+                       error = ENOMEM;
+                       pool_put(&sc->sc_pted_pool, pted);
+                       goto out;
+               }
+       }
+
+       smmu_fill_pte(dom, va, pa, pted, prot, flags, cache);
+
+       error = 0;
+out:
+       return error;
+}
+
+int
+smmu_enter(struct smmu_domain *dom, vaddr_t va, paddr_t pa, vm_prot_t prot,
+    int flags, int cache)
+{
+       struct pte_desc *pted;
+       int error;
+
+       /* printf("%s: 0x%lx -> 0x%lx\n", __func__, va, pa); */
+
+       pted = smmu_vp_lookup(dom, va, NULL);
+       if (pted == NULL) {
+               error = smmu_prepare(dom, va, pa, prot, PROT_NONE, cache);
+               if (error)
+                       goto out;
+               pted = smmu_vp_lookup(dom, va, NULL);
+               KASSERT(pted != NULL);
+       }
+
+       pted->pted_pte |=
+           (pted->pted_va & (PROT_READ|PROT_WRITE|PROT_EXEC));
+       smmu_pte_insert(pted);
+
+       error = 0;
+out:
+       return error;
+}
+
+void
+smmu_remove(struct smmu_domain *dom, vaddr_t va)
+{
+       struct smmu_softc *sc = dom->sd_sc;
+       struct pte_desc *pted;
+
+       /* printf("%s: 0x%lx\n", __func__, va); */
+
+       pted = smmu_vp_lookup(dom, va, NULL);
+       if (pted == NULL) /* XXX really? */
+               return;
+
+       smmu_pte_remove(pted);
+       pted->pted_pte = 0;
+       pted->pted_va = 0;
+       pool_put(&sc->sc_pted_pool, pted);
+
+       membar_producer(); /* XXX bus dma sync? */
+       if (dom->sd_stage == 1)
+               smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIVAL,
+                   (uint64_t)dom->sd_cb_idx << 48 | va >> PAGE_SHIFT);
+       else
+               smmu_cb_write_8(sc, dom->sd_cb_idx, SMMU_CB_TLBIIPAS2L,
+                   va >> PAGE_SHIFT);
+}
+
+int
+smmu_load_map(struct smmu_domain *dom, bus_dmamap_t map)
+{
+       bus_addr_t addr, end;
+       int seg;
+
+       mtx_enter(&dom->sd_mtx); /* XXX necessary ? */
+       for (seg = 0; seg < map->dm_nsegs; seg++) {
+               addr = trunc_page(map->dm_segs[seg].ds_addr);
+               end = round_page(map->dm_segs[seg].ds_addr +
+                   map->dm_segs[seg].ds_len);
+               while (addr < end) {
+                       smmu_enter(dom, addr, addr, PROT_READ | PROT_WRITE,
+                           PROT_READ | PROT_WRITE, PMAP_CACHE_WB);
+                       addr += PAGE_SIZE;
+               }
+       }
+       mtx_leave(&dom->sd_mtx);
+
+       return 0;
+}
+
+void
+smmu_unload_map(struct smmu_domain *dom, bus_dmamap_t map)
+{
+       bus_addr_t addr, end;
+       int curseg;
+
+       mtx_enter(&dom->sd_mtx); /* XXX necessary ? */
+       for (curseg = 0; curseg < map->dm_nsegs; curseg++) {
+               addr = trunc_page(map->dm_segs[curseg].ds_addr);
+               end = round_page(map->dm_segs[curseg].ds_addr +
+                   map->dm_segs[curseg].ds_len);
+               while (addr < end) {
+                       smmu_remove(dom, addr);
+                       addr += PAGE_SIZE;
+               }
+       }
+       mtx_leave(&dom->sd_mtx);
+
+       smmu_tlb_sync_context(dom);
+}
+
+int
+smmu_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
+    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
+{
+       struct smmu_domain *dom = t->_cookie;
+
+       return dom->sd_parent_dmat->_dmamap_create(dom->sd_parent_dmat, size,
+           nsegments, maxsegsz, boundary, flags, dmamp);
+}
+
+void
+smmu_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
+{
+       struct smmu_domain *dom = t->_cookie;
+
+       dom->sd_parent_dmat->_dmamap_destroy(dom->sd_parent_dmat, map);
+}
+
+int
+smmu_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
+    bus_size_t buflen, struct proc *p, int flags)
+{
+       struct smmu_domain *dom = t->_cookie;
+       int error;
+
+       error = dom->sd_parent_dmat->_dmamap_load(dom->sd_parent_dmat, map,
+           buf, buflen, p, flags);
+       if (error)
+               return error;
+
+       error = smmu_load_map(dom, map);
+       if (error)
+               dom->sd_parent_dmat->_dmamap_unload(dom->sd_parent_dmat, map);
+
+       return error;
+}
+
+int
+smmu_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
+    int flags)
+{
+       struct smmu_domain *dom = t->_cookie;
+       int error;
+
+       error = dom->sd_parent_dmat->_dmamap_load_mbuf(dom->sd_parent_dmat, map,
+           m0, flags);
+       if (error)
+               return error;
+
+       error = smmu_load_map(dom, map);
+       if (error)
+               dom->sd_parent_dmat->_dmamap_unload(dom->sd_parent_dmat, map);
+
+       return error;
+}
+
+int
+smmu_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio,
+    int flags)
+{
+       struct smmu_domain *dom = t->_cookie;
+       int error;
+
+       error = dom->sd_parent_dmat->_dmamap_load_uio(dom->sd_parent_dmat, map,
+           uio, flags);
+       if (error)
+               return error;
+
+       error = smmu_load_map(dom, map);
+       if (error)
+               dom->sd_parent_dmat->_dmamap_unload(dom->sd_parent_dmat, map);
+
+       return error;
+}
+
+int
+smmu_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs,
+    int nsegs, bus_size_t size, int flags)
+{
+       struct smmu_domain *dom = t->_cookie;
+       int error;
+
+       error = dom->sd_parent_dmat->_dmamap_load_raw(dom->sd_parent_dmat, map,
+           segs, nsegs, size, flags);
+       if (error)
+               return error;
+
+       error = smmu_load_map(dom, map);
+       if (error)
+               dom->sd_parent_dmat->_dmamap_unload(dom->sd_parent_dmat, map);
+
+       return error;
+}
+
+void
+smmu_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
+{
+       struct smmu_domain *dom = t->_cookie;
+
+       smmu_unload_map(dom, map);
+       dom->sd_parent_dmat->_dmamap_unload(dom->sd_parent_dmat, map);
+}
+
+void
+smmu_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
+    bus_size_t size, int op)
+{
+       struct smmu_domain *dom = t->_cookie;
+       dom->sd_parent_dmat->_dmamap_sync(dom->sd_parent_dmat, map,
+           addr, size, op);
+}
+
+int
+smmu_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
+    size_t size, caddr_t *kvap, int flags)
+{
+       struct smmu_domain *dom = t->_cookie;
+       bus_addr_t addr, end;
+       int cache, seg, error;
+
+       error = dom->sd_parent_dmat->_dmamem_map(dom->sd_parent_dmat, segs,
+           nsegs, size, kvap, flags);
+       if (error)
+               return error;
+
+       cache = PMAP_CACHE_WB;
+       if (((t->_flags & BUS_DMA_COHERENT) == 0 &&
+          (flags & BUS_DMA_COHERENT)) || (flags & BUS_DMA_NOCACHE))
+               cache = PMAP_CACHE_CI;
+       mtx_enter(&dom->sd_mtx); /* XXX necessary ? */
+       for (seg = 0; seg < nsegs; seg++) {
+               addr = trunc_page(segs[seg].ds_addr);
+               end = round_page(segs[seg].ds_addr + segs[seg].ds_len);
+               while (addr < end) {
+                       smmu_prepare(dom, addr, addr, PROT_READ | PROT_WRITE,
+                           PROT_NONE, cache);
+                       addr += PAGE_SIZE;
+               }
+       }
+       mtx_leave(&dom->sd_mtx);
+
+       return error;
+}
diff --git a/sys/arch/arm64/dev/smmu_acpi.c b/sys/arch/arm64/dev/smmu_acpi.c
new file mode 100644 (file)
index 0000000..9947383
--- /dev/null
@@ -0,0 +1,130 @@
+/* $OpenBSD: smmu_acpi.c,v 1.1 2021/02/28 21:39:31 patrick Exp $ */
+/*
+ * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/pool.h>
+
+#include <dev/acpi/acpireg.h>
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/dsdt.h>
+
+#include <dev/pci/pcivar.h>
+#include <arm64/dev/acpiiort.h>
+#include <arm64/dev/smmuvar.h>
+#include <arm64/dev/smmureg.h>
+
+struct smmu_acpi_softc {
+       struct smmu_softc        sc_smmu;
+       void                    *sc_gih;
+};
+
+int smmu_acpi_match(struct device *, void *, void *);
+void smmu_acpi_attach(struct device *, struct device *, void *);
+
+struct cfattach smmu_acpi_ca = {
+       sizeof(struct smmu_acpi_softc), smmu_acpi_match, smmu_acpi_attach
+};
+
+int
+smmu_acpi_match(struct device *parent, void *match, void *aux)
+{
+       struct acpiiort_attach_args *aia = aux;
+       struct acpi_iort_node *node = aia->aia_node;
+
+       if (node->type != ACPI_IORT_SMMU)
+               return 0;
+
+       return 1;
+}
+
+void
+smmu_acpi_attach(struct device *parent, struct device *self, void *aux)
+{
+       struct smmu_acpi_softc *asc = (struct smmu_acpi_softc *)self;
+       struct smmu_softc *sc = &asc->sc_smmu;
+       struct acpiiort_attach_args *aia = aux;
+       struct acpi_iort_node *node = aia->aia_node;
+       struct acpi_iort_smmu_node *smmu;
+       struct acpi_iort_smmu_global_interrupt *girq;
+       struct acpi_iort_smmu_context_interrupt *cirq;
+       struct acpiiort_smmu *as;
+       int i;
+
+       smmu = (struct acpi_iort_smmu_node *)&node[1];
+
+       printf(" addr 0x%llx/0x%llx", smmu->base_address, smmu->span);
+
+       sc->sc_dmat = aia->aia_dmat;
+       sc->sc_iot = aia->aia_memt;
+       if (bus_space_map(sc->sc_iot, smmu->base_address, smmu->span,
+           0, &sc->sc_ioh)) {
+               printf(": can't map registers\n");
+               return;
+       }
+
+       switch (smmu->model) {
+       case ACPI_IORT_SMMU_V1:
+       case ACPI_IORT_SMMU_CORELINK_MMU400:
+       case ACPI_IORT_SMMU_CORELINK_MMU401:
+               printf(": SMMUv1 is unsupported\n");
+               break;
+       case ACPI_IORT_SMMU_CORELINK_MMU500:
+               sc->sc_is_mmu500 = 1;
+       case ACPI_IORT_SMMU_V2:
+       case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
+               break;
+       default:
+               printf(": unknown model %u\n", smmu->model);
+               return;
+       }
+
+       if (smmu->flags & ACPI_IORT_SMMU_COHERENT)
+               sc->sc_coherent = 1;
+
+       if (smmu_attach(sc) != 0)
+               return;
+
+       girq = (struct acpi_iort_smmu_global_interrupt *)
+           ((char *)node + smmu->global_interrupt_offset);
+       asc->sc_gih = acpi_intr_establish(girq->nsgirpt_gsiv,
+           girq->nsgirpt_flags & ACPI_IORT_SMMU_INTR_EDGE ?
+           LR_EXTIRQ_MODE : 0, IPL_TTY, smmu_global_irq,
+           sc, sc->sc_dev.dv_xname);
+       if (asc->sc_gih == NULL)
+               return;
+
+       cirq = (struct acpi_iort_smmu_context_interrupt *)
+           ((char *)node + smmu->context_interrupt_offset);
+       for (i = 0; i < smmu->number_of_context_interrupts; i++) {
+               struct smmu_cb_irq *cbi = malloc(sizeof(*cbi),
+                   M_DEVBUF, M_WAITOK);
+               cbi->cbi_sc = sc;
+               cbi->cbi_idx = i;
+               acpi_intr_establish(cirq[i].gsiv,
+                   cirq[i].flags & ACPI_IORT_SMMU_INTR_EDGE ?
+                   LR_EXTIRQ_MODE : 0, IPL_TTY, smmu_context_irq,
+                   cbi, sc->sc_dev.dv_xname);
+       }
+
+       as = malloc(sizeof(*as), M_DEVBUF, M_WAITOK | M_ZERO);
+       as->as_node = node;
+       as->as_cookie = sc;
+       as->as_hook = smmu_pci_device_hook;
+       acpiiort_smmu_register(as);
+}
diff --git a/sys/arch/arm64/dev/smmu_fdt.c b/sys/arch/arm64/dev/smmu_fdt.c
new file mode 100644 (file)
index 0000000..d11b0d2
--- /dev/null
@@ -0,0 +1,120 @@
+/* $OpenBSD: smmu_fdt.c,v 1.1 2021/02/28 21:39:31 patrick Exp $ */
+/*
+ * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/device.h>
+#include <sys/pool.h>
+
+#include <machine/bus.h>
+#include <machine/fdt.h>
+#include <machine/intr.h>
+
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_misc.h>
+#include <dev/ofw/fdt.h>
+
+#include <dev/pci/pcivar.h>
+#include <arm64/dev/smmuvar.h>
+#include <arm64/dev/smmureg.h>
+
+struct smmu_fdt_softc {
+       struct smmu_softc        sc_smmu;
+
+       struct iommu_device      sc_id;
+};
+
+int smmu_fdt_match(struct device *, void *, void *);
+void smmu_fdt_attach(struct device *, struct device *, void *);
+
+bus_dma_tag_t smmu_fdt_map(void *, uint32_t *, bus_dma_tag_t);
+
+struct cfattach smmu_fdt_ca = {
+       sizeof(struct smmu_fdt_softc), smmu_fdt_match, smmu_fdt_attach
+};
+
+int
+smmu_fdt_match(struct device *parent, void *match, void *aux)
+{
+       struct fdt_attach_args *faa = aux;
+
+       return (OF_is_compatible(faa->fa_node, "arm,smmu-v2") ||
+           OF_is_compatible(faa->fa_node, "arm,mmu-500"));
+}
+
+void
+smmu_fdt_attach(struct device *parent, struct device *self, void *aux)
+{
+       struct smmu_fdt_softc *fsc = (struct smmu_fdt_softc *)self;
+       struct smmu_softc *sc = &fsc->sc_smmu;
+       struct fdt_attach_args *faa = aux;
+       uint32_t ngirq;
+       int i;
+
+       if (faa->fa_nreg < 1) {
+               printf(": no registers\n");
+               return;
+       }
+
+       sc->sc_iot = faa->fa_iot;
+       if (bus_space_map(sc->sc_iot, faa->fa_reg[0].addr,
+           faa->fa_reg[0].size, 0, &sc->sc_ioh)) {
+               printf(": can't map registers\n");
+               return;
+       }
+
+       if (OF_is_compatible(faa->fa_node, "arm,mmu-500"))
+               sc->sc_is_mmu500 = 1;
+       if (OF_is_compatible(faa->fa_node, "marvell,ap806-smmu-500"))
+               sc->sc_is_ap806 = 1;
+       if (OF_getproplen(faa->fa_node, "dma-coherent") == 0)
+               sc->sc_coherent = 1;
+
+       if (smmu_attach(sc) != 0)
+               return;
+
+       ngirq = OF_getpropint(faa->fa_node, "#global-interrupts", 1);
+       for (i = 0; i < ngirq; i++) {
+               fdt_intr_establish_idx(faa->fa_node, i, IPL_TTY,
+                   smmu_global_irq, sc, sc->sc_dev.dv_xname);
+       }
+       for (i = ngirq; ; i++) {
+               struct smmu_cb_irq *cbi = malloc(sizeof(*cbi),
+                   M_DEVBUF, M_WAITOK);
+               cbi->cbi_sc = sc;
+               cbi->cbi_idx = i - ngirq;
+               if (fdt_intr_establish_idx(faa->fa_node, i, IPL_TTY,
+                   smmu_context_irq, cbi, sc->sc_dev.dv_xname) == NULL) {
+                       free(cbi, M_DEVBUF, sizeof(*cbi));
+                       break;
+               }
+       }
+
+       fsc->sc_id.id_node = faa->fa_node;
+       fsc->sc_id.id_cookie = fsc;
+       fsc->sc_id.id_map = smmu_fdt_map;
+       iommu_device_register(&fsc->sc_id);
+}
+
+bus_dma_tag_t
+smmu_fdt_map(void *cookie, uint32_t *cells, bus_dma_tag_t dmat)
+{
+       struct smmu_fdt_softc *fsc = (struct smmu_fdt_softc *)cookie;
+       struct smmu_softc *sc = &fsc->sc_smmu;
+
+       return smmu_device_hook(sc, cells[0], dmat);
+}
diff --git a/sys/arch/arm64/dev/smmureg.h b/sys/arch/arm64/dev/smmureg.h
new file mode 100644 (file)
index 0000000..926d2d9
--- /dev/null
@@ -0,0 +1,271 @@
+/* $OpenBSD: smmureg.h,v 1.1 2021/02/28 21:39:31 patrick Exp $ */
+/*
+ * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Global Register Space 0 */
+#define SMMU_SCR0                      0x000
+#define  SMMU_SCR0_CLIENTPD                    (1 << 0)
+#define  SMMU_SCR0_GFRE                                (1 << 1)
+#define  SMMU_SCR0_GFIE                                (1 << 2)
+#define  SMMU_SCR0_EXIDENABLE                  (1 << 3)
+#define  SMMU_SCR0_GCFGFRE                     (1 << 4)
+#define  SMMU_SCR0_GCFGFIE                     (1 << 5)
+#define  SMMU_SCR0_USFCFG                      (1 << 10)
+#define  SMMU_SCR0_VMIDPNE                     (1 << 11)
+#define  SMMU_SCR0_PTM                         (1 << 12)
+#define  SMMU_SCR0_FB                          (1 << 13)
+#define  SMMU_SCR0_BSU_MASK                    (0x3 << 14)
+#define  SMMU_SCR0_VMID16EN                    (1U << 31)
+#define SMMU_SCR1                      0x004
+#define SMMU_SCR2                      0x008
+#define SMMU_SACR                      0x010
+#define  SMMU_SACR_MMU500_SMTNMB_TLBEN         (1 << 8)
+#define  SMMU_SACR_MMU500_S2CRB_TLBEN          (1 << 10)
+#define  SMMU_SACR_MMU500_CACHE_LOCK           (1 << 26)
+#define SMMU_IDR0                      0x020
+#define  SMMU_IDR0_NUMSMRG(x)                  (((x) >> 0) & 0xff)
+#define  SMMU_IDR0_EXIDS                       (1 << 8)
+#define  SMMU_IDR0_NUMSIDB(x)                  (((x) >> 9) & 0xf)
+#define  SMMU_IDR0_BTM                         (1 << 13)
+#define  SMMU_IDR0_CCTM                                (1 << 14)
+#define  SMMU_IDR0_EXSMRGS                     (1 << 15)
+#define  SMMU_IDR0_NUMIRPT(x)                  (((x) >> 16) & 0xff)
+#define  SMMU_IDR0_PTFS(x)                     (((x) >> 24) & 0x3)
+#define  SMMU_IDR0_PTFS_AARCH32_SHORT_AND_LONG 0x0
+#define  SMMU_IDR0_PTFS_AARCH32_ONLY_LONG      0x1
+#define  SMMU_IDR0_PTFS_AARCH32_NO             0x2
+#define  SMMU_IDR0_PTFS_AARCH32_RES            0x3
+#define  SMMU_IDR0_ATOSNS                      (1 << 26)
+#define  SMMU_IDR0_SMS                         (1 << 27)
+#define  SMMU_IDR0_NTS                         (1 << 28)
+#define  SMMU_IDR0_S2TS                                (1 << 29)
+#define  SMMU_IDR0_S1TS                                (1 << 30)
+#define  SMMU_IDR0_SES                         (1U << 31)
+#define SMMU_IDR1                      0x024
+#define  SMMU_IDR1_NUMCB(x)                    (((x) >> 0) & 0xff)
+#define  SMMU_IDR1_NUMSSDNDXB(x)               (((x) >> 8) & 0xf)
+#define  SMMU_IDR1_SSDTP(x)                    (((x) >> 12) & 0x3)
+#define  SMMU_IDR1_SSDTP_UNK                   0x0
+#define  SMMU_IDR1_SSDTP_IDX_NUMSSDNDXB                0x1
+#define  SMMU_IDR1_SSDTP_RES                   0x2
+#define  SMMU_IDR1_SSDTP_IDX_16BIT             0x3
+#define  SMMU_IDR1_SMCD                                (1 << 15)
+#define  SMMU_IDR1_NUMS2CB(x)                  (((x) >> 16) & 0xff)
+#define  SMMU_IDR1_HAFDBS(x)                   (((x) >> 24) & 0x3)
+#define  SMMU_IDR1_HAFDBS_NO                   0x0
+#define  SMMU_IDR1_HAFDBS_AF                   0x1
+#define  SMMU_IDR1_HAFDBS_RES                  0x2
+#define  SMMU_IDR1_HAFDBS_AFDB                 0x3
+#define  SMMU_IDR1_NUMPAGENDXB(x)              (((x) >> 28) & 0x7)
+#define  SMMU_IDR1_PAGESIZE_4K                 (0U << 31)
+#define  SMMU_IDR1_PAGESIZE_64K                        (1U << 31)
+#define SMMU_IDR2                      0x028
+#define  SMMU_IDR2_IAS(x)                      (((x) >> 0) & 0xf)
+#define  SMMU_IDR2_IAS_32BIT                   0x0
+#define  SMMU_IDR2_IAS_36BIT                   0x1
+#define  SMMU_IDR2_IAS_40BIT                   0x2
+#define  SMMU_IDR2_IAS_42BIT                   0x3
+#define  SMMU_IDR2_IAS_44BIT                   0x4
+#define  SMMU_IDR2_IAS_48BIT                   0x5
+#define  SMMU_IDR2_OAS(x)                      (((x) >> 4) & 0xf)
+#define  SMMU_IDR2_OAS_32BIT                   0x0
+#define  SMMU_IDR2_OAS_36BIT                   0x1
+#define  SMMU_IDR2_OAS_40BIT                   0x2
+#define  SMMU_IDR2_OAS_42BIT                   0x3
+#define  SMMU_IDR2_OAS_44BIT                   0x4
+#define  SMMU_IDR2_OAS_48BIT                   0x5
+#define  SMMU_IDR2_UBS(x)                      (((x) >> 8) & 0xf)
+#define  SMMU_IDR2_UBS_32BIT                   0x0
+#define  SMMU_IDR2_UBS_36BIT                   0x1
+#define  SMMU_IDR2_UBS_40BIT                   0x2
+#define  SMMU_IDR2_UBS_42BIT                   0x3
+#define  SMMU_IDR2_UBS_44BIT                   0x4
+#define  SMMU_IDR2_UBS_49BIT                   0x5
+#define  SMMU_IDR2_UBS_64BIT                   0xf
+#define  SMMU_IDR2_PTFSV8_4KB                  (1 << 12)
+#define  SMMU_IDR2_PTFSV8_16KB                 (1 << 13)
+#define  SMMU_IDR2_PTFSV8_64KB                 (1 << 14)
+#define  SMMU_IDR2_VMID16S                     (1 << 15)
+#define  SMMU_IDR2_EXNUMSMRG                   (((x) >> 16) & 0x7ff)
+#define  SMMU_IDR2_E2HS                                (1 << 27)
+#define  SMMU_IDR2_HADS                                (1 << 28)
+#define  SMMU_IDR2_COMPINDEXS                  (1 << 29)
+#define  SMMU_IDR2_DIPANS                      (1 << 30)
+#define SMMU_IDR3                      0x02c
+#define SMMU_IDR4                      0x030
+#define SMMU_IDR5                      0x034
+#define SMMU_IDR6                      0x038
+#define SMMU_IDR7                      0x03c
+#define  SMMU_IDR7_MINOR(x)                    (((x) >> 0) & 0xf)
+#define  SMMU_IDR7_MAJOR(x)                    (((x) >> 4) & 0xf)
+#define SMMU_SGFSR                     0x048
+#define SMMU_SGFSYNR0                  0x050
+#define SMMU_SGFSYNR1                  0x054
+#define SMMU_SGFSYNR2                  0x058
+#define SMMU_TLBIVMID                  0x064
+#define SMMU_TLBIALLNSNH               0x068
+#define SMMU_TLBIALLH                  0x06c
+#define SMMU_STLBGSYNC                 0x070
+#define SMMU_STLBGSTATUS               0x074
+#define  SMMU_STLBGSTATUS_GSACTIVE             (1 << 0)
+#define SMMU_SMR(x)                    (0x800 + (x) * 0x4) /* 0 - 127 */
+#define  SMMU_SMR_VALID                                (1U << 31)
+#define  SMMU_SMR_MASK                         (0x7fff << 16)
+#define SMMU_S2CR(x)                   (0xc00 + (x) * 0x4) /* 0 - 127 */
+#define  SMMU_S2CR_EXIDVALID                   (1 << 10)
+#define  SMMU_S2CR_TYPE_TRANS                  (0 << 16)
+#define  SMMU_S2CR_TYPE_BYPASS                 (1 << 16)
+#define  SMMU_S2CR_TYPE_FAULT                  (2 << 16)
+#define  SMMU_S2CR_TYPE_MASK                   (0x3 << 16)
+
+/* Global Register Space 1 */
+#define SMMU_CBAR(x)                   (0x000 + (x) * 0x4)
+#define  SMMU_CBAR_VMID_SHIFT                  0
+#define  SMMU_CBAR_BPSHCFG_RES                 (0x0 << 8)
+#define  SMMU_CBAR_BPSHCFG_OSH                 (0x1 << 8)
+#define  SMMU_CBAR_BPSHCFG_ISH                 (0x2 << 8)
+#define  SMMU_CBAR_BPSHCFG_NSH                 (0x3 << 8)
+#define  SMMU_CBAR_MEMATTR_WB                  (0xf << 12)
+#define  SMMU_CBAR_TYPE_S2_TRANS               (0x0 << 16)
+#define  SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS     (0x1 << 16)
+#define  SMMU_CBAR_TYPE_S1_TRANS_S2_FAULT      (0x2 << 16)
+#define  SMMU_CBAR_TYPE_S1_TRANS_S2_TRANS      (0x3 << 16)
+#define  SMMU_CBAR_TYPE_MASK                   (0x3 << 16)
+#define  SMMU_CBAR_IRPTNDX_SHIFT               24
+#define SMMU_CBFRSYNRA(x)              (0x400 + (x) * 0x4)
+#define SMMU_CBA2R(x)                  (0x800 + (x) * 0x4)
+#define  SMMU_CBA2R_VA64                       (1 << 0)
+#define  SMMU_CBA2R_MONC                       (1 << 1)
+#define  SMMU_CBA2R_VMID16_SHIFT               16
+
+/* Context Bank Format */
+#define SMMU_CB_SCTLR                  0x000
+#define  SMMU_CB_SCTLR_M                       (1 << 0)
+#define  SMMU_CB_SCTLR_TRE                     (1 << 1)
+#define  SMMU_CB_SCTLR_AFE                     (1 << 2)
+#define  SMMU_CB_SCTLR_CFRE                    (1 << 5)
+#define  SMMU_CB_SCTLR_CFIE                    (1 << 6)
+#define  SMMU_CB_SCTLR_ASIDPNE                 (1 << 12)
+#define SMMU_CB_ACTLR                  0x004
+#define  SMMU_CB_ACTLR_CPRE                    (1 << 1)
+#define SMMU_CB_TCR2                   0x010
+#define  SMMU_CB_TCR2_PASIZE_32BIT             (0x0 << 0)
+#define  SMMU_CB_TCR2_PASIZE_36BIT             (0x1 << 0)
+#define  SMMU_CB_TCR2_PASIZE_40BIT             (0x2 << 0)
+#define  SMMU_CB_TCR2_PASIZE_42BIT             (0x3 << 0)
+#define  SMMU_CB_TCR2_PASIZE_44BIT             (0x4 << 0)
+#define  SMMU_CB_TCR2_PASIZE_48BIT             (0x5 << 0)
+#define  SMMU_CB_TCR2_PASIZE_MASK              (0x7 << 0)
+#define  SMMU_CB_TCR2_AS                       (1 << 4)
+#define  SMMU_CB_TCR2_SEP_UPSTREAM             (0x7 << 15)
+#define SMMU_CB_TTBR0                  0x020
+#define SMMU_CB_TTBR1                  0x028
+#define  SMMU_CB_TTBR_ASID_SHIFT               48
+#define SMMU_CB_TCR                    0x030
+#define  SMMU_CB_TCR_T0SZ(x)                   ((x) << 0)
+#define  SMMU_CB_TCR_EPD0                      (1 << 7)
+#define  SMMU_CB_TCR_IRGN0_NC                  (0x0 << 8)
+#define  SMMU_CB_TCR_IRGN0_WBWA                        (0x1 << 8)
+#define  SMMU_CB_TCR_IRGN0_WT                  (0x2 << 8)
+#define  SMMU_CB_TCR_IRGN0_WB                  (0x3 << 8)
+#define  SMMU_CB_TCR_ORGN0_NC                  (0x0 << 10)
+#define  SMMU_CB_TCR_ORGN0_WBWA                        (0x1 << 10)
+#define  SMMU_CB_TCR_ORGN0_WT                  (0x2 << 10)
+#define  SMMU_CB_TCR_ORGN0_WB                  (0x3 << 10)
+#define  SMMU_CB_TCR_SH0_NSH                   (0x0 << 12)
+#define  SMMU_CB_TCR_SH0_OSH                   (0x2 << 12)
+#define  SMMU_CB_TCR_SH0_ISH                   (0x3 << 12)
+#define  SMMU_CB_TCR_TG0_4KB                   (0x0 << 14)
+#define  SMMU_CB_TCR_TG0_64KB                  (0x1 << 14)
+#define  SMMU_CB_TCR_TG0_16KB                  (0x2 << 14)
+#define  SMMU_CB_TCR_TG0_MASK                  (0x3 << 14)
+#define  SMMU_CB_TCR_T1SZ(x)                   ((x) << 16)
+#define  SMMU_CB_TCR_EPD1                      (1 << 23)
+#define  SMMU_CB_TCR_IRGN1_NC                  (0x0 << 24)
+#define  SMMU_CB_TCR_IRGN1_WBWA                        (0x1 << 24)
+#define  SMMU_CB_TCR_IRGN1_WT                  (0x2 << 24)
+#define  SMMU_CB_TCR_IRGN1_WB                  (0x3 << 24)
+#define  SMMU_CB_TCR_ORGN1_NC                  (0x0 << 26)
+#define  SMMU_CB_TCR_ORGN1_WBWA                        (0x1 << 26)
+#define  SMMU_CB_TCR_ORGN1_WT                  (0x2 << 26)
+#define  SMMU_CB_TCR_ORGN1_WB                  (0x3 << 26)
+#define  SMMU_CB_TCR_SH1_NSH                   (0x0 << 28)
+#define  SMMU_CB_TCR_SH1_OSH                   (0x2 << 28)
+#define  SMMU_CB_TCR_SH1_ISH                   (0x3 << 28)
+#define  SMMU_CB_TCR_TG1_16KB                  (0x1 << 30)
+#define  SMMU_CB_TCR_TG1_4KB                   (0x2 << 30)
+#define  SMMU_CB_TCR_TG1_64KB                  (0x3 << 30)
+#define  SMMU_CB_TCR_TG1_MASK                  (0x3 << 30)
+#define  SMMU_CB_TCR_S2_SL0_4KB_L2             (0x0 << 6)
+#define  SMMU_CB_TCR_S2_SL0_4KB_L1             (0x1 << 6)
+#define  SMMU_CB_TCR_S2_SL0_4KB_L0             (0x2 << 6)
+#define  SMMU_CB_TCR_S2_SL0_16KB_L3            (0x0 << 6)
+#define  SMMU_CB_TCR_S2_SL0_16KB_L2            (0x1 << 6)
+#define  SMMU_CB_TCR_S2_SL0_16KB_L1            (0x2 << 6)
+#define  SMMU_CB_TCR_S2_SL0_64KB_L3            (0x0 << 6)
+#define  SMMU_CB_TCR_S2_SL0_64KB_L2            (0x1 << 6)
+#define  SMMU_CB_TCR_S2_SL0_64KB_L1            (0x2 << 6)
+#define  SMMU_CB_TCR_S2_SL0_MASK               (0x3 << 6)
+#define  SMMU_CB_TCR_S2_PASIZE_32BIT           (0x0 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_36BIT           (0x1 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_40BIT           (0x2 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_42BIT           (0x3 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_44BIT           (0x4 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_48BIT           (0x5 << 16)
+#define  SMMU_CB_TCR_S2_PASIZE_MASK            (0x7 << 16)
+#define SMMU_CB_MAIR0                  0x038
+#define SMMU_CB_MAIR1                  0x03c
+#define  SMMU_CB_MAIR_MAIR_ATTR(attr, idx)     ((attr) << ((idx) * 8))
+#define  SMMU_CB_MAIR_DEVICE_nGnRnE            0x00
+#define  SMMU_CB_MAIR_DEVICE_nGnRE             0x04
+#define  SMMU_CB_MAIR_DEVICE_NC                        0x44
+#define  SMMU_CB_MAIR_DEVICE_WB                        0xff
+#define  SMMU_CB_MAIR_DEVICE_WT                        0x88
+#define SMMU_CB_FSR                    0x058
+#define  SMMU_CB_FSR_TF                                (1 << 1)
+#define  SMMU_CB_FSR_AFF                       (1 << 2)
+#define  SMMU_CB_FSR_PF                                (1 << 3)
+#define  SMMU_CB_FSR_EF                                (1 << 4)
+#define  SMMU_CB_FSR_TLBMCF                    (1 << 5)
+#define  SMMU_CB_FSR_TLBLKF                    (1 << 6)
+#define  SMMU_CB_FSR_ASF                       (1 << 7)
+#define  SMMU_CB_FSR_UUT                       (1 << 8)
+#define  SMMU_CB_FSR_SS                                (1 << 30)
+#define  SMMU_CB_FSR_MULTI                     (1U << 31)
+#define  SMMU_CB_FSR_MASK                      (SMMU_CB_FSR_TF | \
+                                                SMMU_CB_FSR_AFF | \
+                                                SMMU_CB_FSR_PF | \
+                                                SMMU_CB_FSR_EF | \
+                                                SMMU_CB_FSR_TLBMCF | \
+                                                SMMU_CB_FSR_TLBLKF | \
+                                                SMMU_CB_FSR_ASF | \
+                                                SMMU_CB_FSR_UUT | \
+                                                SMMU_CB_FSR_SS | \
+                                                SMMU_CB_FSR_MULTI)
+#define SMMU_CB_FAR                    0x060
+#define SMMU_CB_FSYNR0                 0x068
+#define SMMU_CB_IPAFAR                 0x070
+#define SMMU_CB_TLBIVA                 0x600
+#define SMMU_CB_TLBIVAA                        0x608
+#define SMMU_CB_TLBIASID               0x610
+#define SMMU_CB_TLBIALL                        0x618
+#define SMMU_CB_TLBIVAL                        0x620
+#define SMMU_CB_TLBIVAAL               0x628
+#define SMMU_CB_TLBIIPAS2              0x630
+#define SMMU_CB_TLBIIPAS2L             0x638
+#define SMMU_CB_TLBSYNC                        0x7f0
+#define SMMU_CB_TLBSTATUS              0x7f4
+#define  SMMU_CB_TLBSTATUS_SACTIVE             (1 << 0)
diff --git a/sys/arch/arm64/dev/smmuvar.h b/sys/arch/arm64/dev/smmuvar.h
new file mode 100644 (file)
index 0000000..b2a0c59
--- /dev/null
@@ -0,0 +1,83 @@
+/* $OpenBSD: smmuvar.h,v 1.1 2021/02/28 21:39:31 patrick Exp $ */
+/*
+ * Copyright (c) 2021 Patrick Wildt <patrick@blueri.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+struct smmu_softc;
+struct smmu_domain {
+       struct smmu_softc               *sd_sc;
+       uint32_t                         sd_sid;
+       bus_dma_tag_t                    sd_parent_dmat;
+       bus_dma_tag_t                    sd_device_dmat;
+       int                              sd_cb_idx;
+       int                              sd_smr_idx;
+       int                              sd_stage;
+       int                              sd_4level;
+       char                             sd_exname[32];
+       struct extent                   *sd_iovamap;
+       union {
+               struct smmuvp0 *l0;     /* virtual to physical table 4 lvl */
+               struct smmuvp1 *l1;     /* virtual to physical table 3 lvl */
+       } sd_vp;
+       struct mutex                     sd_mtx;
+       SIMPLEQ_ENTRY(smmu_domain)       sd_list;
+};
+
+struct smmu_cb {
+};
+
+struct smmu_cb_irq {
+       struct smmu_softc               *cbi_sc;
+       int                              cbi_idx;
+};
+
+struct smmu_smr {
+       uint16_t mask;
+       uint16_t id;
+};
+
+struct smmu_softc {
+       struct device             sc_dev;
+       bus_space_tag_t           sc_iot;
+       bus_space_handle_t        sc_ioh;
+       bus_dma_tag_t             sc_dmat;
+       int                       sc_is_mmu500;
+       int                       sc_is_ap806;
+       size_t                    sc_pagesize;
+       int                       sc_numpage;
+       int                       sc_num_context_banks;
+       int                       sc_num_s2_context_banks;
+       int                       sc_has_s1;
+       int                       sc_has_s2;
+       int                       sc_has_exids;
+       int                       sc_has_vmid16s;
+       int                       sc_num_streams;
+       uint16_t                  sc_stream_mask;
+       int                       sc_ipa_bits;
+       int                       sc_pa_bits;
+       int                       sc_va_bits;
+       struct smmu_smr         **sc_smr;
+       struct smmu_cb          **sc_cb;
+       int                       sc_coherent;
+       struct pool               sc_pted_pool;
+       struct pool               sc_vp_pool;
+       SIMPLEQ_HEAD(, smmu_domain) sc_domains;
+};
+
+int smmu_attach(struct smmu_softc *);
+int smmu_global_irq(void *);
+int smmu_context_irq(void *);
+bus_dma_tag_t smmu_device_hook(struct smmu_softc *, uint32_t, bus_dma_tag_t);
+void smmu_pci_device_hook(void *, uint32_t, struct pci_attach_args *);