From 88ef38baf6cc60975ef2af24204774e91793f1c0 Mon Sep 17 00:00:00 2001 From: kettenis Date: Mon, 25 Sep 2023 19:23:34 +0000 Subject: [PATCH] Implement support for stream IDs. ok tobhe@, patrick@ --- sys/arch/arm64/dev/apldart.c | 288 +++++++++++++++++++++-------------- 1 file changed, 175 insertions(+), 113 deletions(-) diff --git a/sys/arch/arm64/dev/apldart.c b/sys/arch/arm64/dev/apldart.c index cc8c4b83e3a..4520f9a707a 100644 --- a/sys/arch/arm64/dev/apldart.c +++ b/sys/arch/arm64/dev/apldart.c @@ -1,4 +1,4 @@ -/* $OpenBSD: apldart.c,v 1.17 2023/07/23 11:47:20 kettenis Exp $ */ +/* $OpenBSD: apldart.c,v 1.18 2023/09/25 19:23:34 kettenis Exp $ */ /* * Copyright (c) 2021 Mark Kettenis * @@ -64,6 +64,7 @@ #define DART_T8110_TLB_CMD 0x0080 #define DART_T8110_TLB_CMD_BUSY (1U << 31) #define DART_T8110_TLB_CMD_FLUSH_ALL (0 << 8) +#define DART_T8110_TLB_CMD_FLUSH_SID (1 << 8) #define DART_T8110_ERROR 0x0100 #define DART_T8110_ERROR_MASK 0x0104 #define DART_T8110_ERROR_ADDR_LO 0x0170 @@ -90,7 +91,6 @@ #define DART_ALL_STREAMS(sc) ((1U << (sc)->sc_nsid) - 1) - /* * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory * access. To make sure this doesn't fault, round the subpage limits @@ -134,6 +134,18 @@ apldart_trunc_offset(psize_t off) #define HWRITE4(sc, reg, val) \ bus_space_write_4((sc)->sc_iot, (sc)->sc_ioh, (reg), (val)) +struct apldart_stream { + struct apldart_softc *as_sc; + int as_sid; + + struct extent *as_dvamap; + struct mutex as_dvamap_mtx; + struct apldart_dmamem *as_l1; + struct apldart_dmamem **as_l2; + + struct machine_bus_dma_tag as_dmat; +}; + struct apldart_softc { struct device sc_dev; bus_space_tag_t sc_iot; @@ -143,24 +155,19 @@ struct apldart_softc { int sc_nsid; int sc_nttbr; + int sc_shift; bus_addr_t sc_sid_enable_base; bus_addr_t sc_tcr_base; uint32_t sc_tcr_translate_enable; uint32_t sc_tcr_bypass; bus_addr_t sc_ttbr_base; uint32_t sc_ttbr_valid; - void (*sc_flush_tlb)(struct apldart_softc *); + void (*sc_flush_tlb)(struct apldart_softc *, int); bus_addr_t sc_dvabase; bus_addr_t sc_dvaend; - struct extent *sc_dvamap; - struct mutex sc_dvamap_mtx; - int sc_shift; - struct apldart_dmamem *sc_l1; - struct apldart_dmamem **sc_l2; - - struct machine_bus_dma_tag sc_bus_dmat; + struct apldart_stream **sc_as; struct iommu_device sc_id; int sc_do_suspend; @@ -206,10 +213,10 @@ void apldart_reserve(void *, uint32_t *, bus_addr_t, bus_size_t); int apldart_t8020_intr(void *); int apldart_t8110_intr(void *); -void apldart_t8020_flush_tlb(struct apldart_softc *); -void apldart_t8110_flush_tlb(struct apldart_softc *); -int apldart_load_map(struct apldart_softc *, bus_dmamap_t); -void apldart_unload_map(struct apldart_softc *, bus_dmamap_t); +void apldart_t8020_flush_tlb(struct apldart_softc *, int); +void apldart_t8110_flush_tlb(struct apldart_softc *, int); +int apldart_load_map(struct apldart_stream *, bus_dmamap_t); +void apldart_unload_map(struct apldart_stream *, bus_dmamap_t); int apldart_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t, bus_size_t boundary, int, bus_dmamap_t *); @@ -241,9 +248,6 @@ apldart_attach(struct device *parent, struct device *self, void *aux) { struct apldart_softc *sc = (struct apldart_softc *)self; struct fdt_attach_args *faa = aux; - paddr_t pa; - volatile uint64_t *l1; - int ntte, nl1, nl2; uint32_t config, params2, params4, tcr, ttbr; int sid, idx; @@ -367,49 +371,7 @@ apldart_attach(struct device *parent, struct device *self, void *aux) for (idx = 0; idx < sc->sc_nttbr; idx++) HWRITE4(sc, DART_TTBR(sc, sid, idx), 0); } - sc->sc_flush_tlb(sc); - - /* - * Build translation tables. We pre-allocate the translation - * tables for the entire aperture such that we don't have to - * worry about growing them in an mpsafe manner later. - */ - - ntte = howmany(sc->sc_dvaend, DART_PAGE_SIZE); - nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t)); - nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t)); - - sc->sc_l1 = apldart_dmamem_alloc(sc->sc_dmat, - nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE); - sc->sc_l2 = mallocarray(nl2, sizeof(*sc->sc_l2), - M_DEVBUF, M_WAITOK | M_ZERO); - - l1 = APLDART_DMA_KVA(sc->sc_l1); - for (idx = 0; idx < nl2; idx++) { - sc->sc_l2[idx] = apldart_dmamem_alloc(sc->sc_dmat, - DART_PAGE_SIZE, DART_PAGE_SIZE); - pa = APLDART_DMA_DVA(sc->sc_l2[idx]); - l1[idx] = (pa >> sc->sc_shift) | DART_L1_TABLE; - } - - /* Install page tables. */ - for (sid = 0; sid < sc->sc_nsid; sid++) { - pa = APLDART_DMA_DVA(sc->sc_l1); - for (idx = 0; idx < nl1; idx++) { - HWRITE4(sc, DART_TTBR(sc, sid, idx), - (pa >> DART_TTBR_SHIFT) | sc->sc_ttbr_valid); - pa += DART_PAGE_SIZE; - } - } - sc->sc_flush_tlb(sc); - - /* Enable all streams. */ - for (idx = 0; idx < howmany(sc->sc_nsid, 32); idx++) - HWRITE4(sc, DART_SID_ENABLE(sc, idx), ~0); - - /* Enable translations. */ - for (sid = 0; sid < sc->sc_nsid; sid++) - HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_translate_enable); + sc->sc_flush_tlb(sc, -1); if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) { HWRITE4(sc, DART_T8110_ERROR, HREAD4(sc, DART_T8110_ERROR)); @@ -422,21 +384,8 @@ apldart_attach(struct device *parent, struct device *self, void *aux) sc, sc->sc_dev.dv_xname); } - sc->sc_dvamap = extent_create(sc->sc_dev.dv_xname, - sc->sc_dvabase, sc->sc_dvaend, M_DEVBUF, - NULL, 0, EX_NOCOALESCE); - mtx_init(&sc->sc_dvamap_mtx, IPL_HIGH); - - memcpy(&sc->sc_bus_dmat, sc->sc_dmat, sizeof(sc->sc_bus_dmat)); - sc->sc_bus_dmat._cookie = sc; - sc->sc_bus_dmat._dmamap_create = apldart_dmamap_create; - sc->sc_bus_dmat._dmamap_destroy = apldart_dmamap_destroy; - sc->sc_bus_dmat._dmamap_load = apldart_dmamap_load; - sc->sc_bus_dmat._dmamap_load_mbuf = apldart_dmamap_load_mbuf; - sc->sc_bus_dmat._dmamap_load_uio = apldart_dmamap_load_uio; - sc->sc_bus_dmat._dmamap_load_raw = apldart_dmamap_load_raw; - sc->sc_bus_dmat._dmamap_unload = apldart_dmamap_unload; - sc->sc_bus_dmat._flags |= BUS_DMA_COHERENT; + sc->sc_as = mallocarray(sc->sc_nsid, sizeof(*sc->sc_as), + M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_id.id_node = faa->fa_node; sc->sc_id.id_cookie = sc; @@ -460,6 +409,7 @@ apldart_resume(struct apldart_softc *sc) paddr_t pa; int ntte, nl1, nl2; uint32_t params2; + uint32_t mask; int sid, idx; if (!sc->sc_do_suspend) @@ -480,22 +430,32 @@ apldart_resume(struct apldart_softc *sc) /* Install page tables. */ for (sid = 0; sid < sc->sc_nsid; sid++) { - pa = APLDART_DMA_DVA(sc->sc_l1); + if (sc->sc_as[sid] == NULL) + continue; + pa = APLDART_DMA_DVA(sc->sc_as[sid]->as_l1); for (idx = 0; idx < nl1; idx++) { HWRITE4(sc, DART_TTBR(sc, sid, idx), (pa >> DART_TTBR_SHIFT) | sc->sc_ttbr_valid); pa += DART_PAGE_SIZE; } } - sc->sc_flush_tlb(sc); + sc->sc_flush_tlb(sc, -1); - /* Enable all streams. */ - for (idx = 0; idx < howmany(sc->sc_nsid, 32); idx++) - HWRITE4(sc, DART_SID_ENABLE(sc, idx), ~0); + /* Enable all active streams. */ + for (sid = 0; sid < sc->sc_nsid; sid++) { + if (sc->sc_as[sid] == NULL) + continue; + mask = HREAD4(sc, DART_SID_ENABLE(sc, sid / 32)); + mask |= (1U << (sid % 32)); + HWRITE4(sc, DART_SID_ENABLE(sc, sid / 32), mask); + } /* Enable translations. */ - for (sid = 0; sid < sc->sc_nsid; sid++) + for (sid = 0; sid < sc->sc_nsid; sid++) { + if (sc->sc_as[sid] == NULL) + continue; HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_translate_enable); + } if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) { HWRITE4(sc, DART_T8110_ERROR, HREAD4(sc, DART_T8110_ERROR)); @@ -522,12 +482,91 @@ apldart_activate(struct device *self, int act) return 0; } +struct apldart_stream * +apldart_alloc_stream(struct apldart_softc *sc, int sid) +{ + struct apldart_stream *as; + paddr_t pa; + volatile uint64_t *l1; + int idx, ntte, nl1, nl2; + uint32_t mask; + + as = malloc(sizeof(*as), M_DEVBUF, M_WAITOK | M_ZERO); + + as->as_sc = sc; + as->as_sid = sid; + + as->as_dvamap = extent_create(sc->sc_dev.dv_xname, + sc->sc_dvabase, sc->sc_dvaend, M_DEVBUF, + NULL, 0, EX_NOCOALESCE); + mtx_init(&as->as_dvamap_mtx, IPL_HIGH); + + /* + * Build translation tables. We pre-allocate the translation + * tables for the entire aperture such that we don't have to + * worry about growing them in an mpsafe manner later. + */ + + ntte = howmany(sc->sc_dvaend, DART_PAGE_SIZE); + nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t)); + nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t)); + + as->as_l1 = apldart_dmamem_alloc(sc->sc_dmat, + nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE); + as->as_l2 = mallocarray(nl2, sizeof(*as->as_l2), + M_DEVBUF, M_WAITOK | M_ZERO); + + l1 = APLDART_DMA_KVA(as->as_l1); + for (idx = 0; idx < nl2; idx++) { + as->as_l2[idx] = apldart_dmamem_alloc(sc->sc_dmat, + DART_PAGE_SIZE, DART_PAGE_SIZE); + pa = APLDART_DMA_DVA(as->as_l2[idx]); + l1[idx] = (pa >> sc->sc_shift) | DART_L1_TABLE; + } + + /* Install page tables. */ + pa = APLDART_DMA_DVA(as->as_l1); + for (idx = 0; idx < nl1; idx++) { + HWRITE4(sc, DART_TTBR(sc, sid, idx), + (pa >> DART_TTBR_SHIFT) | sc->sc_ttbr_valid); + pa += DART_PAGE_SIZE; + } + sc->sc_flush_tlb(sc, sid); + + /* Enable this stream. */ + mask = HREAD4(sc, DART_SID_ENABLE(sc, sid / 32)); + mask |= (1U << (sid % 32)); + HWRITE4(sc, DART_SID_ENABLE(sc, sid / 32), mask); + + /* Enable translations. */ + HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_translate_enable); + + memcpy(&as->as_dmat, sc->sc_dmat, sizeof(*sc->sc_dmat)); + as->as_dmat._cookie = as; + as->as_dmat._dmamap_create = apldart_dmamap_create; + as->as_dmat._dmamap_destroy = apldart_dmamap_destroy; + as->as_dmat._dmamap_load = apldart_dmamap_load; + as->as_dmat._dmamap_load_mbuf = apldart_dmamap_load_mbuf; + as->as_dmat._dmamap_load_uio = apldart_dmamap_load_uio; + as->as_dmat._dmamap_load_raw = apldart_dmamap_load_raw; + as->as_dmat._dmamap_unload = apldart_dmamap_unload; + as->as_dmat._flags |= BUS_DMA_COHERENT; + + return as; +} + bus_dma_tag_t apldart_map(void *cookie, uint32_t *cells, bus_dma_tag_t dmat) { struct apldart_softc *sc = cookie; + uint32_t sid = cells[0]; + + KASSERT(sid < sc->sc_nsid); - return &sc->sc_bus_dmat; + if (sc->sc_as[sid] == NULL) + sc->sc_as[sid] = apldart_alloc_stream(sc, sid); + + return &sc->sc_as[sid]->as_dmat; } void @@ -558,41 +597,56 @@ apldart_t8110_intr(void *arg) } void -apldart_t8020_flush_tlb(struct apldart_softc *sc) +apldart_t8020_flush_tlb(struct apldart_softc *sc, int sid) { + uint32_t mask; + __asm volatile ("dsb sy" ::: "memory"); - HWRITE4(sc, DART_T8020_TLB_SIDMASK, DART_ALL_STREAMS(sc)); + if (sid == -1) + mask = DART_ALL_STREAMS(sc); + else + mask = (1U << sid); + + HWRITE4(sc, DART_T8020_TLB_SIDMASK, mask); HWRITE4(sc, DART_T8020_TLB_CMD, DART_T8020_TLB_CMD_FLUSH); while (HREAD4(sc, DART_T8020_TLB_CMD) & DART_T8020_TLB_CMD_BUSY) CPU_BUSY_CYCLE(); } void -apldart_t8110_flush_tlb(struct apldart_softc *sc) +apldart_t8110_flush_tlb(struct apldart_softc *sc, int sid) { + uint32_t cmd; + __asm volatile ("dsb sy" ::: "memory"); - HWRITE4(sc, DART_T8110_TLB_CMD, DART_T8110_TLB_CMD_FLUSH_ALL); + if (sid == -1) + cmd = DART_T8110_TLB_CMD_FLUSH_ALL; + else + cmd = DART_T8110_TLB_CMD_FLUSH_SID | sid; + + HWRITE4(sc, DART_T8110_TLB_CMD, cmd); while (HREAD4(sc, DART_T8110_TLB_CMD) & DART_T8110_TLB_CMD_BUSY) CPU_BUSY_CYCLE(); } volatile uint64_t * -apldart_lookup_tte(struct apldart_softc *sc, bus_addr_t dva) +apldart_lookup_tte(struct apldart_stream *as, bus_addr_t dva) { int idx = dva / DART_PAGE_SIZE; int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t)); int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t)); volatile uint64_t *l2; - l2 = APLDART_DMA_KVA(sc->sc_l2[l2_idx]); + l2 = APLDART_DMA_KVA(as->as_l2[l2_idx]); return &l2[tte_idx]; } int -apldart_load_map(struct apldart_softc *sc, bus_dmamap_t map) +apldart_load_map(struct apldart_stream *as, bus_dmamap_t map) { + struct apldart_softc *sc = as->as_sc; struct apldart_map_state *ams = map->_dm_cookie; volatile uint64_t *tte; int seg, error; @@ -606,12 +660,12 @@ apldart_load_map(struct apldart_softc *sc, bus_dmamap_t map) len = apldart_round_page(map->dm_segs[seg].ds_len + off); - mtx_enter(&sc->sc_dvamap_mtx); - error = extent_alloc_with_descr(sc->sc_dvamap, len, + mtx_enter(&as->as_dvamap_mtx); + error = extent_alloc_with_descr(as->as_dvamap, len, DART_PAGE_SIZE, 0, 0, EX_NOWAIT, &ams[seg].ams_er, &dva); - mtx_leave(&sc->sc_dvamap_mtx); + mtx_leave(&as->as_dvamap_mtx); if (error) { - apldart_unload_map(sc, map); + apldart_unload_map(as, map); return error; } @@ -627,7 +681,7 @@ apldart_load_map(struct apldart_softc *sc, bus_dmamap_t map) if (len < DART_PAGE_SIZE) end = apldart_round_offset(len) - 1; - tte = apldart_lookup_tte(sc, dva); + tte = apldart_lookup_tte(as, dva); *tte = (pa >> sc->sc_shift) | DART_L2_VALID | DART_L2_START(start) | DART_L2_END(end); @@ -638,14 +692,15 @@ apldart_load_map(struct apldart_softc *sc, bus_dmamap_t map) } } - sc->sc_flush_tlb(sc); + sc->sc_flush_tlb(sc, as->as_sid); return 0; } void -apldart_unload_map(struct apldart_softc *sc, bus_dmamap_t map) +apldart_unload_map(struct apldart_stream *as, bus_dmamap_t map) { + struct apldart_softc *sc = as->as_sc; struct apldart_map_state *ams = map->_dm_cookie; volatile uint64_t *tte; int seg, error; @@ -661,17 +716,17 @@ apldart_unload_map(struct apldart_softc *sc, bus_dmamap_t map) len = ams[seg].ams_len; while (len > 0) { - tte = apldart_lookup_tte(sc, dva); + tte = apldart_lookup_tte(as, dva); *tte = DART_L2_INVAL; dva += DART_PAGE_SIZE; len -= DART_PAGE_SIZE; } - mtx_enter(&sc->sc_dvamap_mtx); - error = extent_free(sc->sc_dvamap, ams[seg].ams_dva, + mtx_enter(&as->as_dvamap_mtx); + error = extent_free(as->as_dvamap, ams[seg].ams_dva, ams[seg].ams_len, EX_NOWAIT); - mtx_leave(&sc->sc_dvamap_mtx); + mtx_leave(&as->as_dvamap_mtx); KASSERT(error == 0); @@ -679,14 +734,15 @@ apldart_unload_map(struct apldart_softc *sc, bus_dmamap_t map) ams[seg].ams_len = 0; } - sc->sc_flush_tlb(sc); + sc->sc_flush_tlb(sc, as->as_sid); } int apldart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; struct apldart_map_state *ams; bus_dmamap_t map; int error; @@ -711,7 +767,8 @@ apldart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, void apldart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; struct apldart_map_state *ams = map->_dm_cookie; if (map->dm_nsegs) @@ -725,7 +782,8 @@ int apldart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf, size_t buflen, struct proc *p, int flags) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; int error; error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map, @@ -733,7 +791,7 @@ apldart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf, if (error) return error; - error = apldart_load_map(sc, map); + error = apldart_load_map(as, map); if (error) sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); @@ -744,7 +802,8 @@ int apldart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m, int flags) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; int error; error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map, @@ -752,7 +811,7 @@ apldart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, if (error) return error; - error = apldart_load_map(sc, map); + error = apldart_load_map(as, map); if (error) sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); @@ -763,7 +822,8 @@ int apldart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio, int flags) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; int error; error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map, @@ -771,7 +831,7 @@ apldart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, if (error) return error; - error = apldart_load_map(sc, map); + error = apldart_load_map(as, map); if (error) sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); @@ -782,7 +842,8 @@ int apldart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; int error; error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map, @@ -790,7 +851,7 @@ apldart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, if (error) return error; - error = apldart_load_map(sc, map); + error = apldart_load_map(as, map); if (error) sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); @@ -800,9 +861,10 @@ apldart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, void apldart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map) { - struct apldart_softc *sc = t->_cookie; + struct apldart_stream *as = t->_cookie; + struct apldart_softc *sc = as->as_sc; - apldart_unload_map(sc, map); + apldart_unload_map(as, map); sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); } -- 2.20.1