From c4f21f07e9a891ad3b6dfe433d8847ae6615a538 Mon Sep 17 00:00:00 2001 From: jsg Date: Mon, 22 Aug 2016 01:41:59 +0000 Subject: [PATCH] Before pmap7.c rev 1.35 and pmap.h rev 1.44 DMA'able memory with the BUS_DMA_COHERENT flag was mapped as device memory which does not use the store buffer. It is now mapped as normal inner and outer non-cacheable which does. While we drain the cpu store buffer for this case, on cortex a9 systems we also need to explicitly drain the PL310 L2's store buffer. With PL310 revisions r3p2 and later this is done automatically after being present in the store buffer for 256 cycles. On i.MX6 PL310 is rev r3p1 which does not have this behaviour. This issue is i.MX6 errata ERR055199 and PL310 errata 769419. This change restores io performance with a usb flash drive attached to my cubox. Raw reads go from 3 MB/s to 19 MB/s for example. Based on code written by patrick@ some time ago. ok kettenis@ patrick@ --- sys/arch/arm/arm/bus_dma.c | 3 ++- sys/arch/arm/arm/cpufunc.c | 4 +++- sys/arch/arm/cortex/arml2cc.c | 14 +++++++++++++- sys/arch/arm/include/cpufunc.h | 4 +++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sys/arch/arm/arm/bus_dma.c b/sys/arch/arm/arm/bus_dma.c index 6f696146a71..bfdbf4c9772 100644 --- a/sys/arch/arm/arm/bus_dma.c +++ b/sys/arch/arm/arm/bus_dma.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bus_dma.c,v 1.31 2016/08/14 10:32:17 kettenis Exp $ */ +/* $OpenBSD: bus_dma.c,v 1.32 2016/08/22 01:41:59 jsg Exp $ */ /* $NetBSD: bus_dma.c,v 1.38 2003/10/30 08:44:13 scw Exp $ */ /*- @@ -615,6 +615,7 @@ _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset, if (map->_dm_flags & ARM32_DMAMAP_COHERENT) { /* Drain the write buffer. */ cpu_drain_writebuf(); + cpu_sdcache_drain_writebuf(); return; } diff --git a/sys/arch/arm/arm/cpufunc.c b/sys/arch/arm/arm/cpufunc.c index c1fc7e91338..fbf9c82f25c 100644 --- a/sys/arch/arm/arm/cpufunc.c +++ b/sys/arch/arm/arm/cpufunc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.c,v 1.46 2016/08/14 11:30:54 jsg Exp $ */ +/* $OpenBSD: cpufunc.c,v 1.47 2016/08/22 01:41:59 jsg Exp $ */ /* $NetBSD: cpufunc.c,v 1.65 2003/11/05 12:53:15 scw Exp $ */ /* @@ -122,6 +122,7 @@ struct cpu_functions armv7_cpufuncs = { (void *)cpufunc_nullop, /* sdcache_wbinv_range */ (void *)cpufunc_nullop, /* sdcache_inv_range */ (void *)cpufunc_nullop, /* sdcache_wb_range */ + (void *)cpufunc_nullop, /* sdcache_drain_writebuf */ /* Other functions */ @@ -180,6 +181,7 @@ struct cpu_functions xscale_cpufuncs = { (void *)cpufunc_nullop, /* sdcache_wbinv_range */ (void *)cpufunc_nullop, /* sdcache_inv_range */ (void *)cpufunc_nullop, /* sdcache_wb_range */ + (void *)cpufunc_nullop, /* sdcache_drain_writebuf */ /* Other functions */ diff --git a/sys/arch/arm/cortex/arml2cc.c b/sys/arch/arm/cortex/arml2cc.c index 1ce135469d4..8c75cbba511 100644 --- a/sys/arch/arm/cortex/arml2cc.c +++ b/sys/arch/arm/cortex/arml2cc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: arml2cc.c,v 1.4 2015/05/20 00:39:16 jsg Exp $ */ +/* $OpenBSD: arml2cc.c,v 1.5 2016/08/22 01:42:00 jsg Exp $ */ /* * Copyright (c) 2013 Patrick Wildt * @@ -114,6 +114,7 @@ void arml2cc_cache_range_op(paddr_t, psize_t, bus_size_t); void arml2cc_cache_way_op(struct arml2cc_softc *, bus_size_t, uint32_t); void arml2cc_cache_op(struct arml2cc_softc *, bus_size_t, uint32_t); void arml2cc_cache_sync(struct arml2cc_softc *); +void arml2cc_sdcache_drain_writebuf(void); struct cfattach armliicc_ca = { sizeof (struct arml2cc_softc), arml2cc_match, arml2cc_attach @@ -165,6 +166,7 @@ arml2cc_attach(struct device *parent, struct device *self, void *args) cpufuncs.cf_sdcache_wbinv_range = arml2cc_sdcache_wbinv_range; cpufuncs.cf_sdcache_inv_range = arml2cc_sdcache_inv_range; cpufuncs.cf_sdcache_wb_range = arml2cc_sdcache_wb_range; + cpufuncs.cf_sdcache_drain_writebuf = arml2cc_sdcache_drain_writebuf; } void @@ -221,6 +223,16 @@ arml2cc_cache_sync(struct arml2cc_softc *sc) bus_space_write_4(sc->sc_iot, sc->sc_ioh, 0x740, 0xffffffff); } +void +arml2cc_sdcache_drain_writebuf(void) +{ + struct arml2cc_softc * const sc = arml2cc_sc; + if (sc == NULL || !sc->sc_enabled) + return; + + arml2cc_cache_sync(sc); +} + void arml2cc_cache_range_op(paddr_t pa, psize_t len, bus_size_t cache_op) { diff --git a/sys/arch/arm/include/cpufunc.h b/sys/arch/arm/include/cpufunc.h index 5e38d49e270..c8910187b78 100644 --- a/sys/arch/arm/include/cpufunc.h +++ b/sys/arch/arm/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.26 2016/08/14 11:30:54 jsg Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.27 2016/08/22 01:42:00 jsg Exp $ */ /* $NetBSD: cpufunc.h,v 1.29 2003/09/06 09:08:35 rearnsha Exp $ */ /* @@ -140,6 +140,7 @@ struct cpu_functions { void (*cf_sdcache_wbinv_range) (vaddr_t, paddr_t, vsize_t); void (*cf_sdcache_inv_range) (vaddr_t, paddr_t, vsize_t); void (*cf_sdcache_wb_range) (vaddr_t, paddr_t, vsize_t); + void (*cf_sdcache_drain_writebuf) (void); /* Other functions */ @@ -191,6 +192,7 @@ extern u_int cputype; #define cpu_sdcache_wbinv_range(va, pa, s) cpufuncs.cf_sdcache_wbinv_range((va), (pa), (s)) #define cpu_sdcache_inv_range(va, pa, s) cpufuncs.cf_sdcache_inv_range((va), (pa), (s)) #define cpu_sdcache_wb_range(va, pa, s) cpufuncs.cf_sdcache_wb_range((va), (pa), (s)) +#define cpu_sdcache_drain_writebuf() cpufuncs.cf_sdcache_drain_writebuf() #define cpu_flush_prefetchbuf() cpufuncs.cf_flush_prefetchbuf() #define cpu_drain_writebuf() cpufuncs.cf_drain_writebuf() -- 2.20.1