Move bge rxeof and txeof outside the kernel lock.
authorjmatthew <jmatthew@openbsd.org>
Mon, 19 Oct 2015 05:31:25 +0000 (05:31 +0000)
committerjmatthew <jmatthew@openbsd.org>
Mon, 19 Oct 2015 05:31:25 +0000 (05:31 +0000)
To make rxeof safe, use a separate ring refill timeout for each ring.
We activate the refill timeout for a ring when it's too empty to receive
packets, which ensures we won't attempt to refill it from interrupt context.

To make txeof safe, remove the list of dma maps and just allocate maps based on
the ring slots occupied by the packet, and use atomic operations to adjust
bge_txcnt.  Rework some parts of the txeof and start loops so that we only
adjust bge_txcnt after exiting the loop, and only take actions such as setting
or clearing OACTIVE based on the final value.

tested on 5703, 5714, 5721 by me, 5753 by semarie@, 5761 by naddy@, and
also in snapshots for a while
ok mpi@, dlg@

sys/dev/pci/if_bge.c
sys/dev/pci/if_bgereg.h

index f75592d..d94aaf3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if_bge.c,v 1.369 2015/07/19 06:28:12 yuo Exp $        */
+/*     $OpenBSD: if_bge.c,v 1.370 2015/10/19 05:31:25 jmatthew Exp $   */
 
 /*
  * Copyright (c) 2001 Wind River Systems
@@ -141,7 +141,7 @@ void bge_tick(void *);
 void bge_stats_update(struct bge_softc *);
 void bge_stats_update_regs(struct bge_softc *);
 int bge_cksum_pad(struct mbuf *);
-int bge_encap(struct bge_softc *, struct mbuf *, u_int32_t *);
+int bge_encap(struct bge_softc *, struct mbuf *, int *);
 int bge_compact_dma_runt(struct mbuf *);
 
 int bge_intr(void *);
@@ -1262,20 +1262,31 @@ uncreate:
        return (1);
 }
 
+/*
+ * When the refill timeout for a ring is active, that ring is so empty
+ * that no more packets can be received on it, so the interrupt handler
+ * will not attempt to refill it, meaning we don't need to protect against
+ * interrupts here.
+ */
+
 void
 bge_rxtick(void *arg)
 {
        struct bge_softc *sc = arg;
-       int s;
 
-       s = splnet();
        if (ISSET(sc->bge_flags, BGE_RXRING_VALID) &&
            if_rxr_inuse(&sc->bge_std_ring) <= 8)
                bge_fill_rx_ring_std(sc);
+}
+
+void
+bge_rxtick_jumbo(void *arg)
+{
+       struct bge_softc *sc = arg;
+
        if (ISSET(sc->bge_flags, BGE_JUMBO_RXRING_VALID) &&
            if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
                bge_fill_rx_ring_jumbo(sc);
-       splx(s);
 }
 
 void
@@ -1410,7 +1421,7 @@ bge_fill_rx_ring_jumbo(struct bge_softc *sc)
         * that now, then try again later.
         */
        if (if_rxr_inuse(&sc->bge_jumbo_ring) <= 8)
-               timeout_add(&sc->bge_rxtimeout, 1);
+               timeout_add(&sc->bge_rxtimeout_jumbo, 1);
 }
 
 void
@@ -1446,7 +1457,6 @@ void
 bge_free_tx_ring(struct bge_softc *sc)
 {
        int i;
-       struct txdmamap_pool_entry *dma;
 
        if (!(sc->bge_flags & BGE_TXRING_VALID))
                return;
@@ -1455,18 +1465,12 @@ bge_free_tx_ring(struct bge_softc *sc)
                if (sc->bge_cdata.bge_tx_chain[i] != NULL) {
                        m_freem(sc->bge_cdata.bge_tx_chain[i]);
                        sc->bge_cdata.bge_tx_chain[i] = NULL;
-                       SLIST_INSERT_HEAD(&sc->txdma_list, sc->txdma[i],
-                                           link);
-                       sc->txdma[i] = 0;
+                       sc->bge_cdata.bge_tx_map[i] = NULL;
                }
                bzero(&sc->bge_rdata->bge_tx_ring[i],
                    sizeof(struct bge_tx_bd));
-       }
 
-       while ((dma = SLIST_FIRST(&sc->txdma_list))) {
-               SLIST_REMOVE_HEAD(&sc->txdma_list, link);
-               bus_dmamap_destroy(sc->bge_dmatag, dma->dmamap);
-               free(dma, M_DEVBUF, 0);
+               bus_dmamap_destroy(sc->bge_dmatag, sc->bge_txdma[i]);
        }
 
        sc->bge_flags &= ~BGE_TXRING_VALID;
@@ -1476,9 +1480,7 @@ int
 bge_init_tx_ring(struct bge_softc *sc)
 {
        int i;
-       bus_dmamap_t dmamap;
        bus_size_t txsegsz, txmaxsegsz;
-       struct txdmamap_pool_entry *dma;
 
        if (sc->bge_flags & BGE_TXRING_VALID)
                return (0);
@@ -1505,22 +1507,10 @@ bge_init_tx_ring(struct bge_softc *sc)
                txmaxsegsz = MCLBYTES;
        }
 
-       SLIST_INIT(&sc->txdma_list);
        for (i = 0; i < BGE_TX_RING_CNT; i++) {
                if (bus_dmamap_create(sc->bge_dmatag, txmaxsegsz,
-                   BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &dmamap))
+                   BGE_NTXSEG, txsegsz, 0, BUS_DMA_NOWAIT, &sc->bge_txdma[i]))
                        return (ENOBUFS);
-               if (dmamap == NULL)
-                       panic("dmamap NULL in bge_init_tx_ring");
-               dma = malloc(sizeof(*dma), M_DEVBUF, M_NOWAIT);
-               if (dma == NULL) {
-                       printf("%s: can't alloc txdmamap_pool_entry\n",
-                           sc->bge_dev.dv_xname);
-                       bus_dmamap_destroy(sc->bge_dmatag, dmamap);
-                       return (ENOMEM);
-               }
-               dma->dmamap = dmamap;
-               SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
        }
 
        sc->bge_flags |= BGE_TXRING_VALID;
@@ -3081,8 +3071,8 @@ bge_attach(struct device *parent, struct device *self, void *aux)
 
        /* Hookup IRQ last. */
        DPRINTFN(5, ("pci_intr_establish\n"));
-       sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET, bge_intr, sc,
-           sc->bge_dev.dv_xname);
+       sc->bge_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
+           bge_intr, sc, sc->bge_dev.dv_xname);
        if (sc->bge_intrhand == NULL) {
                printf(": couldn't establish interrupt");
                if (intrstr != NULL)
@@ -3139,6 +3129,7 @@ bge_attach(struct device *parent, struct device *self, void *aux)
 
        timeout_set(&sc->bge_timeout, bge_tick, sc);
        timeout_set(&sc->bge_rxtimeout, bge_rxtick, sc);
+       timeout_set(&sc->bge_rxtimeout_jumbo, bge_rxtick_jumbo, sc);
        return;
 
 fail_6:
@@ -3578,15 +3569,17 @@ bge_txeof(struct bge_softc *sc)
 {
        struct bge_tx_bd *cur_tx = NULL;
        struct ifnet *ifp;
-       struct txdmamap_pool_entry *dma;
+       bus_dmamap_t dmamap;
        bus_addr_t offset, toff;
        bus_size_t tlen;
-       int tosync;
+       int tosync, freed, txcnt;
+       u_int32_t cons, newcons;
        struct mbuf *m;
 
        /* Nothing to do */
-       if (sc->bge_tx_saved_considx ==
-           sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx)
+       cons = sc->bge_tx_saved_considx; 
+       newcons = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx;
+       if (cons == newcons)
                return;
 
        ifp = &sc->arpcom.ac_if;
@@ -3597,14 +3590,12 @@ bge_txeof(struct bge_softc *sc)
            BUS_DMASYNC_POSTREAD);
 
        offset = offsetof(struct bge_ring_data, bge_tx_ring);
-       tosync = sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx -
-           sc->bge_tx_saved_considx;
+       tosync = newcons - cons;
 
-       toff = offset + (sc->bge_tx_saved_considx * sizeof (struct bge_tx_bd));
+       toff = offset + (cons * sizeof (struct bge_tx_bd));
 
        if (tosync < 0) {
-               tlen = (BGE_TX_RING_CNT - sc->bge_tx_saved_considx) *
-                   sizeof (struct bge_tx_bd);
+               tlen = (BGE_TX_RING_CNT - cons) * sizeof (struct bge_tx_bd);
                bus_dmamap_sync(sc->bge_dmatag, sc->bge_ring_map,
                    toff, tlen, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
                tosync = -tosync;
@@ -3618,34 +3609,35 @@ bge_txeof(struct bge_softc *sc)
         * Go through our tx ring and free mbufs for those
         * frames that have been sent.
         */
-       while (sc->bge_tx_saved_considx !=
-           sc->bge_rdata->bge_status_block.bge_idx[0].bge_tx_cons_idx) {
-               u_int32_t               idx = 0;
-
-               idx = sc->bge_tx_saved_considx;
-               cur_tx = &sc->bge_rdata->bge_tx_ring[idx];
+       freed = 0;
+       while (cons != newcons) {
+               cur_tx = &sc->bge_rdata->bge_tx_ring[cons];
                if (cur_tx->bge_flags & BGE_TXBDFLAG_END)
                        ifp->if_opackets++;
-               m = sc->bge_cdata.bge_tx_chain[idx];
+               m = sc->bge_cdata.bge_tx_chain[cons];
                if (m != NULL) {
-                       sc->bge_cdata.bge_tx_chain[idx] = NULL;
-                       dma = sc->txdma[idx];
-                       bus_dmamap_sync(sc->bge_dmatag, dma->dmamap, 0,
-                           dma->dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
-                       bus_dmamap_unload(sc->bge_dmatag, dma->dmamap);
-                       SLIST_INSERT_HEAD(&sc->txdma_list, dma, link);
-                       sc->txdma[idx] = NULL;
+                       dmamap = sc->bge_cdata.bge_tx_map[cons];
+
+                       sc->bge_cdata.bge_tx_chain[cons] = NULL;
+                       sc->bge_cdata.bge_tx_map[cons] = NULL;
+                       bus_dmamap_sync(sc->bge_dmatag, dmamap, 0,
+                           dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
+                       bus_dmamap_unload(sc->bge_dmatag, dmamap);
 
                        m_freem(m);
                }
-               sc->bge_txcnt--;
-               BGE_INC(sc->bge_tx_saved_considx, BGE_TX_RING_CNT);
+               freed++;
+               BGE_INC(cons, BGE_TX_RING_CNT);
        }
 
-       if (sc->bge_txcnt < BGE_TX_RING_CNT - 16)
+       txcnt = atomic_sub_int_nv(&sc->bge_txcnt, freed);
+
+       if (txcnt < BGE_TX_RING_CNT - 16)
                ifp->if_flags &= ~IFF_OACTIVE;
-       if (sc->bge_txcnt == 0)
+       if (txcnt == 0)
                ifp->if_timer = 0;
+
+       sc->bge_tx_saved_considx = cons;
 }
 
 int
@@ -3693,8 +3685,11 @@ bge_intr(void *xsc)
 
        if (BGE_ASICREV(sc->bge_chipid) == BGE_ASICREV_BCM5700 ||
            statusword & BGE_STATFLAG_LINKSTATE_CHANGED ||
-           BGE_STS_BIT(sc, BGE_STS_LINK_EVT))
+           BGE_STS_BIT(sc, BGE_STS_LINK_EVT)) {
+               KERNEL_LOCK();
                bge_link_upd(sc);
+               KERNEL_UNLOCK();
+       }
 
        /* Re-enable interrupts. */
        bge_writembx(sc, BGE_MBX_IRQ0_LO, statustag);
@@ -3706,8 +3701,11 @@ bge_intr(void *xsc)
                /* Check TX ring producer/consumer */
                bge_txeof(sc);
 
-               if (!IFQ_IS_EMPTY(&ifp->if_snd))
+               if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+                       KERNEL_LOCK();
                        bge_start(ifp);
+                       KERNEL_UNLOCK();
+               }
        }
 
        return (1);
@@ -3987,16 +3985,15 @@ bge_cksum_pad(struct mbuf *m)
  * pointers to descriptors.
  */
 int
-bge_encap(struct bge_softc *sc, struct mbuf *m_head, u_int32_t *txidx)
+bge_encap(struct bge_softc *sc, struct mbuf *m_head, int *txinc)
 {
        struct bge_tx_bd        *f = NULL;
        u_int32_t               frag, cur;
        u_int16_t               csum_flags = 0;
-       struct txdmamap_pool_entry *dma;
-       bus_dmamap_t dmamap;
+       bus_dmamap_t            dmamap;
        int                     i = 0;
 
-       cur = frag = *txidx;
+       cur = frag = (sc->bge_tx_prodidx + *txinc) % BGE_TX_RING_CNT;
 
        if (m_head->m_pkthdr.csum_flags) {
                if (m_head->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
@@ -4026,10 +4023,7 @@ bge_encap(struct bge_softc *sc, struct mbuf *m_head, u_int32_t *txidx)
                return (ENOBUFS);
 
 doit:
-       dma = SLIST_FIRST(&sc->txdma_list);
-       if (dma == NULL)
-               return (ENOBUFS);
-       dmamap = dma->dmamap;
+       dmamap = sc->bge_txdma[cur];
 
        /*
         * Start packing the mbufs in this chain into
@@ -4052,7 +4046,7 @@ doit:
        }
 
        /* Check if we have enough free send BDs. */
-       if (sc->bge_txcnt + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
+       if (sc->bge_txcnt + *txinc + dmamap->dm_nsegs >= BGE_TX_RING_CNT)
                goto fail_unload;
 
        for (i = 0; i < dmamap->dm_nsegs; i++) {
@@ -4084,11 +4078,9 @@ doit:
 
        sc->bge_rdata->bge_tx_ring[cur].bge_flags |= BGE_TXBDFLAG_END;
        sc->bge_cdata.bge_tx_chain[cur] = m_head;
-       SLIST_REMOVE_HEAD(&sc->txdma_list, link);
-       sc->txdma[cur] = dma;
-       sc->bge_txcnt += dmamap->dm_nsegs;
-
-       *txidx = frag;
+       sc->bge_cdata.bge_tx_map[cur] = dmamap;
+       
+       *txinc += dmamap->dm_nsegs;
 
        return (0);
 
@@ -4107,8 +4099,7 @@ bge_start(struct ifnet *ifp)
 {
        struct bge_softc *sc;
        struct mbuf *m_head;
-       u_int32_t prodidx;
-       int pkts;
+       int txinc;
 
        sc = ifp->if_softc;
 
@@ -4117,55 +4108,44 @@ bge_start(struct ifnet *ifp)
        if (!BGE_STS_BIT(sc, BGE_STS_LINK))
                return;
 
-       prodidx = sc->bge_tx_prodidx;
-
-       for (pkts = 0; !IFQ_IS_EMPTY(&ifp->if_snd);) {
-               if (sc->bge_txcnt > BGE_TX_RING_CNT - 16) {
-                       ifp->if_flags |= IFF_OACTIVE;
-                       break;
-               }
-
+       txinc = 0;
+       while (1) {
                IFQ_POLL(&ifp->if_snd, m_head);
                if (m_head == NULL)
                        break;
 
-               /*
-                * Pack the data into the transmit ring. If we
-                * don't have room, set the OACTIVE flag and wait
-                * for the NIC to drain the ring.
-                */
-               if (bge_encap(sc, m_head, &prodidx)) {
-                       ifp->if_flags |= IFF_OACTIVE;
+               if (bge_encap(sc, m_head, &txinc))
                        break;
-               }
 
                /* now we are committed to transmit the packet */
                IFQ_DEQUEUE(&ifp->if_snd, m_head);
-               pkts++;
 
 #if NBPFILTER > 0
-               /*
-                * If there's a BPF listener, bounce a copy of this frame
-                * to him.
-                */
                if (ifp->if_bpf)
                        bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT);
 #endif
        }
-       if (pkts == 0)
-               return;
 
-       /* Transmit */
-       bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
-       if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
-               bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, prodidx);
+       if (txinc != 0) {
+               int txcnt;
 
-       sc->bge_tx_prodidx = prodidx;
+               /* Transmit */
+               sc->bge_tx_prodidx = (sc->bge_tx_prodidx + txinc) %
+                   BGE_TX_RING_CNT;
+               bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, sc->bge_tx_prodidx);
+               if (BGE_CHIPREV(sc->bge_chipid) == BGE_CHIPREV_5700_BX)
+                       bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO,
+                           sc->bge_tx_prodidx);
 
-       /*
-        * Set a timeout in case the chip goes out to lunch.
-        */
-       ifp->if_timer = 5;
+               txcnt = atomic_add_int_nv(&sc->bge_txcnt, txinc);
+               if (txcnt > BGE_TX_RING_CNT - 16)
+                       ifp->if_flags |= IFF_OACTIVE;
+
+               /*
+                * Set a timeout in case the chip goes out to lunch.
+                */
+               ifp->if_timer = 5;
+       }
 }
 
 void
@@ -4580,6 +4560,7 @@ bge_stop(struct bge_softc *sc)
 
        timeout_del(&sc->bge_timeout);
        timeout_del(&sc->bge_rxtimeout);
+       timeout_del(&sc->bge_rxtimeout_jumbo);
 
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
 
@@ -4640,6 +4621,8 @@ bge_stop(struct bge_softc *sc)
         */
        BGE_CLRBIT(sc, BGE_MODE_CTL, BGE_MODECTL_STACKUP);
 
+       intr_barrier(sc->bge_intrhand);
+
        /* Free the RX lists. */
        bge_free_rx_ring_std(sc);
 
index 6d8f9ab..6b18108 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if_bgereg.h,v 1.127 2015/09/11 13:02:28 stsp Exp $    */
+/*     $OpenBSD: if_bgereg.h,v 1.128 2015/10/19 05:31:25 jmatthew Exp $        */
 
 /*
  * Copyright (c) 2001 Wind River Systems
@@ -2830,11 +2830,6 @@ struct bge_type {
 #define        BGE_TIMEOUT             100000
 #define        BGE_TXCONS_UNSET                0xFFFF  /* impossible value */
 
-struct txdmamap_pool_entry {
-       bus_dmamap_t dmamap;
-       SLIST_ENTRY(txdmamap_pool_entry) link;
-};
-
 #define        ASF_ENABLE              1
 #define        ASF_NEW_HANDSHAKE       2
 #define        ASF_STACKUP             4
@@ -2934,11 +2929,11 @@ struct bge_softc {
        int                     bge_txcnt;
        struct timeout          bge_timeout;
        struct timeout          bge_rxtimeout;
+       struct timeout          bge_rxtimeout_jumbo;
        u_int32_t               bge_rx_discards;
        u_int32_t               bge_tx_discards;
        u_int32_t               bge_rx_inerrors;
        u_int32_t               bge_rx_overruns;
        u_int32_t               bge_tx_collisions;
-       SLIST_HEAD(, txdmamap_pool_entry) txdma_list;
-       struct txdmamap_pool_entry *txdma[BGE_TX_RING_CNT];
+       bus_dmamap_t            bge_txdma[BGE_TX_RING_CNT];
 };