From: sashan Date: Thu, 4 Feb 2021 00:55:41 +0000 (+0000) Subject: make if_pfsync.c a better friend with PF_LOCK X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=2d1fcc79dc4d10fc9b63bb9bf17c5622eda82c67;p=openbsd make if_pfsync.c a better friend with PF_LOCK The code delivered in this change is currently disabled. Brave souls may enable the code by adding -DWITH_PF_LOCK when building customized kernel. Big thanks goes to Hrvoje@ for providing test equipment and testing. As soon as we enter the next release cycle, the WITH_PF_LOCK will be defined as default option for MP kernels. OK dlg@ --- diff --git a/sys/arch/amd64/conf/GENERIC.MP b/sys/arch/amd64/conf/GENERIC.MP index 7f195a53092..5575ffea81b 100644 --- a/sys/arch/amd64/conf/GENERIC.MP +++ b/sys/arch/amd64/conf/GENERIC.MP @@ -1,7 +1,8 @@ -# $OpenBSD: GENERIC.MP,v 1.14 2018/07/13 05:25:24 tb Exp $ +# $OpenBSD: GENERIC.MP,v 1.15 2021/02/04 00:55:41 sashan Exp $ include "arch/amd64/conf/GENERIC" +#option WITH_PF_LOCK option MULTIPROCESSOR #option MP_LOCKDEBUG #option WITNESS diff --git a/sys/net/if_pfsync.c b/sys/net/if_pfsync.c index 2b094dd855f..b5fe9fb3607 100644 --- a/sys/net/if_pfsync.c +++ b/sys/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.282 2021/02/01 00:31:05 dlg Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.283 2021/02/04 00:55:41 sashan Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -210,9 +210,11 @@ struct pfsync_softc { struct ip sc_template; struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; + struct mutex sc_mtx[PFSYNC_S_COUNT]; size_t sc_len; struct pfsync_upd_reqs sc_upd_req_list; + struct mutex sc_upd_req_mtx; int sc_initial_bulk; int sc_link_demoted; @@ -220,6 +222,7 @@ struct pfsync_softc { int sc_defer; struct pfsync_deferrals sc_deferrals; u_int sc_deferred; + struct mutex sc_deferrals_mtx; void *sc_plus; size_t sc_pluslen; @@ -234,6 +237,7 @@ struct pfsync_softc { struct timeout sc_bulk_tmo; TAILQ_HEAD(, tdb) sc_tdb_q; + struct mutex sc_tdb_mtx; struct task sc_ltask; struct task sc_dtask; @@ -241,6 +245,16 @@ struct pfsync_softc { struct timeout sc_tmo; }; +struct pfsync_snapshot { + struct pfsync_softc *sn_sc; + struct pf_state_queue sn_qs[PFSYNC_S_COUNT]; + struct pfsync_upd_reqs sn_upd_req_list; + TAILQ_HEAD(, tdb) sn_tdb_q; + size_t sn_len; + void *sn_plus; + size_t sn_pluslen; +}; + struct pfsync_softc *pfsyncif = NULL; struct cpumem *pfsynccounters; @@ -276,6 +290,10 @@ void pfsync_bulk_start(void); void pfsync_bulk_status(u_int8_t); void pfsync_bulk_update(void *); void pfsync_bulk_fail(void *); + +void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); +void pfsync_drop_snapshot(struct pfsync_snapshot *); + #ifdef WITH_PF_LOCK void pfsync_send_dispatch(void *); void pfsync_send_pkt(struct mbuf *); @@ -307,6 +325,13 @@ pfsync_clone_create(struct if_clone *ifc, int unit) struct pfsync_softc *sc; struct ifnet *ifp; int q; + static const char *mtx_names[] = { + "iack_mtx", + "upd_c_mtx", + "del_mtx", + "ins_mtx", + "upd_mtx", + "" }; if (unit != 0) return (EINVAL); @@ -314,18 +339,23 @@ pfsync_clone_create(struct if_clone *ifc, int unit) pfsync_sync_ok = 1; sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); - for (q = 0; q < PFSYNC_S_COUNT; q++) + for (q = 0; q < PFSYNC_S_COUNT; q++) { TAILQ_INIT(&sc->sc_qs[q]); + mtx_init_flags(&sc->sc_mtx[q], IPL_SOFTNET, mtx_names[q], 0); + } pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", NULL); TAILQ_INIT(&sc->sc_upd_req_list); + mtx_init(&sc->sc_upd_req_mtx, IPL_SOFTNET); TAILQ_INIT(&sc->sc_deferrals); + mtx_init(&sc->sc_deferrals_mtx, IPL_SOFTNET); task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); task_set(&sc->sc_dtask, pfsync_ifdetach, sc); sc->sc_deferred = 0; TAILQ_INIT(&sc->sc_tdb_q); + mtx_init(&sc->sc_tdb_mtx, IPL_SOFTNET); sc->sc_len = PFSYNC_MINPKT; sc->sc_maxupdates = 128; @@ -370,6 +400,7 @@ pfsync_clone_destroy(struct ifnet *ifp) struct pfsync_softc *sc = ifp->if_softc; struct ifnet *ifp0; struct pfsync_deferral *pd; + struct pfsync_deferrals deferrals; NET_LOCK(); @@ -392,10 +423,18 @@ pfsync_clone_destroy(struct ifnet *ifp) pfsync_drop(sc); - while (sc->sc_deferred > 0) { - pd = TAILQ_FIRST(&sc->sc_deferrals); - timeout_del(&pd->pd_tmo); - pfsync_undefer(pd, 0); + if (sc->sc_deferred > 0) { + TAILQ_INIT(&deferrals); + mtx_enter(&sc->sc_deferrals_mtx); + TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry); + sc->sc_deferred = 0; + mtx_leave(&sc->sc_deferrals_mtx); + + while (!TAILQ_EMPTY(&deferrals)) { + pd = TAILQ_FIRST(&deferrals); + TAILQ_REMOVE(&deferrals, pd, pd_entry); + pfsync_undefer(pd, 0); + } } pfsyncif = NULL; @@ -786,10 +825,8 @@ pfsync_input(struct mbuf **mp, int *offp, int proto, int af) return IPPROTO_DONE; } - PF_LOCK(); e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, flags); - PF_UNLOCK(); if (e != 0) goto done; @@ -810,6 +847,7 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags) u_int32_t creatorid; int i; + PF_LOCK(); for (i = 0; i < count; i++) { clr = (struct pfsync_clr *)buf + len * i; kif = NULL; @@ -818,6 +856,7 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags) (kif = pfi_kif_find(clr->ifname)) == NULL) continue; + PF_STATE_ENTER_WRITE(); for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); if (st->creatorid == creatorid && @@ -826,7 +865,9 @@ pfsync_in_clr(caddr_t buf, int len, int count, int flags) pf_remove_state(st); } } + PF_STATE_EXIT_WRITE(); } + PF_UNLOCK(); return (0); } @@ -838,6 +879,7 @@ pfsync_in_ins(caddr_t buf, int len, int count, int flags) sa_family_t af1, af2; int i; + PF_LOCK(); for (i = 0; i < count; i++) { sp = (struct pfsync_state *)(buf + len * i); af1 = sp->key[0].af; @@ -863,6 +905,7 @@ pfsync_in_ins(caddr_t buf, int len, int count, int flags) break; } } + PF_UNLOCK(); return (0); } @@ -881,12 +924,17 @@ pfsync_in_iack(caddr_t buf, int len, int count, int flags) id_key.id = ia->id; id_key.creatorid = ia->creatorid; + PF_STATE_ENTER_READ(); st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); if (st == NULL) continue; if (ISSET(st->state_flags, PFSTATE_ACK)) pfsync_deferred(st, 0); + + pf_state_unref(st); } return (0); @@ -930,8 +978,7 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags) struct pfsync_state *sp; struct pf_state_cmp id_key; struct pf_state *st; - int sync; - + int sync, error; int i; for (i = 0; i < count; i++) { @@ -950,11 +997,17 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags) id_key.id = sp->id; id_key.creatorid = sp->creatorid; + PF_STATE_ENTER_READ(); st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); if (st == NULL) { /* insert the update */ - if (pfsync_state_import(sp, flags)) + PF_LOCK(); + error = pfsync_state_import(sp, flags); + if (error) pfsyncstat_inc(pfsyncs_badstate); + PF_UNLOCK(); continue; } @@ -992,9 +1045,11 @@ pfsync_in_upd(caddr_t buf, int len, int count, int flags) if (sync) { pfsyncstat_inc(pfsyncs_stale); - pfsync_update_state_locked(st); + pfsync_update_state(st); schednetisr(NETISR_PFSYNC); } + + pf_state_unref(st); } return (0); @@ -1027,7 +1082,10 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) id_key.id = up->id; id_key.creatorid = up->creatorid; + PF_STATE_ENTER_READ(); st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); if (st == NULL) { /* We don't have this state. Ask for it. */ pfsync_request_update(id_key.creatorid, id_key.id); @@ -1066,9 +1124,11 @@ pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) if (sync) { pfsyncstat_inc(pfsyncs_stale); - pfsync_update_state_locked(st); + pfsync_update_state(st); schednetisr(NETISR_PFSYNC); } + + pf_state_unref(st); } return (0); @@ -1092,15 +1152,21 @@ pfsync_in_ureq(caddr_t buf, int len, int count, int flags) if (id_key.id == 0 && id_key.creatorid == 0) pfsync_bulk_start(); else { + PF_STATE_ENTER_READ(); st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); if (st == NULL) { pfsyncstat_inc(pfsyncs_badstate); continue; } - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + pf_state_unref(st); continue; + } pfsync_update_state_req(st); + pf_state_unref(st); } } @@ -1115,6 +1181,7 @@ pfsync_in_del(caddr_t buf, int len, int count, int flags) struct pf_state *st; int i; + PF_STATE_ENTER_WRITE(); for (i = 0; i < count; i++) { sp = (struct pfsync_state *)(buf + len * i); @@ -1129,6 +1196,7 @@ pfsync_in_del(caddr_t buf, int len, int count, int flags) SET(st->state_flags, PFSTATE_NOSYNC); pf_remove_state(st); } + PF_STATE_EXIT_WRITE(); return (0); } @@ -1141,6 +1209,8 @@ pfsync_in_del_c(caddr_t buf, int len, int count, int flags) struct pf_state *st; int i; + PF_LOCK(); + PF_STATE_ENTER_WRITE(); for (i = 0; i < count; i++) { sp = (struct pfsync_del_c *)(buf + len * i); @@ -1156,6 +1226,8 @@ pfsync_in_del_c(caddr_t buf, int len, int count, int flags) SET(st->state_flags, PFSTATE_NOSYNC); pf_remove_state(st); } + PF_STATE_EXIT_WRITE(); + PF_UNLOCK(); return (0); } @@ -1505,19 +1577,59 @@ pfsync_out_del(struct pf_state *st, void *buf) } void -pfsync_drop(struct pfsync_softc *sc) +pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) +{ + int q; + + sn->sn_sc = sc; + + for (q = 0; q < PFSYNC_S_COUNT; q++) + mtx_enter(&sc->sc_mtx[q]); + + mtx_enter(&sc->sc_upd_req_mtx); + mtx_enter(&sc->sc_tdb_mtx); + + for (q = 0; q < PFSYNC_S_COUNT; q++) { + TAILQ_INIT(&sn->sn_qs[q]); + TAILQ_CONCAT(&sn->sn_qs[q], &sc->sc_qs[q], sync_list); + } + + TAILQ_INIT(&sn->sn_upd_req_list); + TAILQ_CONCAT(&sn->sn_upd_req_list, &sc->sc_upd_req_list, ur_entry); + + TAILQ_INIT(&sn->sn_tdb_q); + TAILQ_CONCAT(&sn->sn_tdb_q, &sc->sc_tdb_q, tdb_sync_entry); + + sn->sn_len = sc->sc_len; + sc->sc_len = PFSYNC_MINPKT; + + sn->sn_plus = sc->sc_plus; + sc->sc_plus = NULL; + sn->sn_pluslen = sc->sc_pluslen; + sc->sc_pluslen = 0; + + mtx_leave(&sc->sc_tdb_mtx); + mtx_leave(&sc->sc_upd_req_mtx); + + for (q = (PFSYNC_S_COUNT - 1); q >= 0; q--) + mtx_leave(&sc->sc_mtx[q]); +} + +void +pfsync_drop_snapshot(struct pfsync_snapshot *sn) { struct pf_state *st; struct pfsync_upd_req_item *ur; struct tdb *t; int q; + for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&sn->sn_qs[q])) continue; - while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { + TAILQ_REMOVE(&sn->sn_qs[q], st, sync_list); #ifdef PFSYNC_DEBUG KASSERT(st->sync_state == q); #endif @@ -1526,19 +1638,42 @@ pfsync_drop(struct pfsync_softc *sc) } } - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); - pool_put(&sc->sc_pool, ur); + while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_entry); + pool_put(&sn->sn_sc->sc_pool, ur); } - sc->sc_plus = NULL; - - while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { - TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); + while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { + TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_entry); CLR(t->tdb_flags, TDBF_PFSYNC); } +} - sc->sc_len = PFSYNC_MINPKT; +int +pfsync_is_snapshot_empty(struct pfsync_snapshot *sn) +{ + int q; + + for (q = 0; q < PFSYNC_S_COUNT; q++) + if (!TAILQ_EMPTY(&sn->sn_qs[q])) + return (0); + + if (!TAILQ_EMPTY(&sn->sn_upd_req_list)) + return (0); + + if (!TAILQ_EMPTY(&sn->sn_tdb_q)) + return (0); + + return (sn->sn_plus == NULL); +} + +void +pfsync_drop(struct pfsync_softc *sc) +{ + struct pfsync_snapshot sn; + + pfsync_grab_snapshot(&sn, sc); + pfsync_drop_snapshot(&sn); } #ifdef WITH_PF_LOCK @@ -1591,6 +1726,7 @@ pfsync_send_pkt(struct mbuf *m) void pfsync_sendout(void) { + struct pfsync_snapshot sn; struct pfsync_softc *sc = pfsyncif; #if NBPFILTER > 0 struct ifnet *ifp = &sc->sc_if; @@ -1602,12 +1738,9 @@ pfsync_sendout(void) struct pf_state *st; struct pfsync_upd_req_item *ur; struct tdb *t; - int offset; int q, count = 0; - PF_ASSERT_LOCKED(); - if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) return; @@ -1621,26 +1754,35 @@ pfsync_sendout(void) return; } + pfsync_grab_snapshot(&sn, sc); + + /* + * Check below is sufficient to prevent us from sending empty packets, + * but it does not stop us from sending short packets. + */ + if (pfsync_is_snapshot_empty(&sn)) + return; + MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) { sc->sc_if.if_oerrors++; pfsyncstat_inc(pfsyncs_onomem); - pfsync_drop(sc); + pfsync_drop_snapshot(&sn); return; } - if (max_linkhdr + sc->sc_len > MHLEN) { - MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_len); + if (max_linkhdr + sn.sn_len > MHLEN) { + MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len); if (!ISSET(m->m_flags, M_EXT)) { m_free(m); sc->sc_if.if_oerrors++; pfsyncstat_inc(pfsyncs_onomem); - pfsync_drop(sc); + pfsync_drop_snapshot(&sn); return; } } m->m_data += max_linkhdr; - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = sn.sn_len; /* build the ip header */ ip = mtod(m, struct ip *); @@ -1656,16 +1798,16 @@ pfsync_sendout(void) offset += sizeof(*ph); ph->version = PFSYNC_VERSION; - ph->len = htons(sc->sc_len - sizeof(*ip)); + ph->len = htons(sn.sn_len - sizeof(*ip)); bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); - if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { + if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); @@ -1683,20 +1825,19 @@ pfsync_sendout(void) } /* has someone built a custom region for us to add? */ - if (sc->sc_plus != NULL) { - bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); - offset += sc->sc_pluslen; - - sc->sc_plus = NULL; + if (sn.sn_plus != NULL) { + bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen); + offset += sn.sn_pluslen; + sn.sn_plus = NULL; /* XXX memory leak ? */ } - if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { + if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { - TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); + while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { + TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_entry); pfsync_out_tdb(t, m->m_data + offset); offset += sizeof(struct pfsync_tdb); CLR(t->tdb_flags, TDBF_PFSYNC); @@ -1711,15 +1852,15 @@ pfsync_sendout(void) /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&sn.sn_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { + TAILQ_REMOVE(&sn.sn_qs[q], st, sync_list); #ifdef PFSYNC_DEBUG KASSERT(st->sync_state == q); #endif @@ -1741,10 +1882,10 @@ pfsync_sendout(void) #if NBPFILTER > 0 if (ifp->if_bpf) { m->m_data += sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); + m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip); bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); m->m_data -= sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = sn.sn_len; } if (sc->sc_sync_ifidx == 0) { @@ -1754,9 +1895,6 @@ pfsync_sendout(void) } #endif - /* start again */ - sc->sc_len = PFSYNC_MINPKT; - sc->sc_if.if_opackets++; sc->sc_if.if_obytes += m->m_pkthdr.len; @@ -1815,7 +1953,11 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) return (0); if (sc->sc_deferred >= 128) { + mtx_enter(&sc->sc_deferrals_mtx); pd = TAILQ_FIRST(&sc->sc_deferrals); + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + mtx_leave(&sc->sc_deferrals_mtx); if (timeout_del(&pd->pd_tmo)) pfsync_undefer(pd, 0); } @@ -1830,8 +1972,10 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) pd->pd_st = pf_state_ref(st); pd->pd_m = m; + mtx_enter(&sc->sc_deferrals_mtx); sc->sc_deferred++; TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); + mtx_leave(&sc->sc_deferrals_mtx); timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); timeout_add_msec(&pd->pd_tmo, 20); @@ -1842,71 +1986,93 @@ pfsync_defer(struct pf_state *st, struct mbuf *m) } void -pfsync_undefer(struct pfsync_deferral *pd, int drop) +pfsync_undefer_notify(struct pfsync_deferral *pd) { - struct pfsync_softc *sc = pfsyncif; struct pf_pdesc pdesc; struct pf_state *st = pd->pd_st; - NET_ASSERT_LOCKED(); - - if (sc == NULL) - return; - - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; - - CLR(st->state_flags, PFSTATE_ACK); - if (drop) - m_freem(pd->pd_m); - else { - if (st->rt == PF_ROUTETO) { - if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, - st->direction, st->kif, pd->pd_m, NULL) != - PF_PASS) { - m_freem(pd->pd_m); - goto out; - } - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - pf_route(&pdesc, st); - break; + if (st->rule.ptr->rt == PF_ROUTETO) { + if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, + st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) { + m_freem(pd->pd_m); + return; + } + switch (st->key[PF_SK_WIRE]->af) { + case AF_INET: + pf_route(&pdesc, st); + break; #ifdef INET6 - case AF_INET6: - pf_route6(&pdesc, st); - break; + case AF_INET6: + pf_route6(&pdesc, st); + break; #endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); - } - pd->pd_m = pdesc.m; - } else { - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, - 0); - break; + default: + unhandled_af(st->key[PF_SK_WIRE]->af); + } + pd->pd_m = pdesc.m; + } else { + switch (st->key[PF_SK_WIRE]->af) { + case AF_INET: + ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, + 0); + break; #ifdef INET6 - case AF_INET6: - ip6_output(pd->pd_m, NULL, NULL, 0, - NULL, NULL); - break; + case AF_INET6: + ip6_output(pd->pd_m, NULL, NULL, 0, + NULL, NULL); + break; #endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); - } + default: + unhandled_af(st->key[PF_SK_WIRE]->af); } + + pd->pd_m = NULL; } - out: - pf_state_unref(st); +} + +void +pfsync_free_deferral(struct pfsync_deferral *pd) +{ + struct pfsync_softc *sc = pfsyncif; + + pf_state_unref(pd->pd_st); + if (pd->pd_m != NULL) + m_freem(pd->pd_m); pool_put(&sc->sc_pool, pd); } +void +pfsync_undefer(struct pfsync_deferral *pd, int drop) +{ + struct pfsync_softc *sc = pfsyncif; + + NET_ASSERT_LOCKED(); + + if (sc == NULL) + return; + + CLR(pd->pd_st->state_flags, PFSTATE_ACK); + if (drop) { + m_freem(pd->pd_m); + pd->pd_m = NULL; + } else + pfsync_undefer_notify(pd); + + pfsync_free_deferral(pd); +} + void pfsync_defer_tmo(void *arg) { + struct pfsync_softc *sc = pfsyncif; + struct pfsync_deferral *pd = arg; + + mtx_enter(&sc->sc_deferrals_mtx); + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + mtx_leave(&sc->sc_deferrals_mtx); NET_LOCK(); - pfsync_undefer(arg, 0); + pfsync_undefer(pd, 0); NET_UNLOCK(); } @@ -1918,25 +2084,29 @@ pfsync_deferred(struct pf_state *st, int drop) NET_ASSERT_LOCKED(); + mtx_enter(&sc->sc_deferrals_mtx); TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { if (pd->pd_st == st) { - if (timeout_del(&pd->pd_tmo)) + if (timeout_del(&pd->pd_tmo)) { + TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); + sc->sc_deferred--; + mtx_leave(&sc->sc_deferrals_mtx); pfsync_undefer(pd, drop); + } else + mtx_leave(&sc->sc_deferrals_mtx); return; } } - - panic("pfsync_deferred: unable to find deferred state"); + mtx_leave(&sc->sc_deferrals_mtx); } void -pfsync_update_state_locked(struct pf_state *st) +pfsync_update_state(struct pf_state *st) { struct pfsync_softc *sc = pfsyncif; int sync = 0; NET_ASSERT_LOCKED(); - PF_ASSERT_LOCKED(); if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) return; @@ -1981,22 +2151,6 @@ pfsync_update_state_locked(struct pf_state *st) schednetisr(NETISR_PFSYNC); } -void -pfsync_update_state(struct pf_state *st, int *have_pf_lock) -{ - struct pfsync_softc *sc = pfsyncif; - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) - return; - - if (*have_pf_lock == 0) { - PF_LOCK(); - *have_pf_lock = 1; - } - - pfsync_update_state_locked(st); -} - void pfsync_cancel_full_update(struct pfsync_softc *sc) { @@ -2049,7 +2203,7 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = pfsyncif; struct pfsync_upd_req_item *item; - size_t nlen = sizeof(struct pfsync_upd_req); + size_t nlen, sc_len; /* * this code does nothing to prevent multiple update requests for the @@ -2065,18 +2219,24 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t id) item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; - if (TAILQ_EMPTY(&sc->sc_upd_req_list)) - nlen += sizeof(struct pfsync_subheader); + do { + mtx_enter(&sc->sc_upd_req_mtx); - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { - pfsync_sendout(); + nlen = sizeof(struct pfsync_upd_req); + if (TAILQ_EMPTY(&sc->sc_upd_req_list)) + nlen += sizeof(struct pfsync_subheader); - nlen = sizeof(struct pfsync_subheader) + - sizeof(struct pfsync_upd_req); - } + sc_len = atomic_add_long_nv(&sc->sc_len, nlen); + if (sc_len > sc->sc_if.if_mtu) { + atomic_sub_long(&sc->sc_len, nlen); + mtx_leave(&sc->sc_upd_req_mtx); + pfsync_sendout(); + continue; + } - TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); - sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); + mtx_leave(&sc->sc_upd_req_mtx); + } while (0); schednetisr(NETISR_PFSYNC); } @@ -2202,7 +2362,7 @@ void pfsync_q_ins(struct pf_state *st, int q) { struct pfsync_softc *sc = pfsyncif; - size_t nlen = pfsync_qs[q].len; + size_t nlen, sc_len; KASSERT(st->sync_state == PFSYNC_S_NONE); @@ -2210,19 +2370,37 @@ pfsync_q_ins(struct pf_state *st, int q) if (sc->sc_len < PFSYNC_MINPKT) panic("pfsync pkt len is too low %zd", sc->sc_len); #endif - if (TAILQ_EMPTY(&sc->sc_qs[q])) - nlen += sizeof(struct pfsync_subheader); + do { + mtx_enter(&sc->sc_mtx[q]); - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { - pfsync_sendout(); + /* + * If two threads are competing to insert the same state, then + * there must be just single winner. + */ + if (st->sync_state != PFSYNC_S_NONE) { + mtx_leave(&sc->sc_mtx[q]); + break; + } - nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; - } + nlen = pfsync_qs[q].len; - sc->sc_len += nlen; - pf_state_ref(st); - TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); - st->sync_state = q; + if (TAILQ_EMPTY(&sc->sc_qs[q])) + nlen += sizeof(struct pfsync_subheader); + + sc_len = atomic_add_long_nv(&sc->sc_len, nlen); + if (sc_len > sc->sc_if.if_mtu) { + atomic_sub_long(&sc->sc_len, nlen); + mtx_leave(&sc->sc_mtx[q]); + pfsync_sendout(); + continue; + } + + pf_state_ref(st); + + TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); + st->sync_state = q; + mtx_leave(&sc->sc_mtx[q]); + } while (0); } void @@ -2233,39 +2411,48 @@ pfsync_q_del(struct pf_state *st) KASSERT(st->sync_state != PFSYNC_S_NONE); - sc->sc_len -= pfsync_qs[q].len; + mtx_enter(&sc->sc_mtx[q]); + atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + if (TAILQ_EMPTY(&sc->sc_qs[q])) + atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader)); + mtx_leave(&sc->sc_mtx[q]); + st->sync_state = PFSYNC_S_NONE; pf_state_unref(st); - - if (TAILQ_EMPTY(&sc->sc_qs[q])) - sc->sc_len -= sizeof(struct pfsync_subheader); } void pfsync_update_tdb(struct tdb *t, int output) { struct pfsync_softc *sc = pfsyncif; - size_t nlen = sizeof(struct pfsync_tdb); + size_t nlen, sc_len; if (sc == NULL) return; if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - nlen += sizeof(struct pfsync_subheader); - - if (sc->sc_len + nlen > sc->sc_if.if_mtu) { - pfsync_sendout(); + do { + mtx_enter(&sc->sc_tdb_mtx); + nlen = sizeof(struct pfsync_tdb); + + if (TAILQ_EMPTY(&sc->sc_tdb_q)) + nlen += sizeof(struct pfsync_subheader); + + sc_len = atomic_add_long_nv(&sc->sc_len, nlen); + if (sc_len > sc->sc_if.if_mtu) { + atomic_sub_long(&sc->sc_len, nlen); + mtx_leave(&sc->sc_tdb_mtx); + pfsync_sendout(); + continue; + } - nlen = sizeof(struct pfsync_subheader) + - sizeof(struct pfsync_tdb); - } + TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); + mtx_leave(&sc->sc_tdb_mtx); - sc->sc_len += nlen; - TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); - SET(t->tdb_flags, TDBF_PFSYNC); - t->tdb_updates = 0; + SET(t->tdb_flags, TDBF_PFSYNC); + t->tdb_updates = 0; + } while (0); } else { if (++t->tdb_updates >= sc->sc_maxupdates) schednetisr(NETISR_PFSYNC); @@ -2281,16 +2468,22 @@ void pfsync_delete_tdb(struct tdb *t) { struct pfsync_softc *sc = pfsyncif; + size_t nlen; if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) return; - sc->sc_len -= sizeof(struct pfsync_tdb); + mtx_enter(&sc->sc_tdb_mtx); + TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); CLR(t->tdb_flags, TDBF_PFSYNC); + nlen = sizeof(struct pfsync_tdb); if (TAILQ_EMPTY(&sc->sc_tdb_q)) - sc->sc_len -= sizeof(struct pfsync_subheader); + nlen += sizeof(struct pfsync_subheader); + atomic_sub_long(&sc->sc_len, nlen); + + mtx_leave(&sc->sc_tdb_mtx); } void @@ -2338,9 +2531,14 @@ pfsync_bulk_start(void) else { sc->sc_ureq_received = getuptime(); - if (sc->sc_bulk_next == NULL) + if (sc->sc_bulk_next == NULL) { + PF_STATE_ENTER_READ(); sc->sc_bulk_next = TAILQ_FIRST(&state_list); + pf_state_ref(sc->sc_bulk_next); + PF_STATE_EXIT_READ(); + } sc->sc_bulk_last = sc->sc_bulk_next; + pf_state_ref(sc->sc_bulk_last); pfsync_bulk_status(PFSYNC_BUS_START); timeout_add(&sc->sc_bulk_tmo, 0); @@ -2351,7 +2549,7 @@ void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc; - struct pf_state *st; + struct pf_state *st, *st_next; int i = 0; NET_LOCK(); @@ -2359,6 +2557,7 @@ pfsync_bulk_update(void *arg) if (sc == NULL) goto out; st = sc->sc_bulk_next; + sc->sc_bulk_next = NULL; for (;;) { if (st->sync_state == PFSYNC_S_NONE && @@ -2368,13 +2567,23 @@ pfsync_bulk_update(void *arg) i++; } - st = TAILQ_NEXT(st, entry_list); + /* + * I wonder how we prevent infinite bulk update. IMO it can + * happen when sc_bulk_last state expires before we iterate + * through the whole list. + */ + PF_STATE_ENTER_READ(); + st_next = TAILQ_NEXT(st, entry_list); + pf_state_unref(st); + st = st_next; if (st == NULL) st = TAILQ_FIRST(&state_list); + pf_state_ref(st); + PF_STATE_EXIT_READ(); - if (st == sc->sc_bulk_last) { + if ((st == NULL) || (st == sc->sc_bulk_last)) { /* we're done */ - sc->sc_bulk_next = NULL; + pf_state_unref(sc->sc_bulk_last); sc->sc_bulk_last = NULL; pfsync_bulk_status(PFSYNC_BUS_END); break; @@ -2497,9 +2706,7 @@ void pfsync_timeout(void *arg) { NET_LOCK(); - PF_LOCK(); pfsync_sendout(); - PF_UNLOCK(); NET_UNLOCK(); } @@ -2507,9 +2714,7 @@ pfsync_timeout(void *arg) void pfsyncintr(void) { - PF_LOCK(); pfsync_sendout(); - PF_UNLOCK(); } int diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index 3c3b7dd90f6..2c9bedcc920 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.54 2018/09/11 07:53:38 sashan Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.55 2021/02/04 00:55:41 sashan Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -328,7 +328,7 @@ void pfsync_state_export(struct pfsync_state *, struct pf_state *); void pfsync_insert_state(struct pf_state *); -void pfsync_update_state(struct pf_state *, int *); +void pfsync_update_state(struct pf_state *); void pfsync_delete_state(struct pf_state *); void pfsync_clear_states(u_int32_t, const char *); diff --git a/sys/net/pf.c b/sys/net/pf.c index 44e0e17f757..74ec0aa6278 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1107 2021/02/03 07:41:12 dlg Exp $ */ +/* $OpenBSD: pf.c,v 1.1108 2021/02/04 00:55:41 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -6935,7 +6935,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) PF_STATE_EXIT_READ(); if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s, &have_pf_lock); + pfsync_update_state(s); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr; @@ -6967,7 +6967,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) PF_STATE_EXIT_READ(); if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s, &have_pf_lock); + pfsync_update_state(s); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr; @@ -7043,7 +7043,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) if (action == PF_PASS || action == PF_AFRT) { #if NPFSYNC > 0 - pfsync_update_state(s, &have_pf_lock); + pfsync_update_state(s); #endif /* NPFSYNC > 0 */ r = s->rule.ptr; a = s->anchor.ptr;