-/* $OpenBSD: if_pfsync.c,v 1.317 2023/06/05 08:45:20 sashan Exp $ */
+/* $OpenBSD: if_pfsync.c,v 1.318 2023/07/06 04:55:05 dlg Exp $ */
/*
* Copyright (c) 2002 Michael Shalayeff
*/
/*
- * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
+ * Copyright (c) 2009, 2022, 2023 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+#include "bpfilter.h"
+#include "pfsync.h"
+#include "kstat.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/sysctl.h>
#include <sys/pool.h>
#include <sys/syslog.h>
+#include <sys/tree.h>
+#include <sys/smr.h>
+#include <sys/percpu.h>
+#include <sys/refcnt.h>
+#include <sys/kstat.h>
+#include <sys/stdarg.h>
#include <net/if.h>
#include <net/if_types.h>
#include <netinet/ip_carp.h>
#endif
-#define PF_DEBUGNAME "pfsync: "
#include <net/pfvar.h>
#include <net/pfvar_priv.h>
#include <net/if_pfsync.h>
-#include "bpfilter.h"
-#include "pfsync.h"
-
-#define PFSYNC_DEFER_NSEC 20000000ULL
-
#define PFSYNC_MINPKT ( \
sizeof(struct ip) + \
sizeof(struct pfsync_header))
-int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
- struct pfsync_state_peer *);
-
-int pfsync_in_clr(caddr_t, int, int, int);
-int pfsync_in_iack(caddr_t, int, int, int);
-int pfsync_in_upd_c(caddr_t, int, int, int);
-int pfsync_in_ureq(caddr_t, int, int, int);
-int pfsync_in_del(caddr_t, int, int, int);
-int pfsync_in_del_c(caddr_t, int, int, int);
-int pfsync_in_bus(caddr_t, int, int, int);
-int pfsync_in_tdb(caddr_t, int, int, int);
-int pfsync_in_ins(caddr_t, int, int, int);
-int pfsync_in_upd(caddr_t, int, int, int);
-int pfsync_in_eof(caddr_t, int, int, int);
-
-int pfsync_in_error(caddr_t, int, int, int);
-
-void pfsync_update_state_locked(struct pf_state *);
-
-const struct {
- int (*in)(caddr_t, int, int, int);
- size_t len;
-} pfsync_acts[] = {
- /* PFSYNC_ACT_CLR */
- { pfsync_in_clr, sizeof(struct pfsync_clr) },
- /* PFSYNC_ACT_OINS */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_INS_ACK */
- { pfsync_in_iack, sizeof(struct pfsync_ins_ack) },
- /* PFSYNC_ACT_OUPD */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_UPD_C */
- { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) },
- /* PFSYNC_ACT_UPD_REQ */
- { pfsync_in_ureq, sizeof(struct pfsync_upd_req) },
- /* PFSYNC_ACT_DEL */
- { pfsync_in_del, sizeof(struct pfsync_state) },
- /* PFSYNC_ACT_DEL_C */
- { pfsync_in_del_c, sizeof(struct pfsync_del_c) },
- /* PFSYNC_ACT_INS_F */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_DEL_F */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_BUS */
- { pfsync_in_bus, sizeof(struct pfsync_bus) },
- /* PFSYNC_ACT_OTDB */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_EOF */
- { pfsync_in_error, 0 },
- /* PFSYNC_ACT_INS */
- { pfsync_in_ins, sizeof(struct pfsync_state) },
- /* PFSYNC_ACT_UPD */
- { pfsync_in_upd, sizeof(struct pfsync_state) },
- /* PFSYNC_ACT_TDB */
- { pfsync_in_tdb, sizeof(struct pfsync_tdb) },
-};
+struct pfsync_softc;
-struct pfsync_q {
- void (*write)(struct pf_state *, void *);
- size_t len;
- u_int8_t action;
+struct pfsync_deferral {
+ TAILQ_ENTRY(pfsync_deferral) pd_entry;
+ struct pf_state *pd_st;
+ struct mbuf *pd_m;
+ uint64_t pd_deadline;
};
+TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
-/* we have one of these for every PFSYNC_S_ */
-void pfsync_out_state(struct pf_state *, void *);
-void pfsync_out_iack(struct pf_state *, void *);
-void pfsync_out_upd_c(struct pf_state *, void *);
-void pfsync_out_del(struct pf_state *, void *);
+#define PFSYNC_DEFER_NSEC 20000000ULL
+#define PFSYNC_DEFER_LIMIT 128
+#define PFSYNC_BULK_SND_IVAL_MS 20
-struct pfsync_q pfsync_qs[] = {
- { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
- { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
- { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C },
- { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
- { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }
+static struct pool pfsync_deferrals_pool;
+
+enum pfsync_bulk_req_state {
+ PFSYNC_BREQ_S_NONE,
+ PFSYNC_BREQ_S_START,
+ PFSYNC_BREQ_S_SENT,
+ PFSYNC_BREQ_S_BULK,
+ PFSYNC_BREQ_S_DONE,
};
-void pfsync_q_ins(struct pf_state *, int);
-void pfsync_q_del(struct pf_state *);
+static const char *pfsync_bulk_req_state_names[] = {
+ [PFSYNC_BREQ_S_NONE] = "none",
+ [PFSYNC_BREQ_S_START] = "start",
+ [PFSYNC_BREQ_S_SENT] = "sent",
+ [PFSYNC_BREQ_S_BULK] = "bulk",
+ [PFSYNC_BREQ_S_DONE] = "done",
+};
-struct pfsync_upd_req_item {
- TAILQ_ENTRY(pfsync_upd_req_item) ur_entry;
- TAILQ_ENTRY(pfsync_upd_req_item) ur_snap;
- struct pfsync_upd_req ur_msg;
+enum pfsync_bulk_req_event {
+ PFSYNC_BREQ_EVT_UP,
+ PFSYNC_BREQ_EVT_DOWN,
+ PFSYNC_BREQ_EVT_TMO,
+ PFSYNC_BREQ_EVT_LINK,
+ PFSYNC_BREQ_EVT_BUS_START,
+ PFSYNC_BREQ_EVT_BUS_END,
};
-TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
-struct pfsync_deferral {
- TAILQ_ENTRY(pfsync_deferral) pd_entry;
- struct pf_state *pd_st;
- struct mbuf *pd_m;
- uint64_t pd_deadline;
+static const char *pfsync_bulk_req_event_names[] = {
+ [PFSYNC_BREQ_EVT_UP] = "up",
+ [PFSYNC_BREQ_EVT_DOWN] = "down",
+ [PFSYNC_BREQ_EVT_TMO] = "timeout",
+ [PFSYNC_BREQ_EVT_LINK] = "link",
+ [PFSYNC_BREQ_EVT_BUS_START] = "bus-start",
+ [PFSYNC_BREQ_EVT_BUS_END] = "bus-end",
};
-TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
-#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \
- sizeof(struct pfsync_deferral))
+struct pfsync_slice {
+ struct pfsync_softc *s_pfsync;
+ struct mutex s_mtx;
-void pfsync_out_tdb(struct tdb *, void *);
+ struct pf_state_queue s_qs[PFSYNC_S_COUNT];
+ TAILQ_HEAD(, tdb) s_tdb_q;
+ size_t s_len;
+ struct mbuf_list s_ml;
+
+ struct taskq *s_softnet;
+ struct task s_task;
+ struct timeout s_tmo;
+
+ struct mbuf_queue s_sendq;
+ struct task s_send;
+
+ struct pfsync_deferrals s_deferrals;
+ unsigned int s_deferred;
+ struct task s_deferrals_task;
+ struct timeout s_deferrals_tmo;
+
+ uint64_t s_stat_locks;
+ uint64_t s_stat_contended;
+ uint64_t s_stat_write_nop;
+ uint64_t s_stat_task_add;
+ uint64_t s_stat_task_run;
+ uint64_t s_stat_enqueue;
+ uint64_t s_stat_dequeue;
+
+ uint64_t s_stat_defer_add;
+ uint64_t s_stat_defer_ack;
+ uint64_t s_stat_defer_run;
+ uint64_t s_stat_defer_overlimit;
+
+ struct kstat *s_kstat;
+} __aligned(CACHELINESIZE);
+
+#define PFSYNC_SLICE_BITS 1
+#define PFSYNC_NSLICES (1 << PFSYNC_SLICE_BITS)
struct pfsync_softc {
struct ifnet sc_if;
+ unsigned int sc_dead;
+ unsigned int sc_up;
+ struct refcnt sc_refs;
+
+ /* config */
+ struct in_addr sc_syncpeer;
+ unsigned int sc_maxupdates;
+ unsigned int sc_defer;
+
+ /* operation */
unsigned int sc_sync_ifidx;
+ unsigned int sc_sync_if_down;
+ void *sc_inm;
+ struct task sc_ltask;
+ struct task sc_dtask;
+ struct ip sc_template;
- struct pool sc_pool;
+ struct pfsync_slice sc_slices[PFSYNC_NSLICES];
- struct ip_moptions sc_imo;
+ struct {
+ struct rwlock req_lock;
+ struct timeout req_tmo;
+ enum pfsync_bulk_req_state req_state;
+ unsigned int req_tries;
+ unsigned int req_demoted;
+ } sc_bulk_req;
- struct in_addr sc_sync_peer;
- u_int8_t sc_maxupdates;
+ struct {
+ struct rwlock snd_lock;
+ struct timeout snd_tmo;
+ time_t snd_requested;
+
+ struct pf_state *snd_next;
+ struct pf_state *snd_tail;
+ unsigned int snd_again;
+ } sc_bulk_snd;
+};
- struct ip sc_template;
+static struct pfsync_softc *pfsyncif = NULL;
+static struct cpumem *pfsynccounters;
- struct pf_state_queue sc_qs[PFSYNC_S_COUNT];
- struct mutex sc_st_mtx;
- size_t sc_len;
+static inline void
+pfsyncstat_inc(enum pfsync_counters c)
+{
+ counters_inc(pfsynccounters, c);
+}
- struct pfsync_upd_reqs sc_upd_req_list;
- struct mutex sc_upd_req_mtx;
+static int pfsync_clone_create(struct if_clone *, int);
+static int pfsync_clone_destroy(struct ifnet *);
- int sc_initial_bulk;
- int sc_link_demoted;
+static int pfsync_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+static void pfsync_start(struct ifqueue *);
- int sc_defer;
- struct pfsync_deferrals sc_deferrals;
- u_int sc_deferred;
- struct mutex sc_deferrals_mtx;
- struct timeout sc_deferrals_tmo;
+static int pfsync_ioctl(struct ifnet *, u_long, caddr_t);
+static int pfsync_up(struct pfsync_softc *);
+static int pfsync_down(struct pfsync_softc *);
- void *sc_plus;
- size_t sc_pluslen;
+static int pfsync_set_mtu(struct pfsync_softc *, unsigned int);
+static int pfsync_set_parent(struct pfsync_softc *,
+ const struct if_parent *);
+static int pfsync_get_parent(struct pfsync_softc *, struct if_parent *);
+static int pfsync_del_parent(struct pfsync_softc *);
- u_int32_t sc_ureq_sent;
- int sc_bulk_tries;
- struct timeout sc_bulkfail_tmo;
+static int pfsync_get_ioc(struct pfsync_softc *, struct ifreq *);
+static int pfsync_set_ioc(struct pfsync_softc *, struct ifreq *);
- u_int32_t sc_ureq_received;
- struct pf_state *sc_bulk_next;
- struct pf_state *sc_bulk_last;
- struct timeout sc_bulk_tmo;
+static void pfsync_syncif_link(void *);
+static void pfsync_syncif_detach(void *);
- TAILQ_HEAD(, tdb) sc_tdb_q;
- struct mutex sc_tdb_mtx;
+static void pfsync_sendout(struct pfsync_softc *, struct mbuf *);
+static void pfsync_slice_drop(struct pfsync_softc *, struct pfsync_slice *);
- struct task sc_ltask;
- struct task sc_dtask;
+static void pfsync_slice_tmo(void *);
+static void pfsync_slice_task(void *);
+static void pfsync_slice_sendq(void *);
+
+static void pfsync_deferrals_tmo(void *);
+static void pfsync_deferrals_task(void *);
+static void pfsync_defer_output(struct pfsync_deferral *);
+
+static void pfsync_bulk_req_evt(struct pfsync_softc *,
+ enum pfsync_bulk_req_event);
+static void pfsync_bulk_req_tmo(void *);
+
+static void pfsync_bulk_snd_tmo(void *);
+
+#if NKSTAT > 0
+struct pfsync_kstat_data {
+ struct kstat_kv pd_locks;
+ struct kstat_kv pd_contended;
+ struct kstat_kv pd_write_nop;
+ struct kstat_kv pd_task_add;
+ struct kstat_kv pd_task_run;
+ struct kstat_kv pd_enqueue;
+ struct kstat_kv pd_dequeue;
+ struct kstat_kv pd_qdrop;
- struct timeout sc_tmo;
+ struct kstat_kv pd_defer_len;
+ struct kstat_kv pd_defer_add;
+ struct kstat_kv pd_defer_ack;
+ struct kstat_kv pd_defer_run;
+ struct kstat_kv pd_defer_overlimit;
};
-struct pfsync_snapshot {
- struct pfsync_softc *sn_sc;
- struct pf_state_queue sn_qs[PFSYNC_S_COUNT];
- struct pfsync_upd_reqs sn_upd_req_list;
- TAILQ_HEAD(, tdb) sn_tdb_q;
- size_t sn_len;
- void *sn_plus;
- size_t sn_pluslen;
+static const struct pfsync_kstat_data pfsync_kstat_tpl = {
+ KSTAT_KV_INITIALIZER("locks", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("contended", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("write-nops", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("send-sched", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("send-run", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("enqueues", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("dequeues", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_UNIT_INITIALIZER("qdrops",
+ KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
+
+ KSTAT_KV_UNIT_INITIALIZER("defer-len",
+ KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
+ KSTAT_KV_INITIALIZER("defer-add", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("defer-ack", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("defer-run", KSTAT_KV_T_COUNTER64),
+ KSTAT_KV_INITIALIZER("defer-over", KSTAT_KV_T_COUNTER64),
};
-struct pfsync_softc *pfsyncif = NULL;
-struct cpumem *pfsynccounters;
-
-void pfsyncattach(int);
-int pfsync_clone_create(struct if_clone *, int);
-int pfsync_clone_destroy(struct ifnet *);
-void pfsync_update_net_tdb(struct pfsync_tdb *);
-int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
- struct rtentry *);
-int pfsyncioctl(struct ifnet *, u_long, caddr_t);
-void pfsyncstart(struct ifqueue *);
-void pfsync_syncdev_state(void *);
-void pfsync_ifdetach(void *);
-
-void pfsync_deferred(struct pf_state *, int);
-void pfsync_undefer(struct pfsync_deferral *, int);
-void pfsync_deferrals_tmo(void *);
-
-void pfsync_cancel_full_update(struct pfsync_softc *);
-void pfsync_request_full_update(struct pfsync_softc *);
-void pfsync_request_update(u_int32_t, u_int64_t);
-void pfsync_update_state_req(struct pf_state *);
-
-void pfsync_drop(struct pfsync_softc *);
-void pfsync_sendout(void);
-void pfsync_send_plus(void *, size_t);
-void pfsync_timeout(void *);
-void pfsync_tdb_timeout(void *);
-
-void pfsync_bulk_start(void);
-void pfsync_bulk_status(u_int8_t);
-void pfsync_bulk_update(void *);
-void pfsync_bulk_fail(void *);
-
-void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
-void pfsync_drop_snapshot(struct pfsync_snapshot *);
-
-void pfsync_send_dispatch(void *);
-void pfsync_send_pkt(struct mbuf *);
-
-static struct mbuf_queue pfsync_mq;
-static struct task pfsync_task =
- TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq);
+static int
+pfsync_kstat_copy(struct kstat *ks, void *dst)
+{
+ struct pfsync_slice *s = ks->ks_softc;
+ struct pfsync_kstat_data *pd = dst;
+
+ *pd = pfsync_kstat_tpl;
+ kstat_kv_u64(&pd->pd_locks) = s->s_stat_locks;
+ kstat_kv_u64(&pd->pd_contended) = s->s_stat_contended;
+ kstat_kv_u64(&pd->pd_write_nop) = s->s_stat_write_nop;
+ kstat_kv_u64(&pd->pd_task_add) = s->s_stat_task_add;
+ kstat_kv_u64(&pd->pd_task_run) = s->s_stat_task_run;
+ kstat_kv_u64(&pd->pd_enqueue) = s->s_stat_enqueue;
+ kstat_kv_u64(&pd->pd_dequeue) = s->s_stat_dequeue;
+ kstat_kv_u32(&pd->pd_qdrop) = mq_drops(&s->s_sendq);
+
+ kstat_kv_u32(&pd->pd_defer_len) = s->s_deferred;
+ kstat_kv_u64(&pd->pd_defer_add) = s->s_stat_defer_add;
+ kstat_kv_u64(&pd->pd_defer_ack) = s->s_stat_defer_ack;
+ kstat_kv_u64(&pd->pd_defer_run) = s->s_stat_defer_run;
+ kstat_kv_u64(&pd->pd_defer_overlimit) = s->s_stat_defer_overlimit;
+
+ return (0);
+}
+#endif /* NKSTAT > 0 */
#define PFSYNC_MAX_BULKTRIES 12
-int pfsync_sync_ok;
struct if_clone pfsync_cloner =
IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
void
pfsyncattach(int npfsync)
{
- if_clone_attach(&pfsync_cloner);
pfsynccounters = counters_alloc(pfsyncs_ncounters);
- mq_init(&pfsync_mq, 4096, IPL_MPFLOOR);
+ if_clone_attach(&pfsync_cloner);
}
-int
+static int
pfsync_clone_create(struct if_clone *ifc, int unit)
{
struct pfsync_softc *sc;
struct ifnet *ifp;
- int q;
+ size_t i, q;
if (unit != 0)
- return (EINVAL);
-
- pfsync_sync_ok = 1;
+ return (ENXIO);
- sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO);
- for (q = 0; q < PFSYNC_S_COUNT; q++)
- TAILQ_INIT(&sc->sc_qs[q]);
- mtx_init(&sc->sc_st_mtx, IPL_MPFLOOR);
+ if (pfsync_deferrals_pool.pr_size == 0) {
+ pool_init(&pfsync_deferrals_pool,
+ sizeof(struct pfsync_deferral), 0,
+ IPL_MPFLOOR, 0, "pfdefer", NULL);
+ /* pool_cache_init(&pfsync_deferrals_pool); */
+ }
- pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_MPFLOOR, 0, "pfsync",
- NULL);
- TAILQ_INIT(&sc->sc_upd_req_list);
- mtx_init(&sc->sc_upd_req_mtx, IPL_MPFLOOR);
- TAILQ_INIT(&sc->sc_deferrals);
- mtx_init(&sc->sc_deferrals_mtx, IPL_MPFLOOR);
- timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc);
- task_set(&sc->sc_ltask, pfsync_syncdev_state, sc);
- task_set(&sc->sc_dtask, pfsync_ifdetach, sc);
- sc->sc_deferred = 0;
+ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+ if (sc == NULL)
+ return (ENOMEM);
- TAILQ_INIT(&sc->sc_tdb_q);
- mtx_init(&sc->sc_tdb_mtx, IPL_MPFLOOR);
+ /* sc_refs is "owned" by IFF_RUNNING */
- sc->sc_len = PFSYNC_MINPKT;
+ sc->sc_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
sc->sc_maxupdates = 128;
+ sc->sc_defer = 0;
+
+ task_set(&sc->sc_ltask, pfsync_syncif_link, sc);
+ task_set(&sc->sc_dtask, pfsync_syncif_detach, sc);
+
+ rw_init(&sc->sc_bulk_req.req_lock, "pfsyncbreq");
+ /* need process context to take net lock to call ip_output */
+ timeout_set_proc(&sc->sc_bulk_req.req_tmo, pfsync_bulk_req_tmo, sc);
- sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
- sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
- sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ rw_init(&sc->sc_bulk_snd.snd_lock, "pfsyncbsnd");
+ /* need process context to take net lock to call ip_output */
+ timeout_set_proc(&sc->sc_bulk_snd.snd_tmo, pfsync_bulk_snd_tmo, sc);
ifp = &sc->sc_if;
- snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
+ snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d",
+ ifc->ifc_name, unit);
ifp->if_softc = sc;
- ifp->if_ioctl = pfsyncioctl;
- ifp->if_output = pfsyncoutput;
- ifp->if_qstart = pfsyncstart;
+ ifp->if_ioctl = pfsync_ioctl;
+ ifp->if_output = pfsync_output;
+ ifp->if_qstart = pfsync_start;
ifp->if_type = IFT_PFSYNC;
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
- timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL);
- timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL);
- timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL);
+ for (i = 0; i < nitems(sc->sc_slices); i++) {
+ struct pfsync_slice *s = &sc->sc_slices[i];
+
+ s->s_pfsync = sc;
+
+ mtx_init_flags(&s->s_mtx, IPL_SOFTNET, "pfslice", 0);
+ s->s_softnet = net_tq(i);
+ timeout_set(&s->s_tmo, pfsync_slice_tmo, s);
+ task_set(&s->s_task, pfsync_slice_task, s);
+
+ mq_init(&s->s_sendq, 16, IPL_SOFTNET);
+ task_set(&s->s_send, pfsync_slice_sendq, s);
+
+ s->s_len = PFSYNC_MINPKT;
+ ml_init(&s->s_ml);
+
+ for (q = 0; q < nitems(s->s_qs); q++)
+ TAILQ_INIT(&s->s_qs[q]);
+ TAILQ_INIT(&s->s_tdb_q);
+
+ /* stupid NET_LOCK */
+ timeout_set(&s->s_deferrals_tmo, pfsync_deferrals_tmo, s);
+ task_set(&s->s_deferrals_task, pfsync_deferrals_task, s);
+ TAILQ_INIT(&s->s_deferrals);
+
+#if NKSTAT > 0
+ s->s_kstat = kstat_create(ifp->if_xname, 0, "pfsync-slice", i,
+ KSTAT_T_KV, 0);
+
+ kstat_set_mutex(s->s_kstat, &s->s_mtx);
+ s->s_kstat->ks_softc = s;
+ s->s_kstat->ks_datalen = sizeof(pfsync_kstat_tpl);
+ s->s_kstat->ks_copy = pfsync_kstat_copy;
+ kstat_install(s->s_kstat);
+#endif
+ }
+
+ if_counters_alloc(ifp);
if_attach(ifp);
if_alloc_sadl(ifp);
bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
#endif
- pfsyncif = sc;
-
return (0);
}
-int
+static int
pfsync_clone_destroy(struct ifnet *ifp)
{
struct pfsync_softc *sc = ifp->if_softc;
- struct ifnet *ifp0;
- struct pfsync_deferral *pd;
- struct pfsync_deferrals deferrals;
+#if NKSTAT > 0
+ size_t i;
+#endif
NET_LOCK();
+ sc->sc_dead = 1;
-#if NCARP > 0
- if (!pfsync_sync_ok)
- carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
- if (sc->sc_link_demoted)
- carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
-#endif
- if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
- if_linkstatehook_del(ifp0, &sc->sc_ltask);
- if_detachhook_del(ifp0, &sc->sc_dtask);
- }
- if_put(ifp0);
-
- /* XXXSMP breaks atomicity */
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ pfsync_down(sc);
NET_UNLOCK();
- if_detach(ifp);
- NET_LOCK();
- pfsync_drop(sc);
+ if_detach(ifp);
- if (sc->sc_deferred > 0) {
- TAILQ_INIT(&deferrals);
- mtx_enter(&sc->sc_deferrals_mtx);
- TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry);
- sc->sc_deferred = 0;
- mtx_leave(&sc->sc_deferrals_mtx);
+#if NKSTAT > 0
+ for (i = 0; i < nitems(sc->sc_slices); i++) {
+ struct pfsync_slice *s = &sc->sc_slices[i];
- while ((pd = TAILQ_FIRST(&deferrals)) != NULL) {
- TAILQ_REMOVE(&deferrals, pd, pd_entry);
- pfsync_undefer(pd, 0);
- }
+ kstat_destroy(s->s_kstat);
}
+#endif
- pfsyncif = NULL;
- timeout_del(&sc->sc_bulkfail_tmo);
- timeout_del(&sc->sc_bulk_tmo);
- timeout_del(&sc->sc_tmo);
-
- NET_UNLOCK();
-
- pool_destroy(&sc->sc_pool);
- free(sc->sc_imo.imo_membership, M_IPMOPTS,
- sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
free(sc, M_DEVBUF, sizeof(*sc));
return (0);
}
-/*
- * Start output on the pfsync interface.
- */
-void
-pfsyncstart(struct ifqueue *ifq)
+static void
+pfsync_dprintf(struct pfsync_softc *sc, const char *fmt, ...)
{
- ifq_purge(ifq);
+ struct ifnet *ifp = &sc->sc_if;
+ va_list ap;
+
+ if (!ISSET(ifp->if_flags, IFF_DEBUG))
+ return;
+
+ printf("%s: ", ifp->if_xname);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ printf("\n");
}
-void
-pfsync_syncdev_state(void *arg)
+static void
+pfsync_syncif_link(void *arg)
{
struct pfsync_softc *sc = arg;
- struct ifnet *ifp;
+ struct ifnet *ifp0;
+ unsigned int sync_if_down = 1;
- if ((sc->sc_if.if_flags & IFF_UP) == 0)
- return;
- if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL)
- return;
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 != NULL && LINK_STATE_IS_UP(ifp0->if_link_state)) {
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_LINK);
+ sync_if_down = 0;
+ }
+ if_put(ifp0);
- if (ifp->if_link_state == LINK_STATE_DOWN) {
- sc->sc_if.if_flags &= ~IFF_RUNNING;
- if (!sc->sc_link_demoted) {
#if NCARP > 0
- carp_group_demote_adj(&sc->sc_if, 1,
- "pfsync link state down");
-#endif
- sc->sc_link_demoted = 1;
- }
-
- /* drop everything */
- timeout_del(&sc->sc_tmo);
- pfsync_drop(sc);
-
- pfsync_cancel_full_update(sc);
- } else if (sc->sc_link_demoted) {
- sc->sc_if.if_flags |= IFF_RUNNING;
-
- pfsync_request_full_update(sc);
+ if (sc->sc_sync_if_down != sync_if_down) {
+ carp_group_demote_adj(&sc->sc_if,
+ sync_if_down ? 1 : -1, "pfsync link");
}
+#endif
- if_put(ifp);
+ sc->sc_sync_if_down = sync_if_down;
}
-void
-pfsync_ifdetach(void *arg)
+static void
+pfsync_syncif_detach(void *arg)
{
struct pfsync_softc *sc = arg;
- struct ifnet *ifp;
+ struct ifnet *ifp = &sc->sc_if;
- if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) {
- if_linkstatehook_del(ifp, &sc->sc_ltask);
- if_detachhook_del(ifp, &sc->sc_dtask);
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ pfsync_down(sc);
+ if_down(ifp);
}
- if_put(ifp);
sc->sc_sync_ifidx = 0;
}
-int
-pfsync_input(struct mbuf **mp, int *offp, int proto, int af)
+static int
+pfsync_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct rtentry *rt)
{
- struct mbuf *n, *m = *mp;
- struct pfsync_softc *sc = pfsyncif;
- struct ip *ip = mtod(m, struct ip *);
- struct pfsync_header *ph;
- struct pfsync_subheader subh;
- int offset, noff, len, count, mlen, flags = 0;
- int e;
-
- NET_ASSERT_LOCKED();
+ m_freem(m); /* drop packet */
+ return (EAFNOSUPPORT);
+}
- pfsyncstat_inc(pfsyncs_ipackets);
+static int
+pfsync_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct pfsync_softc *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int error = ENOTTY;
- /* verify that we have a sync interface configured */
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
- sc->sc_sync_ifidx == 0 || !pf_status.running)
- goto done;
+ switch (cmd) {
+ case SIOCSIFADDR:
+ error = EOPNOTSUPP;
+ break;
- /* verify that the packet came in on the right interface */
- if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) {
- pfsyncstat_inc(pfsyncs_badif);
- goto done;
- }
+ case SIOCSIFFLAGS:
+ if (ISSET(ifp->if_flags, IFF_UP)) {
+ if (!ISSET(ifp->if_flags, IFF_RUNNING))
+ error = pfsync_up(sc);
+ else
+ error = ENETRESET;
+ } else {
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ error = pfsync_down(sc);
+ }
+ break;
- sc->sc_if.if_ipackets++;
- sc->sc_if.if_ibytes += m->m_pkthdr.len;
+ case SIOCSIFMTU:
+ error = pfsync_set_mtu(sc, ifr->ifr_mtu);
+ break;
- /* verify that the IP TTL is 255. */
- if (ip->ip_ttl != PFSYNC_DFLTTL) {
- pfsyncstat_inc(pfsyncs_badttl);
- goto done;
- }
+ case SIOCSIFPARENT:
+ error = pfsync_set_parent(sc, (struct if_parent *)data);
+ break;
+ case SIOCGIFPARENT:
+ error = pfsync_get_parent(sc, (struct if_parent *)data);
+ break;
+ case SIOCDIFPARENT:
+ error = pfsync_del_parent(sc);
+ break;
- offset = ip->ip_hl << 2;
- n = m_pulldown(m, offset, sizeof(*ph), &noff);
- if (n == NULL) {
- pfsyncstat_inc(pfsyncs_hdrops);
- return IPPROTO_DONE;
- }
- ph = (struct pfsync_header *)(n->m_data + noff);
+ case SIOCSETPFSYNC:
+ error = pfsync_set_ioc(sc, ifr);
+ break;
+ case SIOCGETPFSYNC:
+ error = pfsync_get_ioc(sc, ifr);
+ break;
- /* verify the version */
- if (ph->version != PFSYNC_VERSION) {
- pfsyncstat_inc(pfsyncs_badver);
- goto done;
- }
- len = ntohs(ph->len) + offset;
- if (m->m_pkthdr.len < len) {
- pfsyncstat_inc(pfsyncs_badlen);
- goto done;
+ default:
+ break;
}
- if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
- flags = PFSYNC_SI_CKSUM;
-
- offset += sizeof(*ph);
- while (offset <= len - sizeof(subh)) {
- m_copydata(m, offset, sizeof(subh), &subh);
- offset += sizeof(subh);
-
- mlen = subh.len << 2;
- count = ntohs(subh.count);
-
- if (subh.action >= PFSYNC_ACT_MAX ||
- subh.action >= nitems(pfsync_acts) ||
- mlen < pfsync_acts[subh.action].len) {
- /*
- * subheaders are always followed by at least one
- * message, so if the peer is new
- * enough to tell us how big its messages are then we
- * know enough to skip them.
- */
- if (count > 0 && mlen > 0) {
- offset += count * mlen;
- continue;
- }
- pfsyncstat_inc(pfsyncs_badact);
- goto done;
- }
-
- n = m_pulldown(m, offset, mlen * count, &noff);
- if (n == NULL) {
- pfsyncstat_inc(pfsyncs_badlen);
- return IPPROTO_DONE;
- }
-
- e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count,
- flags);
- if (e != 0)
- goto done;
-
- offset += mlen * count;
- }
+ if (error == ENETRESET)
+ error = 0;
-done:
- m_freem(m);
- return IPPROTO_DONE;
+ return (error);
}
-int
-pfsync_in_clr(caddr_t buf, int len, int count, int flags)
+static int
+pfsync_set_mtu(struct pfsync_softc *sc, unsigned int mtu)
{
- struct pfsync_clr *clr;
- struct pf_state *st, *nexts;
- struct pfi_kif *kif;
- u_int32_t creatorid;
- int i;
+ struct ifnet *ifp = &sc->sc_if;
+ struct ifnet *ifp0;
+ int error = 0;
- PF_LOCK();
- for (i = 0; i < count; i++) {
- clr = (struct pfsync_clr *)buf + len * i;
- kif = NULL;
- creatorid = clr->creatorid;
- if (strlen(clr->ifname) &&
- (kif = pfi_kif_find(clr->ifname)) == NULL)
- continue;
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 == NULL)
+ return (EINVAL);
- PF_STATE_ENTER_WRITE();
- RBT_FOREACH_SAFE(st, pf_state_tree_id, &tree_id, nexts) {
- if (st->creatorid == creatorid &&
- ((kif && st->kif == kif) || !kif)) {
- SET(st->state_flags, PFSTATE_NOSYNC);
- pf_remove_state(st);
- }
- }
- PF_STATE_EXIT_WRITE();
+ if (mtu <= PFSYNC_MINPKT || mtu > ifp0->if_mtu) {
+ error = EINVAL;
+ goto put;
}
- PF_UNLOCK();
- return (0);
+ /* commit */
+ ifp->if_mtu = mtu;
+
+put:
+ if_put(ifp0);
+ return (error);
}
-int
-pfsync_in_ins(caddr_t buf, int len, int count, int flags)
+static int
+pfsync_set_parent(struct pfsync_softc *sc, const struct if_parent *p)
{
- struct pfsync_state *sp;
- sa_family_t af1, af2;
- int i;
+ struct ifnet *ifp = &sc->sc_if;
+ struct ifnet *ifp0;
+ int error = 0;
- PF_LOCK();
- for (i = 0; i < count; i++) {
- sp = (struct pfsync_state *)(buf + len * i);
- af1 = sp->key[0].af;
- af2 = sp->key[1].af;
+ ifp0 = if_unit(p->ifp_parent);
+ if (ifp0 == NULL)
+ return (ENXIO);
- /* check for invalid values */
- if (sp->timeout >= PFTM_MAX ||
- sp->src.state > PF_TCPS_PROXY_DST ||
- sp->dst.state > PF_TCPS_PROXY_DST ||
- sp->direction > PF_OUT ||
- (((af1 || af2) &&
- ((af1 != AF_INET && af1 != AF_INET6) ||
- (af2 != AF_INET && af2 != AF_INET6))) ||
- (sp->af != AF_INET && sp->af != AF_INET6))) {
- DPFPRINTF(LOG_NOTICE,
- "pfsync_input: PFSYNC5_ACT_INS: invalid value");
- pfsyncstat_inc(pfsyncs_badval);
- continue;
- }
+ if (ifp0->if_index == sc->sc_sync_ifidx)
+ goto put;
- if (pf_state_import(sp, flags) == ENOMEM) {
- /* drop out, but process the rest of the actions */
- break;
- }
+ if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+ error = EBUSY;
+ goto put;
}
- PF_UNLOCK();
- return (0);
+ /* commit */
+ sc->sc_sync_ifidx = ifp0->if_index;
+
+put:
+ if_put(ifp0);
+ return (error);
}
-int
-pfsync_in_iack(caddr_t buf, int len, int count, int flags)
+static int
+pfsync_get_parent(struct pfsync_softc *sc, struct if_parent *p)
{
- struct pfsync_ins_ack *ia;
- struct pf_state_cmp id_key;
- struct pf_state *st;
- int i;
+ struct ifnet *ifp0;
+ int error = 0;
- for (i = 0; i < count; i++) {
- ia = (struct pfsync_ins_ack *)(buf + len * i);
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 == NULL)
+ error = EADDRNOTAVAIL;
+ else
+ strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
+ if_put(ifp0);
- id_key.id = ia->id;
- id_key.creatorid = ia->creatorid;
+ return (error);
+}
- PF_STATE_ENTER_READ();
- st = pf_find_state_byid(&id_key);
- pf_state_ref(st);
- PF_STATE_EXIT_READ();
- if (st == NULL)
- continue;
+static int
+pfsync_del_parent(struct pfsync_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_if;
- if (ISSET(st->state_flags, PFSTATE_ACK))
- pfsync_deferred(st, 0);
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ return (EBUSY);
- pf_state_unref(st);
- }
+ /* commit */
+ sc->sc_sync_ifidx = 0;
return (0);
}
-int
-pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
- struct pfsync_state_peer *dst)
+static int
+pfsync_get_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
{
- int sync = 0;
-
- /*
- * The state should never go backwards except
- * for syn-proxy states. Neither should the
- * sequence window slide backwards.
- */
- if ((st->src.state > src->state &&
- (st->src.state < PF_TCPS_PROXY_SRC ||
- src->state >= PF_TCPS_PROXY_SRC)) ||
+ struct pfsyncreq pfsyncr;
+ struct ifnet *ifp0;
- (st->src.state == src->state &&
- SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
- sync++;
- else
- pf_state_peer_ntoh(src, &st->src);
+ memset(&pfsyncr, 0, sizeof(pfsyncr));
- if ((st->dst.state > dst->state) ||
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 != NULL) {
+ strlcpy(pfsyncr.pfsyncr_syncdev, ifp0->if_xname,
+ sizeof(pfsyncr.pfsyncr_syncdev));
+ }
+ if_put(ifp0);
- (st->dst.state >= TCPS_SYN_SENT &&
- SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
- sync++;
- else
- pf_state_peer_ntoh(dst, &st->dst);
+ pfsyncr.pfsyncr_syncpeer = sc->sc_syncpeer;
+ pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
+ pfsyncr.pfsyncr_defer = sc->sc_defer;
- return (sync);
+ return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
}
-int
-pfsync_in_upd(caddr_t buf, int len, int count, int flags)
+static int
+pfsync_set_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
{
- struct pfsync_state *sp;
- struct pf_state_cmp id_key;
- struct pf_state *st;
- int sync, error;
- int i;
-
- for (i = 0; i < count; i++) {
- sp = (struct pfsync_state *)(buf + len * i);
-
- /* check for invalid values */
- if (sp->timeout >= PFTM_MAX ||
- sp->src.state > PF_TCPS_PROXY_DST ||
- sp->dst.state > PF_TCPS_PROXY_DST) {
- DPFPRINTF(LOG_NOTICE,
- "pfsync_input: PFSYNC_ACT_UPD: invalid value");
- pfsyncstat_inc(pfsyncs_badval);
- continue;
- }
-
- id_key.id = sp->id;
- id_key.creatorid = sp->creatorid;
+ struct ifnet *ifp = &sc->sc_if;
+ struct pfsyncreq pfsyncr;
+ unsigned int sync_ifidx = sc->sc_sync_ifidx;
+ int wantdown = 0;
+ int error;
- PF_STATE_ENTER_READ();
- st = pf_find_state_byid(&id_key);
- pf_state_ref(st);
- PF_STATE_EXIT_READ();
- if (st == NULL) {
- /* insert the update */
- PF_LOCK();
- error = pf_state_import(sp, flags);
- if (error)
- pfsyncstat_inc(pfsyncs_badstate);
- PF_UNLOCK();
- continue;
- }
+ error = suser(curproc);
+ if (error != 0)
+ return (error);
- if (ISSET(st->state_flags, PFSTATE_ACK))
- pfsync_deferred(st, 1);
+ error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr));
+ if (error != 0)
+ return (error);
- if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
- sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
- else {
- sync = 0;
+ if (pfsyncr.pfsyncr_maxupdates > 255)
+ return (EINVAL);
- /*
- * Non-TCP protocol state machine always go
- * forwards
- */
- if (st->src.state > sp->src.state)
- sync++;
- else
- pf_state_peer_ntoh(&sp->src, &st->src);
+ if (pfsyncr.pfsyncr_syncdev[0] != '\0') { /* set */
+ struct ifnet *ifp0 = if_unit(pfsyncr.pfsyncr_syncdev);
+ if (ifp0 == NULL)
+ return (ENXIO);
- if (st->dst.state > sp->dst.state)
- sync++;
- else
- pf_state_peer_ntoh(&sp->dst, &st->dst);
- }
+ if (ifp0->if_index != sync_ifidx)
+ wantdown = 1;
- if (sync < 2) {
- pf_state_alloc_scrub_memory(&sp->dst, &st->dst);
- pf_state_peer_ntoh(&sp->dst, &st->dst);
- st->expire = getuptime();
- st->timeout = sp->timeout;
- }
- st->pfsync_time = getuptime();
+ sync_ifidx = ifp0->if_index;
+ if_put(ifp0);
+ } else { /* del */
+ wantdown = 1;
+ sync_ifidx = 0;
+ }
- if (sync) {
- pfsyncstat_inc(pfsyncs_stale);
+ if (pfsyncr.pfsyncr_syncpeer.s_addr == INADDR_ANY)
+ pfsyncr.pfsyncr_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
+ if (pfsyncr.pfsyncr_syncpeer.s_addr != sc->sc_syncpeer.s_addr)
+ wantdown = 1;
- pfsync_update_state(st);
- schednetisr(NETISR_PFSYNC);
- }
+ if (wantdown && ISSET(ifp->if_flags, IFF_RUNNING))
+ return (EBUSY);
- pf_state_unref(st);
- }
+ /* commit */
+ sc->sc_sync_ifidx = sync_ifidx;
+ sc->sc_syncpeer = pfsyncr.pfsyncr_syncpeer;
+ sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+ sc->sc_defer = pfsyncr.pfsyncr_defer;
return (0);
}
-int
-pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
+static int
+pfsync_up(struct pfsync_softc *sc)
{
- struct pfsync_upd_c *up;
- struct pf_state_cmp id_key;
- struct pf_state *st;
-
- int sync;
-
- int i;
-
- for (i = 0; i < count; i++) {
- up = (struct pfsync_upd_c *)(buf + len * i);
+ struct ifnet *ifp = &sc->sc_if;
+ struct ifnet *ifp0;
+ void *inm = NULL;
+ int error = 0;
+ struct ip *ip;
- /* check for invalid values */
- if (up->timeout >= PFTM_MAX ||
- up->src.state > PF_TCPS_PROXY_DST ||
- up->dst.state > PF_TCPS_PROXY_DST) {
- DPFPRINTF(LOG_NOTICE,
- "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
- pfsyncstat_inc(pfsyncs_badval);
- continue;
- }
+ NET_ASSERT_LOCKED();
+ KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
- id_key.id = up->id;
- id_key.creatorid = up->creatorid;
+ if (sc->sc_dead)
+ return (ENXIO);
- PF_STATE_ENTER_READ();
- st = pf_find_state_byid(&id_key);
- pf_state_ref(st);
- PF_STATE_EXIT_READ();
- if (st == NULL) {
- /* We don't have this state. Ask for it. */
- pfsync_request_update(id_key.creatorid, id_key.id);
- continue;
- }
+ /*
+ * coordinate with pfsync_down(). if sc_up is still up and
+ * we're here then something else is tearing pfsync down.
+ */
+ if (sc->sc_up)
+ return (EBUSY);
- if (ISSET(st->state_flags, PFSTATE_ACK))
- pfsync_deferred(st, 1);
+ if (sc->sc_syncpeer.s_addr == INADDR_ANY ||
+ sc->sc_syncpeer.s_addr == INADDR_BROADCAST)
+ return (EDESTADDRREQ);
- if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
- sync = pfsync_upd_tcp(st, &up->src, &up->dst);
- else {
- sync = 0;
- /*
- * Non-TCP protocol state machine always go
- * forwards
- */
- if (st->src.state > up->src.state)
- sync++;
- else
- pf_state_peer_ntoh(&up->src, &st->src);
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 == NULL)
+ return (ENXIO);
- if (st->dst.state > up->dst.state)
- sync++;
- else
- pf_state_peer_ntoh(&up->dst, &st->dst);
+ if (IN_MULTICAST(sc->sc_syncpeer.s_addr)) {
+ if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
+ error = ENODEV;
+ goto put;
}
- if (sync < 2) {
- pf_state_alloc_scrub_memory(&up->dst, &st->dst);
- pf_state_peer_ntoh(&up->dst, &st->dst);
- st->expire = getuptime();
- st->timeout = up->timeout;
+ inm = in_addmulti(&sc->sc_syncpeer, ifp0);
+ if (inm == NULL) {
+ error = ECONNABORTED;
+ goto put;
}
- st->pfsync_time = getuptime();
-
- if (sync) {
- pfsyncstat_inc(pfsyncs_stale);
+ }
- pfsync_update_state(st);
- schednetisr(NETISR_PFSYNC);
- }
+ sc->sc_up = 1;
- pf_state_unref(st);
- }
+ ip = &sc->sc_template;
+ memset(ip, 0, sizeof(*ip));
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ /* len and id are set later */
+ ip->ip_off = htons(IP_DF);
+ ip->ip_ttl = PFSYNC_DFLTTL;
+ ip->ip_p = IPPROTO_PFSYNC;
+ ip->ip_src.s_addr = INADDR_ANY;
+ ip->ip_dst.s_addr = sc->sc_syncpeer.s_addr;
- return (0);
-}
+ /* commit */
+ refcnt_init(&sc->sc_refs); /* IFF_RUNNING kind of owns this */
-int
-pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
-{
- struct pfsync_upd_req *ur;
- int i;
+#if NCARP > 0
+ sc->sc_sync_if_down = 1;
+ carp_group_demote_adj(&sc->sc_if, 1, "pfsync up");
+#endif
- struct pf_state_cmp id_key;
- struct pf_state *st;
+ if_linkstatehook_add(ifp0, &sc->sc_ltask);
+ if_detachhook_add(ifp0, &sc->sc_dtask);
- for (i = 0; i < count; i++) {
- ur = (struct pfsync_upd_req *)(buf + len * i);
+ sc->sc_inm = inm;
+ SET(ifp->if_flags, IFF_RUNNING);
- id_key.id = ur->id;
- id_key.creatorid = ur->creatorid;
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_UP);
- if (id_key.id == 0 && id_key.creatorid == 0)
- pfsync_bulk_start();
- else {
- PF_STATE_ENTER_READ();
- st = pf_find_state_byid(&id_key);
- pf_state_ref(st);
- PF_STATE_EXIT_READ();
- if (st == NULL) {
- pfsyncstat_inc(pfsyncs_badstate);
- continue;
- }
- if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
- pf_state_unref(st);
- continue;
- }
+ refcnt_take(&sc->sc_refs); /* give one to SMR */
+ SMR_PTR_SET_LOCKED(&pfsyncif, sc);
- pfsync_update_state_req(st);
- pf_state_unref(st);
- }
- }
+ pfsync_syncif_link(sc); /* try and push the bulk req state forward */
- return (0);
+put:
+ if_put(ifp0);
+ return (error);
}
-int
-pfsync_in_del(caddr_t buf, int len, int count, int flags)
+static struct mbuf *
+pfsync_encap(struct pfsync_softc *sc, struct mbuf *m)
{
- struct pfsync_state *sp;
- struct pf_state_cmp id_key;
- struct pf_state *st;
- int i;
+ struct {
+ struct ip ip;
+ struct pfsync_header ph;
+ } __packed __aligned(4) *h;
+ unsigned int mlen = m->m_pkthdr.len;
- PF_STATE_ENTER_WRITE();
- for (i = 0; i < count; i++) {
- sp = (struct pfsync_state *)(buf + len * i);
+ m = m_prepend(m, sizeof(*h), M_DONTWAIT);
+ if (m == NULL)
+ return (NULL);
- id_key.id = sp->id;
- id_key.creatorid = sp->creatorid;
+ h = mtod(m, void *);
+ memset(h, 0, sizeof(*h));
- st = pf_find_state_byid(&id_key);
- if (st == NULL) {
- pfsyncstat_inc(pfsyncs_badstate);
- continue;
- }
- SET(st->state_flags, PFSTATE_NOSYNC);
- pf_remove_state(st);
- }
- PF_STATE_EXIT_WRITE();
+ mlen += sizeof(h->ph);
+ h->ph.version = PFSYNC_VERSION;
+ h->ph.len = htons(mlen);
+ /* h->ph.pfcksum */
- return (0);
+ mlen += sizeof(h->ip);
+ h->ip = sc->sc_template;
+ h->ip.ip_len = htons(mlen);
+ h->ip.ip_id = htons(ip_randomid());
+
+ return (m);
}
-int
-pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
+static void
+pfsync_bulk_req_send(struct pfsync_softc *sc)
{
- struct pfsync_del_c *sp;
- struct pf_state_cmp id_key;
- struct pf_state *st;
- int i;
-
- PF_LOCK();
- PF_STATE_ENTER_WRITE();
- for (i = 0; i < count; i++) {
- sp = (struct pfsync_del_c *)(buf + len * i);
-
- id_key.id = sp->id;
- id_key.creatorid = sp->creatorid;
+ struct {
+ struct pfsync_subheader subh;
+ struct pfsync_upd_req ur;
+ } __packed __aligned(4) *h;
+ unsigned mlen = max_linkhdr +
+ sizeof(struct ip) + sizeof(struct pfsync_header) + sizeof(*h);
+ struct mbuf *m;
- st = pf_find_state_byid(&id_key);
- if (st == NULL) {
- pfsyncstat_inc(pfsyncs_badstate);
- continue;
- }
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ goto fail;
- SET(st->state_flags, PFSTATE_NOSYNC);
- pf_remove_state(st);
+ if (mlen > MHLEN) {
+ MCLGETL(m, M_DONTWAIT, mlen);
+ if (!ISSET(m->m_flags, M_EXT))
+ goto drop;
}
- PF_STATE_EXIT_WRITE();
- PF_UNLOCK();
- return (0);
-}
+ m_align(m, sizeof(*h));
+ m->m_len = m->m_pkthdr.len = sizeof(*h);
-int
-pfsync_in_bus(caddr_t buf, int len, int count, int flags)
-{
- struct pfsync_softc *sc = pfsyncif;
- struct pfsync_bus *bus;
+ h = mtod(m, void *);
+ memset(h, 0, sizeof(*h));
- /* If we're not waiting for a bulk update, who cares. */
- if (sc->sc_ureq_sent == 0)
- return (0);
+ h->subh.action = PFSYNC_ACT_UPD_REQ;
+ h->subh.len = sizeof(h->ur) >> 2;
+ h->subh.count = htons(1);
- bus = (struct pfsync_bus *)buf;
+ h->ur.id = htobe64(0);
+ h->ur.creatorid = htobe32(0);
- switch (bus->status) {
- case PFSYNC_BUS_START:
- PF_LOCK();
- timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
- pf_pool_limits[PF_LIMIT_STATES].limit /
- ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
- sizeof(struct pfsync_state)));
- PF_UNLOCK();
- DPFPRINTF(LOG_INFO, "received bulk update start");
- break;
+ m = pfsync_encap(sc, m);
+ if (m == NULL)
+ goto fail;
- case PFSYNC_BUS_END:
- if (getuptime() - ntohl(bus->endtime) >=
- sc->sc_ureq_sent) {
- /* that's it, we're happy */
- sc->sc_ureq_sent = 0;
- sc->sc_bulk_tries = 0;
- timeout_del(&sc->sc_bulkfail_tmo);
-#if NCARP > 0
- if (!pfsync_sync_ok)
- carp_group_demote_adj(&sc->sc_if, -1,
- sc->sc_link_demoted ?
- "pfsync link state up" :
- "pfsync bulk done");
- if (sc->sc_initial_bulk) {
- carp_group_demote_adj(&sc->sc_if, -32,
- "pfsync init");
- sc->sc_initial_bulk = 0;
- }
-#endif
- pfsync_sync_ok = 1;
- sc->sc_link_demoted = 0;
- DPFPRINTF(LOG_INFO, "received valid bulk update end");
- } else {
- DPFPRINTF(LOG_WARNING, "received invalid "
- "bulk update end: bad timestamp");
- }
- break;
- }
+ pfsync_sendout(sc, m);
+ return;
- return (0);
+drop:
+ m_freem(m);
+fail:
+ printf("%s: unable to request bulk update\n", sc->sc_if.if_xname);
}
-int
-pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
+static void
+pfsync_bulk_req_nstate(struct pfsync_softc *sc,
+ enum pfsync_bulk_req_state nstate, int seconds)
{
-#if defined(IPSEC)
- struct pfsync_tdb *tp;
- int i;
-
- for (i = 0; i < count; i++) {
- tp = (struct pfsync_tdb *)(buf + len * i);
- pfsync_update_net_tdb(tp);
- }
-#endif
-
- return (0);
+ sc->sc_bulk_req.req_state = nstate;
+ if (seconds > 0)
+ timeout_add_sec(&sc->sc_bulk_req.req_tmo, seconds);
+ else
+ timeout_del(&sc->sc_bulk_req.req_tmo);
}
-#if defined(IPSEC)
-/* Update an in-kernel tdb. Silently fail if no tdb is found. */
-void
-pfsync_update_net_tdb(struct pfsync_tdb *pt)
+static void
+pfsync_bulk_req_invstate(struct pfsync_softc *sc,
+ enum pfsync_bulk_req_event evt)
{
- struct tdb *tdb;
-
- NET_ASSERT_LOCKED();
-
- /* check for invalid values */
- if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
- (pt->dst.sa.sa_family != AF_INET &&
- pt->dst.sa.sa_family != AF_INET6))
- goto bad;
+ panic("%s: unexpected event %s in state %s", sc->sc_if.if_xname,
+ pfsync_bulk_req_event_names[evt],
+ pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state]);
+}
- tdb = gettdb(ntohs(pt->rdomain), pt->spi,
- (union sockaddr_union *)&pt->dst, pt->sproto);
- if (tdb) {
- pt->rpl = betoh64(pt->rpl);
- pt->cur_bytes = betoh64(pt->cur_bytes);
+static void
+pfsync_bulk_req_nstate_bulk(struct pfsync_softc *sc)
+{
+ /* calculate the number of packets we expect */
+ int t = pf_pool_limits[PF_LIMIT_STATES].limit /
+ ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
+ sizeof(struct pfsync_state));
- /* Neither replay nor byte counter should ever decrease. */
- if (pt->rpl < tdb->tdb_rpl ||
- pt->cur_bytes < tdb->tdb_cur_bytes) {
- tdb_unref(tdb);
- goto bad;
- }
+ /* turn it into seconds */
+ t /= 1000 / PFSYNC_BULK_SND_IVAL_MS;
- tdb->tdb_rpl = pt->rpl;
- tdb->tdb_cur_bytes = pt->cur_bytes;
- tdb_unref(tdb);
- }
- return;
+ if (t == 0)
+ t = 1;
- bad:
- DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
- "invalid value");
- pfsyncstat_inc(pfsyncs_badstate);
- return;
+ pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_BULK, t * 4);
}
-#endif
-
-int
-pfsync_in_eof(caddr_t buf, int len, int count, int flags)
+static inline void
+pfsync_bulk_req_nstate_done(struct pfsync_softc *sc)
{
- if (len > 0 || count > 0)
- pfsyncstat_inc(pfsyncs_badact);
+ pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_DONE, 0);
- /* we're done. let the caller return */
- return (1);
-}
+ KASSERT(sc->sc_bulk_req.req_demoted == 1);
+ sc->sc_bulk_req.req_demoted = 0;
-int
-pfsync_in_error(caddr_t buf, int len, int count, int flags)
-{
- pfsyncstat_inc(pfsyncs_badact);
- return (-1);
+#if NCARP > 0
+ carp_group_demote_adj(&sc->sc_if, -32, "pfsync done");
+#endif
}
-int
-pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
- struct rtentry *rt)
+static void
+pfsync_bulk_req_evt(struct pfsync_softc *sc, enum pfsync_bulk_req_event evt)
{
- m_freem(m); /* drop packet */
- return (EAFNOSUPPORT);
-}
+ struct ifnet *ifp = &sc->sc_if;
-int
-pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct proc *p = curproc;
- struct pfsync_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *)data;
- struct ip_moptions *imo = &sc->sc_imo;
- struct pfsyncreq pfsyncr;
- struct ifnet *ifp0, *sifp;
- struct ip *ip;
- int error;
+ rw_enter_write(&sc->sc_bulk_req.req_lock);
+ pfsync_dprintf(sc, "%s state %s evt %s", __func__,
+ pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state],
+ pfsync_bulk_req_event_names[evt]);
- switch (cmd) {
- case SIOCSIFFLAGS:
- if ((ifp->if_flags & IFF_RUNNING) == 0 &&
- (ifp->if_flags & IFF_UP)) {
- ifp->if_flags |= IFF_RUNNING;
+ if (evt == PFSYNC_BREQ_EVT_DOWN) {
+ /* unconditionally move down */
+ sc->sc_bulk_req.req_tries = 0;
+ pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_NONE, 0);
+ if (sc->sc_bulk_req.req_demoted) {
+ sc->sc_bulk_req.req_demoted = 0;
#if NCARP > 0
- sc->sc_initial_bulk = 1;
- carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
+ carp_group_demote_adj(&sc->sc_if, -32,
+ "pfsync down");
#endif
-
- pfsync_request_full_update(sc);
}
- if ((ifp->if_flags & IFF_RUNNING) &&
- (ifp->if_flags & IFF_UP) == 0) {
- ifp->if_flags &= ~IFF_RUNNING;
-
- /* drop everything */
- timeout_del(&sc->sc_tmo);
- pfsync_drop(sc);
-
- pfsync_cancel_full_update(sc);
- }
- break;
- case SIOCSIFMTU:
- if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL)
- return (EINVAL);
- error = 0;
- if (ifr->ifr_mtu <= PFSYNC_MINPKT ||
- ifr->ifr_mtu > ifp0->if_mtu) {
- error = EINVAL;
+ } else switch (sc->sc_bulk_req.req_state) {
+ case PFSYNC_BREQ_S_NONE:
+ switch (evt) {
+ case PFSYNC_BREQ_EVT_UP:
+ KASSERT(sc->sc_bulk_req.req_demoted == 0);
+ sc->sc_bulk_req.req_demoted = 1;
+#if NCARP > 0
+ carp_group_demote_adj(&sc->sc_if, 32,
+ "pfsync start");
+#endif
+ pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_START, 30);
+ break;
+ default:
+ pfsync_bulk_req_invstate(sc, evt);
}
- if_put(ifp0);
- if (error)
- return error;
- if (ifr->ifr_mtu < ifp->if_mtu)
- pfsync_sendout();
- ifp->if_mtu = ifr->ifr_mtu;
+
break;
- case SIOCGETPFSYNC:
- bzero(&pfsyncr, sizeof(pfsyncr));
- if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
- strlcpy(pfsyncr.pfsyncr_syncdev,
- ifp0->if_xname, IFNAMSIZ);
+
+ case PFSYNC_BREQ_S_START:
+ switch (evt) {
+ case PFSYNC_BREQ_EVT_LINK:
+ pfsync_bulk_req_send(sc);
+ pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_SENT, 2);
+ break;
+ case PFSYNC_BREQ_EVT_TMO:
+ pfsync_dprintf(sc, "timeout waiting for link");
+ pfsync_bulk_req_nstate_done(sc);
+ break;
+ case PFSYNC_BREQ_EVT_BUS_START:
+ pfsync_bulk_req_nstate_bulk(sc);
+ break;
+ case PFSYNC_BREQ_EVT_BUS_END:
+ /* ignore this */
+ break;
+ default:
+ pfsync_bulk_req_invstate(sc, evt);
}
- if_put(ifp0);
- pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
- pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
- pfsyncr.pfsyncr_defer = sc->sc_defer;
- return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
+ break;
- case SIOCSETPFSYNC:
- if ((error = suser(p)) != 0)
- return (error);
- if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
- return (error);
+ case PFSYNC_BREQ_S_SENT:
+ switch (evt) {
+ case PFSYNC_BREQ_EVT_BUS_START:
+ pfsync_bulk_req_nstate_bulk(sc);
+ break;
+ case PFSYNC_BREQ_EVT_BUS_END:
+ case PFSYNC_BREQ_EVT_LINK:
+ /* ignore this */
+ break;
+ case PFSYNC_BREQ_EVT_TMO:
+ if (++sc->sc_bulk_req.req_tries <
+ PFSYNC_MAX_BULKTRIES) {
+ pfsync_bulk_req_send(sc);
+ pfsync_bulk_req_nstate(sc,
+ PFSYNC_BREQ_S_SENT, 2);
+ break;
+ }
- if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
- sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
- else
- sc->sc_sync_peer.s_addr =
- pfsyncr.pfsyncr_syncpeer.s_addr;
+ pfsync_dprintf(sc,
+ "timeout waiting for bulk transfer start");
+ pfsync_bulk_req_nstate_done(sc);
+ break;
+ default:
+ pfsync_bulk_req_invstate(sc, evt);
+ }
+ break;
- if (pfsyncr.pfsyncr_maxupdates > 255)
- return (EINVAL);
- sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
+ case PFSYNC_BREQ_S_BULK:
+ switch (evt) {
+ case PFSYNC_BREQ_EVT_BUS_START:
+ case PFSYNC_BREQ_EVT_LINK:
+ /* ignore this */
+ break;
+ case PFSYNC_BREQ_EVT_BUS_END:
+ pfsync_bulk_req_nstate_done(sc);
+ break;
+ case PFSYNC_BREQ_EVT_TMO:
+ if (++sc->sc_bulk_req.req_tries <
+ PFSYNC_MAX_BULKTRIES) {
+ pfsync_bulk_req_send(sc);
+ pfsync_bulk_req_nstate(sc,
+ PFSYNC_BREQ_S_SENT, 2);
+ }
- sc->sc_defer = pfsyncr.pfsyncr_defer;
+ pfsync_dprintf(sc,
+ "timeout waiting for bulk transfer end");
+ pfsync_bulk_req_nstate_done(sc);
+ break;
+ default:
+ pfsync_bulk_req_invstate(sc, evt);
+ }
+ break;
- if (pfsyncr.pfsyncr_syncdev[0] == 0) {
- if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
- if_linkstatehook_del(ifp0, &sc->sc_ltask);
- if_detachhook_del(ifp0, &sc->sc_dtask);
- }
- if_put(ifp0);
- sc->sc_sync_ifidx = 0;
- if (imo->imo_num_memberships > 0) {
- in_delmulti(imo->imo_membership[
- --imo->imo_num_memberships]);
- imo->imo_ifidx = 0;
- }
+ case PFSYNC_BREQ_S_DONE: /* pfsync is up and running */
+ switch (evt) {
+ case PFSYNC_BREQ_EVT_BUS_START:
+ case PFSYNC_BREQ_EVT_BUS_END:
+ case PFSYNC_BREQ_EVT_LINK:
+ /* nops */
break;
+ default:
+ pfsync_bulk_req_invstate(sc, evt);
}
+ break;
- if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL)
- return (EINVAL);
+ default:
+ panic("%s: unknown event %d", ifp->if_xname, evt);
+ /* NOTREACHED */
+ }
+ rw_exit_write(&sc->sc_bulk_req.req_lock);
+}
- ifp0 = if_get(sc->sc_sync_ifidx);
+static void
+pfsync_bulk_req_tmo(void *arg)
+{
+ struct pfsync_softc *sc = arg;
- if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL &&
- sifp->if_mtu < ifp0->if_mtu) ||
- sifp->if_mtu < MCLBYTES - sizeof(struct ip))
- pfsync_sendout();
+ NET_LOCK();
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_TMO);
+ NET_UNLOCK();
+}
- if (ifp0) {
- if_linkstatehook_del(ifp0, &sc->sc_ltask);
- if_detachhook_del(ifp0, &sc->sc_dtask);
- }
- if_put(ifp0);
- sc->sc_sync_ifidx = sifp->if_index;
+static int
+pfsync_down(struct pfsync_softc *sc)
+{
+ struct ifnet *ifp = &sc->sc_if;
+ struct ifnet *ifp0;
+ struct smr_entry smr;
+ size_t i;
+ void *inm = NULL;
+ unsigned int sndbar = 0;
+ struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
+ struct pfsync_deferral *pd;
- if (imo->imo_num_memberships > 0) {
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- imo->imo_ifidx = 0;
- }
+ NET_ASSERT_LOCKED();
+ KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
- if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
- struct in_addr addr;
+ /*
+ * tearing down pfsync involves waiting for pfsync to stop
+ * running in various contexts including softnet taskqs.
+ * this thread cannot hold netlock while waiting for a
+ * barrier in softnet because softnet might be waiting for
+ * the netlock. sc->sc_up is used to coordinate with
+ * pfsync_up.
+ */
- if (!(sifp->if_flags & IFF_MULTICAST)) {
- sc->sc_sync_ifidx = 0;
- if_put(sifp);
- return (EADDRNOTAVAIL);
- }
+ CLR(ifp->if_flags, IFF_RUNNING);
- addr.s_addr = INADDR_PFSYNC_GROUP;
+ ifp0 = if_get(sc->sc_sync_ifidx);
+ if (ifp0 != NULL) {
+ if_linkstatehook_del(ifp0, &sc->sc_ltask);
+ if_detachhook_del(ifp0, &sc->sc_dtask);
+ }
+ if_put(ifp0);
- if ((imo->imo_membership[0] =
- in_addmulti(&addr, sifp)) == NULL) {
- sc->sc_sync_ifidx = 0;
- if_put(sifp);
- return (ENOBUFS);
- }
- imo->imo_num_memberships++;
- imo->imo_ifidx = sc->sc_sync_ifidx;
- imo->imo_ttl = PFSYNC_DFLTTL;
- imo->imo_loop = 0;
- }
+#if NCARP > 0
+ if (sc->sc_sync_if_down)
+ carp_group_demote_adj(&sc->sc_if, -1, "pfsync down");
+#endif
- ip = &sc->sc_template;
- bzero(ip, sizeof(*ip));
- ip->ip_v = IPVERSION;
- ip->ip_hl = sizeof(sc->sc_template) >> 2;
- ip->ip_tos = IPTOS_LOWDELAY;
- /* len and id are set later */
- ip->ip_off = htons(IP_DF);
- ip->ip_ttl = PFSYNC_DFLTTL;
- ip->ip_p = IPPROTO_PFSYNC;
- ip->ip_src.s_addr = INADDR_ANY;
- ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
+ NET_UNLOCK();
- if_linkstatehook_add(sifp, &sc->sc_ltask);
- if_detachhook_add(sifp, &sc->sc_dtask);
- if_put(sifp);
+ KASSERTMSG(SMR_PTR_GET_LOCKED(&pfsyncif) == sc,
+ "pfsyncif %p != sc %p", pfsyncif, sc);
+ SMR_PTR_SET_LOCKED(&pfsyncif, NULL);
+ smr_init(&smr);
+ smr_call(&smr, (void (*)(void *))refcnt_rele_wake, &sc->sc_refs);
- pfsync_request_full_update(sc);
+ /* stop pf producing work before cleaning up the timeouts and tasks */
+ refcnt_finalize(&sc->sc_refs, "pfsyncfini");
- break;
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_DOWN);
- default:
- return (ENOTTY);
+ rw_enter_read(&pf_state_list.pfs_rwl);
+ rw_enter_write(&sc->sc_bulk_snd.snd_lock);
+ if (sc->sc_bulk_snd.snd_tail != NULL) {
+ sndbar = !timeout_del(&sc->sc_bulk_snd.snd_tmo);
+
+ sc->sc_bulk_snd.snd_again = 0;
+ sc->sc_bulk_snd.snd_next = NULL;
+ sc->sc_bulk_snd.snd_tail = NULL;
+ }
+ rw_exit_write(&sc->sc_bulk_snd.snd_lock);
+ rw_exit_read(&pf_state_list.pfs_rwl);
+
+ /*
+ * do a single barrier for all the timeouts. because the
+ * timeouts in each slice are configured the same way, the
+ * barrier for one will work for all of them.
+ */
+ for (i = 0; i < nitems(sc->sc_slices); i++) {
+ struct pfsync_slice *s = &sc->sc_slices[i];
+
+ timeout_del(&s->s_tmo);
+ task_del(s->s_softnet, &s->s_task);
+ task_del(s->s_softnet, &s->s_send);
+
+ timeout_del(&s->s_deferrals_tmo);
+ task_del(s->s_softnet, &s->s_deferrals_task);
+ }
+ timeout_barrier(&sc->sc_slices[0].s_tmo);
+ timeout_barrier(&sc->sc_bulk_req.req_tmo); /* XXX proc */
+ if (sndbar) {
+ /* technically the preceding barrier does the same job */
+ timeout_barrier(&sc->sc_bulk_snd.snd_tmo);
+ }
+ net_tq_barriers("pfsyncbar");
+
+ /* pfsync is no longer running */
+
+ if (sc->sc_inm != NULL) {
+ inm = sc->sc_inm;
+ sc->sc_inm = NULL;
+ }
+
+ for (i = 0; i < nitems(sc->sc_slices); i++) {
+ struct pfsync_slice *s = &sc->sc_slices[i];
+ struct pf_state *st;
+
+ pfsync_slice_drop(sc, s);
+ mq_purge(&s->s_sendq);
+
+ while ((pd = TAILQ_FIRST(&s->s_deferrals)) != NULL) {
+ TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
+
+ st = pd->pd_st;
+ st->sync_defer = NULL;
+
+ TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
+ }
+ s->s_deferred = 0;
+ }
+
+ NET_LOCK();
+ sc->sc_up = 0;
+
+ if (inm != NULL)
+ in_delmulti(inm);
+
+ while ((pd = TAILQ_FIRST(&pds)) != NULL) {
+ TAILQ_REMOVE(&pds, pd, pd_entry);
+
+ pfsync_defer_output(pd);
}
return (0);
}
-void
+int
+pfsync_is_up(void)
+{
+ int rv;
+
+ smr_read_enter();
+ rv = SMR_PTR_GET(&pfsyncif) != NULL;
+ smr_read_leave();
+
+ return (rv);
+}
+
+static void
+pfsync_start(struct ifqueue *ifq)
+{
+ ifq_purge(ifq);
+}
+
+struct pfsync_q {
+ void (*write)(struct pf_state *, void *);
+ size_t len;
+ u_int8_t action;
+};
+
+static struct pfsync_slice *
+pfsync_slice_enter(struct pfsync_softc *sc, const struct pf_state *st)
+{
+ unsigned int idx = st->key[0]->hash % nitems(sc->sc_slices);
+ struct pfsync_slice *s = &sc->sc_slices[idx];
+
+ if (!mtx_enter_try(&s->s_mtx)) {
+ mtx_enter(&s->s_mtx);
+ s->s_stat_contended++;
+ }
+ s->s_stat_locks++;
+
+ return (s);
+}
+
+static void
+pfsync_slice_leave(struct pfsync_softc *sc, struct pfsync_slice *s)
+{
+ mtx_leave(&s->s_mtx);
+}
+
+/* we have one of these for every PFSYNC_S_ */
+static void pfsync_out_state(struct pf_state *, void *);
+static void pfsync_out_iack(struct pf_state *, void *);
+static void pfsync_out_upd_c(struct pf_state *, void *);
+static void pfsync_out_del(struct pf_state *, void *);
+#if defined(IPSEC)
+static void pfsync_out_tdb(struct tdb *, void *);
+#endif
+
+static const struct pfsync_q pfsync_qs[] = {
+ { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
+ { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
+ { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C },
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
+ { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }
+};
+
+static void
pfsync_out_state(struct pf_state *st, void *buf)
{
struct pfsync_state *sp = buf;
+ mtx_enter(&st->mtx);
pf_state_export(sp, st);
+ mtx_leave(&st->mtx);
}
-void
+static void
pfsync_out_iack(struct pf_state *st, void *buf)
{
struct pfsync_ins_ack *iack = buf;
iack->creatorid = st->creatorid;
}
-void
+static void
pfsync_out_upd_c(struct pf_state *st, void *buf)
{
struct pfsync_upd_c *up = buf;
- bzero(up, sizeof(*up));
+ memset(up, 0, sizeof(*up));
up->id = st->id;
+ up->creatorid = st->creatorid;
+
+ mtx_enter(&st->mtx);
pf_state_peer_hton(&st->src, &up->src);
pf_state_peer_hton(&st->dst, &up->dst);
- up->creatorid = st->creatorid;
up->timeout = st->timeout;
+ mtx_leave(&st->mtx);
}
-void
+static void
pfsync_out_del(struct pf_state *st, void *buf)
{
struct pfsync_del_c *dp = buf;
dp->id = st->id;
dp->creatorid = st->creatorid;
- SET(st->state_flags, PFSTATE_NOSYNC);
+ st->sync_state = PFSYNC_S_DEAD;
}
-void
-pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc)
-{
- int q;
- struct pf_state *st;
- struct pfsync_upd_req_item *ur;
-#if defined(IPSEC)
- struct tdb *tdb;
-#endif
-
- sn->sn_sc = sc;
-
- mtx_enter(&sc->sc_st_mtx);
- mtx_enter(&sc->sc_upd_req_mtx);
- mtx_enter(&sc->sc_tdb_mtx);
-
- for (q = 0; q < PFSYNC_S_COUNT; q++) {
- TAILQ_INIT(&sn->sn_qs[q]);
-
- while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
- mtx_enter(&st->mtx);
- if (st->snapped == 0) {
- TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap);
- st->snapped = 1;
- mtx_leave(&st->mtx);
- } else {
- /*
- * item is on snapshot list already, so we can
- * skip it now.
- */
- mtx_leave(&st->mtx);
- pf_state_unref(st);
- }
- }
- }
-
- TAILQ_INIT(&sn->sn_upd_req_list);
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
- TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap);
- }
-
- TAILQ_INIT(&sn->sn_tdb_q);
#if defined(IPSEC)
- while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry);
- TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap);
-
- mtx_enter(&tdb->tdb_mtx);
- KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED));
- SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
- mtx_leave(&tdb->tdb_mtx);
- }
-#endif
-
- sn->sn_len = sc->sc_len;
- sc->sc_len = PFSYNC_MINPKT;
-
- sn->sn_plus = sc->sc_plus;
- sc->sc_plus = NULL;
- sn->sn_pluslen = sc->sc_pluslen;
- sc->sc_pluslen = 0;
+static inline void
+pfsync_tdb_enter(struct tdb *tdb)
+{
+ mtx_enter(&tdb->tdb_mtx);
+}
- mtx_leave(&sc->sc_tdb_mtx);
- mtx_leave(&sc->sc_upd_req_mtx);
- mtx_leave(&sc->sc_st_mtx);
+static inline void
+pfsync_tdb_leave(struct tdb *tdb)
+{
+ unsigned int snapped = ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
+ mtx_leave(&tdb->tdb_mtx);
+ if (snapped)
+ wakeup_one(&tdb->tdb_updates);
}
+#endif /* defined(IPSEC) */
-void
-pfsync_drop_snapshot(struct pfsync_snapshot *sn)
+static void
+pfsync_slice_drop(struct pfsync_softc *sc, struct pfsync_slice *s)
{
struct pf_state *st;
- struct pfsync_upd_req_item *ur;
+ int q;
#if defined(IPSEC)
- struct tdb *t;
+ struct tdb *tdb;
#endif
- int q;
- for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sn->sn_qs[q]))
+ for (q = 0; q < nitems(s->s_qs); q++) {
+ if (TAILQ_EMPTY(&s->s_qs[q]))
continue;
- while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) {
- mtx_enter(&st->mtx);
+ while ((st = TAILQ_FIRST(&s->s_qs[q])) != NULL) {
+ TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
+#ifdef PFSYNC_DEBUG
KASSERT(st->sync_state == q);
- KASSERT(st->snapped == 1);
- TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap);
+#endif
st->sync_state = PFSYNC_S_NONE;
- st->snapped = 0;
- mtx_leave(&st->mtx);
pf_state_unref(st);
}
}
- while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap);
- pool_put(&sn->sn_sc->sc_pool, ur);
- }
-
#if defined(IPSEC)
- while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap);
- mtx_enter(&t->tdb_mtx);
- KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
- CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
- CLR(t->tdb_flags, TDBF_PFSYNC);
- mtx_leave(&t->tdb_mtx);
- }
-#endif
-}
-
-int
-pfsync_is_snapshot_empty(struct pfsync_snapshot *sn)
-{
- int q;
-
- for (q = 0; q < PFSYNC_S_COUNT; q++)
- if (!TAILQ_EMPTY(&sn->sn_qs[q]))
- return (0);
-
- if (!TAILQ_EMPTY(&sn->sn_upd_req_list))
- return (0);
-
- if (!TAILQ_EMPTY(&sn->sn_tdb_q))
- return (0);
-
- return (sn->sn_plus == NULL);
-}
-
-void
-pfsync_drop(struct pfsync_softc *sc)
-{
- struct pfsync_snapshot sn;
-
- pfsync_grab_snapshot(&sn, sc);
- pfsync_drop_snapshot(&sn);
-}
-
-void
-pfsync_send_dispatch(void *xmq)
-{
- struct mbuf_queue *mq = xmq;
- struct pfsync_softc *sc;
- struct mbuf *m;
- struct mbuf_list ml;
- int error;
-
- mq_delist(mq, &ml);
- if (ml_empty(&ml))
- return;
-
- NET_LOCK();
- sc = pfsyncif;
- if (sc == NULL) {
- ml_purge(&ml);
- goto done;
- }
+ while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
+ TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
- while ((m = ml_dequeue(&ml)) != NULL) {
- if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT,
- &sc->sc_imo, NULL, 0)) == 0)
- pfsyncstat_inc(pfsyncs_opackets);
- else {
- DPFPRINTF(LOG_DEBUG,
- "ip_output() @ %s failed (%d)\n", __func__, error);
- pfsyncstat_inc(pfsyncs_oerrors);
- }
+ pfsync_tdb_enter(tdb);
+ KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
+ CLR(tdb->tdb_flags, TDBF_PFSYNC);
+ pfsync_tdb_leave(tdb);
}
-done:
- NET_UNLOCK();
-}
+#endif /* defined(IPSEC) */
-void
-pfsync_send_pkt(struct mbuf *m)
-{
- if (mq_enqueue(&pfsync_mq, m) != 0) {
- pfsyncstat_inc(pfsyncs_oerrors);
- DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n",
- __func__);
- } else
- task_add(net_tq(0), &pfsync_task);
+ timeout_del(&s->s_tmo);
+ s->s_len = PFSYNC_MINPKT;
}
-void
-pfsync_sendout(void)
+static struct mbuf *
+pfsync_slice_write(struct pfsync_slice *s)
{
- struct pfsync_snapshot sn;
- struct pfsync_softc *sc = pfsyncif;
-#if NBPFILTER > 0
- struct ifnet *ifp = &sc->sc_if;
-#endif
+ struct pfsync_softc *sc = s->s_pfsync;
struct mbuf *m;
+
struct ip *ip;
struct pfsync_header *ph;
struct pfsync_subheader *subh;
- struct pf_state *st;
- struct pfsync_upd_req_item *ur;
- int offset;
- int q, count = 0;
- if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
- return;
+ unsigned int mlen = max_linkhdr + s->s_len;
+ unsigned int q, count;
+ caddr_t ptr;
+ size_t off;
- if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
-#if NBPFILTER > 0
- (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) {
-#else
- sc->sc_sync_ifidx == 0) {
-#endif
- pfsync_drop(sc);
- return;
+ MUTEX_ASSERT_LOCKED(&s->s_mtx);
+ if (s->s_len == PFSYNC_MINPKT) {
+ s->s_stat_write_nop++;
+ return (NULL);
}
- pfsync_grab_snapshot(&sn, sc);
+ task_del(s->s_softnet, &s->s_task);
- /*
- * Check below is sufficient to prevent us from sending empty packets,
- * but it does not stop us from sending short packets.
- */
- if (pfsync_is_snapshot_empty(&sn))
- return;
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ goto drop;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m == NULL) {
- sc->sc_if.if_oerrors++;
- pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop_snapshot(&sn);
- return;
+ if (mlen > MHLEN) {
+ MCLGETL(m, M_DONTWAIT, mlen);
+ if (!ISSET(m->m_flags, M_EXT))
+ goto drop;
}
- if (max_linkhdr + sn.sn_len > MHLEN) {
- MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len);
- if (!ISSET(m->m_flags, M_EXT)) {
- m_free(m);
- sc->sc_if.if_oerrors++;
- pfsyncstat_inc(pfsyncs_onomem);
- pfsync_drop_snapshot(&sn);
- return;
- }
- }
- m->m_data += max_linkhdr;
- m->m_len = m->m_pkthdr.len = sn.sn_len;
+ m_align(m, s->s_len);
+ m->m_len = m->m_pkthdr.len = s->s_len;
- /* build the ip header */
- ip = mtod(m, struct ip *);
- bcopy(&sc->sc_template, ip, sizeof(*ip));
- offset = sizeof(*ip);
+ ptr = mtod(m, caddr_t);
+ off = 0;
+ ip = (struct ip *)(ptr + off);
+ off += sizeof(*ip);
+ *ip = sc->sc_template;
ip->ip_len = htons(m->m_pkthdr.len);
ip->ip_id = htons(ip_randomid());
- /* build the pfsync header */
- ph = (struct pfsync_header *)(m->m_data + offset);
- bzero(ph, sizeof(*ph));
- offset += sizeof(*ph);
-
+ ph = (struct pfsync_header *)(ptr + off);
+ off += sizeof(*ph);
+ memset(ph, 0, sizeof(*ph));
ph->version = PFSYNC_VERSION;
- ph->len = htons(sn.sn_len - sizeof(*ip));
- bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
+ ph->len = htons(m->m_pkthdr.len - sizeof(*ip));
+
+ for (q = 0; q < nitems(s->s_qs); q++) {
+ struct pf_state_queue *psq = &s->s_qs[q];
+ struct pf_state *st;
- if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) {
- subh = (struct pfsync_subheader *)(m->m_data + offset);
- offset += sizeof(*subh);
+ if (TAILQ_EMPTY(psq))
+ continue;
+
+ subh = (struct pfsync_subheader *)(ptr + off);
+ off += sizeof(*subh);
count = 0;
- while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap);
+ while ((st = TAILQ_FIRST(psq)) != NULL) {
+ TAILQ_REMOVE(psq, st, sync_list);
+ count++;
- bcopy(&ur->ur_msg, m->m_data + offset,
- sizeof(ur->ur_msg));
- offset += sizeof(ur->ur_msg);
+ KASSERT(st->sync_state == q);
+ /* the write handler below may override this */
+ st->sync_state = PFSYNC_S_NONE;
- pool_put(&sc->sc_pool, ur);
+ pfsync_qs[q].write(st, ptr + off);
+ off += pfsync_qs[q].len;
- count++;
+ pf_state_unref(st);
}
- bzero(subh, sizeof(*subh));
- subh->len = sizeof(ur->ur_msg) >> 2;
- subh->action = PFSYNC_ACT_UPD_REQ;
+ subh->action = pfsync_qs[q].action;
+ subh->len = pfsync_qs[q].len >> 2;
subh->count = htons(count);
}
- /* has someone built a custom region for us to add? */
- if (sn.sn_plus != NULL) {
- bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen);
- offset += sn.sn_pluslen;
- sn.sn_plus = NULL; /* XXX memory leak ? */
- }
-
#if defined(IPSEC)
- if (!TAILQ_EMPTY(&sn.sn_tdb_q)) {
- struct tdb *t;
+ if (!TAILQ_EMPTY(&s->s_tdb_q)) {
+ struct tdb *tdb;
- subh = (struct pfsync_subheader *)(m->m_data + offset);
- offset += sizeof(*subh);
+ subh = (struct pfsync_subheader *)(ptr + off);
+ off += sizeof(*subh);
count = 0;
- while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
- TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
- pfsync_out_tdb(t, m->m_data + offset);
- offset += sizeof(struct pfsync_tdb);
- mtx_enter(&t->tdb_mtx);
- KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
- CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
- CLR(t->tdb_flags, TDBF_PFSYNC);
- mtx_leave(&t->tdb_mtx);
- tdb_unref(t);
+ while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
+ TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
count++;
+
+ pfsync_tdb_enter(tdb);
+ KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
+
+ /* get a consistent view of the counters */
+ pfsync_out_tdb(tdb, ptr + off);
+
+ CLR(tdb->tdb_flags, TDBF_PFSYNC);
+ pfsync_tdb_leave(tdb);
+
+ off += sizeof(struct pfsync_tdb);
}
- bzero(subh, sizeof(*subh));
subh->action = PFSYNC_ACT_TDB;
subh->len = sizeof(struct pfsync_tdb) >> 2;
subh->count = htons(count);
}
#endif
- /* walk the queues */
- for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sn.sn_qs[q]))
- continue;
-
- subh = (struct pfsync_subheader *)(m->m_data + offset);
- offset += sizeof(*subh);
-
- count = 0;
- while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) {
- mtx_enter(&st->mtx);
- TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap);
- KASSERT(st->sync_state == q);
- KASSERT(st->snapped == 1);
- st->sync_state = PFSYNC_S_NONE;
- st->snapped = 0;
- pfsync_qs[q].write(st, m->m_data + offset);
- offset += pfsync_qs[q].len;
- mtx_leave(&st->mtx);
+ timeout_del(&s->s_tmo);
+ s->s_len = PFSYNC_MINPKT;
- pf_state_unref(st);
- count++;
- }
+ return (m);
+drop:
+ m_freem(m);
+ pfsyncstat_inc(pfsyncs_onomem);
+ pfsync_slice_drop(sc, s);
+ return (NULL);
+}
- bzero(subh, sizeof(*subh));
- subh->action = pfsync_qs[q].action;
- subh->len = pfsync_qs[q].len >> 2;
- subh->count = htons(count);
- }
+static void
+pfsync_sendout(struct pfsync_softc *sc, struct mbuf *m)
+{
+ struct ip_moptions imo;
+ unsigned int len = m->m_pkthdr.len;
+#if NBPF > 0
+ caddr_t if_bpf = sc->sc_if.if_bpf;
+ if (if_bpf)
+ bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT);
+#endif
- /* we're done, let's put it on the wire */
-#if NBPFILTER > 0
- if (ifp->if_bpf) {
- m->m_data += sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip);
- bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
- m->m_data -= sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sn.sn_len;
- }
+ imo.imo_ifidx = sc->sc_sync_ifidx;
+ imo.imo_ttl = PFSYNC_DFLTTL;
+ imo.imo_loop = 0;
- if (sc->sc_sync_ifidx == 0) {
- sc->sc_len = PFSYNC_MINPKT;
- m_freem(m);
- return;
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) == 0) {
+ counters_pkt(sc->sc_if.if_counters, ifc_opackets,
+ ifc_obytes, len);
+ pfsyncstat_inc(pfsyncs_opackets);
+ } else {
+ counters_inc(sc->sc_if.if_counters, ifc_oerrors);
+ pfsyncstat_inc(pfsyncs_oerrors);
}
-#endif
+}
- sc->sc_if.if_opackets++;
- sc->sc_if.if_obytes += m->m_pkthdr.len;
+static void
+pfsync_slice_tmo(void *arg)
+{
+ struct pfsync_slice *s = arg;
- m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
+ task_add(s->s_softnet, &s->s_task);
+}
- pfsync_send_pkt(m);
+static void
+pfsync_slice_sched(struct pfsync_slice *s)
+{
+ s->s_stat_task_add++;
+ task_add(s->s_softnet, &s->s_task);
}
-void
-pfsync_insert_state(struct pf_state *st)
+static void
+pfsync_slice_task(void *arg)
{
- struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_slice *s = arg;
+ struct mbuf *m;
- NET_ASSERT_LOCKED();
+ mtx_enter(&s->s_mtx);
+ s->s_stat_task_run++;
- if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
- st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
- SET(st->state_flags, PFSTATE_NOSYNC);
- return;
+ m = pfsync_slice_write(s);
+ mtx_leave(&s->s_mtx);
+ if (m != NULL) {
+ NET_LOCK();
+ pfsync_sendout(s->s_pfsync, m);
+ NET_UNLOCK();
}
+}
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
- ISSET(st->state_flags, PFSTATE_NOSYNC))
- return;
+static void
+pfsync_slice_sendq(void *arg)
+{
+ struct pfsync_slice *s = arg;
+ struct mbuf_list ml;
+ struct mbuf *m;
- if (sc->sc_len == PFSYNC_MINPKT)
- timeout_add_sec(&sc->sc_tmo, 1);
+ mq_delist(&s->s_sendq, &ml);
+ if (ml_empty(&ml))
+ return;
- pfsync_q_ins(st, PFSYNC_S_INS);
+ mtx_enter(&s->s_mtx);
+ s->s_stat_dequeue++;
+ mtx_leave(&s->s_mtx);
- st->sync_updates = 0;
+ NET_LOCK();
+ while ((m = ml_dequeue(&ml)) != NULL)
+ pfsync_sendout(s->s_pfsync, m);
+ NET_UNLOCK();
}
-int
-pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd)
+static void
+pfsync_q_ins(struct pfsync_slice *s, struct pf_state *st, unsigned int q)
{
- struct pfsync_softc *sc = pfsyncif;
- struct pfsync_deferral *pd;
- unsigned int sched;
-
- NET_ASSERT_LOCKED();
+ size_t nlen = pfsync_qs[q].len;
+ struct mbuf *m = NULL;
- if (!sc->sc_defer ||
- ISSET(st->state_flags, PFSTATE_NOSYNC) ||
- m->m_flags & (M_BCAST|M_MCAST))
- return (0);
+ MUTEX_ASSERT_LOCKED(&s->s_mtx);
+ KASSERT(st->sync_state == PFSYNC_S_NONE);
+ KASSERT(s->s_len >= PFSYNC_MINPKT);
- pd = pool_get(&sc->sc_pool, M_NOWAIT);
- if (pd == NULL)
- return (0);
+ if (TAILQ_EMPTY(&s->s_qs[q]))
+ nlen += sizeof(struct pfsync_subheader);
- /*
- * deferral queue grows faster, than timeout can consume,
- * we have to ask packet (caller) to help timer and dispatch
- * one deferral for us.
- *
- * We wish to call pfsync_undefer() here. Unfortunately we can't,
- * because pfsync_undefer() will be calling to ip_output(),
- * which in turn will call to pf_test(), which would then attempt
- * to grab PF_LOCK() we currently hold.
- */
- if (sc->sc_deferred >= 128) {
- mtx_enter(&sc->sc_deferrals_mtx);
- *ppd = TAILQ_FIRST(&sc->sc_deferrals);
- if (*ppd != NULL) {
- TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry);
- sc->sc_deferred--;
+ if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
+ m = pfsync_slice_write(s);
+ if (m != NULL) {
+ s->s_stat_enqueue++;
+ if (mq_enqueue(&s->s_sendq, m) == 0)
+ task_add(s->s_softnet, &s->s_send);
}
- mtx_leave(&sc->sc_deferrals_mtx);
- } else
- *ppd = NULL;
-
- m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
- SET(st->state_flags, PFSTATE_ACK);
- pd->pd_st = pf_state_ref(st);
- pd->pd_m = m;
+ nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
+ }
- pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC;
+ s->s_len += nlen;
+ pf_state_ref(st);
+ TAILQ_INSERT_TAIL(&s->s_qs[q], st, sync_list);
+ st->sync_state = q;
- mtx_enter(&sc->sc_deferrals_mtx);
- sched = TAILQ_EMPTY(&sc->sc_deferrals);
+ if (!timeout_pending(&s->s_tmo))
+ timeout_add_sec(&s->s_tmo, 1);
+}
- TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred++;
- mtx_leave(&sc->sc_deferrals_mtx);
+static void
+pfsync_q_del(struct pfsync_slice *s, struct pf_state *st)
+{
+ unsigned int q = st->sync_state;
- if (sched)
- timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC);
+ MUTEX_ASSERT_LOCKED(&s->s_mtx);
+ KASSERT(st->sync_state < PFSYNC_S_NONE);
- schednetisr(NETISR_PFSYNC);
+ st->sync_state = PFSYNC_S_NONE;
+ TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
+ pf_state_unref(st);
+ s->s_len -= pfsync_qs[q].len;
- return (1);
+ if (TAILQ_EMPTY(&s->s_qs[q]))
+ s->s_len -= sizeof(struct pfsync_subheader);
}
+/*
+ * the pfsync hooks that pf calls
+ */
+
void
-pfsync_undefer_notify(struct pfsync_deferral *pd)
+pfsync_init_state(struct pf_state *st, const struct pf_state_key *skw,
+ const struct pf_state_key *sks, int flags)
{
- struct pf_pdesc pdesc;
- struct pf_state *st = pd->pd_st;
+ /* this is called before pf_state_insert */
- /*
- * pf_remove_state removes the state keys and sets st->timeout
- * to PFTM_UNLINKED. this is done under NET_LOCK which should
- * be held here, so we can use PFTM_UNLINKED as a test for
- * whether the state keys are set for the address family
- * lookup.
- */
+ if (skw->proto == IPPROTO_PFSYNC)
+ SET(st->state_flags, PFSTATE_NOSYNC);
- if (st->timeout == PFTM_UNLINKED)
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
+ st->sync_state = PFSYNC_S_DEAD;
return;
+ }
- if (st->rt == PF_ROUTETO) {
- if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
- st->direction, st->kif, pd->pd_m, NULL) != PF_PASS)
- return;
- switch (st->key[PF_SK_WIRE]->af) {
- case AF_INET:
- pf_route(&pdesc, st);
- break;
-#ifdef INET6
- case AF_INET6:
- pf_route6(&pdesc, st);
- break;
-#endif /* INET6 */
- default:
- unhandled_af(st->key[PF_SK_WIRE]->af);
- }
- pd->pd_m = pdesc.m;
- } else {
- switch (st->key[PF_SK_WIRE]->af) {
- case AF_INET:
- ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
- break;
-#ifdef INET6
- case AF_INET6:
- ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
- break;
-#endif /* INET6 */
- default:
- unhandled_af(st->key[PF_SK_WIRE]->af);
- }
-
- pd->pd_m = NULL;
+ if (ISSET(flags, PFSYNC_SI_IOCTL)) {
+ /* all good */
+ return;
}
-}
-void
-pfsync_free_deferral(struct pfsync_deferral *pd)
-{
- struct pfsync_softc *sc = pfsyncif;
+ /* state came off the wire */
+ if (ISSET(st->state_flags, PFSTATE_ACK)) {
+ CLR(st->state_flags, PFSTATE_ACK);
- pf_state_unref(pd->pd_st);
- m_freem(pd->pd_m);
- pool_put(&sc->sc_pool, pd);
+ /* peer wants an iack, not an insert */
+ st->sync_state = PFSYNC_S_SYNC;
+ }
}
void
-pfsync_undefer(struct pfsync_deferral *pd, int drop)
+pfsync_insert_state(struct pf_state *st)
{
- struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_softc *sc;
- NET_ASSERT_LOCKED();
+ MUTEX_ASSERT_UNLOCKED(&st->mtx);
- if (sc == NULL)
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
+ st->sync_state == PFSYNC_S_DEAD)
return;
- CLR(pd->pd_st->state_flags, PFSTATE_ACK);
- if (!drop)
- pfsync_undefer_notify(pd);
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ struct pfsync_slice *s = pfsync_slice_enter(sc, st);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ /* we must have lost a race after insert */
+ pfsync_q_del(s, st);
+ /* FALLTHROUGH */
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(s, st, PFSYNC_S_INS);
+ break;
+ case PFSYNC_S_SYNC:
+ st->sync_state = PFSYNC_S_NONE; /* gross */
+ pfsync_q_ins(s, st, PFSYNC_S_IACK);
+ pfsync_slice_sched(s); /* the peer is waiting */
+ break;
+ default:
+ panic("%s: state %p unexpected sync_state %d",
+ __func__, st, st->sync_state);
+ /* NOTREACHED */
+ }
- pfsync_free_deferral(pd);
+ pfsync_slice_leave(sc, s);
+ }
+ smr_read_leave();
}
void
-pfsync_deferrals_tmo(void *arg)
+pfsync_update_state(struct pf_state *st)
{
- struct pfsync_softc *sc = arg;
- struct pfsync_deferral *pd;
- uint64_t now, nsec = 0;
- struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
+ struct pfsync_softc *sc;
- now = getnsecuptime();
+ MUTEX_ASSERT_UNLOCKED(&st->mtx);
- mtx_enter(&sc->sc_deferrals_mtx);
- for (;;) {
- pd = TAILQ_FIRST(&sc->sc_deferrals);
- if (pd == NULL)
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
+ st->sync_state == PFSYNC_S_DEAD)
+ return;
+
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ struct pfsync_slice *s = pfsync_slice_enter(sc, st);
+ int sync = 0;
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ /* we're already handling it */
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
+ st->sync_updates++;
+ if (st->sync_updates >= sc->sc_maxupdates)
+ sync = 1;
+ }
+ /* FALLTHROUGH */
+ case PFSYNC_S_INS:
+ case PFSYNC_S_DEL:
+ case PFSYNC_S_DEAD:
break;
- if (now < pd->pd_deadline) {
- nsec = pd->pd_deadline - now;
+ case PFSYNC_S_IACK:
+ pfsync_q_del(s, st);
+ /* FALLTHROUGH */
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(s, st, PFSYNC_S_UPD_C);
+ st->sync_updates = 0;
break;
+ default:
+ panic("%s: state %p unexpected sync_state %d",
+ __func__, st, st->sync_state);
+ /* NOTREACHED */
}
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
- TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
- }
- mtx_leave(&sc->sc_deferrals_mtx);
-
- if (nsec > 0) {
- /* we were looking at a pd, but it wasn't old enough */
- timeout_add_nsec(&sc->sc_deferrals_tmo, nsec);
- }
-
- if (TAILQ_EMPTY(&pds))
- return;
-
- NET_LOCK();
- while ((pd = TAILQ_FIRST(&pds)) != NULL) {
- TAILQ_REMOVE(&pds, pd, pd_entry);
+ if (!sync && (getuptime() - st->pfsync_time) < 2)
+ sync = 1;
- pfsync_undefer(pd, 0);
+ if (sync)
+ pfsync_slice_sched(s);
+ pfsync_slice_leave(sc, s);
}
- NET_UNLOCK();
+ smr_read_leave();
}
void
-pfsync_deferred(struct pf_state *st, int drop)
+pfsync_delete_state(struct pf_state *st)
{
- struct pfsync_softc *sc = pfsyncif;
- struct pfsync_deferral *pd;
+ struct pfsync_softc *sc;
- NET_ASSERT_LOCKED();
+ MUTEX_ASSERT_UNLOCKED(&st->mtx);
+
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
+ st->sync_state == PFSYNC_S_DEAD)
+ return;
+
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ struct pfsync_slice *s = pfsync_slice_enter(sc, st);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_INS:
+ /* let's pretend this never happened */
+ pfsync_q_del(s, st);
+ break;
- mtx_enter(&sc->sc_deferrals_mtx);
- TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
- if (pd->pd_st == st) {
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(s, st);
+ /* FALLTHROUGH */
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(s, st, PFSYNC_S_DEL);
+ st->sync_updates = 0;
break;
+ case PFSYNC_S_DEL:
+ case PFSYNC_S_DEAD:
+ /* XXX we should count this */
+ break;
+ default:
+ panic("%s: state %p unexpected sync_state %d",
+ __func__, st, st->sync_state);
+ /* NOTREACHED */
}
- }
- mtx_leave(&sc->sc_deferrals_mtx);
- if (pd != NULL)
- pfsync_undefer(pd, drop);
+ pfsync_slice_leave(sc, s);
+ }
+ smr_read_leave();
}
+struct pfsync_subh_clr {
+ struct pfsync_subheader subh;
+ struct pfsync_clr clr;
+} __packed __aligned(4);
+
void
-pfsync_update_state(struct pf_state *st)
+pfsync_clear_states(u_int32_t creatorid, const char *ifname)
{
- struct pfsync_softc *sc = pfsyncif;
- int sync = 0;
+ struct pfsync_softc *sc;
+ struct pfsync_subh_clr *h;
+ struct mbuf *m;
+ unsigned int hlen, mlen;
- NET_ASSERT_LOCKED();
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL)
+ refcnt_take(&sc->sc_refs);
+ smr_read_leave();
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
+ if (sc == NULL)
return;
- if (ISSET(st->state_flags, PFSTATE_ACK))
- pfsync_deferred(st, 0);
- if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
- if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st);
- return;
- }
+ hlen = sizeof(sc->sc_template) +
+ sizeof(struct pfsync_header) +
+ sizeof(*h);
- if (sc->sc_len == PFSYNC_MINPKT)
- timeout_add_sec(&sc->sc_tmo, 1);
+ mlen = max_linkhdr + hlen;
- switch (st->sync_state) {
- case PFSYNC_S_UPD_C:
- case PFSYNC_S_UPD:
- case PFSYNC_S_INS:
- /* we're already handling it */
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ /* count error */
+ goto leave;
+ }
- if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
- st->sync_updates++;
- if (st->sync_updates >= sc->sc_maxupdates)
- sync = 1;
+ if (mlen > MHLEN) {
+ MCLGETL(m, M_DONTWAIT, mlen);
+ if (!ISSET(m->m_flags, M_EXT)) {
+ m_freem(m);
+ goto leave;
}
- break;
+ }
- case PFSYNC_S_IACK:
- pfsync_q_del(st);
- case PFSYNC_S_NONE:
- pfsync_q_ins(st, PFSYNC_S_UPD_C);
- st->sync_updates = 0;
- break;
+ m_align(m, sizeof(*h));
+ h = mtod(m, struct pfsync_subh_clr *);
- case PFSYNC_S_DEL:
- case PFSYNC_S_COUNT:
- case PFSYNC_S_DEFER:
- break;
+ h->subh.action = PFSYNC_ACT_CLR;
+ h->subh.len = sizeof(h->clr) >> 2;
+ h->subh.count = htons(1);
- default:
- panic("pfsync_update_state: unexpected sync state %d",
- st->sync_state);
- }
+ strlcpy(h->clr.ifname, ifname, sizeof(h->clr.ifname));
+ h->clr.creatorid = creatorid;
- if (sync || (getuptime() - st->pfsync_time) < 2)
- schednetisr(NETISR_PFSYNC);
-}
+ m->m_pkthdr.len = m->m_len = sizeof(*h);
+ m = pfsync_encap(sc, m);
+ if (m == NULL)
+ goto leave;
-void
-pfsync_cancel_full_update(struct pfsync_softc *sc)
-{
- if (timeout_pending(&sc->sc_bulkfail_tmo) ||
- timeout_pending(&sc->sc_bulk_tmo)) {
-#if NCARP > 0
- if (!pfsync_sync_ok)
- carp_group_demote_adj(&sc->sc_if, -1,
- "pfsync bulk cancelled");
- if (sc->sc_initial_bulk) {
- carp_group_demote_adj(&sc->sc_if, -32,
- "pfsync init");
- sc->sc_initial_bulk = 0;
- }
-#endif
- pfsync_sync_ok = 1;
- DPFPRINTF(LOG_INFO, "cancelling bulk update");
- }
- timeout_del(&sc->sc_bulkfail_tmo);
- timeout_del(&sc->sc_bulk_tmo);
- sc->sc_bulk_next = NULL;
- sc->sc_bulk_last = NULL;
- sc->sc_ureq_sent = 0;
- sc->sc_bulk_tries = 0;
+ pfsync_sendout(sc, m);
+leave:
+ refcnt_rele_wake(&sc->sc_refs);
}
-void
-pfsync_request_full_update(struct pfsync_softc *sc)
+int
+pfsync_state_in_use(struct pf_state *st)
{
- if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
- /* Request a full state table update. */
- sc->sc_ureq_sent = getuptime();
-#if NCARP > 0
- if (!sc->sc_link_demoted && pfsync_sync_ok)
- carp_group_demote_adj(&sc->sc_if, 1,
- "pfsync bulk start");
-#endif
- pfsync_sync_ok = 0;
- DPFPRINTF(LOG_INFO, "requesting bulk update");
- PF_LOCK();
- timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
- pf_pool_limits[PF_LIMIT_STATES].limit /
- ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
- sizeof(struct pfsync_state)));
- PF_UNLOCK();
- pfsync_request_update(0, 0);
+ struct pfsync_softc *sc;
+ int rv = 0;
+
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ /*
+ * pfsync bulk sends run inside
+ * rw_enter_read(&pf_state_list.pfs_rwl), and this
+ * code (pfsync_state_in_use) is only called from the
+ * purge code inside
+ * rw_enter_write(&pf_state_list.pfs_rwl). therefore,
+ * those two sections are exclusive so we can safely
+ * look at the bulk send pointers.
+ */
+ /* rw_assert_wrlock(&pf_state_list.pfs_rwl); */
+ if (sc->sc_bulk_snd.snd_next == st ||
+ sc->sc_bulk_snd.snd_tail == st)
+ rv = 1;
}
+ smr_read_leave();
+
+ return (rv);
}
-void
-pfsync_request_update(u_int32_t creatorid, u_int64_t id)
+int
+pfsync_defer(struct pf_state *st, struct mbuf *m)
{
- struct pfsync_softc *sc = pfsyncif;
- struct pfsync_upd_req_item *item;
- size_t nlen, sclen;
- int retry;
-
- /*
- * this code does nothing to prevent multiple update requests for the
- * same state being generated.
- */
+ struct pfsync_softc *sc;
+ struct pfsync_slice *s;
+ struct pfsync_deferral *pd;
+ int sched = 0;
+ int rv = 0;
- item = pool_get(&sc->sc_pool, PR_NOWAIT);
- if (item == NULL) {
- /* XXX stats */
- return;
- }
+ if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
+ ISSET(m->m_flags, M_BCAST|M_MCAST))
+ return (0);
- item->ur_msg.id = id;
- item->ur_msg.creatorid = creatorid;
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc == NULL || !sc->sc_defer)
+ goto leave;
- for (;;) {
- mtx_enter(&sc->sc_upd_req_mtx);
+ pd = pool_get(&pfsync_deferrals_pool, M_NOWAIT);
+ if (pd == NULL) {
+ goto leave;
+ }
- nlen = sizeof(struct pfsync_upd_req);
- if (TAILQ_EMPTY(&sc->sc_upd_req_list))
- nlen += sizeof(struct pfsync_subheader);
+ s = pfsync_slice_enter(sc, st);
+ s->s_stat_defer_add++;
- sclen = atomic_add_long_nv(&sc->sc_len, nlen);
- retry = (sclen > sc->sc_if.if_mtu);
- if (retry)
- atomic_sub_long(&sc->sc_len, nlen);
- else
- TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
+ pd->pd_st = pf_state_ref(st);
+ pd->pd_m = m;
+ pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC;
- mtx_leave(&sc->sc_upd_req_mtx);
+ m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+ st->sync_defer = pd;
- if (!retry)
- break;
+ sched = s->s_deferred++;
+ TAILQ_INSERT_TAIL(&s->s_deferrals, pd, pd_entry);
- pfsync_sendout();
+ if (sched == 0)
+ timeout_add_nsec(&s->s_deferrals_tmo, PFSYNC_DEFER_NSEC);
+ else if (sched >= PFSYNC_DEFER_LIMIT) {
+ s->s_stat_defer_overlimit++;
+ timeout_del(&s->s_deferrals_tmo);
+ task_add(s->s_softnet, &s->s_deferrals_task);
}
- schednetisr(NETISR_PFSYNC);
+ pfsync_slice_sched(s);
+ pfsync_slice_leave(sc, s);
+ rv = 1;
+leave:
+ smr_read_leave();
+
+ return (rv);
}
-void
-pfsync_update_state_req(struct pf_state *st)
+static void
+pfsync_deferred(struct pfsync_softc *sc, struct pf_state *st)
{
- struct pfsync_softc *sc = pfsyncif;
-
- if (sc == NULL)
- panic("pfsync_update_state_req: nonexistent instance");
+ struct pfsync_slice *s;
+ struct pfsync_deferral *pd;
- if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
- if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st);
- return;
- }
+ s = pfsync_slice_enter(sc, st);
- switch (st->sync_state) {
- case PFSYNC_S_UPD_C:
- case PFSYNC_S_IACK:
- pfsync_q_del(st);
- case PFSYNC_S_NONE:
- pfsync_q_ins(st, PFSYNC_S_UPD);
- schednetisr(NETISR_PFSYNC);
- return;
+ pd = st->sync_defer;
+ if (pd != NULL) {
+ s->s_stat_defer_ack++;
- case PFSYNC_S_INS:
- case PFSYNC_S_UPD:
- case PFSYNC_S_DEL:
- /* we're already handling it */
- return;
+ TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
+ s->s_deferred--;
- default:
- panic("pfsync_update_state_req: unexpected sync state %d",
- st->sync_state);
+ st = pd->pd_st;
+ st->sync_defer = NULL;
}
+ pfsync_slice_leave(sc, s);
+
+ if (pd != NULL)
+ pfsync_defer_output(pd);
}
-void
-pfsync_delete_state(struct pf_state *st)
+static void
+pfsync_deferrals_tmo(void *arg)
{
- struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_slice *s = arg;
- NET_ASSERT_LOCKED();
+ if (READ_ONCE(s->s_deferred) > 0)
+ task_add(s->s_softnet, &s->s_deferrals_task);
+}
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
- return;
+static void
+pfsync_deferrals_task(void *arg)
+{
+ struct pfsync_slice *s = arg;
+ struct pfsync_deferral *pd;
+ struct pf_state *st;
+ uint64_t now, nsec = 0;
+ struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
- if (ISSET(st->state_flags, PFSTATE_ACK))
- pfsync_deferred(st, 1);
- if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
- if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st);
- return;
- }
+ now = getnsecuptime();
+
+ mtx_enter(&s->s_mtx);
+ s->s_stat_defer_run++; /* maybe move this into the loop */
+ for (;;) {
+ pd = TAILQ_FIRST(&s->s_deferrals);
+ if (pd == NULL)
+ break;
- if (sc->sc_len == PFSYNC_MINPKT)
- timeout_add_sec(&sc->sc_tmo, 1);
+ if (s->s_deferred < PFSYNC_DEFER_LIMIT &&
+ now < pd->pd_deadline) {
+ nsec = pd->pd_deadline - now;
+ break;
+ }
- switch (st->sync_state) {
- case PFSYNC_S_INS:
- /* we never got to tell the world so just forget about it */
- pfsync_q_del(st);
- return;
+ TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
+ s->s_deferred--;
- case PFSYNC_S_UPD_C:
- case PFSYNC_S_UPD:
- case PFSYNC_S_IACK:
- pfsync_q_del(st);
/*
- * FALLTHROUGH to putting it on the del list
- * Note on reference count bookkeeping:
- * pfsync_q_del() drops reference for queue
- * ownership. But the st entry survives, because
- * our caller still holds a reference.
+ * detach the pd from the state. the pd still refers
+ * to the state though.
*/
+ st = pd->pd_st;
+ st->sync_defer = NULL;
- case PFSYNC_S_NONE:
- /*
- * We either fall through here, or there is no reference to
- * st owned by pfsync queues at this point.
- *
- * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins()
- * grabs a reference for delete queue.
- */
- pfsync_q_ins(st, PFSYNC_S_DEL);
+ TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
+ }
+ mtx_leave(&s->s_mtx);
+
+ if (nsec > 0) {
+ /* we were looking at a pd, but it wasn't old enough */
+ timeout_add_nsec(&s->s_deferrals_tmo, nsec);
+ }
+
+ if (TAILQ_EMPTY(&pds))
return;
- default:
- panic("pfsync_delete_state: unexpected sync state %d",
- st->sync_state);
+ NET_LOCK();
+ while ((pd = TAILQ_FIRST(&pds)) != NULL) {
+ TAILQ_REMOVE(&pds, pd, pd_entry);
+
+ pfsync_defer_output(pd);
}
+ NET_UNLOCK();
}
-void
-pfsync_clear_states(u_int32_t creatorid, const char *ifname)
+static void
+pfsync_defer_output(struct pfsync_deferral *pd)
{
- struct pfsync_softc *sc = pfsyncif;
- struct {
- struct pfsync_subheader subh;
- struct pfsync_clr clr;
- } __packed r;
+ struct pf_pdesc pdesc;
+ struct pf_state *st = pd->pd_st;
- NET_ASSERT_LOCKED();
+ if (st->rt == PF_ROUTETO) {
+ if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
+ st->direction, NULL, pd->pd_m, NULL) != PF_PASS)
+ return;
+ switch (st->key[PF_SK_WIRE]->af) {
+ case AF_INET:
+ pf_route(&pdesc, st);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ pf_route6(&pdesc, st);
+ break;
+#endif /* INET6 */
+ default:
+ unhandled_af(st->key[PF_SK_WIRE]->af);
+ }
+ pd->pd_m = pdesc.m;
+ } else {
+ switch (st->key[PF_SK_WIRE]->af) {
+ case AF_INET:
+ ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
+ break;
+#endif /* INET6 */
+ default:
+ unhandled_af(st->key[PF_SK_WIRE]->af);
+ }
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
- return;
+ pd->pd_m = NULL;
+ }
- bzero(&r, sizeof(r));
+ pf_state_unref(st);
+ m_freem(pd->pd_m);
+ pool_put(&pfsync_deferrals_pool, pd);
+}
- r.subh.action = PFSYNC_ACT_CLR;
- r.subh.len = sizeof(struct pfsync_clr) >> 2;
- r.subh.count = htons(1);
+struct pfsync_subh_bus {
+ struct pfsync_subheader subh;
+ struct pfsync_bus bus;
+} __packed __aligned(4);
- strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
- r.clr.creatorid = creatorid;
+static unsigned int
+pfsync_bulk_snd_bus(struct pfsync_softc *sc,
+ struct mbuf *m, const unsigned int space,
+ uint32_t endtime, uint8_t status)
+{
+ struct pfsync_subh_bus *h;
+ unsigned int nlen;
- pfsync_send_plus(&r, sizeof(r));
-}
+ nlen = m->m_len + sizeof(*h);
+ if (space < nlen)
+ return (0);
-void
-pfsync_iack(struct pf_state *st)
-{
- pfsync_q_ins(st, PFSYNC_S_IACK);
- schednetisr(NETISR_PFSYNC);
+ h = (struct pfsync_subh_bus *)(mtod(m, caddr_t) + m->m_len);
+ memset(h, 0, sizeof(*h));
+
+ h->subh.action = PFSYNC_ACT_BUS;
+ h->subh.len = sizeof(h->bus) >> 2;
+ h->subh.count = htons(1);
+
+ h->bus.creatorid = pf_status.hostid;
+ h->bus.endtime = htonl(endtime);
+ h->bus.status = status;
+
+ m->m_len = nlen;
+
+ return (1);
}
-void
-pfsync_q_ins(struct pf_state *st, int q)
+static unsigned int
+pfsync_bulk_snd_states(struct pfsync_softc *sc,
+ struct mbuf *m, const unsigned int space, unsigned int len)
{
- struct pfsync_softc *sc = pfsyncif;
- size_t nlen, sclen;
+ struct pf_state *st;
+ struct pfsync_state *sp;
+ unsigned int nlen;
+ unsigned int count = 0;
- if (sc->sc_len < PFSYNC_MINPKT)
- panic("pfsync pkt len is too low %zd", sc->sc_len);
- do {
- mtx_enter(&sc->sc_st_mtx);
- mtx_enter(&st->mtx);
+ st = sc->sc_bulk_snd.snd_next;
- /*
- * There are either two threads trying to update the
- * the same state, or the state is just being processed
- * (is on snapshot queue).
- */
- if (st->sync_state != PFSYNC_S_NONE) {
- mtx_leave(&st->mtx);
- mtx_leave(&sc->sc_st_mtx);
+ for (;;) {
+ nlen = len + sizeof(*sp);
+ sp = (struct pfsync_state *)(mtod(m, caddr_t) + len);
+ if (space < nlen)
break;
- }
- nlen = pfsync_qs[q].len;
+ mtx_enter(&st->mtx);
+ pf_state_export(sp, st);
+ mtx_leave(&st->mtx);
+
+ /* commit */
+ count++;
+ m->m_len = len = nlen;
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- nlen += sizeof(struct pfsync_subheader);
+ if (st == sc->sc_bulk_snd.snd_tail) {
+ if (pfsync_bulk_snd_bus(sc, m, space,
+ 0, PFSYNC_BUS_END) == 0) {
+ /* couldn't fit the BUS */
+ st = NULL;
+ break;
+ }
- sclen = atomic_add_long_nv(&sc->sc_len, nlen);
- if (sclen > sc->sc_if.if_mtu) {
- atomic_sub_long(&sc->sc_len, nlen);
- mtx_leave(&st->mtx);
- mtx_leave(&sc->sc_st_mtx);
- pfsync_sendout();
- continue;
+ /* this BUS is done */
+ pfsync_dprintf(sc, "bulk send done (%s)", __func__);
+ sc->sc_bulk_snd.snd_again = 0; /* XXX */
+ sc->sc_bulk_snd.snd_next = NULL;
+ sc->sc_bulk_snd.snd_tail = NULL;
+ return (count);
}
- pf_state_ref(st);
+ st = TAILQ_NEXT(st, entry_list);
+ }
- TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
- st->sync_state = q;
- mtx_leave(&st->mtx);
- mtx_leave(&sc->sc_st_mtx);
- } while (0);
+ /* there's still work to do */
+ sc->sc_bulk_snd.snd_next = st;
+ timeout_add_msec(&sc->sc_bulk_snd.snd_tmo, PFSYNC_BULK_SND_IVAL_MS);
+
+ return (count);
}
-void
-pfsync_q_del(struct pf_state *st)
+static unsigned int
+pfsync_bulk_snd_sub(struct pfsync_softc *sc,
+ struct mbuf *m, const unsigned int space)
{
- struct pfsync_softc *sc = pfsyncif;
- int q;
+ struct pfsync_subheader *subh;
+ unsigned int count;
+ unsigned int len, nlen;
+
+ len = m->m_len;
+ nlen = len + sizeof(*subh);
+ if (nlen > space)
+ return (0);
+
+ subh = (struct pfsync_subheader *)(mtod(m, caddr_t) + len);
- mtx_enter(&sc->sc_st_mtx);
- mtx_enter(&st->mtx);
- q = st->sync_state;
/*
- * re-check under mutex
- * if state is snapped already, then just bail out, because we came
- * too late, the state is being just processed/dispatched to peer.
+ * pfsync_bulk_snd_states only updates m->m_len after
+ * filling in a state after the offset we gave it.
*/
- if ((q == PFSYNC_S_NONE) || (st->snapped)) {
- mtx_leave(&st->mtx);
- mtx_leave(&sc->sc_st_mtx);
- return;
- }
- atomic_sub_long(&sc->sc_len, pfsync_qs[q].len);
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader));
- st->sync_state = PFSYNC_S_NONE;
- mtx_leave(&st->mtx);
- mtx_leave(&sc->sc_st_mtx);
+ count = pfsync_bulk_snd_states(sc, m, space, nlen);
+ if (count == 0)
+ return (0);
- pf_state_unref(st);
+ subh->action = PFSYNC_ACT_UPD;
+ subh->len = sizeof(struct pfsync_state) >> 2;
+ subh->count = htons(count);
+
+ return (count);
}
-#if defined(IPSEC)
-void
-pfsync_update_tdb(struct tdb *t, int output)
+static void
+pfsync_bulk_snd_start(struct pfsync_softc *sc)
{
- struct pfsync_softc *sc = pfsyncif;
- size_t nlen, sclen;
+ const unsigned int space = sc->sc_if.if_mtu -
+ (sizeof(struct ip) + sizeof(struct pfsync_header));
+ struct mbuf *m;
- if (sc == NULL)
- return;
+ rw_enter_read(&pf_state_list.pfs_rwl);
- if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
- do {
- mtx_enter(&sc->sc_tdb_mtx);
- nlen = sizeof(struct pfsync_tdb);
+ rw_enter_write(&sc->sc_bulk_snd.snd_lock);
+ if (sc->sc_bulk_snd.snd_next != NULL) {
+ sc->sc_bulk_snd.snd_again = 1;
+ goto leave;
+ }
- mtx_enter(&t->tdb_mtx);
- if (ISSET(t->tdb_flags, TDBF_PFSYNC)) {
- /* we've lost race, no action for us then */
- mtx_leave(&t->tdb_mtx);
- mtx_leave(&sc->sc_tdb_mtx);
- break;
- }
+ mtx_enter(&pf_state_list.pfs_mtx);
+ sc->sc_bulk_snd.snd_next = TAILQ_FIRST(&pf_state_list.pfs_list);
+ sc->sc_bulk_snd.snd_tail = TAILQ_LAST(&pf_state_list.pfs_list,
+ pf_state_queue);
+ mtx_leave(&pf_state_list.pfs_mtx);
- if (TAILQ_EMPTY(&sc->sc_tdb_q))
- nlen += sizeof(struct pfsync_subheader);
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ goto leave;
- sclen = atomic_add_long_nv(&sc->sc_len, nlen);
- if (sclen > sc->sc_if.if_mtu) {
- atomic_sub_long(&sc->sc_len, nlen);
- mtx_leave(&t->tdb_mtx);
- mtx_leave(&sc->sc_tdb_mtx);
- pfsync_sendout();
- continue;
- }
+ MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
+ if (!ISSET(m->m_flags, M_EXT)) {
+ /* some error++ */
+ m_freem(m); /* drop */
+ goto leave;
+ }
- TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
- tdb_ref(t);
- SET(t->tdb_flags, TDBF_PFSYNC);
- mtx_leave(&t->tdb_mtx);
+ m_align(m, space);
+ m->m_len = 0;
- mtx_leave(&sc->sc_tdb_mtx);
- t->tdb_updates = 0;
- } while (0);
- } else {
- if (++t->tdb_updates >= sc->sc_maxupdates)
- schednetisr(NETISR_PFSYNC);
+ if (sc->sc_bulk_snd.snd_tail == NULL) {
+ pfsync_dprintf(sc, "bulk send empty (%s)", __func__);
+
+ /* list is empty */
+ if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
+ panic("%s: mtu is too low", __func__);
+ goto encap;
}
- mtx_enter(&t->tdb_mtx);
- if (output)
- SET(t->tdb_flags, TDBF_PFSYNC_RPL);
- else
- CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
- mtx_leave(&t->tdb_mtx);
+ pfsync_dprintf(sc, "bulk send start (%s)", __func__);
+
+ /* start a bulk update. */
+ if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_START) == 0)
+ panic("%s: mtu is too low", __func__);
+
+ /* fill it up with state updates. */
+ pfsync_bulk_snd_sub(sc, m, space);
+
+encap:
+ m->m_pkthdr.len = m->m_len;
+ m = pfsync_encap(sc, m);
+ if (m == NULL)
+ goto leave;
+
+ pfsync_sendout(sc, m);
+
+leave:
+ rw_exit_write(&sc->sc_bulk_snd.snd_lock);
+
+ rw_exit_read(&pf_state_list.pfs_rwl);
}
-#endif
-#if defined(IPSEC)
-void
-pfsync_delete_tdb(struct tdb *t)
+static void
+pfsync_bulk_snd_tmo(void *arg)
{
- struct pfsync_softc *sc = pfsyncif;
- size_t nlen;
+ struct pfsync_softc *sc = arg;
+ const unsigned int space = sc->sc_if.if_mtu -
+ (sizeof(struct ip) + sizeof(struct pfsync_header));
+ struct mbuf *m;
- if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ /* some error++ */
+ /* retry later */
+ timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
+ PFSYNC_BULK_SND_IVAL_MS);
return;
+ }
- mtx_enter(&sc->sc_tdb_mtx);
-
- /*
- * if tdb entry is just being processed (found in snapshot),
- * then it can not be deleted. we just came too late
- */
- if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) {
- mtx_leave(&sc->sc_tdb_mtx);
+ MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
+ if (!ISSET(m->m_flags, M_EXT)) {
+ /* some error++ */
+ m_freem(m);
+ /* retry later */
+ timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
+ PFSYNC_BULK_SND_IVAL_MS);
return;
}
- TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
+ m_align(m, space);
+ m->m_len = 0;
+
+ rw_enter_read(&pf_state_list.pfs_rwl);
+ rw_enter_write(&sc->sc_bulk_snd.snd_lock);
- mtx_enter(&t->tdb_mtx);
- CLR(t->tdb_flags, TDBF_PFSYNC);
- mtx_leave(&t->tdb_mtx);
+ if (sc->sc_bulk_snd.snd_next == NULL) {
+ /* there was no space in the previous packet for a BUS END */
- nlen = sizeof(struct pfsync_tdb);
- if (TAILQ_EMPTY(&sc->sc_tdb_q))
- nlen += sizeof(struct pfsync_subheader);
- atomic_sub_long(&sc->sc_len, nlen);
+ if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
+ panic("%s: mtu is too low", __func__);
- mtx_leave(&sc->sc_tdb_mtx);
+ /* this bulk is done */
+ pfsync_dprintf(sc, "bulk send done (%s)", __func__);
+ sc->sc_bulk_snd.snd_again = 0; /* XXX */
+ sc->sc_bulk_snd.snd_tail = NULL;
+ } else {
+ pfsync_dprintf(sc, "bulk send again (%s)", __func__);
+
+ /* fill it up with state updates. */
+ pfsync_bulk_snd_sub(sc, m, space);
+ }
+
+ m->m_pkthdr.len = m->m_len;
+ m = pfsync_encap(sc, m);
- tdb_unref(t);
+ rw_exit_write(&sc->sc_bulk_snd.snd_lock);
+ rw_exit_read(&pf_state_list.pfs_rwl);
+
+ if (m != NULL) {
+ NET_LOCK();
+ pfsync_sendout(sc, m);
+ NET_UNLOCK();
+ }
}
-#endif
-void
-pfsync_out_tdb(struct tdb *t, void *buf)
+static void
+pfsync_update_state_req(struct pfsync_softc *sc, struct pf_state *st)
+{
+ struct pfsync_slice *s = pfsync_slice_enter(sc, st);
+
+ switch (st->sync_state) {
+ case PFSYNC_S_UPD_C:
+ case PFSYNC_S_IACK:
+ pfsync_q_del(s, st);
+ /* FALLTHROUGH */
+ case PFSYNC_S_NONE:
+ pfsync_q_ins(s, st, PFSYNC_S_UPD);
+ break;
+
+ case PFSYNC_S_INS:
+ case PFSYNC_S_UPD:
+ case PFSYNC_S_DEL:
+ /* we're already handling it */
+ break;
+ default:
+ panic("%s: state %p unexpected sync_state %d",
+ __func__, st, st->sync_state);
+ }
+
+ pfsync_slice_sched(s);
+ pfsync_slice_leave(sc, s);
+}
+
+#if defined(IPSEC)
+static void
+pfsync_out_tdb(struct tdb *tdb, void *buf)
{
struct pfsync_tdb *ut = buf;
- bzero(ut, sizeof(*ut));
- ut->spi = t->tdb_spi;
- bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
+ memset(ut, 0, sizeof(*ut));
+ ut->spi = tdb->tdb_spi;
+ memcpy(&ut->dst, &tdb->tdb_dst, sizeof(ut->dst));
/*
* When a failover happens, the master's rpl is probably above
* what we see here (we may be up to a second late), so
* this edge case.
*/
#define RPL_INCR 16384
- ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
- RPL_INCR : 0));
- ut->cur_bytes = htobe64(t->tdb_cur_bytes);
- ut->sproto = t->tdb_sproto;
- ut->rdomain = htons(t->tdb_rdomain);
+ ut->rpl = htobe64(tdb->tdb_rpl +
+ (ISSET(tdb->tdb_flags, TDBF_PFSYNC_RPL) ? RPL_INCR : 0));
+ ut->cur_bytes = htobe64(tdb->tdb_cur_bytes);
+ ut->sproto = tdb->tdb_sproto;
+ ut->rdomain = htons(tdb->tdb_rdomain);
}
-void
-pfsync_bulk_start(void)
+static struct pfsync_slice *
+pfsync_slice_enter_tdb(struct pfsync_softc *sc, const struct tdb *t)
{
- struct pfsync_softc *sc = pfsyncif;
-
- NET_ASSERT_LOCKED();
-
/*
- * pf gc via pfsync_state_in_use reads sc_bulk_next and
- * sc_bulk_last while exclusively holding the pf_state_list
- * rwlock. make sure it can't race with us setting these
- * pointers. they basically act as hazards, and borrow the
- * lists state reference count.
+ * just use the first slice for all ipsec (for now) until
+ * it's more obvious what property (eg, spi) we can distribute
+ * tdbs over slices with.
*/
- rw_enter_read(&pf_state_list.pfs_rwl);
-
- /* get a consistent view of the list pointers */
- mtx_enter(&pf_state_list.pfs_mtx);
- if (sc->sc_bulk_next == NULL)
- sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list);
-
- sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
- mtx_leave(&pf_state_list.pfs_mtx);
-
- rw_exit_read(&pf_state_list.pfs_rwl);
-
- DPFPRINTF(LOG_INFO, "received bulk update request");
+ struct pfsync_slice *s = &sc->sc_slices[0];
- if (sc->sc_bulk_last == NULL)
- pfsync_bulk_status(PFSYNC_BUS_END);
- else {
- sc->sc_ureq_received = getuptime();
-
- pfsync_bulk_status(PFSYNC_BUS_START);
- timeout_add(&sc->sc_bulk_tmo, 0);
+ if (!mtx_enter_try(&s->s_mtx)) {
+ mtx_enter(&s->s_mtx);
+ s->s_stat_contended++;
}
+ s->s_stat_locks++;
+
+ return (s);
}
-void
-pfsync_bulk_update(void *arg)
+static void
+pfsync_tdb_ins(struct pfsync_slice *s, struct tdb *tdb)
{
- struct pfsync_softc *sc;
- struct pf_state *st;
- int i = 0;
-
- NET_LOCK();
- sc = pfsyncif;
- if (sc == NULL)
- goto out;
+ size_t nlen = sizeof(struct pfsync_tdb);
+ struct mbuf *m = NULL;
- rw_enter_read(&pf_state_list.pfs_rwl);
- st = sc->sc_bulk_next;
- sc->sc_bulk_next = NULL;
+ KASSERT(s->s_len >= PFSYNC_MINPKT);
- if (st == NULL) {
- rw_exit_read(&pf_state_list.pfs_rwl);
- goto out;
- }
+ MUTEX_ASSERT_LOCKED(&s->s_mtx);
+ MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
- for (;;) {
- if (st->sync_state == PFSYNC_S_NONE &&
- st->timeout < PFTM_MAX &&
- st->pfsync_time <= sc->sc_ureq_received) {
- pfsync_update_state_req(st);
- i++;
- }
+ if (TAILQ_EMPTY(&s->s_tdb_q))
+ nlen += sizeof(struct pfsync_subheader);
- st = TAILQ_NEXT(st, entry_list);
- if ((st == NULL) || (st == sc->sc_bulk_last)) {
- /* we're done */
- sc->sc_bulk_last = NULL;
- pfsync_bulk_status(PFSYNC_BUS_END);
- break;
+ if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
+ m = pfsync_slice_write(s);
+ if (m != NULL) {
+ s->s_stat_enqueue++;
+ if (mq_enqueue(&s->s_sendq, m) == 0)
+ task_add(s->s_softnet, &s->s_send);
}
- if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
- sizeof(struct pfsync_state)) {
- /* we've filled a packet */
- sc->sc_bulk_next = st;
- timeout_add(&sc->sc_bulk_tmo, 1);
- break;
- }
+ nlen = sizeof(struct pfsync_subheader) +
+ sizeof(struct pfsync_tdb);
}
- rw_exit_read(&pf_state_list.pfs_rwl);
- out:
- NET_UNLOCK();
+ s->s_len += nlen;
+ TAILQ_INSERT_TAIL(&s->s_tdb_q, tdb, tdb_sync_entry);
+ tdb->tdb_updates = 0;
+
+ if (!timeout_pending(&s->s_tmo))
+ timeout_add_sec(&s->s_tmo, 1);
}
-void
-pfsync_bulk_status(u_int8_t status)
+static void
+pfsync_tdb_del(struct pfsync_slice *s, struct tdb *tdb)
{
- struct {
- struct pfsync_subheader subh;
- struct pfsync_bus bus;
- } __packed r;
-
- struct pfsync_softc *sc = pfsyncif;
+ MUTEX_ASSERT_LOCKED(&s->s_mtx);
+ MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
- bzero(&r, sizeof(r));
+ TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
- r.subh.action = PFSYNC_ACT_BUS;
- r.subh.len = sizeof(struct pfsync_bus) >> 2;
- r.subh.count = htons(1);
-
- r.bus.creatorid = pf_status.hostid;
- r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received);
- r.bus.status = status;
-
- pfsync_send_plus(&r, sizeof(r));
+ s->s_len -= sizeof(struct pfsync_tdb);
+ if (TAILQ_EMPTY(&s->s_tdb_q))
+ s->s_len -= sizeof(struct pfsync_subheader);
}
+/*
+ * the reference that pfsync has to a tdb is accounted for by the
+ * TDBF_PFSYNC flag, not by tdb_ref/tdb_unref. tdb_delete_tdb() is
+ * called after all other references to a tdb are dropped (with
+ * tdb_unref) as part of the tdb_free().
+ *
+ * tdb_free() needs to wait for pfsync to let go of the tdb though,
+ * which would be best handled by a reference count, but tdb_free
+ * needs the NET_LOCK which pfsync is already fighting with. instead
+ * use the TDBF_PFSYNC_SNAPPED flag to coordinate the pfsync write/drop
+ * with tdb_free.
+ */
+
void
-pfsync_bulk_fail(void *arg)
+pfsync_update_tdb(struct tdb *tdb, int output)
{
struct pfsync_softc *sc;
- NET_LOCK();
- sc = pfsyncif;
- if (sc == NULL)
- goto out;
- if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
- /* Try again */
- timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
- pfsync_request_update(0, 0);
- } else {
- /* Pretend like the transfer was ok */
- sc->sc_ureq_sent = 0;
- sc->sc_bulk_tries = 0;
-#if NCARP > 0
- if (!pfsync_sync_ok)
- carp_group_demote_adj(&sc->sc_if, -1,
- sc->sc_link_demoted ?
- "pfsync link state up" :
- "pfsync bulk fail");
- if (sc->sc_initial_bulk) {
- carp_group_demote_adj(&sc->sc_if, -32,
- "pfsync init");
- sc->sc_initial_bulk = 0;
- }
-#endif
- pfsync_sync_ok = 1;
- sc->sc_link_demoted = 0;
- DPFPRINTF(LOG_ERR, "failed to receive bulk update");
+ MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
+
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
+
+ /* TDBF_PFSYNC is only changed while the slice mtx is held */
+ if (!ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
+ mtx_enter(&tdb->tdb_mtx);
+ SET(tdb->tdb_flags, TDBF_PFSYNC);
+ mtx_leave(&tdb->tdb_mtx);
+
+ pfsync_tdb_ins(s, tdb);
+ } else if (++tdb->tdb_updates >= sc->sc_maxupdates)
+ pfsync_slice_sched(s);
+
+ /* XXX no sync timestamp on tdbs to check */
+
+ pfsync_slice_leave(sc, s);
}
- out:
- NET_UNLOCK();
+ smr_read_leave();
}
void
-pfsync_send_plus(void *plus, size_t pluslen)
+pfsync_delete_tdb(struct tdb *tdb)
{
- struct pfsync_softc *sc = pfsyncif;
+ struct pfsync_softc *sc;
- if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
- pfsync_sendout();
+ MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
- sc->sc_plus = plus;
- sc->sc_pluslen = pluslen;
- atomic_add_long(&sc->sc_len, pluslen);
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
+ if (sc != NULL) {
+ struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
- pfsync_sendout();
-}
+ /* TDBF_PFSYNC is only changed while the slice mtx is held */
+ if (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
+ pfsync_tdb_del(s, tdb);
-int
-pfsync_is_up(void)
-{
- struct pfsync_softc *sc = pfsyncif;
+ mtx_enter(&tdb->tdb_mtx);
+ CLR(tdb->tdb_flags, TDBF_PFSYNC);
+ mtx_leave(&tdb->tdb_mtx);
+ }
- if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
- return (0);
+ pfsync_slice_leave(sc, s);
+ }
+ smr_read_leave();
- return (1);
+ /*
+ * handle pfsync_slice_drop being called from pfsync_down
+ * and the smr/slice access above won't work.
+ */
+
+ mtx_enter(&tdb->tdb_mtx);
+ SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); /* like a thanos snap */
+ while (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
+ msleep_nsec(&tdb->tdb_updates, &tdb->tdb_mtx, PWAIT,
+ "tdbfree", INFSLP);
+ }
+ mtx_leave(&tdb->tdb_mtx);
}
+#endif /* defined(IPSEC) */
-int
-pfsync_state_in_use(struct pf_state *st)
+struct pfsync_act {
+ void (*in)(struct pfsync_softc *, const caddr_t,
+ unsigned int, unsigned int);
+ size_t len;
+};
+
+static void pfsync_in_clr(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_iack(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_upd_c(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_ureq(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_del(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_del_c(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_bus(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_tdb(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_ins(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+static void pfsync_in_upd(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+
+static const struct pfsync_act pfsync_acts[] = {
+ [PFSYNC_ACT_CLR] =
+ { pfsync_in_clr, sizeof(struct pfsync_clr) },
+ [PFSYNC_ACT_INS_ACK] =
+ { pfsync_in_iack, sizeof(struct pfsync_ins_ack) },
+ [PFSYNC_ACT_UPD_C] =
+ { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) },
+ [PFSYNC_ACT_UPD_REQ] =
+ { pfsync_in_ureq, sizeof(struct pfsync_upd_req) },
+ [PFSYNC_ACT_DEL] =
+ { pfsync_in_del, sizeof(struct pfsync_state) },
+ [PFSYNC_ACT_DEL_C] =
+ { pfsync_in_del_c, sizeof(struct pfsync_del_c) },
+ [PFSYNC_ACT_BUS] =
+ { pfsync_in_bus, sizeof(struct pfsync_bus) },
+ [PFSYNC_ACT_INS] =
+ { pfsync_in_ins, sizeof(struct pfsync_state) },
+ [PFSYNC_ACT_UPD] =
+ { pfsync_in_upd, sizeof(struct pfsync_state) },
+ [PFSYNC_ACT_TDB] =
+ { pfsync_in_tdb, sizeof(struct pfsync_tdb) },
+};
+
+static void
+pfsync_in_skip(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
{
- struct pfsync_softc *sc = pfsyncif;
+ /* nop */
+}
+static struct mbuf *
+pfsync_input(struct mbuf *m, uint8_t ttl, unsigned int hlen)
+{
+ struct pfsync_softc *sc;
+ struct pfsync_header *ph;
+ struct pfsync_subheader *subh;
+ unsigned int len;
+ void (*in)(struct pfsync_softc *,
+ const caddr_t, unsigned int, unsigned int);
+#if NBPF > 0
+ caddr_t if_bpf;
+#endif
+
+ pfsyncstat_inc(pfsyncs_ipackets);
+
+ if (!pf_status.running)
+ return (m);
+
+ /*
+ * pfsyncif is only set if it is up and running correctly.
+ */
+ smr_read_enter();
+ sc = SMR_PTR_GET(&pfsyncif);
if (sc == NULL)
- return (0);
+ goto leave;
- rw_assert_wrlock(&pf_state_list.pfs_rwl);
+ if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) {
+ pfsyncstat_inc(pfsyncs_badif);
+ goto leave;
+ }
- if (st->sync_state != PFSYNC_S_NONE ||
- st == sc->sc_bulk_next ||
- st == sc->sc_bulk_last)
- return (1);
+#if NBPF > 0
+#endif
- return (0);
+ /* verify that the IP TTL is 255. */
+ if (ttl != PFSYNC_DFLTTL) {
+ pfsyncstat_inc(pfsyncs_badttl);
+ goto leave;
+ }
+
+ m_adj(m, hlen);
+
+ if (m->m_pkthdr.len < sizeof(*ph)) {
+ pfsyncstat_inc(pfsyncs_hdrops);
+ goto leave;
+ }
+ if (m->m_len < sizeof(*ph)) {
+ m = m_pullup(m, sizeof(*ph));
+ if (m == NULL)
+ goto leave;
+ }
+
+ ph = mtod(m, struct pfsync_header *);
+ if (ph->version != PFSYNC_VERSION) {
+ pfsyncstat_inc(pfsyncs_badver);
+ goto leave;
+ }
+
+ len = ntohs(ph->len);
+ if (m->m_pkthdr.len < len) {
+ pfsyncstat_inc(pfsyncs_badlen);
+ goto leave;
+ }
+ if (m->m_pkthdr.len > len)
+ m->m_pkthdr.len = len;
+
+ /* ok, it's serious now */
+ refcnt_take(&sc->sc_refs);
+ smr_read_leave();
+
+ counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes, len);
+
+ m_adj(m, sizeof(*ph));
+
+ while (m->m_pkthdr.len >= sizeof(*subh)) {
+ unsigned int action, mlen, count;
+
+ if (m->m_len < sizeof(*subh)) {
+ m = m_pullup(m, sizeof(*subh));
+ if (m == NULL)
+ goto rele;
+ }
+ subh = mtod(m, struct pfsync_subheader *);
+
+ action = subh->action;
+ mlen = subh->len << 2;
+ count = ntohs(subh->count);
+
+ if (action >= PFSYNC_ACT_MAX ||
+ action >= nitems(pfsync_acts) ||
+ mlen < pfsync_acts[subh->action].len) {
+ /*
+ * subheaders are always followed by at least one
+ * message, so if the peer is new
+ * enough to tell us how big its messages are then we
+ * know enough to skip them.
+ */
+ if (count == 0 || mlen == 0) {
+ pfsyncstat_inc(pfsyncs_badact);
+ goto rele;
+ }
+
+ in = pfsync_in_skip;
+ } else {
+ in = pfsync_acts[action].in;
+ if (in == NULL)
+ in = pfsync_in_skip;
+ }
+
+ m_adj(m, sizeof(*subh));
+ len = mlen * count;
+ if (len > m->m_pkthdr.len) {
+ pfsyncstat_inc(pfsyncs_badlen);
+ goto rele;
+ }
+ if (m->m_len < len) {
+ m = m_pullup(m, len);
+ if (m == NULL)
+ goto rele;
+ }
+
+ (*in)(sc, mtod(m, caddr_t), mlen, count);
+ m_adj(m, len);
+ }
+
+rele:
+ refcnt_rele_wake(&sc->sc_refs);
+ return (m);
+
+leave:
+ smr_read_leave();
+ return (m);
}
-void
-pfsync_timeout(void *arg)
+static void
+pfsync_in_clr(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
{
- NET_LOCK();
- pfsync_sendout();
- NET_UNLOCK();
+ const struct pfsync_clr *clr;
+ struct pf_state *head, *tail, *st, *next;
+ struct pfi_kif *kif;
+ uint32_t creatorid;
+ unsigned int i;
+
+ rw_enter_read(&pf_state_list.pfs_rwl);
+
+ /* get a view of the state list */
+ mtx_enter(&pf_state_list.pfs_mtx);
+ head = TAILQ_FIRST(&pf_state_list.pfs_list);
+ tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
+ mtx_leave(&pf_state_list.pfs_mtx);
+
+ PF_LOCK();
+ for (i = 0; i < count; i++) {
+ clr = (struct pfsync_clr *)(buf + i * mlen);
+
+ creatorid = clr->creatorid;
+ if (clr->ifname[0] == '\0')
+ kif = NULL;
+ else {
+ kif = pfi_kif_find(clr->ifname);
+ if (kif == NULL)
+ continue;
+ }
+
+ st = NULL;
+ next = head;
+
+ PF_STATE_ENTER_WRITE();
+ while (st != tail) {
+ st = next;
+ next = TAILQ_NEXT(st, entry_list);
+
+ if (creatorid != st->creatorid)
+ continue;
+ if (kif != NULL && kif != st->kif)
+ continue;
+
+ mtx_enter(&st->mtx);
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ mtx_leave(&st->mtx);
+ pf_remove_state(st);
+ }
+ PF_STATE_EXIT_WRITE();
+ }
+ PF_UNLOCK();
+
+ rw_exit_read(&pf_state_list.pfs_rwl);
}
-/* this is a softnet/netisr handler */
-void
-pfsyncintr(void)
+static void
+pfsync_in_ins(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_state *sp;
+ sa_family_t af1, af2;
+ unsigned int i;
+
+ PF_LOCK();
+ for (i = 0; i < count; i++) {
+ sp = (struct pfsync_state *)(buf + mlen * i);
+ af1 = sp->key[0].af;
+ af2 = sp->key[1].af;
+
+ /* check for invalid values */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST ||
+ sp->direction > PF_OUT ||
+ (((af1 || af2) &&
+ ((af1 != AF_INET && af1 != AF_INET6) ||
+ (af2 != AF_INET && af2 != AF_INET6))) ||
+ (sp->af != AF_INET && sp->af != AF_INET6))) {
+ pfsyncstat_inc(pfsyncs_badval);
+ continue;
+ }
+
+ if (pf_state_import(sp, 0) == ENOMEM) {
+ /* drop out, but process the rest of the actions */
+ break;
+ }
+ }
+ PF_UNLOCK();
+}
+
+static void
+pfsync_in_iack(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_ins_ack *ia;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ ia = (struct pfsync_ins_ack *)(buf + mlen * i);
+
+ id_key.id = ia->id;
+ id_key.creatorid = ia->creatorid;
+
+ PF_STATE_ENTER_READ();
+ st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
+ if (st == NULL)
+ continue;
+
+ if (READ_ONCE(st->sync_defer) != NULL)
+ pfsync_deferred(sc, st);
+
+ pf_state_unref(st);
+ }
+}
+
+static int
+pfsync_upd_tcp(struct pf_state *st, const struct pfsync_state_peer *src,
+ const struct pfsync_state_peer *dst)
+{
+ int sync = 0;
+
+ /*
+ * The state should never go backwards except
+ * for syn-proxy states. Neither should the
+ * sequence window slide backwards.
+ */
+ if ((st->src.state > src->state &&
+ (st->src.state < PF_TCPS_PROXY_SRC ||
+ src->state >= PF_TCPS_PROXY_SRC)) ||
+
+ (st->src.state == src->state &&
+ SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
+ sync++;
+ else
+ pf_state_peer_ntoh(src, &st->src);
+
+ if ((st->dst.state > dst->state) ||
+
+ (st->dst.state >= TCPS_SYN_SENT &&
+ SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
+ sync++;
+ else
+ pf_state_peer_ntoh(dst, &st->dst);
+
+ return (sync);
+}
+
+static void
+pfsync_in_updates(struct pfsync_softc *sc, struct pf_state *st,
+ const struct pfsync_state_peer *src, const struct pfsync_state_peer *dst,
+ uint8_t timeout)
{
- pfsync_sendout();
+ struct pf_state_scrub *sscrub = NULL;
+ struct pf_state_scrub *dscrub = NULL;
+ int sync;
+
+ if (src->scrub.scrub_flag && st->src.scrub == NULL) {
+ sscrub = pf_state_scrub_get();
+ if (sscrub == NULL) {
+ /* inc error? */
+ goto out;
+ }
+ }
+ if (dst->scrub.scrub_flag && st->dst.scrub == NULL) {
+ dscrub = pf_state_scrub_get();
+ if (dscrub == NULL) {
+ /* inc error? */
+ goto out;
+ }
+ }
+
+ if (READ_ONCE(st->sync_defer) != NULL)
+ pfsync_deferred(sc, st);
+
+ mtx_enter(&st->mtx);
+
+ /* attach the scrub memory if needed */
+ if (sscrub != NULL && st->src.scrub == NULL) {
+ st->src.scrub = sscrub;
+ sscrub = NULL;
+ }
+ if (dscrub != NULL && st->dst.scrub == NULL) {
+ st->dst.scrub = dscrub;
+ dscrub = NULL;
+ }
+
+ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
+ sync = pfsync_upd_tcp(st, src, dst);
+ else {
+ sync = 0;
+
+ /*
+ * Non-TCP protocol state machine always go
+ * forwards
+ */
+ if (st->src.state > src->state)
+ sync++;
+ else
+ pf_state_peer_ntoh(src, &st->src);
+
+ if (st->dst.state > dst->state)
+ sync++;
+ else
+ pf_state_peer_ntoh(dst, &st->dst);
+ }
+
+ st->pfsync_time = getuptime();
+ if (sync < 2) {
+ st->expire = st->pfsync_time;
+ st->timeout = timeout;
+ }
+
+ mtx_leave(&st->mtx);
+
+ if (sync) {
+ pfsyncstat_inc(pfsyncs_stale);
+ pfsync_update_state(st);
+ }
+
+out:
+ if (sscrub != NULL)
+ pf_state_scrub_put(sscrub);
+ if (dscrub != NULL)
+ pf_state_scrub_put(dscrub);
+}
+
+
+static void
+pfsync_in_upd(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_state *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ int error;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ sp = (struct pfsync_state *)(buf + mlen * i);
+
+ /* check for invalid values */
+ if (sp->timeout >= PFTM_MAX ||
+ sp->src.state > PF_TCPS_PROXY_DST ||
+ sp->dst.state > PF_TCPS_PROXY_DST) {
+ pfsyncstat_inc(pfsyncs_badval);
+ continue;
+ }
+
+ id_key.id = sp->id;
+ id_key.creatorid = sp->creatorid;
+
+ PF_STATE_ENTER_READ();
+ st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
+ if (st == NULL) {
+ /* insert the update */
+ PF_LOCK();
+ error = pf_state_import(sp, 0);
+ if (error)
+ pfsyncstat_inc(pfsyncs_badstate);
+ PF_UNLOCK();
+ continue;
+ }
+
+ pfsync_in_updates(sc, st, &sp->src, &sp->dst, sp->timeout);
+
+ pf_state_unref(st);
+ }
+}
+
+static struct mbuf *
+pfsync_upd_req_init(struct pfsync_softc *sc, unsigned int count)
+{
+ struct mbuf *m;
+ unsigned int mlen;
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ pfsyncstat_inc(pfsyncs_onomem);
+ return (NULL);
+ }
+
+ mlen = max_linkhdr + sizeof(sc->sc_template) +
+ sizeof(struct pfsync_header) +
+ sizeof(struct pfsync_subheader) +
+ sizeof(struct pfsync_upd_req) * count;
+
+ if (mlen > MHLEN) {
+ MCLGETL(m, M_DONTWAIT, mlen);
+ if (!ISSET(m->m_flags, M_EXT)) {
+ m_freem(m);
+ return (NULL);
+ }
+ }
+
+ m_align(m, 0);
+ m->m_len = 0;
+
+ return (m);
+}
+
+static void
+pfsync_in_upd_c(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_upd_c *up;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ unsigned int i;
+ struct mbuf *m = NULL;
+ unsigned int rcount = 0;
+
+ for (i = 0; i < count; i++) {
+ up = (struct pfsync_upd_c *)(buf + mlen * i);
+
+ /* check for invalid values */
+ if (up->timeout >= PFTM_MAX ||
+ up->src.state > PF_TCPS_PROXY_DST ||
+ up->dst.state > PF_TCPS_PROXY_DST) {
+ pfsyncstat_inc(pfsyncs_badval);
+ continue;
+ }
+
+ id_key.id = up->id;
+ id_key.creatorid = up->creatorid;
+
+ PF_STATE_ENTER_READ();
+ st = pf_find_state_byid(&id_key);
+ pf_state_ref(st);
+ PF_STATE_EXIT_READ();
+ if (st == NULL) {
+ /* We don't have this state. Ask for it. */
+ struct pfsync_upd_req *ur;
+
+ if (m == NULL) {
+ m = pfsync_upd_req_init(sc, count);
+ if (m == NULL) {
+ pfsyncstat_inc(pfsyncs_onomem);
+ continue;
+ }
+ }
+
+ m = m_prepend(m, sizeof(*ur), M_DONTWAIT);
+ if (m == NULL) {
+ pfsyncstat_inc(pfsyncs_onomem);
+ continue;
+ }
+
+ ur = mtod(m, struct pfsync_upd_req *);
+ ur->id = up->id;
+ ur->creatorid = up->creatorid;
+ rcount++;
+
+ continue;
+ }
+
+ pfsync_in_updates(sc, st, &up->src, &up->dst, up->timeout);
+
+ pf_state_unref(st);
+ }
+
+ if (m != NULL) {
+ struct pfsync_subheader *subh;
+
+ m = m_prepend(m, sizeof(*subh), M_DONTWAIT);
+ if (m == NULL) {
+ pfsyncstat_inc(pfsyncs_onomem);
+ return;
+ }
+
+ subh = mtod(m, struct pfsync_subheader *);
+ subh->action = PFSYNC_ACT_UPD_REQ;
+ subh->len = sizeof(struct pfsync_upd_req) >> 2;
+ subh->count = htons(rcount);
+
+ m = pfsync_encap(sc, m);
+ if (m == NULL) {
+ pfsyncstat_inc(pfsyncs_onomem);
+ return;
+ }
+
+ pfsync_sendout(sc, m);
+ }
+}
+
+static void
+pfsync_in_ureq(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_upd_req *ur;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ ur = (struct pfsync_upd_req *)(buf + mlen * i);
+
+ id_key.id = ur->id;
+ id_key.creatorid = ur->creatorid;
+
+ if (id_key.id == 0 && id_key.creatorid == 0) {
+ pfsync_bulk_snd_start(sc);
+ continue;
+ }
+
+ PF_STATE_ENTER_READ();
+ st = pf_find_state_byid(&id_key);
+ if (st != NULL && st->timeout < PFTM_MAX &&
+ !ISSET(st->state_flags, PFSTATE_NOSYNC))
+ pf_state_ref(st);
+ else
+ st = NULL;
+ PF_STATE_EXIT_READ();
+ if (st == NULL) {
+ pfsyncstat_inc(pfsyncs_badstate);
+ continue;
+ }
+
+ pfsync_update_state_req(sc, st);
+
+ pf_state_unref(st);
+ }
+}
+
+static void
+pfsync_in_del(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_state *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ unsigned int i;
+
+ PF_LOCK();
+ PF_STATE_ENTER_WRITE();
+ for (i = 0; i < count; i++) {
+ sp = (struct pfsync_state *)(buf + mlen * i);
+
+ id_key.id = sp->id;
+ id_key.creatorid = sp->creatorid;
+
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ pfsyncstat_inc(pfsyncs_badstate);
+ continue;
+ }
+
+ mtx_enter(&st->mtx);
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ mtx_leave(&st->mtx);
+ pf_remove_state(st);
+ }
+ PF_STATE_EXIT_WRITE();
+ PF_UNLOCK();
+}
+
+static void
+pfsync_in_del_c(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int mlen, unsigned int count)
+{
+ const struct pfsync_del_c *sp;
+ struct pf_state_cmp id_key;
+ struct pf_state *st;
+ unsigned int i;
+
+ PF_LOCK();
+ PF_STATE_ENTER_WRITE();
+ for (i = 0; i < count; i++) {
+ sp = (struct pfsync_del_c *)(buf + mlen * i);
+
+ id_key.id = sp->id;
+ id_key.creatorid = sp->creatorid;
+
+ st = pf_find_state_byid(&id_key);
+ if (st == NULL) {
+ pfsyncstat_inc(pfsyncs_badstate);
+ continue;
+ }
+
+ mtx_enter(&st->mtx);
+ SET(st->state_flags, PFSTATE_NOSYNC);
+ mtx_leave(&st->mtx);
+ pf_remove_state(st);
+ }
+ PF_STATE_EXIT_WRITE();
+ PF_UNLOCK();
+}
+
+static void
+pfsync_in_bus(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int len, unsigned int count)
+{
+ const struct pfsync_bus *bus = (struct pfsync_bus *)buf;
+
+ switch (bus->status) {
+ case PFSYNC_BUS_START:
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_START);
+ break;
+
+ case PFSYNC_BUS_END:
+ pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_END);
+ break;
+ }
+}
+
+#if defined(IPSEC)
+/* Update an in-kernel tdb. Silently fail if no tdb is found. */
+static void
+pfsync_update_net_tdb(const struct pfsync_tdb *pt)
+{
+ struct tdb *tdb;
+
+ NET_ASSERT_LOCKED();
+
+ /* check for invalid values */
+ if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
+ (pt->dst.sa.sa_family != AF_INET &&
+ pt->dst.sa.sa_family != AF_INET6))
+ goto bad;
+
+ tdb = gettdb(ntohs(pt->rdomain), pt->spi,
+ (union sockaddr_union *)&pt->dst, pt->sproto);
+ if (tdb) {
+ uint64_t rpl = betoh64(pt->rpl);
+ uint64_t cur_bytes = betoh64(pt->cur_bytes);
+
+ /* Neither replay nor byte counter should ever decrease. */
+ mtx_enter(&tdb->tdb_mtx);
+ if (rpl >= tdb->tdb_rpl &&
+ cur_bytes >= tdb->tdb_cur_bytes) {
+ tdb->tdb_rpl = rpl;
+ tdb->tdb_cur_bytes = cur_bytes;
+ }
+ mtx_leave(&tdb->tdb_mtx);
+
+ tdb_unref(tdb);
+ }
+ return;
+
+ bad:
+ DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
+ "invalid value");
+ pfsyncstat_inc(pfsyncs_badstate);
+ return;
+}
+#endif
+
+static void
+pfsync_in_tdb(struct pfsync_softc *sc,
+ const caddr_t buf, unsigned int len, unsigned int count)
+{
+#if defined(IPSEC)
+ const struct pfsync_tdb *tp;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ tp = (const struct pfsync_tdb *)(buf + len * i);
+ pfsync_update_net_tdb(tp);
+ }
+#endif
+}
+
+int
+pfsync_input4(struct mbuf **mp, int *offp, int proto, int af)
+{
+ struct mbuf *m = *mp;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ m = pfsync_input(m, ip->ip_ttl, ip->ip_hl << 2);
+
+ m_freem(m);
+ *mp = NULL;
+
+ return (IPPROTO_DONE);
}
int
}
int
-pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen)
+pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
{
/* All sysctl names at this level are terminal. */
if (namelen != 1)