From: bluhm Date: Fri, 25 Nov 2022 20:27:53 +0000 (+0000) Subject: revert pf.c r1.1152 again: move pf_purge out from under the kernel lock X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=af09f97befaf183c8fe005c6f3670aeed4f82daa;p=openbsd revert pf.c r1.1152 again: move pf_purge out from under the kernel lock Using systqmp for pf_purge creates a deadlock between pf_purge() and ixgbe_stop() and possibly other drivers. On systqmp pf(4) needs netlock which the interface ioctl(2) is holding. ix(4) waits in sched_barrier() which is also scheduled on the systqmp task queue. Removing the netlock from pf_purge() as a quick fix caused other problems. backout suggested by deraadt@ --- diff --git a/sys/net/pf.c b/sys/net/pf.c index 33f030f9aa5..593f08ed4e2 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1155 2022/11/25 18:03:53 kettenis Exp $ */ +/* $OpenBSD: pf.c,v 1.1156 2022/11/25 20:27:53 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -120,6 +120,10 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; +int pf_npurge; +struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); +struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); + enum pf_test_status { PF_TEST_FAIL = -1, PF_TEST_OK, @@ -1516,110 +1520,47 @@ pf_state_import(const struct pfsync_state *sp, int flags) /* END state table stuff */ -void pf_purge_states(void *); -struct task pf_purge_states_task = - TASK_INITIALIZER(pf_purge_states, NULL); - -void pf_purge_states_tick(void *); -struct timeout pf_purge_states_to = - TIMEOUT_INITIALIZER(pf_purge_states_tick, NULL); - -unsigned int pf_purge_expired_states(unsigned int, unsigned int); - -/* - * how many states to scan this interval. - * - * this is set when the timeout fires, and reduced by the task. the - * task will reschedule itself until the limit is reduced to zero, - * and then it adds the timeout again. - */ -unsigned int pf_purge_states_limit; - -/* - * limit how many states are processed with locks held per run of - * the state purge task. - */ -unsigned int pf_purge_states_collect = 64; - -void -pf_purge_states_tick(void *null) -{ - unsigned int limit = pf_status.states; - unsigned int interval = pf_default_rule.timeout[PFTM_INTERVAL]; - - if (limit == 0) { - timeout_add_sec(&pf_purge_states_to, 1); - return; - } - - /* - * process a fraction of the state table every second - */ - - if (interval > 1) - limit /= interval; - - pf_purge_states_limit = limit; - task_add(systqmp, &pf_purge_states_task); -} - void -pf_purge_states(void *null) +pf_purge_timeout(void *unused) { - unsigned int limit; - unsigned int scanned; - - limit = pf_purge_states_limit; - if (limit < pf_purge_states_collect) - limit = pf_purge_states_collect; - - scanned = pf_purge_expired_states(limit, pf_purge_states_collect); - if (scanned >= pf_purge_states_limit) { - /* we've run out of states to scan this "interval" */ - timeout_add_sec(&pf_purge_states_to, 1); - return; - } - - pf_purge_states_limit -= scanned; - task_add(systqmp, &pf_purge_states_task); + /* XXX move to systqmp to avoid KERNEL_LOCK */ + task_add(systq, &pf_purge_task); } -void pf_purge_tick(void *); -struct timeout pf_purge_to = - TIMEOUT_INITIALIZER(pf_purge_tick, NULL); - -void pf_purge(void *); -struct task pf_purge_task = - TASK_INITIALIZER(pf_purge, NULL); - void -pf_purge_tick(void *null) +pf_purge(void *xnloops) { - task_add(systqmp, &pf_purge_task); -} + int *nloops = xnloops; -void -pf_purge(void *null) -{ - unsigned int interval = max(1, pf_default_rule.timeout[PFTM_INTERVAL]); + /* + * process a fraction of the state table every second + * Note: + * we no longer need PF_LOCK() here, because + * pf_purge_expired_states() uses pf_state_lock to maintain + * consistency. + */ + if (pf_default_rule.timeout[PFTM_INTERVAL] > 0) + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); - /* XXX is NET_LOCK necessary? */ NET_LOCK(); PF_LOCK(); - - pf_purge_expired_src_nodes(); - + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) + pf_purge_expired_src_nodes(); PF_UNLOCK(); /* * Fragments don't require PF_LOCK(), they use their own lock. */ - pf_purge_expired_fragments(); + if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + *nloops = 0; + } NET_UNLOCK(); - /* interpret the interval as idle time between runs */ - timeout_add_sec(&pf_purge_to, interval); + timeout_add_sec(&pf_purge_to, 1); } int32_t @@ -1819,8 +1760,8 @@ pf_free_state(struct pf_state *cur) pf_status.states--; } -unsigned int -pf_purge_expired_states(const unsigned int limit, const unsigned int collect) +void +pf_purge_expired_states(u_int32_t maxcheck) { /* * this task/thread/context/whatever is the only thing that @@ -1834,8 +1775,6 @@ pf_purge_expired_states(const unsigned int limit, const unsigned int collect) struct pf_state *st; SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); time_t now; - unsigned int scanned; - unsigned int collected = 0; PF_ASSERT_UNLOCKED(); @@ -1849,7 +1788,7 @@ pf_purge_expired_states(const unsigned int limit, const unsigned int collect) if (head == NULL) { /* the list is empty */ rw_exit_read(&pf_state_list.pfs_rwl); - return (limit); + return; } /* (re)start at the front of the list */ @@ -1858,38 +1797,28 @@ pf_purge_expired_states(const unsigned int limit, const unsigned int collect) now = getuptime(); - for (scanned = 0; scanned < limit; scanned++) { + do { uint8_t stimeout = cur->timeout; - unsigned int limited = 0; if ((stimeout == PFTM_UNLINKED) || (pf_state_expires(cur, stimeout) <= now)) { st = pf_state_ref(cur); SLIST_INSERT_HEAD(&gcl, st, gc_list); - - if (++collected >= collect) - limited = 1; } /* don't iterate past the end of our view of the list */ if (cur == tail) { - scanned = limit; cur = NULL; break; } cur = TAILQ_NEXT(cur, entry_list); - - /* don't spend too much time here. */ - if (ISSET(READ_ONCE(curcpu()->ci_schedstate.spc_schedflags), - SPCF_SHOULDYIELD) || limited) - break; - } + } while (maxcheck--); rw_exit_read(&pf_state_list.pfs_rwl); if (SLIST_EMPTY(&gcl)) - return (scanned); + return; NET_LOCK(); rw_enter_write(&pf_state_list.pfs_rwl); @@ -1910,8 +1839,6 @@ pf_purge_expired_states(const unsigned int limit, const unsigned int collect) SLIST_REMOVE_HEAD(&gcl, gc_list); pf_state_unref(st); } - - return (scanned); } int diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index e8e7ccd9027..a8cf59e0aae 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.391 2022/11/11 16:12:08 dlg Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.392 2022/11/25 20:27:53 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1145,7 +1145,6 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_status.stateid = gettime(); pf_status.stateid = pf_status.stateid << 32; } - timeout_add_sec(&pf_purge_states_to, 1); timeout_add_sec(&pf_purge_to, 1); pf_create_queues(); DPFPRINTF(LOG_NOTICE, "pf: started"); @@ -2741,9 +2740,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) pf_default_rule.timeout[i] = pf_default_rule_new.timeout[i]; if (pf_default_rule.timeout[i] == PFTM_INTERVAL && - pf_default_rule.timeout[i] < old && - timeout_del(&pf_purge_to)) - task_add(systqmp, &pf_purge_task); + pf_default_rule.timeout[i] < old) + task_add(net_tq(0), &pf_purge_task); } pfi_xcommit(); pf_trans_set_commit(); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index deabd17741f..1023966b087 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.520 2022/11/11 16:12:08 dlg Exp $ */ +/* $OpenBSD: pfvar.h,v 1.521 2022/11/25 20:27:53 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1633,6 +1633,7 @@ extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); extern void pf_purge_expired_src_nodes(void); +extern void pf_purge_expired_states(u_int32_t); extern void pf_purge_expired_rules(void); extern void pf_remove_state(struct pf_state *); extern void pf_remove_divert_state(struct pf_state_key *); diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index ec08d6c0aba..28d98e4c2e1 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.22 2022/11/24 00:04:32 mvs Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.23 2022/11/25 20:27:53 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -267,7 +267,6 @@ struct pf_pdesc { } hdr; }; -extern struct timeout pf_purge_states_to; extern struct task pf_purge_task; extern struct timeout pf_purge_to; @@ -320,6 +319,9 @@ extern struct rwlock pf_state_lock; rw_status(&pf_state_lock), __func__);\ } while (0) +extern void pf_purge_timeout(void *); +extern void pf_purge(void *); + /* for copies to/from network byte order */ void pf_state_peer_hton(const struct pf_state_peer *, struct pfsync_state_peer *);