From: sashan Date: Fri, 6 Jan 2023 17:44:33 +0000 (+0000) Subject: PF_ANCHOR_STACK_MAX is insufficient protection against stack overflow. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=acafb14ed9a415a9d14f4ce03118e42089440453;p=openbsd PF_ANCHOR_STACK_MAX is insufficient protection against stack overflow. On amd64 stack overflows for anchor rule with depth ~30. The tricky thing is the 'safe' depth varies depending on kind of packet processed by pf_match_rule(). For example for local outbound TCP packet stack overflows when recursion if pf_match_rule() reaches depth 24. Instead of lowering PF_ANCHOR_STACK_MAX to 20 and hoping it will be enough on all platforms and for all packets I'd like to stop calling pf_match_rule() recursively. This commit brings back pf_anchor_stackframe array we used to have back in 2017. It also revives patrick@'s idea to pre-allocate stack frame arrays from per-cpu. OK kn@ --- diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index 3aa81b88bc7..8cbd9d77b4f 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl.c,v 1.389 2022/11/19 14:01:51 kn Exp $ */ +/* $OpenBSD: pfctl.c,v 1.390 2023/01/06 17:44:33 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -841,6 +841,11 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, int len = strlen(path), ret = 0; char *npath, *p; + if (depth > PF_ANCHOR_STACK_MAX) { + warnx("%s: max stack depth exceeded for %s", __func__, path); + return (-1); + } + /* * Truncate a trailing / and * on an anchorname before searching for * the ruleset, this is syntactic sugar that doesn't actually make it diff --git a/sys/net/pf.c b/sys/net/pf.c index 7619413e65b..4e638f61dc1 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1168 2023/01/05 23:44:35 dlg Exp $ */ +/* $OpenBSD: pf.c,v 1.1169 2023/01/06 17:44:34 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -132,7 +132,6 @@ enum pf_test_status { }; struct pf_test_ctx { - enum pf_test_status test_status; struct pf_pdesc *pd; struct pf_rule_actions act; u_int8_t icmpcode; @@ -152,11 +151,8 @@ struct pf_test_ctx { struct pf_ruleset *arsm; struct pf_ruleset *aruleset; struct tcphdr *th; - int depth; }; -#define PF_ANCHOR_STACK_MAX 64 - struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; @@ -3514,48 +3510,112 @@ pf_tag_packet(struct mbuf *m, int tag, int rtableid) m->m_pkthdr.ph_rtableid = (u_int)rtableid; } -enum pf_test_status -pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) +void +pf_anchor_stack_init(void) { - int rv; + struct pf_anchor_stackframe *stack; - if (ctx->depth >= PF_ANCHOR_STACK_MAX) { - log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); - return (PF_TEST_FAIL); - } + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0]; + cpumem_leave(pf_anchor_stack, stack); +} - ctx->depth++; +int +pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf) +{ + struct pf_anchor_stackframe *stack; + int rv; - if (r->anchor_wildcard) { - struct pf_anchor *child; - rv = PF_TEST_OK; - RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { - rv = pf_match_rule(ctx, &child->ruleset); - if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { - /* - * we either hit a rule with quick action - * (more likely), or hit some runtime - * error (e.g. pool_get() failure). - */ - break; - } - } - } else { - rv = pf_match_rule(ctx, &r->anchor->ruleset); - /* - * Unless errors occurred, stop iff any rule matched - * within quick anchors. - */ - if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && - *ctx->am == r) - rv = PF_TEST_QUICK; - } + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + rv = (sf == &stack[PF_ANCHOR_STACK_MAX]); + cpumem_leave(pf_anchor_stack, stack); - ctx->depth--; + return (rv); +} + +int +pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf) +{ + struct pf_anchor_stackframe *stack; + int rv; + + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + rv = (sf == &stack[0]); + cpumem_leave(pf_anchor_stack, stack); return (rv); } +struct pf_anchor_stackframe * +pf_anchor_stack_top(void) +{ + struct pf_anchor_stackframe *stack; + struct pf_anchor_stackframe *top_sf; + + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top; + cpumem_leave(pf_anchor_stack, stack); + + return (top_sf); +} + +int +pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *r, + struct pf_anchor *child, int jump_target) +{ + struct pf_anchor_stackframe *stack; + struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); + + top_sf++; + if (pf_anchor_stack_is_full(top_sf)) + return (-1); + + top_sf->sf_rs = rs; + top_sf->sf_r = r; + top_sf->sf_child = child; + top_sf->sf_jump_target = jump_target; + + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + + if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) + panic("%s: top frame outside of anchor stack range", __func__); + + stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; + cpumem_leave(pf_anchor_stack, stack); + + return (0); +} + +int +pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **r, + struct pf_anchor **child, int *jump_target) +{ + struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); + struct pf_anchor_stackframe *stack; + int on_top; + + stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); + if (pf_anchor_stack_is_empty(top_sf)) { + on_top = -1; + } else { + if ((top_sf <= &stack[0]) || + (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) + panic("%s: top frame outside of anchor stack range", + __func__); + + *rs = top_sf->sf_rs; + *r = top_sf->sf_r; + *child = top_sf->sf_child; + *jump_target = top_sf->sf_jump_target; + top_sf--; + stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; + on_top = 0; + } + cpumem_leave(pf_anchor_stack, stack); + + return (on_top); +} + void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) @@ -3917,11 +3977,12 @@ pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) enum pf_test_status pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) { - struct pf_rule *r; - struct pf_rule *save_a; - struct pf_ruleset *save_aruleset; + struct pf_rule *r; + struct pf_anchor *child = NULL; + int target; -retry: + pf_anchor_stack_init(); +enter_ruleset: r = TAILQ_FIRST(ruleset->rules.active.ptr); while (r != NULL) { PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, @@ -4057,18 +4118,19 @@ retry: rule_flag = r->rule_flag; if (((rule_flag & PFRULE_EXPIRED) == 0) && atomic_cas_uint(&r->rule_flag, rule_flag, - rule_flag | PFRULE_EXPIRED) == rule_flag) + rule_flag | PFRULE_EXPIRED) == rule_flag) { r->exptime = gettime(); - else - goto retry; + } else { + r = TAILQ_NEXT(r, entries); + continue; + } } if (r->action == PF_MATCH) { if ((ctx->ri = pool_get(&pf_rule_item_pl, PR_NOWAIT)) == NULL) { REASON_SET(&ctx->reason, PFRES_MEMORY); - ctx->test_status = PF_TEST_FAIL; - break; + return (PF_TEST_FAIL); } ctx->ri->r = r; /* order is irrelevant */ @@ -4081,8 +4143,7 @@ retry: &ctx->nr) == -1) { REASON_SET(&ctx->reason, PFRES_TRANSLATE); - ctx->test_status = PF_TEST_FAIL; - break; + return (PF_TEST_FAIL); } #if NPFLOG > 0 if (r->log) { @@ -4116,28 +4177,50 @@ retry: &ctx->rules); #endif /* NPFLOG > 0 */ - if (r->quick) { - ctx->test_status = PF_TEST_QUICK; - break; - } + if (r->quick) + return (PF_TEST_QUICK); } else { - save_a = ctx->a; - save_aruleset = ctx->aruleset; - ctx->a = r; /* remember anchor */ - ctx->aruleset = ruleset; /* and its ruleset */ - /* - * Note: we don't need to restore if we are not going - * to continue with ruleset evaluation. - */ - if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) - break; - ctx->a = save_a; - ctx->aruleset = save_aruleset; + ctx->a = r; + ctx->aruleset = &r->anchor->ruleset; + if (r->anchor_wildcard) { + RB_FOREACH(child, pf_anchor_node, + &r->anchor->children) { + if (pf_anchor_stack_push(ruleset, r, child, + PF_NEXT_CHILD) != 0) + return (PF_TEST_FAIL); + + ruleset = &child->ruleset; + goto enter_ruleset; +next_child: + continue; /* with RB_FOREACH() */ + } + } else { + if (pf_anchor_stack_push(ruleset, r, child, + PF_NEXT_RULE) != 0) + return (PF_TEST_FAIL); + + ruleset = &r->anchor->ruleset; + child = NULL; + goto enter_ruleset; +next_rule: + ; + } } r = TAILQ_NEXT(r, entries); } - return (ctx->test_status); + if (pf_anchor_stack_pop(&ruleset, &r, &child, &target) == 0) { + switch (target) { + case PF_NEXT_CHILD: + goto next_child; + case PF_NEXT_RULE: + goto next_rule; + default: + panic("%s: unknown jump target", __func__); + } + } + + return (PF_TEST_OK); } int diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index a6316954ace..1141069dcf6 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.396 2023/01/05 23:44:35 dlg Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.397 2023/01/06 17:44:34 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -156,6 +156,8 @@ struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); struct rwlock pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock"); struct rwlock pfioctl_rw = RWLOCK_INITIALIZER("pfioctl_rw"); +struct cpumem *pf_anchor_stack; + #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif @@ -171,6 +173,8 @@ void pfattach(int num) { u_int32_t *timeout = pf_default_rule.timeout; + struct pf_anchor_stackframe *sf; + struct cpumem_iter cmi; pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, IPL_SOFTNET, 0, "pfrule", NULL); @@ -261,6 +265,18 @@ pfattach(int num) pf_status.hostid = arc4random(); pf_default_rule_new = pf_default_rule; + + /* + * we waste two stack frames as meta-data. + * frame[0] always presents a top, which can not be used for data + * frame[PF_ANCHOR_STACK_MAX] denotes a bottom of the stack and keeps + * the pointer to currently used stack frame. + */ + pf_anchor_stack = cpumem_malloc( + sizeof(struct pf_anchor_stackframe) * (PF_ANCHOR_STACK_MAX + 2), + M_WAITOK|M_ZERO); + CPUMEM_FOREACH(sf, &cmi, pf_anchor_stack) + sf[PF_ANCHOR_STACK_MAX].sf_stack_top = &sf[0]; } int diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index f05e19436dc..1453bc35c04 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.527 2023/01/04 10:31:55 dlg Exp $ */ +/* $OpenBSD: pfvar.h,v 1.528 2023/01/06 17:44:34 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -474,6 +474,7 @@ union pf_rule_ptr { u_int32_t nr; }; +#define PF_ANCHOR_STACK_MAX 64 #define PF_ANCHOR_NAME_SIZE 64 #define PF_ANCHOR_MAXPATH (PATH_MAX - PF_ANCHOR_NAME_SIZE - 1) #define PF_ANCHOR_HIWAT 512 diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index 207d20d77cf..38fff6473aa 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.29 2023/01/04 10:31:55 dlg Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.30 2023/01/06 17:44:34 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -39,6 +39,7 @@ #include #include +#include struct pf_state_item { TAILQ_ENTRY(pf_state_item) @@ -303,6 +304,24 @@ struct pf_pdesc { } hdr; }; +struct pf_anchor_stackframe { + struct pf_ruleset *sf_rs; + union { + struct pf_rule *u_r; + struct pf_anchor_stackframe *u_stack_top; + } u; + struct pf_anchor *sf_child; + int sf_jump_target; +}; +#define sf_r u.u_r +#define sf_stack_top u.u_stack_top +enum { + PF_NEXT_RULE, + PF_NEXT_CHILD +}; + +extern struct cpumem *pf_anchor_stack; + extern struct task pf_purge_task; extern struct timeout pf_purge_to;