PF_ANCHOR_STACK_MAX is insufficient protection against stack overflow.
authorsashan <sashan@openbsd.org>
Fri, 6 Jan 2023 17:44:33 +0000 (17:44 +0000)
committersashan <sashan@openbsd.org>
Fri, 6 Jan 2023 17:44:33 +0000 (17:44 +0000)
On amd64 stack overflows for anchor rule with depth ~30. The tricky
thing is the 'safe' depth varies depending on kind of packet processed
by pf_match_rule(). For example for local outbound TCP packet stack
overflows when recursion if pf_match_rule() reaches depth 24.

Instead of lowering PF_ANCHOR_STACK_MAX to 20 and hoping it will
be enough on all platforms and for all packets I'd like to stop
calling pf_match_rule() recursively. This commit brings back
pf_anchor_stackframe array we used to have back in 2017. It also
revives patrick@'s idea to pre-allocate stack frame arrays
from per-cpu.

OK kn@

sbin/pfctl/pfctl.c
sys/net/pf.c
sys/net/pf_ioctl.c
sys/net/pfvar.h
sys/net/pfvar_priv.h

index 3aa81b8..8cbd9d7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pfctl.c,v 1.389 2022/11/19 14:01:51 kn Exp $ */
+/*     $OpenBSD: pfctl.c,v 1.390 2023/01/06 17:44:33 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -841,6 +841,11 @@ pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format,
        int len = strlen(path), ret = 0;
        char *npath, *p;
 
+       if (depth > PF_ANCHOR_STACK_MAX) {
+               warnx("%s: max stack depth exceeded for %s", __func__, path);
+               return (-1);
+       }
+
        /*
         * Truncate a trailing / and * on an anchorname before searching for
         * the ruleset, this is syntactic sugar that doesn't actually make it
index 7619413..4e638f6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pf.c,v 1.1168 2023/01/05 23:44:35 dlg Exp $ */
+/*     $OpenBSD: pf.c,v 1.1169 2023/01/06 17:44:34 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -132,7 +132,6 @@ enum pf_test_status {
 };
 
 struct pf_test_ctx {
-       enum pf_test_status       test_status;
        struct pf_pdesc          *pd;
        struct pf_rule_actions    act;
        u_int8_t                  icmpcode;
@@ -152,11 +151,8 @@ struct pf_test_ctx {
        struct pf_ruleset        *arsm;
        struct pf_ruleset        *aruleset;
        struct tcphdr            *th;
-       int                       depth;
 };
 
-#define        PF_ANCHOR_STACK_MAX     64
-
 struct pool             pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
 struct pool             pf_state_pl, pf_state_key_pl, pf_state_item_pl;
 struct pool             pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl;
@@ -3514,48 +3510,112 @@ pf_tag_packet(struct mbuf *m, int tag, int rtableid)
                m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 }
 
-enum pf_test_status
-pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r)
+void
+pf_anchor_stack_init(void)
 {
-       int     rv;
+       struct pf_anchor_stackframe *stack;
 
-       if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
-               log(LOG_ERR, "pf_step_into_anchor: stack overflow\n");
-               return (PF_TEST_FAIL);
-       }
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+       stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0];
+       cpumem_leave(pf_anchor_stack, stack);
+}
 
-       ctx->depth++;
+int
+pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf)
+{
+       struct pf_anchor_stackframe *stack;
+       int rv;
 
-       if (r->anchor_wildcard) {
-               struct pf_anchor        *child;
-               rv = PF_TEST_OK;
-               RB_FOREACH(child, pf_anchor_node, &r->anchor->children) {
-                       rv = pf_match_rule(ctx, &child->ruleset);
-                       if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
-                               /*
-                                * we either hit a rule with quick action
-                                * (more likely), or hit some runtime
-                                * error (e.g. pool_get() failure).
-                                */
-                               break;
-                       }
-               }
-       } else {
-               rv = pf_match_rule(ctx, &r->anchor->ruleset);
-               /*
-                * Unless errors occurred, stop iff any rule matched
-                * within quick anchors.
-                */
-               if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK &&
-                   *ctx->am == r)
-                       rv = PF_TEST_QUICK;
-       }
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+       rv = (sf == &stack[PF_ANCHOR_STACK_MAX]);
+       cpumem_leave(pf_anchor_stack, stack);
 
-       ctx->depth--;
+       return (rv);
+}
+
+int
+pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf)
+{
+       struct pf_anchor_stackframe *stack;
+       int rv;
+
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+       rv = (sf == &stack[0]);
+       cpumem_leave(pf_anchor_stack, stack);
 
        return (rv);
 }
 
+struct pf_anchor_stackframe *
+pf_anchor_stack_top(void)
+{
+       struct pf_anchor_stackframe *stack;
+       struct pf_anchor_stackframe *top_sf;
+
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+       top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top;
+       cpumem_leave(pf_anchor_stack, stack);
+
+       return (top_sf);
+}
+
+int
+pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *r,
+    struct pf_anchor *child, int jump_target)
+{
+       struct pf_anchor_stackframe *stack;
+       struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
+
+       top_sf++;
+       if (pf_anchor_stack_is_full(top_sf))
+               return (-1);
+
+       top_sf->sf_rs = rs;
+       top_sf->sf_r = r;
+       top_sf->sf_child = child;
+       top_sf->sf_jump_target = jump_target;
+
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+
+       if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
+               panic("%s: top frame outside of anchor stack range", __func__);
+
+       stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
+       cpumem_leave(pf_anchor_stack, stack);
+
+       return (0);
+}
+
+int
+pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **r,
+    struct pf_anchor **child, int *jump_target)
+{
+       struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
+       struct pf_anchor_stackframe *stack;
+       int on_top;
+
+       stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
+       if (pf_anchor_stack_is_empty(top_sf)) {
+               on_top = -1;
+       } else {
+               if ((top_sf <= &stack[0]) ||
+                   (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
+                       panic("%s: top frame outside of anchor stack range",
+                           __func__);
+
+               *rs = top_sf->sf_rs;
+               *r = top_sf->sf_r;
+               *child = top_sf->sf_child;
+               *jump_target = top_sf->sf_jump_target;
+               top_sf--;
+               stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
+               on_top = 0;
+       }
+       cpumem_leave(pf_anchor_stack, stack);
+
+       return (on_top);
+}
+
 void
 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
@@ -3917,11 +3977,12 @@ pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a)
 enum pf_test_status
 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
 {
-       struct pf_rule  *r;
-       struct pf_rule  *save_a;
-       struct pf_ruleset       *save_aruleset;
+       struct pf_rule *r;
+       struct pf_anchor *child = NULL;
+       int target;
 
-retry:
+       pf_anchor_stack_init();
+enter_ruleset:
        r = TAILQ_FIRST(ruleset->rules.active.ptr);
        while (r != NULL) {
                PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
@@ -4057,18 +4118,19 @@ retry:
                                rule_flag = r->rule_flag;
                                if (((rule_flag & PFRULE_EXPIRED) == 0) &&
                                    atomic_cas_uint(&r->rule_flag, rule_flag,
-                                   rule_flag | PFRULE_EXPIRED) == rule_flag)
+                                   rule_flag | PFRULE_EXPIRED) == rule_flag) {
                                        r->exptime = gettime();
-                               else
-                                       goto retry;
+                               } else {
+                                       r = TAILQ_NEXT(r, entries);
+                                       continue;
+                               }
                        }
 
                        if (r->action == PF_MATCH) {
                                if ((ctx->ri = pool_get(&pf_rule_item_pl,
                                    PR_NOWAIT)) == NULL) {
                                        REASON_SET(&ctx->reason, PFRES_MEMORY);
-                                       ctx->test_status = PF_TEST_FAIL;
-                                       break;
+                                       return (PF_TEST_FAIL);
                                }
                                ctx->ri->r = r;
                                /* order is irrelevant */
@@ -4081,8 +4143,7 @@ retry:
                                    &ctx->nr) == -1) {
                                        REASON_SET(&ctx->reason,
                                            PFRES_TRANSLATE);
-                                       ctx->test_status = PF_TEST_FAIL;
-                                       break;
+                                       return (PF_TEST_FAIL);
                                }
 #if NPFLOG > 0
                                if (r->log) {
@@ -4116,28 +4177,50 @@ retry:
                                    &ctx->rules);
 #endif /* NPFLOG > 0 */
 
-                       if (r->quick) {
-                               ctx->test_status = PF_TEST_QUICK;
-                               break;
-                       }
+                       if (r->quick)
+                               return (PF_TEST_QUICK);
                } else {
-                       save_a = ctx->a;
-                       save_aruleset = ctx->aruleset;
-                       ctx->a = r;             /* remember anchor */
-                       ctx->aruleset = ruleset;        /* and its ruleset */
-                       /*
-                        * Note: we don't need to restore if we are not going
-                        * to continue with ruleset evaluation.
-                        */
-                       if (pf_step_into_anchor(ctx, r) != PF_TEST_OK)
-                               break;
-                       ctx->a = save_a;
-                       ctx->aruleset = save_aruleset;
+                       ctx->a = r;
+                       ctx->aruleset = &r->anchor->ruleset;
+                       if (r->anchor_wildcard) {
+                               RB_FOREACH(child, pf_anchor_node,
+                                   &r->anchor->children) {
+                                       if (pf_anchor_stack_push(ruleset, r, child,
+                                           PF_NEXT_CHILD) != 0)
+                                               return (PF_TEST_FAIL);
+
+                                       ruleset = &child->ruleset;
+                                       goto enter_ruleset;
+next_child:
+                                       continue;       /* with RB_FOREACH() */
+                               }
+                       } else {
+                               if (pf_anchor_stack_push(ruleset, r, child,
+                                   PF_NEXT_RULE) != 0)
+                                       return (PF_TEST_FAIL);
+
+                               ruleset = &r->anchor->ruleset;
+                               child = NULL;
+                               goto enter_ruleset;
+next_rule:
+                               ;
+                       }
                }
                r = TAILQ_NEXT(r, entries);
        }
 
-       return (ctx->test_status);
+       if (pf_anchor_stack_pop(&ruleset, &r, &child, &target) == 0) {
+               switch (target) {
+               case PF_NEXT_CHILD:
+                       goto next_child;
+               case PF_NEXT_RULE:
+                       goto next_rule;
+               default:
+                       panic("%s: unknown jump target", __func__);
+               }
+       }
+
+       return (PF_TEST_OK);
 }
 
 int
index a631695..1141069 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pf_ioctl.c,v 1.396 2023/01/05 23:44:35 dlg Exp $ */
+/*     $OpenBSD: pf_ioctl.c,v 1.397 2023/01/06 17:44:34 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -156,6 +156,8 @@ struct rwlock                pf_lock = RWLOCK_INITIALIZER("pf_lock");
 struct rwlock           pf_state_lock = RWLOCK_INITIALIZER("pf_state_lock");
 struct rwlock           pfioctl_rw = RWLOCK_INITIALIZER("pfioctl_rw");
 
+struct cpumem *pf_anchor_stack;
+
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
 #endif
@@ -171,6 +173,8 @@ void
 pfattach(int num)
 {
        u_int32_t *timeout = pf_default_rule.timeout;
+       struct pf_anchor_stackframe *sf;
+       struct cpumem_iter cmi;
 
        pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0,
            IPL_SOFTNET, 0, "pfrule", NULL);
@@ -261,6 +265,18 @@ pfattach(int num)
        pf_status.hostid = arc4random();
 
        pf_default_rule_new = pf_default_rule;
+
+       /*
+        * we waste two stack frames as meta-data.
+        * frame[0] always presents a top, which can not be used for data
+        * frame[PF_ANCHOR_STACK_MAX] denotes a bottom of the stack and keeps
+        * the pointer to currently used stack frame.
+        */
+       pf_anchor_stack = cpumem_malloc(
+           sizeof(struct pf_anchor_stackframe) * (PF_ANCHOR_STACK_MAX + 2),
+           M_WAITOK|M_ZERO);
+       CPUMEM_FOREACH(sf, &cmi, pf_anchor_stack)
+               sf[PF_ANCHOR_STACK_MAX].sf_stack_top = &sf[0];
 }
 
 int
index f05e194..1453bc3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pfvar.h,v 1.527 2023/01/04 10:31:55 dlg Exp $ */
+/*     $OpenBSD: pfvar.h,v 1.528 2023/01/06 17:44:34 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -474,6 +474,7 @@ union pf_rule_ptr {
        u_int32_t                nr;
 };
 
+#define        PF_ANCHOR_STACK_MAX      64
 #define        PF_ANCHOR_NAME_SIZE      64
 #define        PF_ANCHOR_MAXPATH       (PATH_MAX - PF_ANCHOR_NAME_SIZE - 1)
 #define        PF_ANCHOR_HIWAT          512
index 207d20d..38fff64 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: pfvar_priv.h,v 1.29 2023/01/04 10:31:55 dlg Exp $     */
+/*     $OpenBSD: pfvar_priv.h,v 1.30 2023/01/06 17:44:34 sashan Exp $  */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -39,6 +39,7 @@
 
 #include <sys/rwlock.h>
 #include <sys/mutex.h>
+#include <sys/percpu.h>
 
 struct pf_state_item {
        TAILQ_ENTRY(pf_state_item)
@@ -303,6 +304,24 @@ struct pf_pdesc {
        } hdr;
 };
 
+struct pf_anchor_stackframe {
+       struct pf_ruleset       *sf_rs;
+       union {
+               struct pf_rule                  *u_r;
+               struct pf_anchor_stackframe     *u_stack_top;
+       } u;
+       struct pf_anchor        *sf_child;
+       int                      sf_jump_target;
+};
+#define sf_r           u.u_r
+#define sf_stack_top   u.u_stack_top
+enum {
+       PF_NEXT_RULE,
+       PF_NEXT_CHILD
+};
+
+extern struct cpumem *pf_anchor_stack;
+
 extern struct task     pf_purge_task;
 extern struct timeout  pf_purge_to;