From: dlg Date: Fri, 16 Dec 2022 02:05:44 +0000 (+0000) Subject: always keep pf_state_keys attached to pf_states. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=437c6c3f437b1cb182e10d7cb7a6528e67c89fe2;p=openbsd always keep pf_state_keys attached to pf_states. pf_state structures don't contain ip addresses, protocols, ports, etc. that information is stored in a pf_state_key struct, which is used to wire a state into the state table. when things like pfsync or the pf state ioctls want to export information about a state, particularly the addresses on it, they needs the pf_state_key struct to read from. before this diff the code assumed that when a state was removed from the state tables it could throw the pf_state_key structs away as part of that removal. this code changes it so once pf_state_insert succeeds, a pf_state will keep its references to the pf_state_key structs until the pf_state struct itself is being destroyed. this allows anything that holds a reference to a pf_state to also look at the pf_state_key structs because they're now effectively an immutable part of the pf_state struct. this is by far the simplest and most straightforward fix for pfsync crashing on pf_state_key dereferences we've come up with so far. it has been made possible by the addition of reference counts to pf_state and pf_state_key structs, which allows us to properly account for this adjusted lifecycle for pf_state_keys on pf_state structs. sashan@ and i have been kicking this diff around for a couple of weeks now. ok sashan@ jmatthew@ --- diff --git a/sys/net/pf.c b/sys/net/pf.c index 593f08ed4e2..7203589d4bd 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1156 2022/11/25 20:27:53 bluhm Exp $ */ +/* $OpenBSD: pf.c,v 1.1157 2022/12/16 02:05:44 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -185,6 +185,8 @@ int pf_translate_icmp_af(struct pf_pdesc*, int, void *); void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, sa_family_t, struct pf_rule *, u_int); void pf_detach_state(struct pf_state *); +struct pf_state_key *pf_state_key_attach(struct pf_state_key *, + struct pf_state *, int); void pf_state_key_detach(struct pf_state *, int); u_int32_t pf_tcp_iss(struct pf_pdesc *); void pf_rule_to_actions(struct pf_rule *, @@ -379,7 +381,7 @@ pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) if (s->src.state == newstate) return; - if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && + if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK]->proto == IPPROTO_TCP && !(TCPS_HAVEESTABLISHED(s->src.state) || s->src.state == TCPS_CLOSED) && @@ -720,17 +722,26 @@ pf_state_compare_id(struct pf_state *a, struct pf_state *b) return (0); } -int +/* + * on failure, pf_state_key_attach() releases the pf_state_key + * reference and returns NULL. + */ +struct pf_state_key * pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) { struct pf_state_item *si; struct pf_state_key *cur; struct pf_state *olds = NULL; + PF_ASSERT_LOCKED(); + KASSERT(s->key[idx] == NULL); - if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { + sk->removed = 0; + cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk); + if (cur != NULL) { + sk->removed = 1; /* key exists. check for same kif, if none, add to key */ - TAILQ_FOREACH(si, &cur->states, entry) + TAILQ_FOREACH(si, &cur->states, entry) { if (si->s->kif == s->kif && ((si->s->key[PF_SK_WIRE]->af == sk->af && si->s->direction == s->direction) || @@ -766,44 +777,53 @@ pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) /* remove late or sks can go away */ olds = si->s; } else { - pool_put(&pf_state_key_pl, sk); - return (-1); /* collision! */ + pf_state_key_unref(sk); + return (NULL); /* collision! */ } } - pool_put(&pf_state_key_pl, sk); - s->key[idx] = cur; - } else - s->key[idx] = sk; + } + + /* reuse the existing state key */ + pf_state_key_unref(sk); + sk = cur; + } if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { - pf_state_key_detach(s, idx); - return (-1); + if (TAILQ_EMPTY(&sk->states)) { + KASSERT(cur == NULL); + RB_REMOVE(pf_state_tree, &pf_statetbl, sk); + sk->removed = 1; + pf_state_key_unref(sk); + } + + return (NULL); } - si->s = s; + + s->key[idx] = pf_state_key_ref(sk); /* give a ref to state */ + si->s = pf_state_ref(s); /* list is sorted, if-bound states before floating */ if (s->kif == pfi_all) - TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + TAILQ_INSERT_TAIL(&sk->states, si, entry); else - TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + TAILQ_INSERT_HEAD(&sk->states, si, entry); if (olds) pf_remove_state(olds); - return (0); + /* caller owns the pf_state ref, which owns a pf_state_key ref now */ + return (sk); } void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; + KASSERT(s->key[PF_SK_WIRE] != NULL); + pf_state_key_detach(s, PF_SK_WIRE); - if (s->key[PF_SK_STACK] != NULL) + KASSERT(s->key[PF_SK_STACK] != NULL); + if (s->key[PF_SK_STACK] != s->key[PF_SK_WIRE]) pf_state_key_detach(s, PF_SK_STACK); - - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); } void @@ -812,20 +832,22 @@ pf_state_key_detach(struct pf_state *s, int idx) struct pf_state_item *si; struct pf_state_key *sk; - if (s->key[idx] == NULL) - return; + PF_ASSERT_LOCKED(); - si = TAILQ_FIRST(&s->key[idx]->states); - while (si && si->s != s) - si = TAILQ_NEXT(si, entry); + sk = s->key[idx]; + if (sk == NULL) + return; - if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); - pool_put(&pf_state_item_pl, si); + TAILQ_FOREACH(si, &sk->states, entry) { + if (si->s == s) + break; } + if (si == NULL) + return; - sk = s->key[idx]; - s->key[idx] = NULL; + TAILQ_REMOVE(&sk->states, si, entry); + pool_put(&pf_state_item_pl, si); + if (TAILQ_EMPTY(&sk->states)) { RB_REMOVE(pf_state_tree, &pf_statetbl, sk); sk->removed = 1; @@ -833,6 +855,8 @@ pf_state_key_detach(struct pf_state *s, int idx) pf_state_key_unlink_inpcb(sk); pf_state_key_unref(sk); } + + pf_state_unref(s); } struct pf_state_key * @@ -842,7 +866,10 @@ pf_alloc_state_key(int pool_flags) if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) return (NULL); + + PF_REF_INIT(sk->refcnt); TAILQ_INIT(&sk->states); + sk->removed = 1; return (sk); } @@ -916,8 +943,6 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, sk1->proto = pd->proto; sk1->af = pd->af; sk1->rdomain = pd->rdomain; - PF_REF_INIT(sk1->refcnt); - sk1->removed = 0; if (rtableid >= 0) wrdom = rtable_l2(rtableid); @@ -926,7 +951,7 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, pd->nsport != pd->osport || pd->ndport != pd->odport || wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { - pool_put(&pf_state_key_pl, sk1); + pf_state_key_unref(sk1); return (ENOMEM); } pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, @@ -949,10 +974,8 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, sk2->proto = pd->proto; sk2->af = pd->naf; sk2->rdomain = wrdom; - PF_REF_INIT(sk2->refcnt); - sk2->removed = 0; } else - sk2 = sk1; + sk2 = pf_state_key_ref(sk1); if (pd->dir == PF_IN) { *skw = sk1; @@ -971,34 +994,47 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, return (0); } +/* + * pf_state_insert() does the following: + * - links the pf_state up with pf_state_key(s). + * - inserts the pf_state_keys into pf_state_tree. + * - inserts the pf_state into the into pf_state_tree_id. + * - tells pfsync about the state. + * + * pf_state_insert() owns the references to the pf_state_key structs + * it is given. on failure to insert, these references are released. + * on success, the caller owns a pf_state reference that allows it + * to access the state keys. + */ + int -pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, - struct pf_state_key **sks, struct pf_state *s) +pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, + struct pf_state_key **sksp, struct pf_state *s) { + struct pf_state_key *skw = *skwp; + struct pf_state_key *sks = *sksp; + int same = (skw == sks); + PF_ASSERT_LOCKED(); s->kif = kif; PF_STATE_ENTER_WRITE(); - if (*skw == *sks) { - if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { - PF_STATE_EXIT_WRITE(); - return (-1); - } - *skw = *sks = s->key[PF_SK_WIRE]; - s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; - } else { - if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { - pool_put(&pf_state_key_pl, *sks); - PF_STATE_EXIT_WRITE(); - return (-1); - } - *skw = s->key[PF_SK_WIRE]; - if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); - PF_STATE_EXIT_WRITE(); - return (-1); - } - *sks = s->key[PF_SK_STACK]; + + skw = pf_state_key_attach(skw, s, PF_SK_WIRE); + if (skw == NULL) { + pf_state_key_unref(sks); + PF_STATE_EXIT_WRITE(); + return (-1); + } + + if (same) { + /* pf_state_key_attach might have swapped skw */ + pf_state_key_unref(sks); + s->key[PF_SK_STACK] = sks = pf_state_key_ref(skw); + } else if (pf_state_key_attach(sks, s, PF_SK_STACK) == NULL) { + pf_state_key_detach(s, PF_SK_WIRE); + PF_STATE_EXIT_WRITE(); + return (-1); } if (s->id == 0 && s->creatorid == 0) { @@ -1024,6 +1060,10 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, pfsync_insert_state(s); #endif /* NPFSYNC > 0 */ PF_STATE_EXIT_WRITE(); + + *skwp = skw; + *sksp = sks; + return (0); } @@ -1374,7 +1414,7 @@ pf_state_import(const struct pfsync_state *sp, int flags) if ((sks = pf_alloc_state_key(pool_flags)) == NULL) goto cleanup; } else - sks = skw; + sks = pf_state_key_ref(skw); /* allocate memory for scrub info */ if (pf_state_alloc_scrub_memory(&sp->src, &st->src) || @@ -1387,7 +1427,6 @@ pf_state_import(const struct pfsync_state *sp, int flags) skw->port[0] = sp->key[PF_SK_WIRE].port[0]; skw->port[1] = sp->key[PF_SK_WIRE].port[1]; skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); - PF_REF_INIT(skw->refcnt); skw->proto = sp->proto; if (!(skw->af = sp->key[PF_SK_WIRE].af)) skw->af = sp->af; @@ -1397,7 +1436,6 @@ pf_state_import(const struct pfsync_state *sp, int flags) sks->port[0] = sp->key[PF_SK_STACK].port[0]; sks->port[1] = sp->key[PF_SK_STACK].port[1]; sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); - PF_REF_INIT(sks->refcnt); if (!(sks->af = sp->key[PF_SK_STACK].af)) sks->af = sp->af; if (sks->af != skw->af) { @@ -1499,12 +1537,10 @@ pf_state_import(const struct pfsync_state *sp, int flags) return (0); cleanup: - if (skw == sks) - sks = NULL; if (skw != NULL) - pool_put(&pf_state_key_pl, skw); + pf_state_key_unref(skw); if (sks != NULL) - pool_put(&pf_state_key_pl, sks); + pf_state_key_unref(sks); cleanup_state: /* pf_state_insert frees the state keys */ if (st) { @@ -1587,10 +1623,9 @@ pf_state_expires(const struct pf_state *state, uint8_t stimeout) */ /* handle all PFTM_* > PFTM_MAX here */ - if (stimeout == PFTM_PURGE) + if (stimeout > PFTM_MAX) return (0); - KASSERT(stimeout != PFTM_UNLINKED); KASSERT(stimeout < PFTM_MAX); timeout = state->rule.ptr->timeout[stimeout]; @@ -4460,7 +4495,6 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, } if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { - pf_detach_state(s); *sks = *skw = NULL; REASON_SET(&reason, PFRES_STATEINS); goto csfailed; @@ -7993,8 +8027,9 @@ pf_state_unref(struct pf_state *s) #endif /* NPFSYNC */ KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); - KASSERT((s->key[PF_SK_WIRE] == NULL) && - (s->key[PF_SK_STACK] == NULL)); + + pf_state_key_unref(s->key[PF_SK_WIRE]); + pf_state_key_unref(s->key[PF_SK_STACK]); pool_put(&pf_state_pl, s); } diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 1023966b087..d547bef23b2 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.521 2022/11/25 20:27:53 bluhm Exp $ */ +/* $OpenBSD: pfvar.h,v 1.522 2022/12/16 02:05:44 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -1731,7 +1731,6 @@ void pf_pkt_addr_changed(struct mbuf *); struct inpcb *pf_inp_lookup(struct mbuf *); void pf_inp_link(struct mbuf *, struct inpcb *); void pf_inp_unlink(struct inpcb *); -int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); int pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, u_int16_t, int); int pf_translate_af(struct pf_pdesc *); diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index 28d98e4c2e1..81e26c96a38 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.23 2022/11/25 20:27:53 bluhm Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.24 2022/12/16 02:05:45 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -42,7 +42,7 @@ /* * Protection/ownership of pf_state members: - * I immutable after creation + * I immutable after pf_state_insert() * M pf_state mtx * P PF_STATE_LOCK * S pfsync mutex @@ -69,7 +69,7 @@ struct pf_state { union pf_rule_ptr natrule; /* [I] */ struct pf_addr rt_addr; /* [I] */ struct pf_sn_head src_nodes; /* [I] */ - struct pf_state_key *key[2]; /* stack and wire */ + struct pf_state_key *key[2]; /* [I] stack and wire */ struct pfi_kif *kif; /* [I] */ struct mutex mtx; pf_refcnt_t refcnt;