From ad6271c5803ac5f60f3823ae073cfb046050012d Mon Sep 17 00:00:00 2001 From: dlg Date: Thu, 22 Dec 2022 05:59:26 +0000 Subject: [PATCH] use stoeplitz to generate a hash/flowid for state keys. the hash will be used to partition work in pf and pfsync in the future, and right now it is used as the first comparison in the rb tree state lookup. using stoeplitz means that pf will hash traffic the same way that hardware using a stoeplitz key will hash incoming traffic on rings. stoeplitz is also used by the tcp stack to generate a flow id, which is used to pick which transmit ring is used on nics with multiple queues too. using the same algorithm throughout the stack encourages affinity of packets to rings and softnet threads the whole way through. using the hash as the first comparison in the state rb tree comparison should encourage faster traversal of the state tree by having all the address/port bits summarised into the single hash value. however, tests by hrvoje popovski don't show performance changing. on the plus side, if this change is free from a performance point of view then it makes the future steps more straightforward. discussed at length at h2k22 tested by sashan@ and hrvoje popovski ok tb@ sashan@ claudio@ jmatthew@ --- sys/conf/files | 4 ++-- sys/net/pf.c | 56 +++++++++++++++++++++++++++++++++++++++++++- sys/net/pfvar.h | 3 ++- sys/net/pfvar_priv.h | 4 +++- 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index 7ad6affb5c2..f4fabbed9bb 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.719 2022/11/06 15:36:13 patrick Exp $ +# $OpenBSD: files,v 1.720 2022/12/22 05:59:26 dlg Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -587,7 +587,7 @@ file miscfs/fuse/fuse_vfsops.c fuse file miscfs/fuse/fuse_vnops.c fuse file miscfs/fuse/fusebuf.c fuse -pseudo-device pf: ifnet +pseudo-device pf: ifnet, stoeplitz file net/pf.c pf needs-flag file net/pf_norm.c pf file net/pf_ruleset.c pf diff --git a/sys/net/pf.c b/sys/net/pf.c index 4978c10d2ff..975e7b8f71b 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1160 2022/12/21 03:02:34 dlg Exp $ */ +/* $OpenBSD: pf.c,v 1.1161 2022/12/22 05:59:27 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -238,6 +239,9 @@ int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); int pf_compare_state_keys(struct pf_state_key *, struct pf_state_key *, struct pfi_kif *, u_int); +u_int16_t pf_pkt_hash(sa_family_t, uint8_t, + const struct pf_addr *, const struct pf_addr *, + uint16_t, uint16_t); int pf_find_state(struct pf_pdesc *, struct pf_state_key_cmp *, struct pf_state **); int pf_src_connlimit(struct pf_state **); @@ -690,6 +694,8 @@ pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) { int diff; + if ((diff = a->hash - b->hash) != 0) + return (diff); if ((diff = a->proto - b->proto) != 0) return (diff); if ((diff = a->af - b->af) != 0) @@ -944,6 +950,7 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, sk1->proto = pd->proto; sk1->af = pd->af; sk1->rdomain = pd->rdomain; + sk1->hash = pd->hash; if (rtableid >= 0) wrdom = rtable_l2(rtableid); @@ -975,6 +982,8 @@ pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, sk2->proto = pd->proto; sk2->af = pd->naf; sk2->rdomain = wrdom; + sk2->hash = pf_pkt_hash(sk2->af, sk2->proto, + &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]); } else sk2 = pf_state_key_ref(sk1); @@ -1435,6 +1444,9 @@ pf_state_import(const struct pfsync_state *sp, int flags) skw->proto = sp->proto; if (!(skw->af = sp->key[PF_SK_WIRE].af)) skw->af = sp->af; + skw->hash = pf_pkt_hash(skw->af, skw->proto, + &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]); + if (sks != skw) { sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; @@ -1463,6 +1475,9 @@ pf_state_import(const struct pfsync_state *sp, int flags) goto cleanup; } + sks->hash = pf_pkt_hash(sks->af, sks->proto, + &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]); + } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { error = EINVAL; goto cleanup; @@ -5310,6 +5325,9 @@ pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, pd->dst, pd->af, multi)) return (PF_DROP); + key->hash = pf_pkt_hash(pd->af, pd->proto, + pd->src, pd->dst, 0, 0); + action = pf_find_state(pd, key, state); if (action != PF_MATCH) return (action); @@ -5579,6 +5597,8 @@ pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th->th_sport; key.port[pd2.didx] = th->th_dport; + key.hash = pf_pkt_hash(pd2.af, pd2.proto, + pd2.src, pd2.dst, th->th_sport, th->th_dport); action = pf_find_state(&pd2, &key, state); if (action != PF_MATCH) @@ -5757,6 +5777,8 @@ pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh->uh_sport; key.port[pd2.didx] = uh->uh_dport; + key.hash = pf_pkt_hash(pd2.af, pd2.proto, + pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport); action = pf_find_state(&pd2, &key, state); if (action != PF_MATCH) @@ -6091,6 +6113,8 @@ pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; + key.hash = pf_pkt_hash(pd2.af, pd2.proto, + pd2.src, pd2.dst, 0, 0); action = pf_find_state(&pd2, &key, state); if (action != PF_MATCH) @@ -7019,6 +7043,32 @@ pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) } #endif /* INET6 */ +u_int16_t +pf_pkt_hash(sa_family_t af, uint8_t proto, + const struct pf_addr *src, const struct pf_addr *dst, + uint16_t sport, uint16_t dport) +{ + uint32_t hash; + + hash = src->addr32[0] ^ dst->addr32[0]; +#ifdef INET6 + if (af == AF_INET6) { + hash ^= src->addr32[1] ^ dst->addr32[1]; + hash ^= src->addr32[2] ^ dst->addr32[2]; + hash ^= src->addr32[3] ^ dst->addr32[3]; + } +#endif + + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + hash ^= sport ^ dport; + break; + } + + return stoeplitz_n32(hash); +} + int pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, struct pfi_kif *kif, struct mbuf *m, u_short *reason) @@ -7206,6 +7256,9 @@ pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, if (pd->dport) pd->odport = pd->ndport = *pd->dport; + pd->hash = pf_pkt_hash(pd->af, pd->proto, + pd->src, pd->dst, pd->osport, pd->odport); + return (PF_PASS); } @@ -7486,6 +7539,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af); key.port[pd.sidx] = pd.osport; key.port[pd.didx] = pd.odport; + key.hash = pd.hash; PF_STATE_ENTER_READ(); action = pf_find_state(&pd, &key, &s); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 719ad0a8728..b169f0a0510 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.524 2022/12/21 02:23:10 dlg Exp $ */ +/* $OpenBSD: pfvar.h,v 1.525 2022/12/22 05:59:27 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -704,6 +704,7 @@ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; u_int16_t rdomain; + u_int16_t hash; sa_family_t af; u_int8_t proto; }; diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index cd5b308e4dc..51cc0f66f25 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.26 2022/12/21 02:23:10 dlg Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.27 2022/12/22 05:59:27 dlg Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -52,6 +52,7 @@ struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; u_int16_t rdomain; + u_int16_t hash; sa_family_t af; u_int8_t proto; @@ -254,6 +255,7 @@ struct pf_pdesc { u_int16_t *dport; u_int16_t osport; u_int16_t odport; + u_int16_t hash; u_int16_t nsport; /* src port after NAT */ u_int16_t ndport; /* dst port after NAT */ -- 2.20.1