From 8bde4b774c473788f83186e3c93d69e4b5ef7143 Mon Sep 17 00:00:00 2001 From: bluhm Date: Sat, 24 Jun 2023 20:54:46 +0000 Subject: [PATCH] Calculate inet PCB SIP hash without table mutex. Goal is to run UDP input in parallel. Btrace kstack analysis shows that SIP hash for PCB lookup is quite expensive. When running in parallel, there is also lock contention on the PCB table mutex. It results in better performance to calculate the hash value before taking the mutex. The hash secret has to be constant as hash calculation must not depend on values protected by the table mutex. Do not reseed anymore when hash table gets resized. Analysis also shows that asserting a rw_lock while holding a mutex is a bit expensive. Just remove the netlock assert. OK dlg@ mvs@ --- sys/netinet/in_pcb.c | 58 +++++++++++++++++++++++++----------------- sys/netinet/in_pcb.h | 7 +++-- sys/netinet6/in6_pcb.c | 31 ++++++++++++++-------- 3 files changed, 59 insertions(+), 37 deletions(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index acc9a06a619..292a64364bc 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.c,v 1.276 2022/10/03 16:43:52 bluhm Exp $ */ +/* $OpenBSD: in_pcb.c,v 1.277 2023/06/24 20:54:46 bluhm Exp $ */ /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ /* @@ -121,15 +121,15 @@ struct baddynamicports rootonlyports; struct pool inpcb_pool; void in_pcbhash_insert(struct inpcb *); -struct inpcb *in_pcbhash_lookup(struct inpcbtable *, u_int, +struct inpcb *in_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int, const struct in_addr *, u_short, const struct in_addr *, u_short); int in_pcbresize(struct inpcbtable *, int); #define INPCBHASH_LOADFACTOR(_x) (((_x) * 3) / 4) -struct inpcbhead *in_pcbhash(struct inpcbtable *, u_int, +uint64_t in_pcbhash(struct inpcbtable *, u_int, const struct in_addr *, u_short, const struct in_addr *, u_short); -struct inpcbhead *in_pcblhash(struct inpcbtable *, u_int, u_short); +uint64_t in_pcblhash(struct inpcbtable *, u_int, u_short); /* * in_pcb is used for inet and inet6. in6_pcb only contains special @@ -142,7 +142,7 @@ in_init(void) IPL_SOFTNET, 0, "inpcb", NULL); } -struct inpcbhead * +uint64_t in_pcbhash(struct inpcbtable *table, u_int rdomain, const struct in_addr *faddr, u_short fport, const struct in_addr *laddr, u_short lport) @@ -156,11 +156,10 @@ in_pcbhash(struct inpcbtable *table, u_int rdomain, SipHash24_Update(&ctx, &fport, sizeof(fport)); SipHash24_Update(&ctx, laddr, sizeof(*laddr)); SipHash24_Update(&ctx, &lport, sizeof(lport)); - - return (&table->inpt_hashtbl[SipHash24_End(&ctx) & table->inpt_mask]); + return SipHash24_End(&ctx); } -struct inpcbhead * +uint64_t in_pcblhash(struct inpcbtable *table, u_int rdomain, u_short lport) { SIPHASH_CTX ctx; @@ -169,8 +168,7 @@ in_pcblhash(struct inpcbtable *table, u_int rdomain, u_short lport) SipHash24_Init(&ctx, &table->inpt_lkey); SipHash24_Update(&ctx, &nrdom, sizeof(nrdom)); SipHash24_Update(&ctx, &lport, sizeof(lport)); - - return (&table->inpt_lhashtbl[SipHash24_End(&ctx) & table->inpt_lmask]); + return SipHash24_End(&ctx); } void @@ -816,11 +814,14 @@ in_pcblookup_local(struct inpcbtable *table, void *laddrp, u_int lport_arg, struct in6_addr *laddr6 = (struct in6_addr *)laddrp; #endif struct inpcbhead *head; + uint64_t lhash; u_int rdomain; rdomain = rtable_l2(rtable); + lhash = in_pcblhash(table, rdomain, lport); + mtx_enter(&table->inpt_mtx); - head = in_pcblhash(table, rdomain, lport); + head = &table->inpt_lhashtbl[lhash & table->inpt_lmask]; LIST_FOREACH(inp, head, inp_lhash) { if (rtable_l2(inp->inp_rtableid) != rdomain) continue; @@ -1056,37 +1057,38 @@ in_pcbhash_insert(struct inpcb *inp) { struct inpcbtable *table = inp->inp_table; struct inpcbhead *head; + uint64_t hash, lhash; - NET_ASSERT_LOCKED(); MUTEX_ASSERT_LOCKED(&table->inpt_mtx); - head = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport); + lhash = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport); + head = &table->inpt_lhashtbl[lhash & table->inpt_lmask]; LIST_INSERT_HEAD(head, inp, inp_lhash); #ifdef INET6 if (inp->inp_flags & INP_IPV6) - head = in6_pcbhash(table, rtable_l2(inp->inp_rtableid), + hash = in6_pcbhash(table, rtable_l2(inp->inp_rtableid), &inp->inp_faddr6, inp->inp_fport, &inp->inp_laddr6, inp->inp_lport); else #endif /* INET6 */ - head = in_pcbhash(table, rtable_l2(inp->inp_rtableid), + hash = in_pcbhash(table, rtable_l2(inp->inp_rtableid), &inp->inp_faddr, inp->inp_fport, &inp->inp_laddr, inp->inp_lport); + head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_INSERT_HEAD(head, inp, inp_hash); } struct inpcb * -in_pcbhash_lookup(struct inpcbtable *table, u_int rdomain, +in_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain, const struct in_addr *faddr, u_short fport, const struct in_addr *laddr, u_short lport) { struct inpcbhead *head; struct inpcb *inp; - NET_ASSERT_LOCKED(); MUTEX_ASSERT_LOCKED(&table->inpt_mtx); - head = in_pcbhash(table, rdomain, faddr, fport, laddr, lport); + head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 if (ISSET(inp->inp_flags, INP_IPV6)) @@ -1140,8 +1142,6 @@ in_pcbresize(struct inpcbtable *table, int hashsize) table->inpt_mask = nmask; table->inpt_lmask = nlmask; table->inpt_size = hashsize; - arc4random_buf(&table->inpt_key, sizeof(table->inpt_key)); - arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey)); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { LIST_REMOVE(inp, inp_lhash); @@ -1172,13 +1172,18 @@ in_pcblookup(struct inpcbtable *table, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, u_int rtable) { struct inpcb *inp; + uint64_t hash; u_int rdomain; rdomain = rtable_l2(rtable); + hash = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport); + mtx_enter(&table->inpt_mtx); - inp = in_pcbhash_lookup(table, rdomain, &faddr, fport, &laddr, lport); + inp = in_pcbhash_lookup(table, hash, rdomain, + &faddr, fport, &laddr, lport); in_pcbref(inp); mtx_leave(&table->inpt_mtx); + #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%u\n", @@ -1202,6 +1207,7 @@ in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, { const struct in_addr *key1, *key2; struct inpcb *inp; + uint64_t hash; u_int16_t lport = lport_arg; u_int rdomain; @@ -1239,14 +1245,20 @@ in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, #endif rdomain = rtable_l2(rtable); + hash = in_pcbhash(table, rdomain, &zeroin_addr, 0, key1, lport); + mtx_enter(&table->inpt_mtx); - inp = in_pcbhash_lookup(table, rdomain, &zeroin_addr, 0, key1, lport); + inp = in_pcbhash_lookup(table, hash, rdomain, + &zeroin_addr, 0, key1, lport); if (inp == NULL && key1->s_addr != key2->s_addr) { - inp = in_pcbhash_lookup(table, rdomain, + hash = in_pcbhash(table, rdomain, + &zeroin_addr, 0, key2, lport); + inp = in_pcbhash_lookup(table, hash, rdomain, &zeroin_addr, 0, key2, lport); } in_pcbref(inp); mtx_leave(&table->inpt_mtx); + #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: laddr=%08x lport=%d rdom=%u\n", diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index fd887789462..61c26da36e4 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.h,v 1.135 2022/10/03 16:43:52 bluhm Exp $ */ +/* $OpenBSD: in_pcb.h,v 1.136 2023/06/24 20:54:46 bluhm Exp $ */ /* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ /* @@ -172,7 +172,7 @@ struct inpcbtable { TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */ struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */ struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */ - SIPHASH_KEY inpt_key, inpt_lkey; /* [t] secrets for hashes */ + SIPHASH_KEY inpt_key, inpt_lkey; /* [I] secrets for hashes */ u_long inpt_mask, inpt_lmask; /* [t] hash masks */ int inpt_count, inpt_size; /* [t] queue count, hash size */ }; @@ -294,8 +294,7 @@ struct inpcb * in_pcblookup_listen(struct inpcbtable *, struct in_addr, u_int, struct mbuf *, u_int); #ifdef INET6 -struct inpcbhead * - in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *, +uint64_t in6_pcbhash(struct inpcbtable *, u_int, const struct in6_addr *, u_short, const struct in6_addr *, u_short); struct inpcb * in6_pcblookup(struct inpcbtable *, const struct in6_addr *, diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 1e17ada3257..5feb33c820c 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in6_pcb.c,v 1.123 2022/09/03 22:43:38 mvs Exp $ */ +/* $OpenBSD: in6_pcb.c,v 1.124 2023/06/24 20:54:46 bluhm Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -126,10 +126,10 @@ const struct in6_addr zeroin6_addr; -struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, u_int, +struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int, const struct in6_addr *, u_short, const struct in6_addr *, u_short); -struct inpcbhead * +uint64_t in6_pcbhash(struct inpcbtable *table, u_int rdomain, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport) @@ -143,8 +143,7 @@ in6_pcbhash(struct inpcbtable *table, u_int rdomain, SipHash24_Update(&ctx, &fport, sizeof(fport)); SipHash24_Update(&ctx, laddr, sizeof(*laddr)); SipHash24_Update(&ctx, &lport, sizeof(lport)); - - return (&table->inpt_hashtbl[SipHash24_End(&ctx) & table->inpt_mask]); + return SipHash24_End(&ctx); } int @@ -541,7 +540,7 @@ in6_pcbnotify(struct inpcbtable *table, struct sockaddr_in6 *dst, } struct inpcb * -in6_pcbhash_lookup(struct inpcbtable *table, u_int rdomain, +in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain, const struct in6_addr *faddr, u_short fport, const struct in6_addr *laddr, u_short lport) { @@ -551,7 +550,7 @@ in6_pcbhash_lookup(struct inpcbtable *table, u_int rdomain, NET_ASSERT_LOCKED(); MUTEX_ASSERT_LOCKED(&table->inpt_mtx); - head = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport); + head = &table->inpt_hashtbl[hash & table->inpt_mask]; LIST_FOREACH(inp, head, inp_hash) { if (!ISSET(inp->inp_flags, INP_IPV6)) continue; @@ -581,13 +580,18 @@ in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr, u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable) { struct inpcb *inp; + uint64_t hash; u_int rdomain; rdomain = rtable_l2(rtable); + hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport); + mtx_enter(&table->inpt_mtx); - inp = in6_pcbhash_lookup(table, rdomain, faddr, fport, laddr, lport); + inp = in6_pcbhash_lookup(table, hash, rdomain, + faddr, fport, laddr, lport); in_pcbref(inp); mtx_leave(&table->inpt_mtx); + #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n", @@ -603,6 +607,7 @@ in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, { const struct in6_addr *key1, *key2; struct inpcb *inp; + uint64_t hash; u_int rdomain; key1 = laddr; @@ -636,14 +641,20 @@ in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, #endif rdomain = rtable_l2(rtable); + hash = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport); + mtx_enter(&table->inpt_mtx); - inp = in6_pcbhash_lookup(table, rdomain, &zeroin6_addr, 0, key1, lport); + inp = in6_pcbhash_lookup(table, hash, rdomain, + &zeroin6_addr, 0, key1, lport); if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { - inp = in6_pcbhash_lookup(table, rdomain, + hash = in6_pcbhash(table, rdomain, + &zeroin6_addr, 0, key2, lport); + inp = in6_pcbhash_lookup(table, hash, rdomain, &zeroin6_addr, 0, key2, lport); } in_pcbref(inp); mtx_leave(&table->inpt_mtx); + #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: laddr= lport=%d rdom=%u\n", -- 2.20.1