From fc7515015f5f5f1b5deab757287233039b217e26 Mon Sep 17 00:00:00 2001 From: bluhm Date: Mon, 12 Aug 2024 11:25:27 +0000 Subject: [PATCH] Run network protocol timer without kernel lock. Mark slow and fast protocol timeouts as MP safe. This means they run on a spearate thread without holding the kernel lock. IGMP and MLD6 cannot run in parallel, they use exclusive net lock to protect themselves. As a performance optimization global variables are used to skip igmp_fasttimo() and mld6_fasttimeo() if no multicast is active. These global variables use atomic operations and memory barriers to work lockless. IPv6 fragment timeout protects itself with a mutex. TCP timers also run without kernel lock now. The whole TCP stack holds exclusive net lock, so additional kernel lock is useless. OK mvs@ --- sys/kern/uipc_domain.c | 8 ++++--- sys/netinet/igmp.c | 47 +++++++++++++++++++++++++++++------------- sys/netinet6/icmp6.c | 3 +-- sys/netinet6/mld6.c | 45 +++++++++++++++++++++++++++++----------- 4 files changed, 72 insertions(+), 31 deletions(-) diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c index da73a54b672..d96641af6be 100644 --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_domain.c,v 1.65 2024/01/11 14:15:11 bluhm Exp $ */ +/* $OpenBSD: uipc_domain.c,v 1.66 2024/08/12 11:25:27 bluhm Exp $ */ /* $NetBSD: uipc_domain.c,v 1.14 1996/02/09 19:00:44 christos Exp $ */ /* @@ -90,8 +90,10 @@ domaininit(void) max_linkhdr = 64; max_hdr = max_linkhdr + max_protohdr; - timeout_set_proc(&pffast_timeout, pffasttimo, &pffast_timeout); - timeout_set_proc(&pfslow_timeout, pfslowtimo, &pfslow_timeout); + timeout_set_flags(&pffast_timeout, pffasttimo, &pffast_timeout, + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); + timeout_set_flags(&pfslow_timeout, pfslowtimo, &pfslow_timeout, + KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); timeout_add(&pffast_timeout, 1); timeout_add(&pfslow_timeout, 1); } diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index 00dba7d2088..600731ae36a 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: igmp.c,v 1.83 2023/09/16 09:33:27 mpi Exp $ */ +/* $OpenBSD: igmp.c,v 1.84 2024/08/12 11:25:27 bluhm Exp $ */ /* $NetBSD: igmp.c,v 1.15 1996/02/13 23:41:25 christos Exp $ */ /* @@ -96,12 +96,12 @@ #define IP_MULTICASTOPTS 0 -int igmp_timers_are_running; /* [N] shortcut for fast timer */ +int igmp_timers_are_running; /* [a] shortcut for fast timer */ static LIST_HEAD(, router_info) rti_head; static struct mbuf *router_alert; struct cpumem *igmpcounters; -void igmp_checktimer(struct ifnet *); +int igmp_checktimer(struct ifnet *); void igmp_sendpkt(struct ifnet *, struct in_multi *, int, in_addr_t); int rti_fill(struct in_multi *); struct router_info * rti_find(struct ifnet *); @@ -228,7 +228,7 @@ igmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) struct in_multi *inm; struct router_info *rti; struct in_ifaddr *ia; - int timer; + int timer, running = 0; igmplen = ntohs(ip->ip_len) - iphlen; @@ -300,7 +300,7 @@ igmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); - igmp_timers_are_running = 1; + running = 1; } } } else { @@ -341,7 +341,7 @@ igmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; + running = 1; break; case IGMP_SLEEPING_MEMBER: inm->inm_state = @@ -475,6 +475,11 @@ igmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) } + if (running) { + membar_producer(); + atomic_store_int(&igmp_timers_are_running, running); + } + /* * Pass all valid IGMP packets up to any process(es) listening * on a raw IGMP socket. @@ -485,7 +490,7 @@ igmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) void igmp_joingroup(struct in_multi *inm, struct ifnet *ifp) { - int i; + int i, running = 0; inm->inm_state = IGMP_IDLE_MEMBER; @@ -496,9 +501,14 @@ igmp_joingroup(struct in_multi *inm, struct ifnet *ifp) inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ); - igmp_timers_are_running = 1; + running = 1; } else inm->inm_timer = 0; + + if (running) { + membar_producer(); + atomic_store_int(&igmp_timers_are_running, running); + } } void @@ -525,6 +535,7 @@ void igmp_fasttimo(void) { struct ifnet *ifp; + int running = 0; /* * Quick check to see if any work needs to be done, in order @@ -533,23 +544,29 @@ igmp_fasttimo(void) * lock intentionally. In case it is not set due to MP races, we may * miss to check the timers. Then run the loop at next fast timeout. */ - if (!igmp_timers_are_running) + if (!atomic_load_int(&igmp_timers_are_running)) return; + membar_consumer(); NET_LOCK(); - igmp_timers_are_running = 0; - TAILQ_FOREACH(ifp, &ifnetlist, if_list) - igmp_checktimer(ifp); + TAILQ_FOREACH(ifp, &ifnetlist, if_list) { + if (igmp_checktimer(ifp)) + running = 1; + } + + membar_producer(); + atomic_store_int(&igmp_timers_are_running, running); NET_UNLOCK(); } -void +int igmp_checktimer(struct ifnet *ifp) { struct in_multi *inm; struct ifmaddr *ifma; + int running = 0; NET_ASSERT_LOCKED(); @@ -570,9 +587,11 @@ igmp_checktimer(struct ifnet *ifp) inm->inm_state = IGMP_IDLE_MEMBER; } } else { - igmp_timers_are_running = 1; + running = 1; } } + + return (running); } void diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 04ad1b3e023..dbf8177a1a5 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: icmp6.c,v 1.254 2024/07/14 18:53:39 bluhm Exp $ */ +/* $OpenBSD: icmp6.c,v 1.255 2024/08/12 11:25:27 bluhm Exp $ */ /* $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $ */ /* @@ -1198,7 +1198,6 @@ icmp6_reflect(struct mbuf **mp, size_t off, struct sockaddr *sa) void icmp6_fasttimo(void) { - mld6_fasttimeo(); } diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index 0c5b49e022e..61ff4274c79 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mld6.c,v 1.62 2024/02/13 12:22:09 bluhm Exp $ */ +/* $OpenBSD: mld6.c,v 1.63 2024/08/12 11:25:27 bluhm Exp $ */ /* $KAME: mld6.c,v 1.26 2001/02/16 14:50:35 itojun Exp $ */ /* @@ -85,9 +85,9 @@ #include static struct ip6_pktopts ip6_opts; -int mld6_timers_are_running; /* [N] shortcut for fast timer */ +int mld6_timers_are_running; /* [a] shortcut for fast timer */ -void mld6_checktimer(struct ifnet *); +int mld6_checktimer(struct ifnet *); static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *); void @@ -118,6 +118,7 @@ mld6_start_listening(struct in6_multi *in6m) { /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; + int running = 0; /* * RFC2710 page 10: @@ -138,7 +139,12 @@ mld6_start_listening(struct in6_multi *in6m) MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); in6m->in6m_state = MLD_IREPORTEDLAST; - mld6_timers_are_running = 1; + running = 1; + } + + if (running) { + membar_producer(); + atomic_store_int(&mld6_timers_are_running, running); } } @@ -169,6 +175,7 @@ mld6_input(struct mbuf *m, int off) struct in6_multi *in6m; struct ifmaddr *ifma; int timer; /* timer value in the MLD query header */ + int running = 0; /* XXX: These are necessary for KAME's link-local hack */ struct in6_addr all_nodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; @@ -272,7 +279,7 @@ mld6_input(struct mbuf *m, int off) in6m->in6m_timer > timer) { in6m->in6m_timer = MLD_RANDOM_DELAY(timer); - mld6_timers_are_running = 1; + running = 1; } } } @@ -323,8 +330,13 @@ mld6_input(struct mbuf *m, int off) #endif break; } - if_put(ifp); + if (running) { + membar_producer(); + atomic_store_int(&mld6_timers_are_running, running); + } + + if_put(ifp); m_freem(m); } @@ -332,6 +344,7 @@ void mld6_fasttimeo(void) { struct ifnet *ifp; + int running; /* * Quick check to see if any work needs to be done, in order @@ -340,23 +353,29 @@ mld6_fasttimeo(void) * lock intentionally. In case it is not set due to MP races, we may * miss to check the timers. Then run the loop at next fast timeout. */ - if (!mld6_timers_are_running) + if (!atomic_load_int(&mld6_timers_are_running)) return; + membar_consumer(); NET_LOCK(); - mld6_timers_are_running = 0; - TAILQ_FOREACH(ifp, &ifnetlist, if_list) - mld6_checktimer(ifp); + TAILQ_FOREACH(ifp, &ifnetlist, if_list) { + if (mld6_checktimer(ifp)) + running = 1; + } + + membar_producer(); + atomic_store_int(&mld6_timers_are_running, running); NET_UNLOCK(); } -void +int mld6_checktimer(struct ifnet *ifp) { struct in6_multi *in6m; struct ifmaddr *ifma; + int running = 0; NET_ASSERT_LOCKED(); @@ -370,9 +389,11 @@ mld6_checktimer(struct ifnet *ifp) mld6_sendpkt(in6m, MLD_LISTENER_REPORT, NULL); in6m->in6m_state = MLD_IREPORTEDLAST; } else { - mld6_timers_are_running = 1; + running = 1; } } + + return (running); } static void -- 2.20.1