From: bluhm Date: Mon, 28 Aug 2023 14:50:01 +0000 (+0000) Subject: Introduce reference counting for TCP syn cache entries. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=a1744ce2a1c5c8a9c633cb01945266a3b1c4df75;p=openbsd Introduce reference counting for TCP syn cache entries. The syn_cache_reaper() is a hack to serialize timeouts. Unfortunately it has a race and panics sometimes with pool_do_get: syncache free list modified. Add a reference counter for timeout and list of syn cache entries. Currently list refcout is not strictly necessary due to exclusive netlock, but will be needed when we continue unlocking. Checking timeout_initialized() is not MP friendly, better do proper initialization during object allocation. Refcount in btrace helps to find leaks. bug reported and fix tested by Peter J. Philipp OK claudio@ --- diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c index 0d67832806c..d03badccbbe 100644 --- a/sys/dev/dt/dt_prov_static.c +++ b/sys/dev/dt/dt_prov_static.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_prov_static.c,v 1.21 2023/08/14 08:33:24 mpi Exp $ */ +/* $OpenBSD: dt_prov_static.c,v 1.22 2023/08/28 14:50:01 bluhm Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -100,6 +100,7 @@ DT_STATIC_PROBE3(refcnt, ifaddr, "void *", "int", "int"); DT_STATIC_PROBE3(refcnt, ifmaddr, "void *", "int", "int"); DT_STATIC_PROBE3(refcnt, inpcb, "void *", "int", "int"); DT_STATIC_PROBE3(refcnt, rtentry, "void *", "int", "int"); +DT_STATIC_PROBE3(refcnt, syncache, "void *", "int", "int"); DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int"); /* @@ -152,6 +153,7 @@ struct dt_probe *const dtps_static[] = { &_DT_STATIC_P(refcnt, ifmaddr), &_DT_STATIC_P(refcnt, inpcb), &_DT_STATIC_P(refcnt, rtentry), + &_DT_STATIC_P(refcnt, syncache), &_DT_STATIC_P(refcnt, tdb), }; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6585978069b..06a3bf7a6cf 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.389 2023/07/06 09:15:23 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.390 2023/08/28 14:50:01 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -192,7 +192,6 @@ void syn_cache_put(struct syn_cache *); void syn_cache_rm(struct syn_cache *); int syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t); void syn_cache_timer(void *); -void syn_cache_reaper(void *); void syn_cache_insert(struct syn_cache *, struct tcpcb *); void syn_cache_reset(struct sockaddr *, struct sockaddr *, struct tcphdr *, u_int); @@ -3091,6 +3090,7 @@ int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE; int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE; int tcp_syn_use_limit = 100000; +struct pool syn_cache_pool; struct syn_cache_set tcp_syn_cache[2]; int tcp_syn_cache_active; @@ -3138,25 +3138,27 @@ syn_cache_rm(struct syn_cache *sc) TAILQ_REMOVE(&sc->sc_buckethead->sch_bucket, sc, sc_bucketq); sc->sc_tp = NULL; LIST_REMOVE(sc, sc_tpq); + refcnt_rele(&sc->sc_refcnt); sc->sc_buckethead->sch_length--; - timeout_del(&sc->sc_timer); + if (timeout_del(&sc->sc_timer)) + refcnt_rele(&sc->sc_refcnt); sc->sc_set->scs_count--; } void syn_cache_put(struct syn_cache *sc) { + if (refcnt_rele(&sc->sc_refcnt) == 0) + return; + m_free(sc->sc_ipopts); if (sc->sc_route4.ro_rt != NULL) { rtfree(sc->sc_route4.ro_rt); sc->sc_route4.ro_rt = NULL; } - timeout_set(&sc->sc_timer, syn_cache_reaper, sc); - timeout_add(&sc->sc_timer, 0); + pool_put(&syn_cache_pool, sc); } -struct pool syn_cache_pool; - /* * We don't estimate RTT with SYNs, so each packet starts with the default * RTT and each timer step has a fixed timeout value. @@ -3166,9 +3168,8 @@ do { \ TCPT_RANGESET((sc)->sc_rxtcur, \ TCPTV_SRTTDFLT * tcp_backoff[(sc)->sc_rxtshift], TCPTV_MIN, \ TCPTV_REXMTMAX); \ - if (!timeout_initialized(&(sc)->sc_timer)) \ - timeout_set_proc(&(sc)->sc_timer, syn_cache_timer, (sc)); \ - timeout_add_msec(&(sc)->sc_timer, (sc)->sc_rxtcur); \ + if (timeout_add_msec(&(sc)->sc_timer, (sc)->sc_rxtcur)) \ + refcnt_take(&(sc)->sc_refcnt); \ } while (/*CONSTCOND*/0) void @@ -3306,6 +3307,7 @@ syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp) SYN_CACHE_TIMER_ARM(sc); /* Link it from tcpcb entry */ + refcnt_take(&sc->sc_refcnt); LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq); /* Put it into the bucket. */ @@ -3336,10 +3338,11 @@ syn_cache_timer(void *arg) { struct syn_cache *sc = arg; uint64_t now; + int lastref; NET_LOCK(); if (sc->sc_flags & SCF_DEAD) - goto out; + goto freeit; now = tcp_now(); @@ -3364,26 +3367,28 @@ syn_cache_timer(void *arg) sc->sc_rxtshift++; SYN_CACHE_TIMER_ARM(sc); - out: + /* + * Decrement reference of this timer. We know there is another timer + * as we just added it. So just deref, free is not necessary. + */ + lastref = refcnt_rele(&sc->sc_refcnt); + KASSERT(lastref == 0); + (void)lastref; NET_UNLOCK(); return; dropit: tcpstat_inc(tcps_sc_timed_out); syn_cache_rm(sc); + /* Decrement reference of the timer and free object after remove. */ + lastref = refcnt_rele(&sc->sc_refcnt); + KASSERT(lastref == 0); + (void)lastref; + freeit: syn_cache_put(sc); NET_UNLOCK(); } -void -syn_cache_reaper(void *arg) -{ - struct syn_cache *sc = arg; - - pool_put(&syn_cache_pool, (sc)); - return; -} - /* * Remove syn cache created by the specified tcb entry, * because this does not make sense to keep them @@ -3826,6 +3831,8 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, m_free(ipopts); return (-1); } + refcnt_init_trace(&sc->sc_refcnt, DT_REFCNT_IDX_SYNCACHE); + timeout_set_proc(&sc->sc_timer, syn_cache_timer, sc); /* * Fill in the cache, and put the necessary IP and TCP diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 284f2b6254e..be7e986f7b7 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.169 2023/07/06 09:15:24 bluhm Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.170 2023/08/28 14:50:02 bluhm Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -236,6 +236,7 @@ union syn_cache_sa { struct syn_cache { TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */ + struct refcnt sc_refcnt; /* ref count list and timer */ struct timeout sc_timer; /* rexmt timer */ union { /* cached route */ struct route route4; diff --git a/sys/sys/refcnt.h b/sys/sys/refcnt.h index c0f048214fc..38fb995e1af 100644 --- a/sys/sys/refcnt.h +++ b/sys/sys/refcnt.h @@ -1,4 +1,4 @@ -/* $OpenBSD: refcnt.h,v 1.11 2023/07/06 19:46:53 kn Exp $ */ +/* $OpenBSD: refcnt.h,v 1.12 2023/08/28 14:50:02 bluhm Exp $ */ /* * Copyright (c) 2015 David Gwynne @@ -49,7 +49,8 @@ unsigned int refcnt_read(struct refcnt *); #define DT_REFCNT_IDX_IFMADDR 3 #define DT_REFCNT_IDX_INPCB 4 #define DT_REFCNT_IDX_RTENTRY 5 -#define DT_REFCNT_IDX_TDB 6 +#define DT_REFCNT_IDX_SYNCACHE 6 +#define DT_REFCNT_IDX_TDB 7 #endif /* _KERNEL */