From 8c664ca5427760632512c03b468d61ac8f9d2f51 Mon Sep 17 00:00:00 2001 From: bluhm Date: Sat, 3 Sep 2022 19:22:19 +0000 Subject: [PATCH] Use a mutex to update tcp_maxidle, tcp_iss, and tcp_now. This removes pressure from the exclusive netlock in tcp_slowtimo(). Reading is done atomically. Ensure that the tcp_now value is read only once per function to provide consistent time. OK yasuoka@ --- sys/net/pf.c | 4 +- sys/netinet/tcp_input.c | 83 +++++++++++++++++++++------------------- sys/netinet/tcp_output.c | 17 ++++---- sys/netinet/tcp_subr.c | 34 +++++++++++----- sys/netinet/tcp_timer.c | 38 ++++++++++++------ sys/netinet/tcp_usrreq.c | 16 ++++---- sys/netinet/tcp_var.h | 9 +++-- 7 files changed, 119 insertions(+), 82 deletions(-) diff --git a/sys/net/pf.c b/sys/net/pf.c index fd36dbd4155..1b157b84ee9 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1139 2022/09/03 14:57:54 yasuoka Exp $ */ +/* $OpenBSD: pf.c,v 1.1140 2022/09/03 19:22:19 bluhm Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -3580,7 +3580,7 @@ pf_tcp_iss(struct pf_pdesc *pd) } SHA512Final(digest.bytes, &ctx); pf_tcp_iss_off += 4096; - return (digest.words[0] + tcp_iss + pf_tcp_iss_off); + return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off); } void diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 59ef806c645..a37da1dfd99 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.379 2022/08/30 11:53:04 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.380 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -190,7 +190,7 @@ void tcp_newreno_partialack(struct tcpcb *, struct tcphdr *); void syn_cache_put(struct syn_cache *); void syn_cache_rm(struct syn_cache *); -int syn_cache_respond(struct syn_cache *, struct mbuf *); +int syn_cache_respond(struct syn_cache *, struct mbuf *, uint32_t); void syn_cache_timer(void *); void syn_cache_reaper(void *); void syn_cache_insert(struct syn_cache *, struct tcpcb *); @@ -198,10 +198,10 @@ void syn_cache_reset(struct sockaddr *, struct sockaddr *, struct tcphdr *, u_int); int syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *, unsigned int, struct socket *, struct mbuf *, u_char *, int, - struct tcp_opt_info *, tcp_seq *); + struct tcp_opt_info *, tcp_seq *, uint32_t); struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *, struct tcphdr *, unsigned int, unsigned int, struct socket *, - struct mbuf *); + struct mbuf *, uint32_t); struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *, struct syn_cache_head **, u_int); @@ -375,6 +375,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto, int af) short ostate; caddr_t saveti; tcp_seq iss, *reuse = NULL; + uint32_t now; u_long tiwin; struct tcp_opt_info opti; struct tcphdr *th; @@ -389,6 +390,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto, int af) opti.ts_present = 0; opti.maxseg = 0; + now = READ_ONCE(tcp_now); /* * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN @@ -698,7 +700,7 @@ findpcb: case TH_ACK: so = syn_cache_get(&src.sa, &dst.sa, - th, iphlen, tlen, so, m); + th, iphlen, tlen, so, m, now); if (so == NULL) { /* * We don't have a SYN for @@ -830,7 +832,8 @@ findpcb: */ if (so->so_qlen > so->so_qlimit || syn_cache_add(&src.sa, &dst.sa, th, iphlen, - so, m, optp, optlen, &opti, reuse) == -1) { + so, m, optp, optlen, &opti, reuse, now) + == -1) { tcpstat_inc(tcps_dropsyn); goto drop; } @@ -857,7 +860,7 @@ findpcb: * Segment received on connection. * Reset idle time and keep-alive timer. */ - tp->t_rcvtime = tcp_now; + tp->t_rcvtime = now; if (TCPS_HAVEESTABLISHED(tp->t_state)) TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); @@ -873,7 +876,7 @@ findpcb: if (optp) #endif if (tcp_dooptions(tp, optp, optlen, th, m, iphlen, &opti, - m->m_pkthdr.ph_rtableid)) + m->m_pkthdr.ph_rtableid, now)) goto drop; if (opti.ts_present && opti.ts_ecr) { @@ -883,7 +886,7 @@ findpcb: opti.ts_ecr -= tp->ts_modulate; /* make sure ts_ecr is sensible */ - rtt_test = tcp_now - opti.ts_ecr; + rtt_test = now - opti.ts_ecr; if (rtt_test < 0 || rtt_test > TCP_RTT_MAX) opti.ts_ecr = 0; } @@ -926,7 +929,7 @@ findpcb: * Fix from Braden, see Stevens p. 870 */ if (opti.ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { - tp->ts_recent_age = tcp_now; + tp->ts_recent_age = now; tp->ts_recent = opti.ts_val; } @@ -940,15 +943,14 @@ findpcb: */ tcpstat_inc(tcps_predack); if (opti.ts_present && opti.ts_ecr) - tcp_xmit_timer(tp, tcp_now - opti.ts_ecr); + tcp_xmit_timer(tp, now - opti.ts_ecr); else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) - tcp_xmit_timer(tp, - tcp_now - tp->t_rtttime); + tcp_xmit_timer(tp, now - tp->t_rtttime); acked = th->th_ack - tp->snd_una; tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); - tp->t_rcvacktime = tcp_now; + tp->t_rcvacktime = now; ND6_HINT(tp); sbdrop(so, &so->so_snd, acked); @@ -1189,7 +1191,7 @@ findpcb: * use its rtt as our initial srtt & rtt var. */ if (tp->t_rtttime) - tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); + tcp_xmit_timer(tp, now - tp->t_rtttime); /* * Since new data was acked (the SYN), open the * congestion window by one MSS. We do this @@ -1270,7 +1272,7 @@ trimthenstep6: TSTMP_LT(opti.ts_val, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ - if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) { + if ((int)(now - tp->ts_recent_age) > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent @@ -1380,7 +1382,7 @@ trimthenstep6: */ if (opti.ts_present && TSTMP_GEQ(opti.ts_val, tp->ts_recent) && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { - tp->ts_recent_age = tcp_now; + tp->ts_recent_age = now; tp->ts_recent = opti.ts_val; } @@ -1683,7 +1685,7 @@ trimthenstep6: } acked = th->th_ack - tp->snd_una; tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); - tp->t_rcvacktime = tcp_now; + tp->t_rcvacktime = now; /* * If we have a timestamp reply, update smoothed @@ -1695,9 +1697,9 @@ trimthenstep6: * Recompute the initial retransmit timer. */ if (opti.ts_present && opti.ts_ecr) - tcp_xmit_timer(tp, tcp_now - opti.ts_ecr); + tcp_xmit_timer(tp, now - opti.ts_ecr); else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) - tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); + tcp_xmit_timer(tp, now - tp->t_rtttime); /* * If all outstanding data is acked, stop retransmit @@ -2092,12 +2094,12 @@ dropwithreset: goto drop; if (tiflags & TH_ACK) { tcp_respond(tp, mtod(m, caddr_t), th, (tcp_seq)0, th->th_ack, - TH_RST, m->m_pkthdr.ph_rtableid); + TH_RST, m->m_pkthdr.ph_rtableid, now); } else { if (tiflags & TH_SYN) tlen++; tcp_respond(tp, mtod(m, caddr_t), th, th->th_seq + tlen, - (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid); + (tcp_seq)0, TH_RST|TH_ACK, m->m_pkthdr.ph_rtableid, now); } m_freem(m); in_pcbunref(inp); @@ -2118,7 +2120,7 @@ drop: int tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th, struct mbuf *m, int iphlen, struct tcp_opt_info *oi, - u_int rtableid) + u_int rtableid, uint32_t now) { u_int16_t mss = 0; int opt, optlen; @@ -2187,7 +2189,7 @@ tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th, */ tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = oi->ts_val; - tp->ts_recent_age = tcp_now; + tp->ts_recent_age = now; break; case TCPOPT_SACK_PERMITTED: @@ -3169,8 +3171,6 @@ do { \ timeout_add(&(sc)->sc_timer, (sc)->sc_rxtcur * (hz / PR_SLOWHZ)); \ } while (/*CONSTCOND*/0) -#define SYN_CACHE_TIMESTAMP(sc) tcp_now + (sc)->sc_modulate - void syn_cache_init(void) { @@ -3335,11 +3335,14 @@ void syn_cache_timer(void *arg) { struct syn_cache *sc = arg; + uint32_t now; NET_LOCK(); if (sc->sc_flags & SCF_DEAD) goto out; + now = READ_ONCE(tcp_now); + if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { /* Drop it -- too many retransmissions. */ goto dropit; @@ -3355,7 +3358,7 @@ syn_cache_timer(void *arg) goto dropit; tcpstat_inc(tcps_sc_retransmitted); - (void) syn_cache_respond(sc, NULL); + (void) syn_cache_respond(sc, NULL, now); /* Advance the timer back-off. */ sc->sc_rxtshift++; @@ -3466,7 +3469,7 @@ syn_cache_lookup(struct sockaddr *src, struct sockaddr *dst, */ struct socket * syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, - u_int hlen, u_int tlen, struct socket *so, struct mbuf *m) + u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint32_t now) { struct syn_cache *sc; struct syn_cache_head *scp; @@ -3488,7 +3491,7 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, if ((th->th_ack != sc->sc_iss + 1) || SEQ_LEQ(th->th_seq, sc->sc_irs) || SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) { - (void) syn_cache_respond(sc, m); + (void) syn_cache_respond(sc, m, now); return ((struct socket *)(-1)); } @@ -3622,10 +3625,10 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, #endif tcp_rcvseqinit(tp); tp->t_state = TCPS_SYN_RECEIVED; - tp->t_rcvtime = tcp_now; - tp->t_sndtime = tcp_now; - tp->t_rcvacktime = tcp_now; - tp->t_sndacktime = tcp_now; + tp->t_rcvtime = now; + tp->t_sndtime = now; + tp->t_rcvacktime = now; + tp->t_sndacktime = now; TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); tcpstat_inc(tcps_accepts); @@ -3655,7 +3658,7 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, resetandabort: tcp_respond(NULL, mtod(m, caddr_t), th, (tcp_seq)0, th->th_ack, TH_RST, - m->m_pkthdr.ph_rtableid); + m->m_pkthdr.ph_rtableid, now); abort: m_freem(m); if (so != NULL) @@ -3741,7 +3744,7 @@ syn_cache_unreach(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, int syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int optlen, - struct tcp_opt_info *oi, tcp_seq *issp) + struct tcp_opt_info *oi, tcp_seq *issp, uint32_t now) { struct tcpcb tb, *tp; long win; @@ -3779,7 +3782,7 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, #endif tb.t_state = TCPS_LISTEN; if (tcp_dooptions(&tb, optp, optlen, th, m, iphlen, oi, - sotoinpcb(so)->inp_rtableid)) + sotoinpcb(so)->inp_rtableid, now)) return (-1); } @@ -3811,7 +3814,7 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, sc->sc_ipopts = ipopts; } sc->sc_timestamp = tb.ts_recent; - if (syn_cache_respond(sc, m) == 0) { + if (syn_cache_respond(sc, m, now) == 0) { tcpstat_inc(tcps_sndacks); tcpstat_inc(tcps_sndtotal); } @@ -3895,7 +3898,7 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, sc->sc_flags |= SCF_SIGNATURE; #endif sc->sc_tp = tp; - if (syn_cache_respond(sc, m) == 0) { + if (syn_cache_respond(sc, m, now) == 0) { syn_cache_insert(sc, tp); tcpstat_inc(tcps_sndacks); tcpstat_inc(tcps_sndtotal); @@ -3908,7 +3911,7 @@ syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, } int -syn_cache_respond(struct syn_cache *sc, struct mbuf *m) +syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint32_t now) { u_int8_t *optp; int optlen, error; @@ -4034,7 +4037,7 @@ syn_cache_respond(struct syn_cache *sc, struct mbuf *m) u_int32_t *lp = (u_int32_t *)(optp); /* Form timestamp option as shown in appendix A of RFC 1323. */ *lp++ = htonl(TCPOPT_TSTAMP_HDR); - *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc)); + *lp++ = htonl(now + sc->sc_modulate); *lp = htonl(sc->sc_timestamp); optp += TCPOLEN_TSTAMP_APPA; } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index dacfd7cb84d..815fe43bad6 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.132 2022/08/11 09:13:21 claudio Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.133 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -203,6 +203,7 @@ tcp_output(struct tcpcb *tp) int idle, sendalot = 0; int i, sack_rxmit = 0; struct sackhole *p; + uint32_t now; #ifdef TCP_SIGNATURE unsigned int sigoff; #endif /* TCP_SIGNATURE */ @@ -221,6 +222,8 @@ tcp_output(struct tcpcb *tp) return (EINVAL); #endif /* defined(TCP_SIGNATURE) && defined(DIAGNOSTIC) */ + now = READ_ONCE(tcp_now); + /* * Determine length of data that should be transmitted, * and flags that will be used. @@ -228,7 +231,7 @@ tcp_output(struct tcpcb *tp) * to send, then transmit; otherwise, investigate further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); - if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) + if (idle && (now - tp->t_rcvtime) >= tp->t_rxtcur) /* * We have been idle for "a while" and no acks are * expected to clock out any data we send -- @@ -539,13 +542,13 @@ send: /* Form timestamp option as shown in appendix A of RFC 1323. */ *lp++ = htonl(TCPOPT_TSTAMP_HDR); - *lp++ = htonl(tcp_now + tp->ts_modulate); + *lp++ = htonl(now + tp->ts_modulate); *lp = htonl(tp->ts_recent); optlen += TCPOLEN_TSTAMP_APPA; /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0) - tp->rfbuf_ts = tcp_now; + tp->rfbuf_ts = now; } @@ -691,7 +694,7 @@ send: */ if (off + len == so->so_snd.sb_cc && !soissending(so)) flags |= TH_PUSH; - tp->t_sndtime = tcp_now; + tp->t_sndtime = now; } else { if (tp->t_flags & TF_ACKNOW) tcpstat_inc(tcps_sndacks); @@ -924,7 +927,7 @@ send: * not currently timing anything. */ if (tp->t_rtttime == 0) { - tp->t_rtttime = tcp_now; + tp->t_rtttime = now; tp->t_rtseq = startseq; tcpstat_inc(tcps_segstimed); } @@ -1123,7 +1126,7 @@ out: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + win; tp->last_ack_sent = tp->rcv_nxt; - tp->t_sndacktime = tcp_now; + tp->t_sndacktime = now; tp->t_flags &= ~TF_ACKNOW; TCP_TIMER_DISARM(tp, TCPT_DELACK); if (sendalot) diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 53be81074b0..e1da481895d 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.186 2022/08/30 11:53:04 bluhm Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.187 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,14 @@ #include #include +/* + * Locks used to protect struct members in this file: + * I immutable after creation + * T tcp_timer_mtx global tcp timer data structures + */ + +struct mutex tcp_timer_mtx; + /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; @@ -111,8 +120,6 @@ int tcp_do_ecn = 0; /* RFC3168 ECN enabled/disabled? */ #endif int tcp_do_rfc3390 = 2; /* Increase TCP's Initial Window to 10*mss */ -u_int32_t tcp_now = 1; - #ifndef TCB_INITIAL_HASH_SIZE #define TCB_INITIAL_HASH_SIZE 128 #endif @@ -126,9 +133,10 @@ struct pool sackhl_pool; struct cpumem *tcpcounters; /* tcp statistics */ -u_char tcp_secret[16]; -SHA2_CTX tcp_secret_ctx; -tcp_seq tcp_iss; +u_char tcp_secret[16]; /* [I] */ +SHA2_CTX tcp_secret_ctx; /* [I] */ +tcp_seq tcp_iss; /* [T] updated by timer and connection */ +uint32_t tcp_now; /* [T] incremented by slow timer */ /* * Tcp initialization @@ -137,6 +145,7 @@ void tcp_init(void) { tcp_iss = 1; /* wrong */ + tcp_now = 1; pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0, "tcpcb", NULL); pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0, @@ -281,7 +290,7 @@ tcp_template(struct tcpcb *tp) */ void tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0, - tcp_seq ack, tcp_seq seq, int flags, u_int rtableid) + tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint32_t now) { int tlen; int win = 0; @@ -362,7 +371,7 @@ tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0, u_int32_t *lp = (u_int32_t *)(th + 1); /* Form timestamp option as shown in appendix A of RFC 1323. */ *lp++ = htonl(TCPOPT_TSTAMP_HDR); - *lp++ = htonl(tcp_now + tp->ts_modulate); + *lp++ = htonl(now + tp->ts_modulate); *lp = htonl(tp->ts_recent); tlen += TCPOLEN_TSTAMP_APPA; th->th_off = (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2; @@ -913,6 +922,12 @@ tcp_set_iss_tsm(struct tcpcb *tp) uint32_t words[2]; } digest; u_int rdomain = rtable_l2(tp->t_inpcb->inp_rtableid); + tcp_seq iss; + + mtx_enter(&tcp_timer_mtx); + tcp_iss += TCP_ISS_CONN_INC; + iss = tcp_iss; + mtx_leave(&tcp_timer_mtx); ctx = tcp_secret_ctx; SHA512Update(&ctx, &rdomain, sizeof(rdomain)); @@ -930,8 +945,7 @@ tcp_set_iss_tsm(struct tcpcb *tp) sizeof(struct in_addr)); } SHA512Final(digest.bytes, &ctx); - tcp_iss += TCP_ISS_CONN_INC; - tp->iss = digest.words[0] + tcp_iss; + tp->iss = digest.words[0] + iss; tp->ts_modulate = digest.words[1]; } diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 08a91aa1b16..86f4ccd4cdd 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_timer.c,v 1.69 2022/01/02 22:36:04 jsg Exp $ */ +/* $OpenBSD: tcp_timer.c,v 1.70 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */ /* @@ -55,11 +55,16 @@ #include #include +/* + * Locks used to protect struct members in this file: + * T tcp_timer_mtx global tcp timer data structures + */ + int tcp_always_keepalive; int tcp_keepidle; int tcp_keepintvl; int tcp_maxpersistidle; /* max idle time in persist */ -int tcp_maxidle; +int tcp_maxidle; /* [T] max idle time for keep alive */ /* * Time to delay the ACK. This is initialized in tcp_init(), unless @@ -144,13 +149,11 @@ tcp_timer_delack(void *arg) void tcp_slowtimo(void) { - NET_LOCK(); - + mtx_enter(&tcp_timer_mtx); tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */ tcp_now++; /* for timestamps */ - - NET_UNLOCK(); + mtx_leave(&tcp_timer_mtx); } /* @@ -392,6 +395,7 @@ tcp_timer_persist(void *arg) struct tcpcb *otp = NULL, *tp = arg; uint32_t rto; short ostate; + uint32_t now; NET_LOCK(); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ @@ -418,9 +422,10 @@ tcp_timer_persist(void *arg) rto = TCP_REXMTVAL(tp); if (rto < tp->t_rttmin) rto = tp->t_rttmin; + now = READ_ONCE(tcp_now); if (tp->t_rxtshift == TCP_MAXRXTSHIFT && - ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || - (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { + ((now - tp->t_rcvtime) >= tcp_maxpersistidle || + (now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { tcpstat_inc(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; @@ -458,8 +463,13 @@ tcp_timer_keep(void *arg) if ((tcp_always_keepalive || tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - if ((tcp_maxidle > 0) && - ((tcp_now - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)) + int maxidle; + uint32_t now; + + maxidle = READ_ONCE(tcp_maxidle); + now = READ_ONCE(tcp_now); + if ((maxidle > 0) && + ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) goto dropit; /* * Send a packet designed to force a response @@ -475,7 +485,7 @@ tcp_timer_keep(void *arg) */ tcpstat_inc(tcps_keepprobe); tcp_respond(tp, mtod(tp->t_template, caddr_t), - NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0); + NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now); TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); } else TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); @@ -496,6 +506,8 @@ tcp_timer_2msl(void *arg) { struct tcpcb *otp = NULL, *tp = arg; short ostate; + int maxidle; + uint32_t now; NET_LOCK(); /* Ignore canceled timeouts or timeouts that have been rescheduled. */ @@ -510,8 +522,10 @@ tcp_timer_2msl(void *arg) } tcp_timer_freesack(tp); + maxidle = READ_ONCE(tcp_maxidle); + now = READ_ONCE(tcp_now); if (tp->t_state != TCPS_TIME_WAIT && - ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle))) + ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle))) TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); else tp = tcp_close(tp); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 7f64090f83d..5630404e576 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.205 2022/09/03 18:48:50 mvs Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.206 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -271,6 +271,7 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) struct proc *p = curproc; struct tcp_info *ti; u_int t = 1000000 / PR_SLOWHZ; + uint32_t now; if (sizeof(*ti) > MLEN) { MCLGETL(m, M_WAITOK, sizeof(*ti)); @@ -280,6 +281,7 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) ti = mtod(m, struct tcp_info *); m->m_len = sizeof(*ti); memset(ti, 0, sizeof(*ti)); + now = READ_ONCE(tcp_now); ti->tcpi_state = tp->t_state; if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) @@ -300,10 +302,10 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) ti->tcpi_snd_mss = tp->t_maxseg; ti->tcpi_rcv_mss = tp->t_peermss; - ti->tcpi_last_data_sent = (tcp_now - tp->t_sndtime) * t; - ti->tcpi_last_ack_sent = (tcp_now - tp->t_sndacktime) * t; - ti->tcpi_last_data_recv = (tcp_now - tp->t_rcvtime) * t; - ti->tcpi_last_ack_recv = (tcp_now - tp->t_rcvacktime) * t; + ti->tcpi_last_data_sent = (now - tp->t_sndtime) * t; + ti->tcpi_last_ack_sent = (now - tp->t_sndacktime) * t; + ti->tcpi_last_data_recv = (now - tp->t_rcvtime) * t; + ti->tcpi_last_ack_recv = (now - tp->t_rcvacktime) * t; ti->tcpi_rtt = ((uint64_t)tp->t_srtt * t) >> (TCP_RTT_SHIFT + TCP_RTT_BASE_SHIFT); @@ -341,9 +343,9 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) ti->tcpi_snd_max = tp->snd_max - tp->iss; ti->tcpi_ts_recent = tp->ts_recent; /* XXX value from the wire */ - ti->tcpi_ts_recent_age = (tcp_now - tp->ts_recent_age) * t; + ti->tcpi_ts_recent_age = (now - tp->ts_recent_age) * t; ti->tcpi_rfbuf_cnt = tp->rfbuf_cnt; - ti->tcpi_rfbuf_ts = (tcp_now - tp->rfbuf_ts) * t; + ti->tcpi_rfbuf_ts = (now - tp->rfbuf_ts) * t; ti->tcpi_so_rcv_sb_cc = so->so_rcv.sb_cc; ti->tcpi_so_rcv_sb_hiwat = so->so_rcv.sb_hiwat; diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index f72e4d37b6e..d7d643b9aa9 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.155 2022/09/03 18:48:50 mvs Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.156 2022/09/03 19:22:19 bluhm Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -638,6 +638,7 @@ tcpstat_pkt(enum tcpstat_counters pcounter, enum tcpstat_counters bcounter, counters_pkt(tcpcounters, pcounter, bcounter, v); } +extern struct mutex tcp_timer_mtx; extern const struct pr_usrreqs tcp_usrreqs; #ifdef INET6 @@ -646,7 +647,7 @@ extern const struct pr_usrreqs tcp6_usrreqs; extern struct pool tcpcb_pool; extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ -extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ +extern uint32_t tcp_now; /* for RFC 1323 timestamps */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* time to keep alive the initial SYN packet */ extern int tcp_mssdflt; /* default maximum segment size */ @@ -682,7 +683,7 @@ struct tcpcb * struct tcpcb * tcp_drop(struct tcpcb *, int); int tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *, - struct mbuf *, int, struct tcp_opt_info *, u_int); + struct mbuf *, int, struct tcp_opt_info *, u_int, uint32_t); void tcp_init(void); int tcp_input(struct mbuf **, int *, int, int); int tcp_mss(struct tcpcb *, int); @@ -702,7 +703,7 @@ void tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int); int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *); void tcp_rscale(struct tcpcb *, u_long); void tcp_respond(struct tcpcb *, caddr_t, struct tcphdr *, tcp_seq, - tcp_seq, int, u_int); + tcp_seq, int, u_int, uint32_t); void tcp_setpersist(struct tcpcb *); void tcp_update_sndspace(struct tcpcb *); void tcp_update_rcvspace(struct tcpcb *); -- 2.20.1