From: yasuoka Date: Mon, 7 Nov 2022 11:22:55 +0000 (+0000) Subject: Modify TCP receive buffer size auto scaling to use the smoothed RTT X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=00007ca37eafa7b89f0bc525e5d9422d2458477f;p=openbsd Modify TCP receive buffer size auto scaling to use the smoothed RTT (SRTT) instead of the timestamp option. Since the timestamp option is disabled on some OSs (eg. Windows) or dropped by some firewalls/routers, in such a case the window size had been fixed at 16KB, this limits throughput at very low on high latency networks. Also replace "tcp_now" from 2HZ tick counter to binuptime in milliseconds to calculate the SRTT better. tested by krw matthieu jmatthew dlg djm stu stsp ok claudio --- diff --git a/lib/libc/sys/sysctl.2 b/lib/libc/sys/sysctl.2 index 59f709ff5e9..83a51e2368e 100644 --- a/lib/libc/sys/sysctl.2 +++ b/lib/libc/sys/sysctl.2 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.2,v 1.49 2022/08/16 13:29:52 visa Exp $ +.\" $OpenBSD: sysctl.2,v 1.50 2022/11/07 11:22:55 yasuoka Exp $ .\" .\" Copyright (c) 1993 .\" The Regents of the University of California. All rights reserved. @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: August 16 2022 $ +.Dd $Mdocdate: November 7 2022 $ .Dt SYSCTL 2 .Os .Sh NAME @@ -1329,7 +1329,6 @@ The currently defined protocols and names are: .It tcp Ta rootonly Ta array Ta yes .It tcp Ta rstppslimit Ta integer Ta yes .It tcp Ta sack Ta integer Ta yes -.It tcp Ta slowhz Ta integer Ta no .It tcp Ta stats Ta structure Ta no .It tcp Ta synbucketlimit Ta integer Ta yes .It tcp Ta syncachelimit Ta integer Ta yes @@ -1685,14 +1684,13 @@ both set to \-1. .It Li tcp.keepidle Pq Va net.inet.tcp.keepidle If the socket option .Dv SO_KEEPALIVE -has been set on a socket, then this value specifies how much time a -connection needs to be idle before keepalives are sent. -See also tcp.slowhz. +has been set on a socket, then this value specifies how much time in seconds +a connection needs to be idle before keepalives are sent. .It Li tcp.keepinittime Pq Va net.inet.tcp.keepinittime -Time to keep alive the initial SYN packet of a TCP handshake. +Time in seconds to keep alive the initial SYN packet of a TCP handshake. .It Li tcp.keepintvl Pq Va net.inet.tcp.keepintvl -Time after a keepalive probe is sent until, in the absence of any response, -another probe is sent. +Time in seconds after a keepalive probe is sent until, in the absence of any +response, another probe is sent. See also tcp.slowhz. .It Li tcp.always_keepalive Pq Va net.inet.tcp.always_keepalive Act as if the option @@ -1729,11 +1727,6 @@ and will not go out from the node. A negative value disables rate limitation. .It Li tcp.sack Pq Va net.inet.tcp.sack Returns 1 if RFC 2018 Selective Acknowledgements are enabled. -.It Li tcp.slowhz Pq Va net.inet.tcp.slowhz -The units for tcp.keepidle and tcp.keepintvl; those variables are in ticks -of a clock that ticks tcp.slowhz times per second. -(That is, their values must be divided by the tcp.slowhz value to get times -in seconds.) .It Li tcp.stats Pq Va net.inet.tcp.stats Returns the TCP statistics in a struct tcpstat. .It Li tcp.synbucketlimit Pq Va net.inet.tcp.synbucketlimit diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 3ec2bfc7060..9941613d3b3 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.381 2022/10/03 16:43:52 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.382 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -127,7 +127,7 @@ int tcp_ackdrop_ppslim = 100; /* 100pps */ int tcp_ackdrop_ppslim_count = 0; struct timeval tcp_ackdrop_ppslim_last; -#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) +#define TCP_PAWS_IDLE TCP_TIME(24 * 24 * 60 * 60) /* for modulo comparisons of timestamps */ #define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) @@ -181,7 +181,7 @@ do { \ (ifp && (ifp->if_flags & IFF_LOOPBACK))) \ tp->t_flags |= TF_ACKNOW; \ else \ - TCP_TIMER_ARM_MSEC(tp, TCPT_DELACK, tcp_delack_msecs); \ + TCP_TIMER_ARM(tp, TCPT_DELACK, tcp_delack_msecs); \ if_put(ifp); \ } while (0) @@ -390,7 +390,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto, int af) opti.ts_present = 0; opti.maxseg = 0; - now = READ_ONCE(tcp_now); + now = tcp_now(); /* * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN @@ -862,7 +862,7 @@ findpcb: */ tp->t_rcvtime = now; if (TCPS_HAVEESTABLISHED(tp->t_state)) - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcp_keepidle)); if (tp->sack_enable) tcp_del_sackholes(tp, th); /* Delete stale SACK holes */ @@ -1039,16 +1039,15 @@ findpcb: if (so->so_state & SS_CANTRCVMORE) m_freem(m); else { - if (opti.ts_present && opti.ts_ecr) { - if (tp->rfbuf_ts < opti.ts_ecr && - opti.ts_ecr - tp->rfbuf_ts < hz) { - tcp_update_rcvspace(tp); - /* Start over with next RTT. */ - tp->rfbuf_cnt = 0; - tp->rfbuf_ts = 0; - } else - tp->rfbuf_cnt += tlen; - } + if (tp->t_srtt != 0 && tp->rfbuf_ts != 0 && + now - tp->rfbuf_ts > (tp->t_srtt >> + (TCP_RTT_SHIFT + TCP_RTT_BASE_SHIFT))) { + tcp_update_rcvspace(tp); + /* Start over with next RTT. */ + tp->rfbuf_cnt = 0; + tp->rfbuf_ts = 0; + } else + tp->rfbuf_cnt += tlen; m_adj(m, iphlen + off); sbappendstream(so, &so->so_rcv, m); } @@ -1081,10 +1080,6 @@ findpcb: tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); } - /* Reset receive buffer auto scaling when not in bulk receive mode. */ - tp->rfbuf_cnt = 0; - tp->rfbuf_ts = 0; - switch (tp->t_state) { /* @@ -1177,7 +1172,7 @@ findpcb: soisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; tp->t_state = TCPS_ESTABLISHED; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcp_keepidle)); /* Do window scaling on this connection? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { @@ -1463,7 +1458,7 @@ trimthenstep6: soisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; tp->t_state = TCPS_ESTABLISHED; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcp_keepidle)); /* Do window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == (TF_RCVD_SCALE|TF_REQ_SCALE)) { @@ -1798,7 +1793,8 @@ trimthenstep6: tp->t_flags |= TF_BLOCKOUTPUT; soisdisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); + TCP_TIMER_ARM(tp, TCPT_2MSL, + TCP_TIME(tcp_maxidle)); } tp->t_state = TCPS_FIN_WAIT_2; } @@ -1814,7 +1810,8 @@ trimthenstep6: if (ourfinisacked) { tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL); + TCP_TIMER_ARM(tp, TCPT_2MSL, + TCP_TIME(2 * TCPTV_MSL)); tp->t_flags |= TF_BLOCKOUTPUT; soisdisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; @@ -1840,7 +1837,7 @@ trimthenstep6: * it and restart the finack timer. */ case TCPS_TIME_WAIT: - TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL); + TCP_TIMER_ARM(tp, TCPT_2MSL, TCP_TIME(2 * TCPTV_MSL)); goto dropafterack; } } @@ -2016,7 +2013,7 @@ dodata: /* XXX */ case TCPS_FIN_WAIT_2: tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL); + TCP_TIMER_ARM(tp, TCPT_2MSL, TCP_TIME(2 * TCPTV_MSL)); tp->t_flags |= TF_BLOCKOUTPUT; soisdisconnected(so); tp->t_flags &= ~TF_BLOCKOUTPUT; @@ -2026,7 +2023,7 @@ dodata: /* XXX */ * In TIME_WAIT state restart the 2 MSL time_wait timer. */ case TCPS_TIME_WAIT: - TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * TCPTV_MSL); + TCP_TIMER_ARM(tp, TCPT_2MSL, TCP_TIME(2 * TCPTV_MSL)); break; } } @@ -2750,7 +2747,8 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt) * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). */ - rttmin = min(max(rtt + 2, tp->t_rttmin), TCPTV_REXMTMAX); + rttmin = min(max(tp->t_rttmin, rtt + 2 * (TCP_TIME(1) / hz)), + TCPTV_REXMTMAX); TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), rttmin, TCPTV_REXMTMAX); /* @@ -3168,7 +3166,7 @@ do { \ TCPTV_REXMTMAX); \ if (!timeout_initialized(&(sc)->sc_timer)) \ timeout_set_proc(&(sc)->sc_timer, syn_cache_timer, (sc)); \ - timeout_add(&(sc)->sc_timer, (sc)->sc_rxtcur * (hz / PR_SLOWHZ)); \ + timeout_add_msec(&(sc)->sc_timer, (sc)->sc_rxtcur); \ } while (/*CONSTCOND*/0) void @@ -3341,7 +3339,7 @@ syn_cache_timer(void *arg) if (sc->sc_flags & SCF_DEAD) goto out; - now = READ_ONCE(tcp_now); + now = tcp_now(); if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { /* Drop it -- too many retransmissions. */ @@ -3629,7 +3627,7 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, tp->t_sndtime = now; tp->t_rcvacktime = now; tp->t_sndacktime = now; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcptv_keep_init)); tcpstat_inc(tcps_accepts); tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 815fe43bad6..1a9cc438a1f 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.133 2022/09/03 19:22:19 bluhm Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.134 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -222,7 +222,7 @@ tcp_output(struct tcpcb *tp) return (EINVAL); #endif /* defined(TCP_SIGNATURE) && defined(DIAGNOSTIC) */ - now = READ_ONCE(tcp_now); + now = tcp_now(); /* * Determine length of data that should be transmitted, @@ -545,11 +545,11 @@ send: *lp++ = htonl(now + tp->ts_modulate); *lp = htonl(tp->ts_recent); optlen += TCPOLEN_TSTAMP_APPA; - - /* Set receive buffer autosizing timestamp. */ - if (tp->rfbuf_ts == 0) - tp->rfbuf_ts = now; - + } + /* Set receive buffer autosizing timestamp. */ + if (tp->rfbuf_ts == 0) { + tp->rfbuf_ts = now; + tp->rfbuf_cnt = 0; } #ifdef TCP_SIGNATURE @@ -1105,7 +1105,7 @@ out: /* Restart the delayed ACK timer, if necessary. */ if (TCP_TIMER_ISARMED(tp, TCPT_DELACK)) - TCP_TIMER_ARM_MSEC(tp, TCPT_DELACK, tcp_delack_msecs); + TCP_TIMER_ARM(tp, TCPT_DELACK, tcp_delack_msecs); return (error); } @@ -1138,7 +1138,7 @@ void tcp_setpersist(struct tcpcb *tp) { int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + TCP_RTT_BASE_SHIFT); - int nticks; + int msec; if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) panic("tcp_output REXMT"); @@ -1147,9 +1147,9 @@ tcp_setpersist(struct tcpcb *tp) */ if (t < tp->t_rttmin) t = tp->t_rttmin; - TCPT_RANGESET(nticks, t * tcp_backoff[tp->t_rxtshift], + TCPT_RANGESET(msec, t * tcp_backoff[tp->t_rxtshift], TCPTV_PERSMIN, TCPTV_PERSMAX); - TCP_TIMER_ARM(tp, TCPT_PERSIST, nticks); + TCP_TIMER_ARM(tp, TCPT_PERSIST, msec); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) tp->t_rxtshift++; } diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 3a0d0cd7ab3..b08f55f00e2 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.189 2022/10/03 16:43:52 bluhm Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.190 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -109,7 +109,7 @@ struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* patchable/settable parameters for tcp */ int tcp_mssdflt = TCP_MSS; -int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; +int tcp_rttdflt = TCPTV_SRTTDFLT; /* values controllable via sysctl */ int tcp_do_rfc1323 = 1; @@ -136,7 +136,6 @@ struct cpumem *tcpcounters; /* tcp statistics */ u_char tcp_secret[16]; /* [I] */ SHA2_CTX tcp_secret_ctx; /* [I] */ tcp_seq tcp_iss; /* [T] updated by timer and connection */ -uint32_t tcp_now; /* [T] incremented by slow timer */ /* * Tcp initialization @@ -145,7 +144,6 @@ void tcp_init(void) { tcp_iss = 1; /* wrong */ - tcp_now = 1; pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0, "tcpcb", NULL); pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0, @@ -445,7 +443,7 @@ tcp_newtcpcb(struct inpcb *inp, int wait) * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; - tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << + tp->t_rttvar = tcp_rttdflt << (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT - 1); tp->t_rttmin = TCPTV_MIN; TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 86f4ccd4cdd..73f553963bd 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_timer.c,v 1.70 2022/09/03 19:22:19 bluhm Exp $ */ +/* $OpenBSD: tcp_timer.c,v 1.71 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */ /* @@ -152,7 +152,6 @@ tcp_slowtimo(void) mtx_enter(&tcp_timer_mtx); tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; tcp_iss += TCP_ISSINCR2/PR_SLOWHZ; /* increment iss */ - tcp_now++; /* for timestamps */ mtx_leave(&tcp_timer_mtx); } @@ -422,9 +421,9 @@ tcp_timer_persist(void *arg) rto = TCP_REXMTVAL(tp); if (rto < tp->t_rttmin) rto = tp->t_rttmin; - now = READ_ONCE(tcp_now); + now = tcp_now(); if (tp->t_rxtshift == TCP_MAXRXTSHIFT && - ((now - tp->t_rcvtime) >= tcp_maxpersistidle || + ((now - tp->t_rcvtime) >= TCP_TIME(tcp_maxpersistidle) || (now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { tcpstat_inc(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); @@ -467,9 +466,9 @@ tcp_timer_keep(void *arg) uint32_t now; maxidle = READ_ONCE(tcp_maxidle); - now = READ_ONCE(tcp_now); - if ((maxidle > 0) && - ((now - tp->t_rcvtime) >= tcp_keepidle + maxidle)) + now = tcp_now(); + if ((maxidle > 0) && ((now - tp->t_rcvtime) >= + TCP_TIME(tcp_keepidle + maxidle))) goto dropit; /* * Send a packet designed to force a response @@ -486,9 +485,9 @@ tcp_timer_keep(void *arg) tcpstat_inc(tcps_keepprobe); tcp_respond(tp, mtod(tp->t_template, caddr_t), NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0, now); - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcp_keepintvl)); } else - TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcp_keepidle)); if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); out: @@ -523,10 +522,10 @@ tcp_timer_2msl(void *arg) tcp_timer_freesack(tp); maxidle = READ_ONCE(tcp_maxidle); - now = READ_ONCE(tcp_now); + now = tcp_now(); if (tp->t_state != TCPS_TIME_WAIT && - ((maxidle == 0) || ((now - tp->t_rcvtime) <= maxidle))) - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl); + ((maxidle == 0) || ((now - tp->t_rcvtime) <= TCP_TIME(maxidle)))) + TCP_TIMER_ARM(tp, TCPT_2MSL, TCP_TIME(tcp_keepintvl)); else tp = tcp_close(tp); if (otp) diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h index befffd109ee..c0f417f55dc 100644 --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_timer.h,v 1.18 2018/05/08 15:10:33 bluhm Exp $ */ +/* $OpenBSD: tcp_timer.h,v 1.19 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_timer.h,v 1.6 1995/03/26 20:32:37 jtc Exp $ */ /* @@ -86,27 +86,27 @@ /* * Time constants. */ -#define TCPTV_MSL ( 30*PR_SLOWHZ) /* max seg lifetime (hah!) */ -#define TCPTV_SRTTBASE 0 /* base roundtrip time; - if 0, no idea yet */ -#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */ +#define TCPTV_MSL 30 /* max seg lifetime (hah!) */ +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_SRTTDFLT TCP_TIME_MSEC(1500) /* assumed RTT if no info */ -#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */ -#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */ +#define TCPTV_PERSMIN 5 /* retransmit persistence */ +#define TCPTV_PERSMAX 60 /* maximum persist interval */ -#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */ -#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */ -#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */ -#define TCPTV_KEEPCNT 8 /* max probes before drop */ +#define TCPTV_KEEP_INIT 75 /* initial connect keep alive */ +#define TCPTV_KEEP_IDLE 120*60 /* dflt time before probing */ +#define TCPTV_KEEPINTVL 75 /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ -#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */ -#define TCPTV_REXMTMAX ( 64*PR_SLOWHZ) /* max allowable REXMT value */ +#define TCPTV_MIN TCP_TIME(1) /* minimum allowable value */ +#define TCPTV_REXMTMAX TCP_TIME(64) /* max allowable REXMT value */ -#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ -#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ -#define TCP_DELACK_MSECS 200 /* time to delay ACK */ +#define TCP_DELACK_MSECS 200 /* time to delay ACK */ #ifdef TCPTIMERS const char *tcptimers[TCPT_NTIMERS] = @@ -119,16 +119,10 @@ const char *tcptimers[TCPT_NTIMERS] = #define TCP_TIMER_INIT(tp, timer) \ timeout_set_proc(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp) -#define TCP_TIMER_ARM(tp, timer, nticks) \ +#define TCP_TIMER_ARM(tp, timer, msecs) \ do { \ SET((tp)->t_flags, TF_TIMER << (timer)); \ - timeout_add_msec(&(tp)->t_timer[(timer)], (nticks) * 500); \ -} while (0) - -#define TCP_TIMER_ARM_MSEC(tp, timer, msecs) \ -do { \ - SET((tp)->t_flags, TF_TIMER << (timer)); \ - timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)); \ + timeout_add_msec(&(tp)->t_timer[(timer)], (msecs)); \ } while (0) #define TCP_TIMER_DISARM(tp, timer) \ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 30c562dd7de..34fe08cabeb 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.210 2022/10/17 14:49:02 mvs Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.211 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -153,9 +153,7 @@ const struct pr_usrreqs tcp6_usrreqs = { }; #endif -static int pr_slowhz = PR_SLOWHZ; const struct sysctl_bounded_args tcpctl_vars[] = { - { TCPCTL_SLOWHZ, &pr_slowhz, SYSCTL_INT_READONLY }, { TCPCTL_RFC1323, &tcp_do_rfc1323, 0, 1 }, { TCPCTL_KEEPINITTIME, &tcptv_keep_init, 1, 3 * TCPTV_KEEP_INIT }, { TCPCTL_KEEPIDLE, &tcp_keepidle, 1, 5 * TCPTV_KEEP_IDLE }, @@ -214,7 +212,7 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) { struct proc *p = curproc; struct tcp_info *ti; - u_int t = 1000000 / PR_SLOWHZ; + u_int t = 1000; /* msec => usec */ uint32_t now; if (sizeof(*ti) > MLEN) { @@ -225,7 +223,7 @@ tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) ti = mtod(m, struct tcp_info *); m->m_len = sizeof(*ti); memset(ti, 0, sizeof(*ti)); - now = READ_ONCE(tcp_now); + now = tcp_now(); ti->tcpi_state = tp->t_state; if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) @@ -674,7 +672,7 @@ tcp_connect(struct socket *so, struct mbuf *nam) soisconnecting(so); tcpstat_inc(tcps_connattempt); tp->t_state = TCPS_SYN_SENT; - TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); + TCP_TIMER_ARM(tp, TCPT_KEEP, TCP_TIME(tcptv_keep_init)); tcp_set_iss_tsm(tp); tcp_sendseqinit(tp); tp->snd_last = tp->snd_una; @@ -1112,7 +1110,7 @@ tcp_usrclosed(struct tcpcb *tp) * not left in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) - TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); + TCP_TIMER_ARM(tp, TCPT_2MSL, TCP_TIME(tcp_maxidle)); } return (tp); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index d9630b0c58c..eb5023052f9 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.160 2022/10/17 14:49:02 mvs Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.161 2022/11/07 11:22:55 yasuoka Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -72,7 +72,7 @@ struct tcpcb { struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */ short t_state; /* state of this connection */ short t_rxtshift; /* log(2) of rexmt exp. backoff */ - short t_rxtcur; /* current retransmit value */ + int t_rxtcur; /* current retransmit value */ short t_dupacks; /* consecutive dup acks recd */ u_short t_maxseg; /* maximum segment size */ char t_force; /* 1 if forcing out a byte */ @@ -166,9 +166,9 @@ struct tcpcb { uint32_t t_sndacktime; /* time last ack sent */ uint32_t t_rtttime; /* time we started measuring rtt */ tcp_seq t_rtseq; /* sequence number being timed */ - short t_srtt; /* smoothed round-trip time */ - short t_rttvar; /* variance in round-trip time */ - u_short t_rttmin; /* minimum rtt allowed */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + u_int t_rttmin; /* minimum rtt allowed */ u_long max_sndwnd; /* largest window peer has offered */ /* out-of-band data */ @@ -310,7 +310,7 @@ struct syn_cache_set { #define TCP_RTT_SHIFT 3 /* shift for srtt; 5 bits frac. */ #define TCP_RTTVAR_SHIFT 2 /* shift for rttvar; 4 bits */ #define TCP_RTT_BASE_SHIFT 2 /* remaining 2 bit shift */ -#define TCP_RTT_MAX (1<<9) /* maximum rtt */ +#define TCP_RTT_MAX (1<<18) /* maximum rtt */ /* * The initial retransmission should happen at rtt + 4 * rttvar. @@ -481,7 +481,7 @@ struct tcpstat { { "keepinittime", CTLTYPE_INT }, \ { "keepidle", CTLTYPE_INT }, \ { "keepintvl", CTLTYPE_INT }, \ - { "slowhz", CTLTYPE_INT }, \ + { NULL, 0 }, \ { "baddynamic", CTLTYPE_STRUCT }, \ { NULL, 0 }, \ { NULL, 0 }, \ @@ -638,6 +638,15 @@ tcpstat_pkt(enum tcpstat_counters pcounter, enum tcpstat_counters bcounter, counters_pkt(tcpcounters, pcounter, bcounter, v); } +static inline uint32_t +tcp_now(void) +{ + return (getnsecuptime() / 1000000); +} + +#define TCP_TIME_MSEC(_ms) (_ms) /* tcp_now() is in milliseconds */ +#define TCP_TIME(_sec) ((_sec) * 1000) + extern struct mutex tcp_timer_mtx; extern const struct pr_usrreqs tcp_usrreqs; @@ -647,7 +656,6 @@ extern const struct pr_usrreqs tcp6_usrreqs; extern struct pool tcpcb_pool; extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ -extern uint32_t tcp_now; /* for RFC 1323 timestamps */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* time to keep alive the initial SYN packet */ extern int tcp_mssdflt; /* default maximum segment size */