From ced6d44d3b6b8e0b34987d39fdb7319e7ac81cb1 Mon Sep 17 00:00:00 2001 From: claudio Date: Thu, 11 Aug 2022 09:13:21 +0000 Subject: [PATCH] Add TCP_INFO support to getsockopt for tcp sessions. TCP_INFO provides a lot of information about the TCP session of this socket. Many processes like to peek at the rtt of a connection but this also provides a lot of more special info for use by e.g. tcpbench(1). While the basic minimal info is available all the time the more specific data is only populated for privileged processes. This is done to not share data back to userland that may allow to attack a session. TCP_INFO is available to pledge "inet" since pledged processes like chrome tend to use TCP_INFO when available. OK bluhm@ --- sys/kern/kern_pledge.c | 7 +-- sys/netinet/tcp.h | 94 +++++++++++++++++++++++++++++++- sys/netinet/tcp_input.c | 8 ++- sys/netinet/tcp_output.c | 7 ++- sys/netinet/tcp_usrreq.c | 114 +++++++++++++++++++++++++++++++++++++-- sys/netinet/tcp_var.h | 12 ++++- 6 files changed, 230 insertions(+), 12 deletions(-) diff --git a/sys/kern/kern_pledge.c b/sys/kern/kern_pledge.c index f2378fc07d4..57ebe45aa18 100644 --- a/sys/kern/kern_pledge.c +++ b/sys/kern/kern_pledge.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_pledge.c,v 1.292 2022/08/08 01:53:01 deraadt Exp $ */ +/* $OpenBSD: kern_pledge.c,v 1.293 2022/08/11 09:13:21 claudio Exp $ */ /* * Copyright (c) 2015 Nicholas Marriott @@ -1370,7 +1370,7 @@ pledge_sockopt(struct proc *p, int set, int level, int optname) switch (optname) { case SO_RCVBUF: case SO_ERROR: - return 0; + return (0); } break; } @@ -1392,7 +1392,7 @@ pledge_sockopt(struct proc *p, int set, int level, int optname) case SOL_SOCKET: switch (optname) { case SO_TIMESTAMP: - return 0; + return (0); } break; } @@ -1430,6 +1430,7 @@ pledge_sockopt(struct proc *p, int set, int level, int optname) case TCP_SACK_ENABLE: case TCP_MAXSEG: case TCP_NOPUSH: + case TCP_INFO: return (0); } break; diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index 085624ed5f7..b9cf23d4c53 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp.h,v 1.22 2021/02/08 19:37:15 jan Exp $ */ +/* $OpenBSD: tcp.h,v 1.23 2022/08/11 09:13:21 claudio Exp $ */ /* $NetBSD: tcp.h,v 1.8 1995/04/17 05:32:58 cgd Exp $ */ /* @@ -126,6 +126,98 @@ struct tcphdr { #define TCP_MAXSEG 0x02 /* set maximum segment size */ #define TCP_MD5SIG 0x04 /* enable TCP MD5 signature option */ #define TCP_SACK_ENABLE 0x08 /* enable SACKs (if disabled by def.) */ +#define TCP_INFO 0x09 /* retrieve tcp_info structure */ #define TCP_NOPUSH 0x10 /* don't push last block of write */ +#define TCPI_OPT_TIMESTAMPS 0x01 +#define TCPI_OPT_SACK 0x02 +#define TCPI_OPT_WSCALE 0x04 +#define TCPI_OPT_ECN 0x08 +#define TCPI_OPT_TOE 0x10 + +/* + * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits + * the caller to query certain information about the state of a TCP + * connection. Provide an overlapping set of fields with the Linux + * implementation, but at the same time add a lot of OpenBSD specific + * extra information. + */ +struct tcp_info { + uint8_t tcpi_state; /* TCP FSM state. */ + uint8_t __tcpi_ca_state; + uint8_t __tcpi_retransmits; + uint8_t __tcpi_probes; + uint8_t __tcpi_backoff; + uint8_t tcpi_options; /* Options enabled on conn. */ + uint8_t tcpi_snd_wscale; /* RFC1323 send shift value. */ + uint8_t tcpi_rcv_wscale; /* RFC1323 recv shift value. */ + + uint32_t tcpi_rto; /* Retransmission timeout (usec). */ + uint32_t __tcpi_ato; + uint32_t tcpi_snd_mss; /* Max segment size for send. */ + uint32_t tcpi_rcv_mss; /* Max segment size for recv. */ + + uint32_t __tcpi_unacked; + uint32_t __tcpi_sacked; + uint32_t __tcpi_lost; + uint32_t __tcpi_retrans; + uint32_t __tcpi_fackets; + + /* Times; measurements in usecs. */ + uint32_t tcpi_last_data_sent; /* since last sent data. */ + uint32_t tcpi_last_ack_sent; /* since last sent ack. */ + uint32_t tcpi_last_data_recv; /* since last recv data. */ + uint32_t tcpi_last_ack_recv; /* since last recv ack. */ + + /* Metrics; variable units. */ + uint32_t __tcpi_pmtu; + uint32_t __tcpi_rcv_ssthresh; + uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */ + uint32_t tcpi_rttvar; /* RTT variance in usecs. */ + uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */ + uint32_t tcpi_snd_cwnd; /* Send congestion window. */ + uint32_t __tcpi_advmss; + uint32_t __tcpi_reordering; + + uint32_t __tcpi_rcv_rtt; + uint32_t tcpi_rcv_space; /* Advertised recv window. */ + + /* + * Members below this point are only set if process is privileged, + * otherwise values will be 0. + */ + + /* FreeBSD/NetBSD extensions to tcp_info. */ + uint32_t tcpi_snd_wnd; /* Advertised send window. */ + uint32_t tcpi_snd_nxt; /* Next egress seqno */ + uint32_t tcpi_rcv_nxt; /* Next ingress seqno */ + uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ + uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */ + uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */ + uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */ + + /* OpenBSD extensions */ + uint32_t tcpi_rttmin; + uint32_t tcpi_max_sndwnd; + uint32_t tcpi_rcv_adv; + uint32_t tcpi_rcv_up; + uint32_t tcpi_snd_una; + uint32_t tcpi_snd_up; + uint32_t tcpi_snd_wl1; + uint32_t tcpi_snd_wl2; + uint32_t tcpi_snd_max; + uint32_t tcpi_ts_recent; + uint32_t tcpi_ts_recent_age; + uint32_t tcpi_rfbuf_cnt; + uint32_t tcpi_rfbuf_ts; + uint32_t tcpi_so_rcv_sb_cc; + uint32_t tcpi_so_rcv_sb_hiwat; + uint32_t tcpi_so_rcv_sb_lowat; + uint32_t tcpi_so_rcv_sb_wat; + uint32_t tcpi_so_snd_sb_cc; + uint32_t tcpi_so_snd_sb_hiwat; + uint32_t tcpi_so_snd_sb_lowat; + uint32_t tcpi_so_snd_sb_wat; +}; + #endif /* _NETINET_TCP_H_ */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index b5c9be1f990..7df100facfc 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.376 2022/08/08 12:06:30 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.377 2022/08/11 09:13:21 claudio Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -275,6 +275,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, struct mbuf *m, int *tlen) } } tcpstat_pkt(tcps_rcvoopack, tcps_rcvoobyte, *tlen); + tp->t_rcvoopack++; /* * While we overlap succeeding segments trim them or, @@ -947,6 +948,7 @@ findpcb: acked = th->th_ack - tp->snd_una; tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); + tp->t_rcvacktime = tcp_now; ND6_HINT(tp); sbdrop(so, &so->so_snd, acked); @@ -1681,6 +1683,7 @@ trimthenstep6: } acked = th->th_ack - tp->snd_una; tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); + tp->t_rcvacktime = tcp_now; /* * If we have a timestamp reply, update smoothed @@ -3620,6 +3623,9 @@ syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, tcp_rcvseqinit(tp); tp->t_state = TCPS_SYN_RECEIVED; tp->t_rcvtime = tcp_now; + tp->t_sndtime = tcp_now; + tp->t_rcvacktime = tcp_now; + tp->t_sndacktime = tcp_now; TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); tcpstat_inc(tcps_accepts); diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index faaec77ce2f..dacfd7cb84d 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_output.c,v 1.131 2021/11/25 13:46:02 bluhm Exp $ */ +/* $OpenBSD: tcp_output.c,v 1.132 2022/08/11 09:13:21 claudio Exp $ */ /* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */ /* @@ -636,6 +636,7 @@ send: else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { tcpstat_pkt(tcps_sndrexmitpack, tcps_sndrexmitbyte, len); + tp->t_sndrexmitpack++; } else { tcpstat_pkt(tcps_sndpack, tcps_sndbyte, len); } @@ -690,6 +691,7 @@ send: */ if (off + len == so->so_snd.sb_cc && !soissending(so)) flags |= TH_PUSH; + tp->t_sndtime = tcp_now; } else { if (tp->t_flags & TF_ACKNOW) tcpstat_inc(tcps_sndacks); @@ -821,6 +823,8 @@ send: if (flags & TH_RST) win = 0; th->th_win = htons((u_int16_t) (win>>tp->rcv_scale)); + if (th->th_win == 0) + tp->t_sndzerowin++; if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { u_int32_t urp = tp->snd_up - tp->snd_nxt; if (urp > IP_MAXPACKET) @@ -1119,6 +1123,7 @@ out: if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + win; tp->last_ack_sent = tp->rcv_nxt; + tp->t_sndacktime = tcp_now; tp->t_flags &= ~TF_ACKNOW; TCP_TIMER_DISARM(tp, TCPT_DELACK); if (sendalot) diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 6185187ca0b..0f588bb083d 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_usrreq.c,v 1.184 2022/08/08 12:06:30 bluhm Exp $ */ +/* $OpenBSD: tcp_usrreq.c,v 1.185 2022/08/11 09:13:21 claudio Exp $ */ /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ /* @@ -78,7 +78,9 @@ #include #include #include +#include #include +#include #include #include @@ -132,7 +134,8 @@ const struct sysctl_bounded_args tcpctl_vars[] = { struct inpcbtable tcbtable; -int tcp_ident(void *, size_t *, void *, size_t, int); +int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *); +int tcp_ident(void *, size_t *, void *, size_t, int); /* * Process a TCP user request for TCP tb. If this is a send request @@ -425,6 +428,103 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, return (error); } +/* + * Export internal TCP state information via a struct tcp_info without + * leaking any sensitive information. Sequence numbers are reported + * relative to the initial sequence number. + */ +int +tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m) +{ + struct proc *p = curproc; + struct tcp_info *ti; + u_int t = 1000000 / PR_SLOWHZ; + + if (sizeof(*ti) > MLEN) { + MCLGETL(m, M_WAITOK, sizeof(*ti)); + if (!ISSET(m->m_flags, M_EXT)) + return ENOMEM; + } + ti = mtod(m, struct tcp_info *); + m->m_len = sizeof(*ti); + memset(ti, 0, sizeof(*ti)); + + ti->tcpi_state = tp->t_state; + if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) + ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; + if (tp->t_flags & TF_SACK_PERMIT) + ti->tcpi_options |= TCPI_OPT_SACK; + if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { + ti->tcpi_options |= TCPI_OPT_WSCALE; + ti->tcpi_snd_wscale = tp->snd_scale; + ti->tcpi_rcv_wscale = tp->rcv_scale; + } +#ifdef TCP_ECN + if (tp->t_flags & TF_ECN_PERMIT) + ti->tcpi_options |= TCPI_OPT_ECN; +#endif + + ti->tcpi_rto = tp->t_rxtcur * t; + ti->tcpi_snd_mss = tp->t_maxseg; + ti->tcpi_rcv_mss = tp->t_peermss; + + ti->tcpi_last_data_sent = (tcp_now - tp->t_sndtime) * t; + ti->tcpi_last_ack_sent = (tcp_now - tp->t_sndacktime) * t; + ti->tcpi_last_data_recv = (tcp_now - tp->t_rcvtime) * t; + ti->tcpi_last_ack_recv = (tcp_now - tp->t_rcvacktime) * t; + + ti->tcpi_rtt = ((uint64_t)tp->t_srtt * t) >> + (TCP_RTT_SHIFT + TCP_RTT_BASE_SHIFT); + ti->tcpi_rttvar = ((uint64_t)tp->t_rttvar * t) >> + (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT); + ti->tcpi_snd_ssthresh = tp->snd_ssthresh; + ti->tcpi_snd_cwnd = tp->snd_cwnd; + + ti->tcpi_rcv_space = tp->rcv_wnd; + + /* + * Provide only minimal information for unprivileged processes. + */ + if (suser(p) != 0) + return 0; + + /* FreeBSD-specific extension fields for tcp_info. */ + ti->tcpi_snd_wnd = tp->snd_wnd; + ti->tcpi_snd_nxt = tp->snd_nxt - tp->iss; + ti->tcpi_rcv_nxt = tp->rcv_nxt - tp->irs; + /* missing tcpi_toe_tid */ + ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack; + ti->tcpi_rcv_ooopack = tp->t_rcvoopack; + ti->tcpi_snd_zerowin = tp->t_sndzerowin; + + /* OpenBSD extensions */ + ti->tcpi_rttmin = tp->t_rttmin * t; + ti->tcpi_max_sndwnd = tp->max_sndwnd; + ti->tcpi_rcv_adv = tp->rcv_adv - tp->irs; + ti->tcpi_rcv_up = tp->rcv_up - tp->irs; + ti->tcpi_snd_una = tp->snd_una - tp->iss; + ti->tcpi_snd_up = tp->snd_up - tp->iss; + ti->tcpi_snd_wl1 = tp->snd_wl1 - tp->iss; + ti->tcpi_snd_wl2 = tp->snd_wl2 - tp->iss; + ti->tcpi_snd_max = tp->snd_max - tp->iss; + + ti->tcpi_ts_recent = tp->ts_recent; /* XXX value from the wire */ + ti->tcpi_ts_recent_age = (tcp_now - tp->ts_recent_age) * t; + ti->tcpi_rfbuf_cnt = tp->rfbuf_cnt; + ti->tcpi_rfbuf_ts = (tcp_now - tp->rfbuf_ts) * t; + + ti->tcpi_so_rcv_sb_cc = so->so_rcv.sb_cc; + ti->tcpi_so_rcv_sb_hiwat = so->so_rcv.sb_hiwat; + ti->tcpi_so_rcv_sb_lowat = so->so_rcv.sb_lowat; + ti->tcpi_so_rcv_sb_wat = so->so_rcv.sb_wat; + ti->tcpi_so_snd_sb_cc = so->so_snd.sb_cc; + ti->tcpi_so_snd_sb_hiwat = so->so_snd.sb_hiwat; + ti->tcpi_so_snd_sb_lowat = so->so_snd.sb_lowat; + ti->tcpi_so_snd_sb_wat = so->so_snd.sb_wat; + + return 0; +} + int tcp_ctloutput(int op, struct socket *so, int level, int optname, struct mbuf *m) @@ -541,23 +641,29 @@ tcp_ctloutput(int op, struct socket *so, int level, int optname, break; case PRCO_GETOPT: - m->m_len = sizeof(int); - switch (optname) { case TCP_NODELAY: + m->m_len = sizeof(int); *mtod(m, int *) = tp->t_flags & TF_NODELAY; break; case TCP_NOPUSH: + m->m_len = sizeof(int); *mtod(m, int *) = tp->t_flags & TF_NOPUSH; break; case TCP_MAXSEG: + m->m_len = sizeof(int); *mtod(m, int *) = tp->t_maxseg; break; case TCP_SACK_ENABLE: + m->m_len = sizeof(int); *mtod(m, int *) = tp->sack_enable; break; + case TCP_INFO: + error = tcp_fill_info(tp, so, m); + break; #ifdef TCP_SIGNATURE case TCP_MD5SIG: + m->m_len = sizeof(int); *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; break; #endif diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 7d8f615d4d2..29dcff81e0e 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_var.h,v 1.139 2022/02/25 23:51:03 guenther Exp $ */ +/* $OpenBSD: tcp_var.h,v 1.140 2022/08/11 09:13:21 claudio Exp $ */ /* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */ /* @@ -161,6 +161,9 @@ struct tcpcb { * "Variance" is actually smoothed difference. */ uint32_t t_rcvtime; /* time last segment received */ + uint32_t t_rcvacktime; /* time last ack received */ + uint32_t t_sndtime; /* time last segment sent */ + uint32_t t_sndacktime; /* time last ack sent */ uint32_t t_rtttime; /* time we started measuring rtt */ tcp_seq t_rtseq; /* sequence number being timed */ short t_srtt; /* smoothed round-trip time */ @@ -182,7 +185,7 @@ struct tcpcb { u_char requested_s_scale; u_int32_t ts_recent; /* timestamp echo data */ u_int32_t ts_modulate; /* modulation on timestamp */ - u_int32_t ts_recent_age; /* when last updated */ + u_int32_t ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; /* pointer for syn cache entries*/ @@ -197,6 +200,11 @@ struct tcpcb { u_short t_pmtud_ip_hl; /* IP header length from ICMP payload */ int pf; + +/* maintain a few stats per connection: */ + u_int t_rcvoopack; /* out-of-order packets received */ + u_int t_sndrexmitpack; /* retransmit packets sent */ + u_int t_sndzerowin; /* zero-window updates sent */ }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) -- 2.20.1