TCP_INFO provides a lot of information about the TCP session of this socket.
Many processes like to peek at the rtt of a connection but this also provides
a lot of more special info for use by e.g. tcpbench(1).
While the basic minimal info is available all the time the more specific
data is only populated for privileged processes. This is done to not share
data back to userland that may allow to attack a session.
TCP_INFO is available to pledge "inet" since pledged processes like chrome
tend to use TCP_INFO when available.
OK bluhm@
-/* $OpenBSD: kern_pledge.c,v 1.292 2022/08/08 01:53:01 deraadt Exp $ */
+/* $OpenBSD: kern_pledge.c,v 1.293 2022/08/11 09:13:21 claudio Exp $ */
/*
* Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
switch (optname) {
case SO_RCVBUF:
case SO_ERROR:
- return 0;
+ return (0);
}
break;
}
case SOL_SOCKET:
switch (optname) {
case SO_TIMESTAMP:
- return 0;
+ return (0);
}
break;
}
case TCP_SACK_ENABLE:
case TCP_MAXSEG:
case TCP_NOPUSH:
+ case TCP_INFO:
return (0);
}
break;
-/* $OpenBSD: tcp.h,v 1.22 2021/02/08 19:37:15 jan Exp $ */
+/* $OpenBSD: tcp.h,v 1.23 2022/08/11 09:13:21 claudio Exp $ */
/* $NetBSD: tcp.h,v 1.8 1995/04/17 05:32:58 cgd Exp $ */
/*
#define TCP_MAXSEG 0x02 /* set maximum segment size */
#define TCP_MD5SIG 0x04 /* enable TCP MD5 signature option */
#define TCP_SACK_ENABLE 0x08 /* enable SACKs (if disabled by def.) */
+#define TCP_INFO 0x09 /* retrieve tcp_info structure */
#define TCP_NOPUSH 0x10 /* don't push last block of write */
+#define TCPI_OPT_TIMESTAMPS 0x01
+#define TCPI_OPT_SACK 0x02
+#define TCPI_OPT_WSCALE 0x04
+#define TCPI_OPT_ECN 0x08
+#define TCPI_OPT_TOE 0x10
+
+/*
+ * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
+ * the caller to query certain information about the state of a TCP
+ * connection. Provide an overlapping set of fields with the Linux
+ * implementation, but at the same time add a lot of OpenBSD specific
+ * extra information.
+ */
+struct tcp_info {
+ uint8_t tcpi_state; /* TCP FSM state. */
+ uint8_t __tcpi_ca_state;
+ uint8_t __tcpi_retransmits;
+ uint8_t __tcpi_probes;
+ uint8_t __tcpi_backoff;
+ uint8_t tcpi_options; /* Options enabled on conn. */
+ uint8_t tcpi_snd_wscale; /* RFC1323 send shift value. */
+ uint8_t tcpi_rcv_wscale; /* RFC1323 recv shift value. */
+
+ uint32_t tcpi_rto; /* Retransmission timeout (usec). */
+ uint32_t __tcpi_ato;
+ uint32_t tcpi_snd_mss; /* Max segment size for send. */
+ uint32_t tcpi_rcv_mss; /* Max segment size for recv. */
+
+ uint32_t __tcpi_unacked;
+ uint32_t __tcpi_sacked;
+ uint32_t __tcpi_lost;
+ uint32_t __tcpi_retrans;
+ uint32_t __tcpi_fackets;
+
+ /* Times; measurements in usecs. */
+ uint32_t tcpi_last_data_sent; /* since last sent data. */
+ uint32_t tcpi_last_ack_sent; /* since last sent ack. */
+ uint32_t tcpi_last_data_recv; /* since last recv data. */
+ uint32_t tcpi_last_ack_recv; /* since last recv ack. */
+
+ /* Metrics; variable units. */
+ uint32_t __tcpi_pmtu;
+ uint32_t __tcpi_rcv_ssthresh;
+ uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */
+ uint32_t tcpi_rttvar; /* RTT variance in usecs. */
+ uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */
+ uint32_t tcpi_snd_cwnd; /* Send congestion window. */
+ uint32_t __tcpi_advmss;
+ uint32_t __tcpi_reordering;
+
+ uint32_t __tcpi_rcv_rtt;
+ uint32_t tcpi_rcv_space; /* Advertised recv window. */
+
+ /*
+ * Members below this point are only set if process is privileged,
+ * otherwise values will be 0.
+ */
+
+ /* FreeBSD/NetBSD extensions to tcp_info. */
+ uint32_t tcpi_snd_wnd; /* Advertised send window. */
+ uint32_t tcpi_snd_nxt; /* Next egress seqno */
+ uint32_t tcpi_rcv_nxt; /* Next ingress seqno */
+ uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
+ uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
+ uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */
+ uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
+
+ /* OpenBSD extensions */
+ uint32_t tcpi_rttmin;
+ uint32_t tcpi_max_sndwnd;
+ uint32_t tcpi_rcv_adv;
+ uint32_t tcpi_rcv_up;
+ uint32_t tcpi_snd_una;
+ uint32_t tcpi_snd_up;
+ uint32_t tcpi_snd_wl1;
+ uint32_t tcpi_snd_wl2;
+ uint32_t tcpi_snd_max;
+ uint32_t tcpi_ts_recent;
+ uint32_t tcpi_ts_recent_age;
+ uint32_t tcpi_rfbuf_cnt;
+ uint32_t tcpi_rfbuf_ts;
+ uint32_t tcpi_so_rcv_sb_cc;
+ uint32_t tcpi_so_rcv_sb_hiwat;
+ uint32_t tcpi_so_rcv_sb_lowat;
+ uint32_t tcpi_so_rcv_sb_wat;
+ uint32_t tcpi_so_snd_sb_cc;
+ uint32_t tcpi_so_snd_sb_hiwat;
+ uint32_t tcpi_so_snd_sb_lowat;
+ uint32_t tcpi_so_snd_sb_wat;
+};
+
#endif /* _NETINET_TCP_H_ */
-/* $OpenBSD: tcp_input.c,v 1.376 2022/08/08 12:06:30 bluhm Exp $ */
+/* $OpenBSD: tcp_input.c,v 1.377 2022/08/11 09:13:21 claudio Exp $ */
/* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */
/*
}
}
tcpstat_pkt(tcps_rcvoopack, tcps_rcvoobyte, *tlen);
+ tp->t_rcvoopack++;
/*
* While we overlap succeeding segments trim them or,
acked = th->th_ack - tp->snd_una;
tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte,
acked);
+ tp->t_rcvacktime = tcp_now;
ND6_HINT(tp);
sbdrop(so, &so->so_snd, acked);
}
acked = th->th_ack - tp->snd_una;
tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked);
+ tp->t_rcvacktime = tcp_now;
/*
* If we have a timestamp reply, update smoothed
tcp_rcvseqinit(tp);
tp->t_state = TCPS_SYN_RECEIVED;
tp->t_rcvtime = tcp_now;
+ tp->t_sndtime = tcp_now;
+ tp->t_rcvacktime = tcp_now;
+ tp->t_sndacktime = tcp_now;
TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
tcpstat_inc(tcps_accepts);
-/* $OpenBSD: tcp_output.c,v 1.131 2021/11/25 13:46:02 bluhm Exp $ */
+/* $OpenBSD: tcp_output.c,v 1.132 2022/08/11 09:13:21 claudio Exp $ */
/* $NetBSD: tcp_output.c,v 1.16 1997/06/03 16:17:09 kml Exp $ */
/*
else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
tcpstat_pkt(tcps_sndrexmitpack, tcps_sndrexmitbyte,
len);
+ tp->t_sndrexmitpack++;
} else {
tcpstat_pkt(tcps_sndpack, tcps_sndbyte, len);
}
*/
if (off + len == so->so_snd.sb_cc && !soissending(so))
flags |= TH_PUSH;
+ tp->t_sndtime = tcp_now;
} else {
if (tp->t_flags & TF_ACKNOW)
tcpstat_inc(tcps_sndacks);
if (flags & TH_RST)
win = 0;
th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
+ if (th->th_win == 0)
+ tp->t_sndzerowin++;
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
u_int32_t urp = tp->snd_up - tp->snd_nxt;
if (urp > IP_MAXPACKET)
if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
tp->rcv_adv = tp->rcv_nxt + win;
tp->last_ack_sent = tp->rcv_nxt;
+ tp->t_sndacktime = tcp_now;
tp->t_flags &= ~TF_ACKNOW;
TCP_TIMER_DISARM(tp, TCPT_DELACK);
if (sendalot)
-/* $OpenBSD: tcp_usrreq.c,v 1.184 2022/08/08 12:06:30 bluhm Exp $ */
+/* $OpenBSD: tcp_usrreq.c,v 1.185 2022/08/11 09:13:21 claudio Exp $ */
/* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */
/*
#include <sys/sysctl.h>
#include <sys/domain.h>
#include <sys/kernel.h>
+#include <sys/pledge.h>
#include <sys/pool.h>
+#include <sys/proc.h>
#include <net/if.h>
#include <net/if_var.h>
struct inpcbtable tcbtable;
-int tcp_ident(void *, size_t *, void *, size_t, int);
+int tcp_fill_info(struct tcpcb *, struct socket *, struct mbuf *);
+int tcp_ident(void *, size_t *, void *, size_t, int);
/*
* Process a TCP user request for TCP tb. If this is a send request
return (error);
}
+/*
+ * Export internal TCP state information via a struct tcp_info without
+ * leaking any sensitive information. Sequence numbers are reported
+ * relative to the initial sequence number.
+ */
+int
+tcp_fill_info(struct tcpcb *tp, struct socket *so, struct mbuf *m)
+{
+ struct proc *p = curproc;
+ struct tcp_info *ti;
+ u_int t = 1000000 / PR_SLOWHZ;
+
+ if (sizeof(*ti) > MLEN) {
+ MCLGETL(m, M_WAITOK, sizeof(*ti));
+ if (!ISSET(m->m_flags, M_EXT))
+ return ENOMEM;
+ }
+ ti = mtod(m, struct tcp_info *);
+ m->m_len = sizeof(*ti);
+ memset(ti, 0, sizeof(*ti));
+
+ ti->tcpi_state = tp->t_state;
+ if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+ ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+ if (tp->t_flags & TF_SACK_PERMIT)
+ ti->tcpi_options |= TCPI_OPT_SACK;
+ if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+ ti->tcpi_options |= TCPI_OPT_WSCALE;
+ ti->tcpi_snd_wscale = tp->snd_scale;
+ ti->tcpi_rcv_wscale = tp->rcv_scale;
+ }
+#ifdef TCP_ECN
+ if (tp->t_flags & TF_ECN_PERMIT)
+ ti->tcpi_options |= TCPI_OPT_ECN;
+#endif
+
+ ti->tcpi_rto = tp->t_rxtcur * t;
+ ti->tcpi_snd_mss = tp->t_maxseg;
+ ti->tcpi_rcv_mss = tp->t_peermss;
+
+ ti->tcpi_last_data_sent = (tcp_now - tp->t_sndtime) * t;
+ ti->tcpi_last_ack_sent = (tcp_now - tp->t_sndacktime) * t;
+ ti->tcpi_last_data_recv = (tcp_now - tp->t_rcvtime) * t;
+ ti->tcpi_last_ack_recv = (tcp_now - tp->t_rcvacktime) * t;
+
+ ti->tcpi_rtt = ((uint64_t)tp->t_srtt * t) >>
+ (TCP_RTT_SHIFT + TCP_RTT_BASE_SHIFT);
+ ti->tcpi_rttvar = ((uint64_t)tp->t_rttvar * t) >>
+ (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT);
+ ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
+ ti->tcpi_snd_cwnd = tp->snd_cwnd;
+
+ ti->tcpi_rcv_space = tp->rcv_wnd;
+
+ /*
+ * Provide only minimal information for unprivileged processes.
+ */
+ if (suser(p) != 0)
+ return 0;
+
+ /* FreeBSD-specific extension fields for tcp_info. */
+ ti->tcpi_snd_wnd = tp->snd_wnd;
+ ti->tcpi_snd_nxt = tp->snd_nxt - tp->iss;
+ ti->tcpi_rcv_nxt = tp->rcv_nxt - tp->irs;
+ /* missing tcpi_toe_tid */
+ ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
+ ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
+ ti->tcpi_snd_zerowin = tp->t_sndzerowin;
+
+ /* OpenBSD extensions */
+ ti->tcpi_rttmin = tp->t_rttmin * t;
+ ti->tcpi_max_sndwnd = tp->max_sndwnd;
+ ti->tcpi_rcv_adv = tp->rcv_adv - tp->irs;
+ ti->tcpi_rcv_up = tp->rcv_up - tp->irs;
+ ti->tcpi_snd_una = tp->snd_una - tp->iss;
+ ti->tcpi_snd_up = tp->snd_up - tp->iss;
+ ti->tcpi_snd_wl1 = tp->snd_wl1 - tp->iss;
+ ti->tcpi_snd_wl2 = tp->snd_wl2 - tp->iss;
+ ti->tcpi_snd_max = tp->snd_max - tp->iss;
+
+ ti->tcpi_ts_recent = tp->ts_recent; /* XXX value from the wire */
+ ti->tcpi_ts_recent_age = (tcp_now - tp->ts_recent_age) * t;
+ ti->tcpi_rfbuf_cnt = tp->rfbuf_cnt;
+ ti->tcpi_rfbuf_ts = (tcp_now - tp->rfbuf_ts) * t;
+
+ ti->tcpi_so_rcv_sb_cc = so->so_rcv.sb_cc;
+ ti->tcpi_so_rcv_sb_hiwat = so->so_rcv.sb_hiwat;
+ ti->tcpi_so_rcv_sb_lowat = so->so_rcv.sb_lowat;
+ ti->tcpi_so_rcv_sb_wat = so->so_rcv.sb_wat;
+ ti->tcpi_so_snd_sb_cc = so->so_snd.sb_cc;
+ ti->tcpi_so_snd_sb_hiwat = so->so_snd.sb_hiwat;
+ ti->tcpi_so_snd_sb_lowat = so->so_snd.sb_lowat;
+ ti->tcpi_so_snd_sb_wat = so->so_snd.sb_wat;
+
+ return 0;
+}
+
int
tcp_ctloutput(int op, struct socket *so, int level, int optname,
struct mbuf *m)
break;
case PRCO_GETOPT:
- m->m_len = sizeof(int);
-
switch (optname) {
case TCP_NODELAY:
+ m->m_len = sizeof(int);
*mtod(m, int *) = tp->t_flags & TF_NODELAY;
break;
case TCP_NOPUSH:
+ m->m_len = sizeof(int);
*mtod(m, int *) = tp->t_flags & TF_NOPUSH;
break;
case TCP_MAXSEG:
+ m->m_len = sizeof(int);
*mtod(m, int *) = tp->t_maxseg;
break;
case TCP_SACK_ENABLE:
+ m->m_len = sizeof(int);
*mtod(m, int *) = tp->sack_enable;
break;
+ case TCP_INFO:
+ error = tcp_fill_info(tp, so, m);
+ break;
#ifdef TCP_SIGNATURE
case TCP_MD5SIG:
+ m->m_len = sizeof(int);
*mtod(m, int *) = tp->t_flags & TF_SIGNATURE;
break;
#endif
-/* $OpenBSD: tcp_var.h,v 1.139 2022/02/25 23:51:03 guenther Exp $ */
+/* $OpenBSD: tcp_var.h,v 1.140 2022/08/11 09:13:21 claudio Exp $ */
/* $NetBSD: tcp_var.h,v 1.17 1996/02/13 23:44:24 christos Exp $ */
/*
* "Variance" is actually smoothed difference.
*/
uint32_t t_rcvtime; /* time last segment received */
+ uint32_t t_rcvacktime; /* time last ack received */
+ uint32_t t_sndtime; /* time last segment sent */
+ uint32_t t_sndacktime; /* time last ack sent */
uint32_t t_rtttime; /* time we started measuring rtt */
tcp_seq t_rtseq; /* sequence number being timed */
short t_srtt; /* smoothed round-trip time */
u_char requested_s_scale;
u_int32_t ts_recent; /* timestamp echo data */
u_int32_t ts_modulate; /* modulation on timestamp */
- u_int32_t ts_recent_age; /* when last updated */
+ u_int32_t ts_recent_age; /* when last updated */
tcp_seq last_ack_sent;
/* pointer for syn cache entries*/
u_short t_pmtud_ip_hl; /* IP header length from ICMP payload */
int pf;
+
+/* maintain a few stats per connection: */
+ u_int t_rcvoopack; /* out-of-order packets received */
+ u_int t_sndrexmitpack; /* retransmit packets sent */
+ u_int t_sndzerowin; /* zero-window updates sent */
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)