From 3d0cee07960a73413d633711e9bdf9be05d3ac98 Mon Sep 17 00:00:00 2001 From: neirac Date: Mon, 20 Nov 2023 18:32:22 +0000 Subject: [PATCH] 14744 Add TCP_INFO socket option --- usr/src/uts/common/brand/lx/sys/lx_socket.h | 102 ++++++++++++++++++ .../uts/common/brand/lx/syscall/lx_socket.c | 54 +++++++++- 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/usr/src/uts/common/brand/lx/sys/lx_socket.h b/usr/src/uts/common/brand/lx/sys/lx_socket.h index 99489e4d13..a4f3e46db1 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_socket.h +++ b/usr/src/uts/common/brand/lx/sys/lx_socket.h @@ -23,6 +23,7 @@ * Use is subject to license terms. * Copyright 2016 Joyent, Inc. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2023 Carlos Neira */ #ifndef _SYS_LX_SOCKET_H @@ -394,6 +395,27 @@ extern "C" { #define LX_MSG_FASTOPEN 0x20000000 #define LX_MSG_CMSG_CLOEXEC 0x40000000 +/* + * Linux TCP states + */ + +#define LX_TCP_ESTABLISHED 1 +#define LX_TCP_SYN_SENT 2 +#define LX_TCP_SYN_RECV 3 +#define LX_TCP_FIN_WAIT1 4 +#define LX_TCP_FIN_WAIT2 5 +#define LX_TCP_TIME_WAIT 6 +#define LX_TCP_CLOSE 7 +#define LX_TCP_CLOSE_WAIT 8 +#define LX_TCP_LAST_ACK 9 +#define LX_TCP_LISTEN 10 +#define LX_TCP_CLOSING 11 +#define LX_TCP_NEW_SYN_RECV 12 +/* max/min illumos tcp states*/ +#define LX_OS_MAX_TCP_STATE 6 +#define LX_OS_MIN_TCP_STATE -6 + + typedef struct lx_msghdr { void *msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ @@ -437,6 +459,86 @@ typedef struct lx_sockaddr_in6 { /* one 32-bit field shorter than illumos */ } lx_sockaddr_in6_t; +typedef struct lx_tcp_info { + /* Current state in TCP state machine */ + uint8_t tcpi_state; + /* Congestion avoidance state */ + uint8_t tcpi_ca_state; + /* Number of unrecovered RTO timeouts */ + uint8_t tcpi_retransmits; + /* Unanswered 0 window probes */ + uint8_t tcpi_probes; + /* Current exponential backoff for RTO */ + uint8_t tcpi_backoff; + /* Enabled TCP options */ + uint8_t tcpi_options; +#define LX_TCPI_OPT_TIMESTAMPS 0x01 /* Negotiated TCP Timestamps */ +#define LX_TCPI_OPT_SACK 0x02 /* Negotiated SACK */ +#define LX_TCPI_OPT_WSCALE 0x04 /* Negotiated Window Scaling */ +#define LX_TCPI_OPT_ECN 0x08 /* Negotiated ECN */ +#define LX_TCPI_OPT_ECN_SEEN 0x10 /* Received at least 1 packet w/ ECT */ +#define LX_TCPI_OPT_SYN_DATA 0x20 /* Sent or received SYN-ACK for SYN */ + + uint8_t + tcpi_snd_wscale : 4, /* Send window scale shift */ + tcpi_rcv_wscale : 4; /* Receive window scale shift */ + + /* Retransmission timeout (usecs) */ + uint32_t tcpi_rto; + /* Predicted soft clock tick for delivering delayed ACK */ + uint32_t tcpi_ato; + /* Maximum Segment Size, sent (RFC 4898 tcpEStatsStackMSSSent) */ + uint32_t tcpi_snd_mss; + /* Maximum Segment Size, received (RFC 4898 tcpEStatsStackMSSRcvd) */ + uint32_t tcpi_rcv_mss; + + /* Sent but unacknowledged bytes */ + uint32_t tcpi_unacked; + /* # of SACKed packets; without SACK, # of recvd dup packets */ + uint32_t tcpi_sacked; + /* Estimated # of packets lost */ + uint32_t tcpi_lost; + /* Total # of rexmitted segments */ + uint32_t tcpi_retrans; + /* # of packets to highest SACKed sequence (deprecated on Linux) */ + uint32_t tcpi_fackets; + + /* Time (msecs) since last sent data */ + uint32_t tcpi_last_data_sent; + /* Time (msecs) since last sent ACK (not filled in on Linux) */ + uint32_t tcpi_last_ack_sent; + /* Time (msecs) since last recv data */ + uint32_t tcpi_last_data_recv; + /* Time (msecs) since last recv ACK */ + uint32_t tcpi_last_ack_recv; + + /* Last PMTU seen by socket */ + uint32_t tcpi_pmtu; + /* Slow start threshold (recv) */ + uint32_t tcpi_rcv_ssthresh; + /* Smoothed RTT (usecs) */ + uint32_t tcpi_rtt; + /* RTT variance (usecs) */ + uint32_t tcpi_rttvar; + /* Slow start threshold (send) */ + uint32_t tcpi_snd_ssthresh; + /* Send congestion window */ + uint32_t tcpi_snd_cwnd; + /* Advertised MSS */ + uint32_t tcpi_advmss; + /* ? */ + uint32_t tcpi_reordering; + + /* ? */ + uint32_t tcpi_rcv_rtt; + /* Advertised recv window */ + uint32_t tcpi_rcv_space; + + /* Total # of rexmitted segments for connection */ + uint32_t tcpi_total_retrans; + +} lx_tcp_info_t; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/brand/lx/syscall/lx_socket.c b/usr/src/uts/common/brand/lx/syscall/lx_socket.c index 727fc60bf2..2a19bb6011 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_socket.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_socket.c @@ -24,6 +24,7 @@ * Use is subject to license terms. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. * Copyright 2022 Joyent, Inc. + * Copyright 2023 Carlos Neira */ #include @@ -62,6 +63,7 @@ #include #include #include +#include /* From uts/common/fs/sockfs/socksyscalls.c */ extern int listen(int, int, int); @@ -240,7 +242,25 @@ static const int stol_socktype[SOCK_SEQPACKET + 1] = { #define STOL_SOCKTYPE(t) \ ((t) <= SOCK_SEQPACKET ? stol_socktype[(t)] : SOCK_INVAL) +/* illumos represents tcp states with ranges between -6 to 6, where: + * tcp states > 0 define connections that have been stablished. + * tcp states < 0 define connections that have not yet stablished. + * Where Linux defines their tcp states as a range from 1 to 12. + * the macro STOL_TCPSTATE translate our illumos tcp states to a Linux one, + * in case there is no tcp state returned by OS that matches Linux, the state + * LX_TCP_CLOSE_WAIT will be returned. + * */ +static const int stol_tcp_state[LX_TCP_NEW_SYN_RECV + 1] = { + LX_TCP_ESTABLISHED, LX_TCP_CLOSE_WAIT, LX_TCP_FIN_WAIT1, + LX_TCP_CLOSING, LX_TCP_LAST_ACK, LX_TCP_FIN_WAIT2, LX_TCP_TIME_WAIT, + LX_TCP_SYN_RECV, LX_TCP_SYN_SENT, LX_TCP_LISTEN, LX_TCP_NEW_SYN_RECV, + LX_TCP_CLOSE_WAIT, LX_TCP_CLOSE_WAIT +}; +#define STOL_TCPSTATE(t) \ + (((t) <= LX_OS_MAX_TCP_STATE && (t) >= LX_OS_MIN_TCP_STATE )? \ + ((t) < 0 ? stol_tcp_state[((t) * -1) + 6] : \ + stol_tcp_state[(t)]) : LX_TCP_CLOSE_WAIT) /* * This string is used to prefix all abstract namespace Unix sockets, ie all * abstract namespace sockets are converted to regular sockets in the /tmp @@ -2825,7 +2845,7 @@ static const lx_sockopt_map_t ltos_tcp_sockopts[LX_TCP_NOTSENT_LOWAT + 1] = { { TCP_LINGER2, sizeof (int) }, /* TCP_LINGER2 */ { OPTNOTSUP, 0 }, /* TCP_DEFER_ACCEPT - in code */ { OPTNOTSUP, 0 }, /* TCP_WINDOW_CLAMP - in code */ - { OPTNOTSUP, 0 }, /* TCP_INFO */ + { OPTNOTSUP, 0 }, /* TCP_INFO - in code */ { TCP_QUICKACK, sizeof (int) }, /* TCP_QUICKACK */ { TCP_CONGESTION, CC_ALGO_NAME_MAX }, /* TCP_CONGESTION */ { OPTNOTSUP, 0 }, /* TCP_MD5SIG */ @@ -3951,6 +3971,38 @@ lx_getsockopt_tcp(sonode_t *so, int optname, void *optval, socklen_t *optlen) } *optlen = sizeof (int); goto out; + + case LX_TCP_INFO: + /* We only try to fill in the fields, that we know + * some applications expect. + */ + if (*optlen < sizeof (lx_tcp_info_t)) { + error = EINVAL; + } else { + *optlen = sizeof (lx_tcp_info_t); + bzero(optval, *optlen); + + lx_tcp_info_t *ti = (lx_tcp_info_t*) optval; + conn_t *con = (struct conn_s*) so->so_proto_handle; + tcp_t *tp = con->conn_tcp; + ti->tcpi_state = STOL_TCPSTATE(tp->tcp_state); + ti->tcpi_rto = tp->tcp_rto_initial; + ti->tcpi_last_data_recv = tp->tcp_ts_recent; + ti->tcpi_rtt = NSEC2USEC(tp->tcp_rtt_sa); + ti->tcpi_rttvar = NSEC2USEC(tp->tcp_rtt_sd); + ti->tcpi_snd_ssthresh = tp->tcp_swnd; + ti->tcpi_snd_cwnd = tp->tcp_cwnd; + ti->tcpi_snd_mss = tp->tcp_mss; + ti->tcpi_unacked = tp->tcp_suna; + ti->tcpi_sacked = tp->tcp_rack_cnt; + ti->tcpi_pmtu = tp->tcp_initial_pmtu; + ti->tcpi_total_retrans = tp->tcp_suna; + ti->tcpi_unacked = tp->tcp_suna; + ti->tcpi_sacked = tp->tcp_rack_cnt; + ti->tcpi_rcv_space = tp->tcp_rcv_ws; + } + goto out; + default: break; } -- 2.34.1