40#include "opt_kern_tls.h"
41#include "opt_tcpdebug.h"
46#include <sys/callout.h>
47#include <sys/eventhandler.h>
51#include <sys/kernel.h>
60#include <sys/sysctl.h>
62#include <sys/malloc.h>
63#include <sys/refcount.h>
66#include <sys/domain.h>
71#include <sys/socket.h>
72#include <sys/socketvar.h>
73#include <sys/protosw.h>
74#include <sys/random.h>
79#include <net/route/nhop.h>
81#include <net/if_var.h>
96#include <netinet6/in6_fib.h>
97#include <netinet6/in6_pcb.h>
98#include <netinet6/ip6_var.h>
99#include <netinet6/scope6_var.h>
100#include <netinet6/nd6.h>
116#include <netinet6/tcp6_var.h>
127#include <netinet6/ip6protosw.h>
135#include <netipsec/ipsec_support.h>
137#include <machine/in_cksum.h>
138#include <crypto/siphash/siphash.h>
140#include <security/mac/mac_framework.h>
147#ifdef NETFLIX_EXP_DETECTION
150 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
151 "Sack Attack detection thresholds");
152int32_t tcp_force_detection = 0;
153SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, force_detection,
155 &tcp_force_detection, 0,
156 "Do we force detection even if the INP has it off?");
157int32_t tcp_sack_to_ack_thresh = 700;
158SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sack_to_ack_thresh,
160 &tcp_sack_to_ack_thresh, 700,
161 "Percentage of sacks to acks we must see above (10.1 percent is 101)?");
162int32_t tcp_sack_to_move_thresh = 600;
163SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, move_thresh,
165 &tcp_sack_to_move_thresh, 600,
166 "Percentage of sack moves we must see above (10.1 percent is 101)");
167int32_t tcp_restoral_thresh = 650;
168SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, restore_thresh,
170 &tcp_restoral_thresh, 550,
171 "Percentage of sack to ack percentage we must see below to restore(10.1 percent is 101)");
172int32_t tcp_sad_decay_val = 800;
173SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, decay_per,
175 &tcp_sad_decay_val, 800,
176 "The decay percentage (10.1 percent equals 101 )");
177int32_t tcp_map_minimum = 500;
178SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, nummaps,
180 &tcp_map_minimum, 500,
181 "Number of Map enteries before we start detection");
182int32_t tcp_attack_on_turns_on_logging = 0;
183SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged,
185 &tcp_attack_on_turns_on_logging, 0,
186 "When we have a positive hit on attack, do we turn on logging?");
187int32_t tcp_sad_pacing_interval = 2000;
188SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int,
190 &tcp_sad_pacing_interval, 2000,
191 "What is the minimum pacing interval for a classified attacker?");
193int32_t tcp_sad_low_pps = 100;
194SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_low_pps,
196 &tcp_sad_low_pps, 100,
197 "What is the input pps that below which we do not decay?");
203 "If the tcp_stack does ack-war prevention how many milliseconds are in its time window?");
208 "If the tcp_stack does ack-war prevention how many acks can be sent in its time window?");
218 error = sysctl_handle_int(oidp, &
new, 0, req);
219 if (error == 0 && req->newptr) {
220 if (
new < TCP_MINMSS)
229 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
231 "Default TCP Maximum Segment Size");
235sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
239 new = V_tcp_v6mssdflt;
240 error = sysctl_handle_int(oidp, &
new, 0, req);
241 if (error == 0 && req->newptr) {
242 if (
new < TCP_MINMSS)
245 V_tcp_v6mssdflt =
new;
251 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
252 &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check,
"I",
253 "Default TCP Maximum Segment Size for IPv6");
265SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
266 &VNET_NAME(tcp_minmss), 0,
267 "Minimum TCP Maximum Segment Size");
271 &VNET_NAME(tcp_do_rfc1323), 0,
272 "Enable rfc1323 (high performance TCP) extensions");
282SYSCTL_INT(_net_inet_tcp, OID_AUTO, tolerate_missing_ts, CTLFLAG_VNET | CTLFLAG_RW,
283 &VNET_NAME(tcp_tolerate_missing_ts), 0,
284 "Tolerate missing TCP timestamps");
287SYSCTL_INT(_net_inet_tcp, OID_AUTO, ts_offset_per_conn, CTLFLAG_VNET | CTLFLAG_RW,
288 &VNET_NAME(tcp_ts_offset_per_conn), 0,
289 "Initialize TCP timestamps per connection instead of per host pair");
298 "If the TCP stack does pacing, is there a limit (-1 = no, 0 = no pacing N = number of connections)");
305 &
tcp_log_debug, 0,
"Log errors caused by incoming TCP segments");
308SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
313 "Enable tcp_drain routine for extra help when low on mbufs");
315SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
316 &VNET_NAME(tcbinfo.ipi_count), 0,
"Number of active PCBs");
319#define V_icmp_may_rst VNET(icmp_may_rst)
320SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
321 &VNET_NAME(icmp_may_rst), 0,
322 "Certain ICMP unreachable messages may abort connections in SYN_SENT");
325#define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval)
326SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
327 &VNET_NAME(tcp_isn_reseed_interval), 0,
328 "Seconds between reseeding of ISN secret");
331SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
335#define V_sack_hole_zone VNET(sack_hole_zone)
344 error = sysctl_handle_int(oidp, &
new, 0, req);
345 if (error == 0 && req->newptr) {
355 CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
356 &VNET_NAME(tcp_map_entries_limit), 0,
358 "Total sendmap entries limit");
361SYSCTL_UINT(_net_inet_tcp, OID_AUTO, split_limit, CTLFLAG_VNET | CTLFLAG_RW,
362 &VNET_NAME(tcp_map_split_limit), 0,
363 "Total sendmap split entries limit");
369#define TS_OFFSET_SECRET_LENGTH SIPHASH_KEY_LENGTH
371#define V_ts_offset_secret VNET(ts_offset_secret)
380 void *ip4hdr,
const void *ip6hdr);
442 if (f->
tf_fb == blk) {
490 if ((f->
tf_fb == blk) &&
528 (
"%s: called by the built-in default stack", __func__));
545 if (tfb == tp->
t_fb) {
577 panic(
"Can't refer to tcp_def_funcblk");
582 panic(
"Default stack rejects a new session?");
589 panic(
"Default stack initialization failed");
595 const struct sockaddr *sa,
void *ctx)
607 if ((m->m_flags & M_PKTHDR) == 0) {
612 thlen =
sizeof(
struct tcphdr);
613 if (m->m_len < off +
sizeof(
struct udphdr) + thlen &&
614 (m = m_pullup(m, off +
sizeof(
struct udphdr) + thlen)) == NULL) {
618 iph = mtod(m,
struct ip *);
619 uh = (
struct udphdr *)((caddr_t)iph + off);
620 th = (
struct tcphdr *)(uh + 1);
621 thlen = th->th_off << 2;
622 if (m->m_len < off +
sizeof(
struct udphdr) + thlen) {
623 m = m_pullup(m, off +
sizeof(
struct udphdr) + thlen);
628 iph = mtod(m,
struct ip *);
629 uh = (
struct udphdr *)((caddr_t)iph + off);
630 th = (
struct tcphdr *)(uh + 1);
633 m->m_pkthdr.tcp_tun_port = port = uh->
uh_sport;
634 bcopy(th, uh, m->m_len - off);
635 m->m_len -=
sizeof(
struct udphdr);
636 m->m_pkthdr.len -=
sizeof(
struct udphdr);
653 ip6 = mtod(m,
struct ip6_hdr *);
654 ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) -
sizeof(
struct udphdr));
674 memset(&fs, 0,
sizeof(fs));
687 if (error != 0 || req->newptr == NULL)
704 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
706 "Set/get the default TCP functions");
711 int error, cnt, linesz;
725 buffer = malloc(bufsz, M_TEMP, M_WAITOK);
730 linesz = snprintf(cp, bufsz,
"\n%-32s%c %-32s %s\n",
"Stack",
'D',
731 "Alias",
"PCB count");
739 linesz = snprintf(cp, bufsz,
"%-32s%c %-32s %u\n",
744 if (linesz >= bufsz) {
754 error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
755 free(buffer, M_TEMP);
760 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
762 "list available TCP Function sets");
767VNET_DEFINE(
struct socket *, udp4_tun_socket) = NULL;
768#define V_udp4_tun_socket VNET(udp4_tun_socket)
771VNET_DEFINE(
struct socket *, udp6_tun_socket) = NULL;
772#define V_udp6_tun_socket VNET(udp6_tun_socket)
783 if (V_udp4_tun_socket != NULL) {
784 soclose(V_udp4_tun_socket);
785 V_udp4_tun_socket = NULL;
789 if (V_udp6_tun_socket != NULL) {
790 soclose(V_udp6_tun_socket);
791 V_udp6_tun_socket = NULL;
805 struct sockaddr_in6 sin6;
812 if (ntohs(port) == 0) {
817 if (V_udp4_tun_socket != NULL) {
823 if (V_udp6_tun_socket != NULL) {
829 if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
831 curthread->td_ucred, curthread))) {
848 if ((ret = sobind(V_udp4_tun_socket,
849 (
struct sockaddr *)&sin, curthread))) {
855 if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
857 curthread->td_ucred, curthread))) {
864 tcp6_ctlinput_viaudp,
870 memset(&sin6, 0,
sizeof(
struct sockaddr_in6));
871 sin6.sin6_len =
sizeof(
struct sockaddr_in6);
872 sin6.sin6_family = AF_INET6;
873 sin6.sin6_port = htons(port);
874 if ((ret = sobind(V_udp6_tun_socket,
875 (
struct sockaddr *)&sin6, curthread))) {
891 error = sysctl_handle_int(oidp, &
new, 0, req);
893 (req->newptr != NULL)) {
911 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
912 &VNET_NAME(tcp_udp_tunneling_port),
914 "Tunneling port for tcp over udp");
924 error = sysctl_handle_int(oidp, &
new, 0, req);
925 if (error == 0 && req->newptr) {
936 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
937 &VNET_NAME(tcp_udp_tunneling_overhead),
939 "MSS reduction when using tcp over udp");
954 if (req->newptr != NULL)
961 if (req->oldptr != NULL) {
962 error = sysctl_wire_old_buffer(req, 0);
976 if (req->oldptr == NULL) {
985 if (req->oldptr != NULL) {
986 bzero(&tfi,
sizeof(tfi));
993 error = SYSCTL_OUT(req, &tfi,
sizeof(tfi));
1002 (
"%s: cnt (%d) != tcp_fb_cnt (%d)", __func__, cnt,
tcp_fb_cnt));
1007 if (req->oldptr == NULL)
1008 error = SYSCTL_OUT(req, NULL,
1015 CTLTYPE_OPAQUE | CTLFLAG_SKIP | CTLFLAG_RD | CTLFLAG_MPSAFE,
1017 "List TCP function block name-to-ID mappings");
1048 (
"%s: connection %p in unexpected state %d", __func__, tp,
1073 (int32_t)sbavail(&so->so_snd))
1119#define TCBHASHSIZE 0
1138#define V_tcpcb_zone VNET(tcpcb_zone)
1145#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
1146#define ISN_LOCK() mtx_lock(&isn_mtx)
1147#define ISN_UNLOCK() mtx_unlock(&isn_mtx)
1164 hashsize = 1 << fls(size);
1166 if (hashsize < size) {
1167 hashsize = 1 << (fls(size) - 1);
1190 const char *names[],
int *num_names)
1196 KASSERT(names != NULL && *num_names > 0,
1197 (
"%s: Called with 0-length name list", __func__));
1198 KASSERT(names != NULL, (
"%s: Called with NULL name list", __func__));
1200 (
"%s: called too early", __func__));
1237 for (i = 0; i < *num_names; i++) {
1238 n = malloc(
sizeof(
struct tcp_function), M_TCPFUNCTIONS, wait);
1251 free(n, M_TCPFUNCTIONS);
1271 if (!strncmp(n->
tf_name, names[i],
1276 free(n, M_TCPFUNCTIONS);
1295 const char *name_list[1];
1300 name_list[0] = name;
1364 VNET_ITERATOR_DECL(vnet_iter);
1369 VNET_FOREACH(vnet_iter) {
1370 CURVNET_SET(vnet_iter);
1371 while ((inp =
inp_next(&inpi)) != NULL) {
1375 if (tp == NULL || tp->
t_fb != blk)
1381 VNET_LIST_RUNLOCK();
1400 free(f, M_TCPFUNCTIONS);
1413 printf(
"%s: WARNING: unable to register helper hook\n", __func__);
1416 printf(
"%s: WARNING: unable to register helper hook\n", __func__);
1420 printf(
"%s: WARNING: unable to initialise TCP stats\n",
1430 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1432 uma_zone_set_warning(
V_tcpcb_zone,
"kern.ipc.maxsockets limit reached");
1438 TUNABLE_INT_FETCH(
"net.inet.tcp.sack.enable", &
V_tcp_do_sack);
1440 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1445 VNET_PCPUSTAT_ALLOC(
tcpstat, M_WAITOK);
1455 const char *tcbhash_tuneable;
1492 tcp6_usrreqs.pru_soreceive = soreceive_stream;
1497#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
1499#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
1505#undef TCP_MINPROTOHDR
1508 EVENTHANDLER_REGISTER(shutdown_pre_sync,
tcp_fini, NULL,
1509 SHUTDOWN_PRI_DEFAULT);
1525 tcbhash_tuneable =
"net.inet.tcp.tcbhashsize";
1526 TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
1527 if (hashsize == 0) {
1542 printf(
"%s: %s auto tuned to %d\n", __func__,
1543 tcbhash_tuneable, hashsize);
1553 if (!powerof2(hashsize)) {
1554 int oldhashsize = hashsize;
1560 printf(
"%s: WARNING: TCB hash size not a power of 2, "
1561 "clipped from %d to %d.\n", __func__, oldhashsize,
1570tcp_destroy(
void *unused __unused)
1588 pause(
"tcpdes", hz / 10);
1610 printf(
"%s: WARNING: unable to deregister helper hook "
1611 "type=%d, id=%d: error %d returned\n", __func__,
1616 printf(
"%s: WARNING: unable to deregister helper hook "
1617 "type=%d, id=%d: error %d returned\n", __func__,
1622VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL);
1639 struct tcphdr *th = (
struct tcphdr *)tcp_ptr;
1647 ip6 = (
struct ip6_hdr *)ip_ptr;
1648 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
1650 ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
1656 ip6->ip6_plen = htons(
sizeof(
struct tcphdr));
1657 ip6->
ip6_src = inp->in6p_laddr;
1658 ip6->
ip6_dst = inp->in6p_faddr;
1661#if defined(INET6) && defined(INET)
1668 ip = (
struct ip *)ip_ptr;
1681 ip->ip_src = inp->inp_laddr;
1685 th->th_sport = inp->inp_lport;
1686 th->th_dport = inp->inp_fport;
1706 t = malloc(
sizeof(*t), M_TEMP, M_NOWAIT);
1729 tcp_seq ack, tcp_seq seq,
int flags)
1735 struct udphdr *uh = NULL;
1743 int optlen, tlen, win, ulen;
1751 KASSERT(tp != NULL || m != NULL, (
"tcp_respond: tp and m both NULL"));
1762 KASSERT(inp != NULL, (
"tcp control block w/o inpcb"));
1769 if (isipv6 && ip6 && (ip6->ip6_nxt ==
IPPROTO_UDP))
1770 port = m->m_pkthdr.tcp_tun_port;
1774 port = m->m_pkthdr.tcp_tun_port;
1783 if (!(flags & TH_RST)) {
1792 m = m_gethdr(M_NOWAIT, MT_DATA);
1795 m->m_data += max_linkhdr;
1798 bcopy((caddr_t)ip6, mtod(m, caddr_t),
1800 ip6 = mtod(m,
struct ip6_hdr *);
1801 nth = (
struct tcphdr *)(ip6 + 1);
1804 uh = (
struct udphdr *)nth;
1807 nth = (
struct tcphdr *)(uh + 1);
1812 bcopy((caddr_t)
ip, mtod(m, caddr_t),
sizeof(
struct ip));
1813 ip = mtod(m,
struct ip *);
1814 nth = (
struct tcphdr *)(
ip + 1);
1817 uh = (
struct udphdr *)nth;
1820 nth = (
struct tcphdr *)(uh + 1);
1823 bcopy((caddr_t)th, (caddr_t)nth,
sizeof(
struct tcphdr));
1825 }
else if ((!M_WRITABLE(m)) || (port != 0)) {
1829 n = m_gethdr(M_NOWAIT, MT_DATA);
1835 if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
1841 n->m_data += max_linkhdr;
1843#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
1846 bcopy((caddr_t)ip6, mtod(n, caddr_t),
1848 ip6 = mtod(n,
struct ip6_hdr *);
1849 xchg(ip6->ip6_dst, ip6->ip6_src,
struct in6_addr);
1850 nth = (
struct tcphdr *)(ip6 + 1);
1853 uh = (
struct udphdr *)nth;
1856 nth = (
struct tcphdr *)(uh + 1);
1861 bcopy((caddr_t)
ip, mtod(n, caddr_t),
sizeof(
struct ip));
1862 ip = mtod(n,
struct ip *);
1864 nth = (
struct tcphdr *)(
ip + 1);
1867 uh = (
struct udphdr *)nth;
1870 nth = (
struct tcphdr *)(uh + 1);
1873 bcopy((caddr_t)th, (caddr_t)nth,
sizeof(
struct tcphdr));
1885 m->m_data = (caddr_t)ipgen;
1890 nth = (
struct tcphdr *)(ip6 + 1);
1895 nth = (
struct tcphdr *)(
ip + 1);
1903 nth->th_sport = th->th_sport;
1904 nth->th_dport = th->th_dport;
1912 tlen =
sizeof (
struct ip6_hdr) + sizeof (struct tcphdr);
1914#if defined(INET) && defined(INET6)
1921 tlen +=
sizeof (
struct udphdr);
1924 KASSERT(M_TRAILINGSPACE(m) >= tlen,
1925 (
"Not enough trailing space for message (m=%p, need=%d, have=%ld)",
1926 m, tlen, (
long)M_TRAILINGSPACE(m)));
1932 if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
1933 m->m_next = m_get(M_NOWAIT, MT_DATA);
1935 optp = mtod(m->m_next, u_char *);
1940 optp = (u_char *) (nth + 1);
1951#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
1960 optm->m_len += optlen;
1966 ulen = tlen -
sizeof(
struct ip6_hdr);
1975 ip6->ip6_plen = htons(tlen -
sizeof(*ip6));
1978#if defined(INET) && defined(INET6)
1984 ulen = tlen -
sizeof(
struct ip);
1998 m->m_pkthdr.len = tlen;
1999 m->m_pkthdr.rcvif = NULL;
2007 mac_inpcb_create_mbuf(inp, m);
2013 mac_netinet_tcp_reply(m);
2016 nth->th_seq = htonl(seq);
2017 nth->th_ack = htonl(ack);
2018 nth->th_off = (
sizeof (
struct tcphdr) + optlen) >> 2;
2021 nth->th_win = htons((u_short) (win >> tp->
rcv_scale));
2023 nth->th_win = htons((u_short)win);
2026#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2028 if (!TCPMD5_ENABLED() ||
2039 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
2040 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
2044 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
2045 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
2046 nth->th_sum = in6_cksum_pseudo(ip6,
2049 ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->
t_inpcb :
2053#if defined(INET6) && defined(INET)
2061 m->m_pkthdr.csum_flags = CSUM_UDP;
2062 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
2065 m->m_pkthdr.csum_flags = CSUM_TCP;
2066 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
2068 htons((u_short)(tlen -
sizeof(
struct ip) +
ip->
ip_p)));
2073 if (tp == NULL || (inp->
inp_socket->so_options & SO_DEBUG))
2078 TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth);
2092 ERRNO_UNK, 0, &log,
false, NULL, NULL, 0, &tv);
2114 KASSERT((thflags & (TH_ACK|TH_SYN)) == TH_SYN &&
2117 (
"%s: Logging of TCP segment with flags 0x%b and "
2118 "UDP encapsulation port %u skipped in state %s",
2119 __func__, thflags, PRINT_TH_FLAGS,
2120 ntohs(port), tcpstates[tp->
t_state]));
2127 output_ret = ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
2130#if defined(INET) && defined(INET6)
2136 output_ret =
ip_output(m, NULL, NULL, 0, NULL, inp);
2183 if (
CC_ALGO(tp)->cb_init != NULL)
2184 if (
CC_ALGO(tp)->cb_init(tp->
ccv, NULL) > 0) {
2196 if (khelp_init_osd(HELPER_CLASS_TCP, tp->
osd)) {
2207 tp->
t_vnet = inp->inp_vnet;
2213 isipv6 ? V_tcp_v6mssdflt :
2228 TAILQ_INIT(&tp->snd_holes);
2239 tp->
snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
2269 if (V_tcp_perconn_stats_enable == 1)
2270 tp->
t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
2293 (void)tcp_output_nodrop(tp);
2299 so->so_error = errno;
2354 if (
CC_ALGO(tp)->cb_destroy != NULL)
2361 khelp_destroy_osd(tp->
osd);
2364 stats_blob_destroy(tp->
t_stats);
2370 bool released __diagused;
2373 KASSERT(!released, (
"%s: inp %p should not have been released "
2374 "here", __func__, inp));
2418 bzero(&metrics,
sizeof(metrics));
2429 if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
2439 (isipv6 ?
sizeof (
struct ip6_hdr) +
2440 sizeof (struct tcphdr) :
2442 sizeof (struct tcpiphdr)
2449 metrics.rmx_ssthresh = ssthresh;
2451 metrics.rmx_rtt = tp->t_srtt;
2452 metrics.rmx_rttvar = tp->t_rttvar;
2453 metrics.rmx_cwnd = tp->snd_cwnd;
2454 metrics.rmx_sendpipe = 0;
2455 metrics.rmx_recvpipe = 0;
2460 refcount_release(&tp->t_fb->tfb_refcnt);
2461 uma_zfree(V_tcpcb_zone, tp);
2463 return (in_pcbrele_wlocked(inp));
2498 KASSERT(inp->
inp_socket != NULL, (
"tcp_close: inp_socket NULL"));
2500 soisdisconnected(so);
2502 KASSERT(so->so_state & SS_PROTOREF,
2503 (
"tcp_close: !SS_PROTOREF"));
2507 so->so_state &= ~SS_PROTOREF;
2517 VNET_ITERATOR_DECL(vnet_iter);
2522 VNET_LIST_RLOCK_NOSLEEP();
2523 VNET_FOREACH(vnet_iter) {
2524 CURVNET_SET(vnet_iter);
2538 while ((inpb =
inp_next(&inpi)) != NULL) {
2558 VNET_LIST_RUNLOCK_NOSLEEP();
2569static struct inpcb *
2581 KASSERT(tp != NULL, (
"tcp_notify: tp == NULL"));
2591 (error == EHOSTUNREACH || error == ENETUNREACH ||
2592 error == EHOSTDOWN)) {
2595 inp->
inp_route.ro_nh = (
struct nhop_object *)NULL;
2610 wakeup( &so->so_timeo);
2625 if (req->newptr != NULL)
2628 if (req->oldptr == NULL) {
2633 n += imax(n / 8, 10);
2634 req->oldidx = 2 * (
sizeof xig) + n *
sizeof(
struct xtcpcb);
2638 if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
2641 bzero(&xig,
sizeof(xig));
2642 xig.xig_len =
sizeof xig;
2646 xig.xig_sogen = so_gencnt;
2647 error = SYSCTL_OUT(req, &xig,
sizeof xig);
2655 while ((inp =
inp_next(&inpi)) != NULL) {
2666 crerr = cr_cansee(req->td->td_ucred,
2676 error = SYSCTL_OUT(req, &xt,
sizeof xt);
2695 xig.xig_sogen = so_gencnt;
2698 error = SYSCTL_OUT(req, &xig,
sizeof xig);
2705 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
2707 "List of active TCP connections");
2711tcp_getcred(SYSCTL_HANDLER_ARGS)
2715 struct epoch_tracker et;
2719 error = priv_check(req->td, PRIV_NETINET_GETCRED);
2722 error = SYSCTL_IN(req, addrs,
sizeof(addrs));
2725 NET_EPOCH_ENTER(et);
2740 error = SYSCTL_OUT(req, &xuc,
sizeof(
struct xucred));
2745 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_NEEDGIANT,
2746 0, 0, tcp_getcred,
"S,xucred",
2747 "Get the xucred of a TCP connection");
2752tcp6_getcred(SYSCTL_HANDLER_ARGS)
2754 struct epoch_tracker et;
2756 struct sockaddr_in6 addrs[2];
2763 error = priv_check(req->td, PRIV_NETINET_GETCRED);
2766 error = SYSCTL_IN(req, addrs,
sizeof(addrs));
2769 if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
2770 (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
2773 if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
2775 if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
2782 NET_EPOCH_ENTER(et);
2786 *(
struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
2788 *(
struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
2793 &addrs[1].sin6_addr, addrs[1].sin6_port,
2794 &addrs[0].sin6_addr, addrs[0].sin6_port,
2808 error = SYSCTL_OUT(req, &xuc,
sizeof(
struct xucred));
2813 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_NEEDGIANT,
2814 0, 0, tcp6_getcred,
"S,xucred",
2815 "Get the xucred of a TCP6 connection");
2821tcp_next_pmtu(
const struct icmp *icp,
const struct ip *
ip)
2823 int mtu = ntohs(icp->icmp_nextmtu);
2835tcp_ctlinput_with_port(
int cmd,
struct sockaddr *sa,
void *vip,
uint16_t port)
2837 struct ip *
ip = vip;
2845 tcp_seq icmp_tcp_seq;
2849 if (sa->sa_family != AF_INET || faddr.s_addr ==
INADDR_ANY)
2852 if (cmd == PRC_MSGSIZE)
2855 cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL ||
2856 cmd == PRC_TIMXCEED_INTRANS) &&
ip)
2864 else if (cmd == PRC_HOSTDEAD)
2866 else if ((
unsigned)cmd >= PRC_NCMDS ||
inetctlerrmap[cmd] == 0)
2875 th = (
struct tcphdr *)((caddr_t)
ip + (
ip->
ip_hl << 2));
2878 if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
2880 inp = (*notify)(inp, EHOSTDOWN);
2883 icmp_tcp_seq = th->th_seq;
2895 mtu = tcp_next_pmtu(icp,
ip);
2900 if (tp->
t_port != port) {
2905 if (cmd == PRC_MSGSIZE) {
2910 mtu = tcp_next_pmtu(icp,
ip);
2916 if (mtu < tp->t_maxseg +
2918 bzero(&inc,
sizeof(inc));
2919 inc.inc_faddr = faddr;
2926 inp = (*notify)(inp,
2931 bzero(&inc,
sizeof(inc));
2932 inc.inc_fport = th->th_dport;
2933 inc.inc_lport = th->th_sport;
2934 inc.inc_faddr = faddr;
2935 inc.inc_laddr =
ip->ip_src;
2946 tcp_ctlinput_with_port(cmd, sa, vip, htons(0));
2953 struct ip *outer_ip, *inner_ip;
2956 struct tcphdr *th, ttemp;
2960 inner_ip = (
struct ip *)vip;
2961 icmp = (
struct icmp *)((caddr_t)inner_ip -
2962 (
sizeof(
struct icmp) - sizeof(struct
ip)));
2963 outer_ip = (
struct ip *)((caddr_t)
icmp -
sizeof(
struct ip));
2964 i_hlen = inner_ip->
ip_hl << 2;
2965 o_len = ntohs(outer_ip->
ip_len);
2967 (
sizeof(
struct ip) + 8 + i_hlen +
sizeof(
struct udphdr) + offsetof(
struct tcphdr, th_ack))) {
2972 udp = (
struct udphdr *)(((caddr_t)inner_ip) + i_hlen);
2977 th = (
struct tcphdr *)(udp + 1);
2978 memcpy(&ttemp, th,
sizeof(
struct tcphdr));
2979 memcpy(udp, &ttemp,
sizeof(
struct tcphdr));
2981 o_len -=
sizeof(
struct udphdr);
2982 outer_ip->
ip_len = htons(o_len);
2984 tcp_ctlinput_with_port(cmd, sa, vip, port);
2990tcp6_next_pmtu(
const struct icmp6_hdr *icmp6)
2992 int mtu = ntohl(icmp6->icmp6_mtu);
3004tcp6_ctlinput_with_port(
int cmd,
struct sockaddr *sa,
void *d,
uint16_t port)
3006 struct in6_addr *dst;
3013 struct ip6ctlparam *ip6cp = NULL;
3014 const struct sockaddr_in6 *sa6_src = NULL;
3020 tcp_seq icmp_tcp_seq;
3024 if (sa->sa_family != AF_INET6 ||
3025 sa->sa_len !=
sizeof(
struct sockaddr_in6))
3030 ip6cp = (
struct ip6ctlparam *)d;
3031 icmp6 = ip6cp->ip6c_icmp6;
3033 ip6 = ip6cp->ip6c_ip6;
3034 off = ip6cp->ip6c_off;
3035 sa6_src = ip6cp->ip6c_src;
3036 dst = ip6cp->ip6c_finaldst;
3045 if (cmd == PRC_MSGSIZE)
3048 cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL ||
3049 cmd == PRC_TIMXCEED_INTRANS) && ip6 != NULL)
3057 else if (cmd == PRC_HOSTDEAD)
3059 else if ((
unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
3064 (
const struct sockaddr *)sa6_src,
3065 0, cmd, NULL, notify);
3072 (int32_t) (off +
sizeof(
struct tcp_ports)))) {
3075 bzero(&t_ports,
sizeof(
struct tcp_ports));
3076 m_copydata(m, off,
sizeof(
struct tcp_ports), (caddr_t)&t_ports);
3079 if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
3081 inp = (*notify)(inp, EHOSTDOWN);
3084 off +=
sizeof(
struct tcp_ports);
3085 if (m->m_pkthdr.len < (int32_t) (off +
sizeof(tcp_seq))) {
3088 m_copydata(m, off,
sizeof(tcp_seq), (caddr_t)&icmp_tcp_seq);
3097 mtu = tcp6_next_pmtu(icmp6);
3102 if (tp->
t_port != port) {
3107 if (cmd == PRC_MSGSIZE) {
3114 mtu = tcp6_next_pmtu(icmp6);
3116 bzero(&inc,
sizeof(inc));
3117 inc.inc_fibnum = M_GETFIB(m);
3119 inc.inc6_faddr = *dst;
3120 if (in6_setscope(&inc.inc6_faddr,
3121 m->m_pkthdr.rcvif, NULL))
3127 if (mtu < tp->t_maxseg +
3128 sizeof (
struct tcphdr) +
3135 inp = (*notify)(inp,
3136 inet6ctlerrmap[cmd]);
3140 bzero(&inc,
sizeof(inc));
3141 inc.inc_fibnum = M_GETFIB(m);
3143 inc.inc_fport = t_ports.th_dport;
3144 inc.inc_lport = t_ports.th_sport;
3145 inc.inc6_faddr = *dst;
3146 inc.inc6_laddr = ip6->
ip6_src;
3155tcp6_ctlinput(
int cmd,
struct sockaddr *sa,
void *d)
3157 tcp6_ctlinput_with_port(cmd, sa, d, htons(0));
3161tcp6_ctlinput_viaudp(
int cmd,
struct sockaddr *sa,
void *d,
void *unused)
3163 struct ip6ctlparam *ip6cp;
3168 ip6cp = (
struct ip6ctlparam *)d;
3169 m = m_pulldown(ip6cp->ip6c_m, ip6cp->ip6c_off,
sizeof(
struct udphdr), NULL);
3173 udp = mtod(m,
struct udphdr *);
3178 m_adj(m,
sizeof(
struct udphdr));
3179 if ((m->m_flags & M_PKTHDR) == 0) {
3180 ip6cp->ip6c_m->m_pkthdr.len -=
sizeof(
struct udphdr);
3183 tcp6_ctlinput_with_port(cmd, sa, d, port);
3194 KASSERT(len >= SIPHASH_KEY_LENGTH,
3195 (
"%s: keylen %u too short ", __func__, len));
3196 SipHash24_Init(&ctx);
3197 SipHash_SetKey(&ctx, (
uint8_t *)key);
3198 SipHash_Update(&ctx, &inc->inc_fport,
sizeof(
uint16_t));
3199 SipHash_Update(&ctx, &inc->inc_lport,
sizeof(
uint16_t));
3203 SipHash_Update(&ctx, &inc->inc_faddr,
sizeof(
struct in_addr));
3204 SipHash_Update(&ctx, &inc->inc_laddr,
sizeof(
struct in_addr));
3209 SipHash_Update(&ctx, &inc->inc6_faddr,
sizeof(
struct in6_addr));
3210 SipHash_Update(&ctx, &inc->inc6_laddr,
sizeof(
struct in6_addr));
3214 SipHash_Final((
uint8_t *)hash, &ctx);
3216 return (hash[0] ^ hash[1]);
3225 memcpy(&inc_store, inc,
sizeof(
struct in_conninfo));
3226 inc_store.inc_lport = 0;
3227 inc_store.inc_fport = 0;
3228 local_inc = &inc_store;
3280#define ISN_BYTES_PER_SECOND 1048576
3281#define ISN_STATIC_INCREMENT 4096
3282#define ISN_RANDOM_INCREMENT (4096 - 1)
3283#define ISN_SECRET_LENGTH SIPHASH_KEY_LENGTH
3291#define V_isn_secret VNET(isn_secret)
3292#define V_isn_last VNET(isn_last)
3293#define V_isn_last_reseed VNET(isn_last_reseed)
3294#define V_isn_offset VNET(isn_offset)
3295#define V_isn_offset_old VNET(isn_offset_old)
3301 u_int32_t projected_offset;
3367static struct inpcb *
3374static struct inpcb *
3386 KASSERT(tp != NULL, (
"tcp_mtudisc: tp == NULL"));
3391 SOCKBUF_LOCK(&so->so_snd);
3393 if (so->so_snd.sb_hiwat < tp->
t_maxseg)
3394 tp->
t_maxseg = so->so_snd.sb_hiwat;
3395 SOCKBUF_UNLOCK(&so->so_snd);
3413 if (tcp_output(tp) < 0)
3429 struct nhop_object *nh;
3433 KASSERT(inc != NULL, (
"tcp_maxmtu with NULL in_conninfo pointer"));
3441 maxmtu = nh->nh_mtu;
3445 if (ifp->if_capenable & IFCAP_TSO4 &&
3446 ifp->if_hwassist & CSUM_TSO) {
3447 cap->
ifcap |= CSUM_TSO;
3448 cap->
tsomax = ifp->if_hw_tsomax;
3462 struct nhop_object *nh;
3463 struct in6_addr dst6;
3468 KASSERT(inc != NULL, (
"tcp_maxmtu6 with NULL in_conninfo pointer"));
3473 if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
3474 in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
3475 nh = fib6_lookup(inc->
inc_fibnum, &dst6, scopeid, NHR_NONE, 0);
3480 maxmtu = nh->nh_mtu;
3484 if (ifp->if_capenable & IFCAP_TSO6 &&
3485 ifp->if_hwassist & CSUM_TSO) {
3486 cap->
ifcap |= CSUM_TSO;
3487 cap->
tsomax = ifp->if_hw_tsomax;
3523 struct ip6_pktopts *opt;
3526 if (opt != NULL && opt->ip6po_minmtu == IP6PO_MINMTU_ALL &&
3554 optlen = TCPOLEN_TSTAMP_APPA;
3557#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
3559 optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
3562 optlen += TCPOLEN_SACKHDR;
3564 optlen = PADTCPOLEN(optlen);
3568 optlen = TCPOLEN_TSTAMP_APPA;
3570 optlen = PADTCPOLEN(TCPOLEN_MAXSEG);
3572 optlen += PADTCPOLEN(TCPOLEN_WINDOW);
3573#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
3575 optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
3578 optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED);
3581 optlen = min(optlen, TCP_MAXOLEN);
3602#define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4)
3605 optlen = TCPOLEN_TSTAMP_APPA;
3608#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
3610 optlen +=
PAD(TCPOLEN_SIGNATURE);
3614 optlen = TCPOLEN_TSTAMP_APPA;
3616 optlen =
PAD(TCPOLEN_MAXSEG);
3618 optlen +=
PAD(TCPOLEN_WINDOW);
3619#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
3621 optlen +=
PAD(TCPOLEN_SIGNATURE);
3624 optlen +=
PAD(TCPOLEN_SACK_PERMITTED);
3627 optlen = min(optlen, TCP_MAXOLEN);
3637 struct sockaddr_storage addrs[2];
3642 struct epoch_tracker et;
3644 struct sockaddr_in6 *fin6, *lin6;
3655 if (req->oldptr != NULL || req->oldlen != 0)
3657 if (req->newptr == NULL)
3659 if (req->newlen <
sizeof(addrs))
3661 error = SYSCTL_IN(req, &addrs,
sizeof(addrs));
3665 switch (addrs[0].ss_family) {
3668 fin6 = (
struct sockaddr_in6 *)&addrs[0];
3669 lin6 = (
struct sockaddr_in6 *)&addrs[1];
3670 if (fin6->sin6_len !=
sizeof(
struct sockaddr_in6) ||
3671 lin6->sin6_len !=
sizeof(
struct sockaddr_in6))
3673 if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
3674 if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
3676 in6_sin6_2_sin_in_sock((
struct sockaddr *)&addrs[0]);
3677 in6_sin6_2_sin_in_sock((
struct sockaddr *)&addrs[1]);
3682 error = sa6_embedscope(fin6, V_ip6_use_defzone);
3685 error = sa6_embedscope(lin6, V_ip6_use_defzone);
3702 NET_EPOCH_ENTER(et);
3703 switch (addrs[0].ss_family) {
3706 inp = in6_pcblookup(&
V_tcbinfo, &fin6->sin6_addr,
3707 fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
3746 CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
3748 "Drop TCP connection");
3758 CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
3760 "Set socket option for TCP endpoint");
3764sysctl_switch_tls(SYSCTL_HANDLER_ARGS)
3767 struct sockaddr_storage addrs[2];
3770 struct epoch_tracker et;
3772 struct sockaddr_in6 *fin6, *lin6;
3783 if (req->oldptr != NULL || req->oldlen != 0)
3785 if (req->newptr == NULL)
3787 if (req->newlen <
sizeof(addrs))
3789 error = SYSCTL_IN(req, &addrs,
sizeof(addrs));
3793 switch (addrs[0].ss_family) {
3796 fin6 = (
struct sockaddr_in6 *)&addrs[0];
3797 lin6 = (
struct sockaddr_in6 *)&addrs[1];
3798 if (fin6->sin6_len !=
sizeof(
struct sockaddr_in6) ||
3799 lin6->sin6_len !=
sizeof(
struct sockaddr_in6))
3801 if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
3802 if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
3804 in6_sin6_2_sin_in_sock((
struct sockaddr *)&addrs[0]);
3805 in6_sin6_2_sin_in_sock((
struct sockaddr *)&addrs[1]);
3810 error = sa6_embedscope(fin6, V_ip6_use_defzone);
3813 error = sa6_embedscope(lin6, V_ip6_use_defzone);
3830 NET_EPOCH_ENTER(et);
3831 switch (addrs[0].ss_family) {
3834 inp = in6_pcblookup(&
V_tcbinfo, &fin6->sin6_addr,
3835 fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
3857 error = ktls_set_tx_mode(so,
3867SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_sw_tls,
3868 CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
3869 CTLFLAG_NEEDGIANT, NULL, 0, sysctl_switch_tls,
"",
3870 "Switch TCP connection to SW TLS");
3871SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_ifnet_tls,
3872 CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP |
3873 CTLFLAG_NEEDGIANT, NULL, 1, sysctl_switch_tls,
"",
3874 "Switch TCP connection to ifnet TLS");
3922 ip6 = (
const struct ip6_hdr *)ip6hdr;
3924 ip = (
struct ip *)ip4hdr;
3930 size =
sizeof(
"TCP: []:12345 to []:12345 tcpflags 0x2<>") +
3931 sizeof(PRINT_TH_FLAGS) + 1 +
3933 2 * INET6_ADDRSTRLEN;
3935 2 * INET_ADDRSTRLEN;
3938 s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
3942 strcat(s,
"TCP: [");
3948 sprintf(sp,
"]:%i to [", ntohs(inc->inc_fport));
3952 sprintf(sp,
"]:%i", ntohs(inc->inc_lport));
3955 ip6_sprintf(sp, &inc->inc6_faddr);
3957 sprintf(sp,
"]:%i to [", ntohs(inc->inc_fport));
3959 ip6_sprintf(sp, &inc->inc6_laddr);
3961 sprintf(sp,
"]:%i", ntohs(inc->inc_lport));
3962 }
else if (ip6 && th) {
3963 ip6_sprintf(sp, &ip6->
ip6_src);
3965 sprintf(sp,
"]:%i to [", ntohs(th->th_sport));
3967 ip6_sprintf(sp, &ip6->
ip6_dst);
3969 sprintf(sp,
"]:%i", ntohs(th->th_dport));
3972 }
else if (
ip && th) {
3975 sprintf(sp,
"]:%i to [", ntohs(th->th_sport));
3979 sprintf(sp,
"]:%i", ntohs(th->th_dport));
3987 sprintf(sp,
" tcpflags 0x%b",
tcp_get_flags(th), PRINT_TH_FLAGS);
3988 if (*(s + size - 1) !=
'\0')
3989 panic(
"%s: string too long", __func__);
4001#if defined(KDTRACE_HOOKS)
4008 TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
4026 bzero(xt,
sizeof(*xt));
4029 xt->xt_encaps_port = tw->
t_port;
4048 now = getsbinuptime();
4049#define COPYTIMER(ttt) do { \
4050 if (callout_active(&tp->t_timers->ttt)) \
4051 xt->ttt = (tp->t_timers->ttt.c_time - now) / \
4062 xt->t_rcvtime = 1000 * (ticks - tp->
t_rcvtime) / hz;
4064 xt->xt_encaps_port = tp->
t_port;
4067 bcopy(
CC_ALGO(tp)->name, xt->xt_cc,
4074 xt->xt_len =
sizeof(
struct xtcpcb);
4091 if (status > (
sizeof(
uint32_t) * 8)) {
4095 bit = 1U << (status - 1);
4132 KASSERT(ret != 0, (
"tcp_paced_connection_exits -1 would cause wrap?"));
4135 printf(
"Warning all pacing is now disabled, count decrements invalidly!\n");
4138 printf(
"Warning pacing count is invalid, invalid decrement\n");
static SYSCTL_NODE(_net_inet_accf, OID_AUTO, http, CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "HTTP accept filter")
void cc_attach(struct tcpcb *tp, struct cc_algo *algo)
void cc_detach(struct tcpcb *tp)
#define CC_DEFAULT_ALGO()
#define ICMP6STAT_INC(name)
char * inet_ntoa_r(struct in_addr ina, char *buf)
u_short in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
struct nhop_object * fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid)
#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)
#define TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5)
#define TCP_PROBE3(probe, arg0, arg1, arg2)
struct inpcb * inp_next(struct inpcb_iterator *ii)
void in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
void in_pcbtoxinpcb(const struct inpcb *inp, struct xinpcb *xi)
void in_pcbdrop(struct inpcb *inp)
void in_pcbref(struct inpcb *inp)
bool in_pcbrele_wlocked(struct inpcb *inp)
void in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor, u_int hash_nelements, u_int porthash_nelements)
int sysctl_setsockopt(SYSCTL_HANDLER_ARGS, struct inpcbinfo *pcbinfo, int(*ctloutput_set)(struct inpcb *, struct sockopt *))
#define INP_LOCK_ASSERT(inp)
struct inpcb * in_pcblookup(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, int, struct ifnet *)
#define INP_ALL_ITERATOR(_ipi, _lock)
#define INP_WLOCK_ASSERT(inp)
#define INP_INFO_WUNLOCK(ipi)
#define INP_INFO_WLOCK(ipi)
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, int, struct inpcb *(*)(struct inpcb *, int))
int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp)
#define IPV6_FLOWINFO_MASK
#define IPV6_VERSION_MASK
VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, div_destroy, NULL)
int ip_next_mtu(int, int)
int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp)
union cc_var::ccv_container ccvc
struct socket * inp_socket
struct ip6_pktopts * in6p_outputopts
struct in_conninfo inp_inc
struct in_addr ip_src ip_dst
int(* tfb_tcp_output)(struct tcpcb *)
char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]
int(* tfb_tcp_ctloutput)(struct inpcb *inp, struct sockopt *sopt)
void(* tfb_tcp_timer_stop)(struct tcpcb *, uint32_t)
int(* tfb_tcp_timer_active)(struct tcpcb *, uint32_t)
volatile uint32_t tfb_refcnt
void(* tfb_tcp_fb_fini)(struct tcpcb *, int)
int(* tfb_tcp_handoff_ok)(struct tcpcb *)
void(* tfb_tcp_mtu_chg)(struct tcpcb *)
int(* tfb_tcp_fb_init)(struct tcpcb *)
void(* tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t)
int(* tfb_tcp_timer_stop_all)(struct tcpcb *)
void(* tfb_tcp_timer_activate)(struct tcpcb *, uint32_t, u_int)
char tfi_name[TCP_FUNCTION_NAME_LEN_MAX]
char tfi_alias[TCP_FUNCTION_NAME_LEN_MAX]
char function_set_name[TCP_FUNCTION_NAME_LEN_MAX]
char tf_name[TCP_FUNCTION_NAME_LEN_MAX]
struct tcp_function_block * tf_fb
struct callout tt_persist
struct tsegqe_head t_segq
unsigned int * t_tfo_pending
uint8_t t_end_info_bytes[TCP_END_BYTE_INFO]
struct tcp_timer * t_timers
uint32_t t_end_info_status
struct statsblob * t_stats
uint32_t t_dsack_tlp_bytes
struct tcp_function_block * t_fb
#define TCP_TLS_MODE_IFNET
#define TCP_FUNCTION_NAME_LEN_MAX
void tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen, struct tcphdr *th, int req)
void tcp_fastopen_init(void)
void tcp_fastopen_destroy(void)
void tcp_fastopen_disable_path(struct tcpcb *tp)
void tcp_fastopen_decrement_counter(unsigned int *counter)
#define TCPS_HAVERCVDSYN(s)
#define TCPS_SYN_RECEIVED
#define TCPS_HAVEESTABLISHED(s)
void tcp_hc_updatemtu(struct in_conninfo *inc, uint32_t mtu)
void tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
void tcp_hpts_remove(struct inpcb *inp)
static __inline uint32_t tcp_get_usecs(struct timeval *tv)
void tcp_log_drain(struct tcpcb *tp)
void tcp_log_tcpcbinit(struct tcpcb *tp)
struct tcp_log_buffer * tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, union tcp_log_stackspecific *stackinfo, int th_hostorder, const char *output_caller, const char *func, int line, const struct timeval *itv)
void tcp_log_tcpcbfini(struct tcpcb *tp)
size_t tcp_log_get_id(struct tcpcb *tp, char *buf)
counter_u64_t tcp_inp_lro_compressed
counter_u64_t tcp_inp_lro_locks_taken
counter_u64_t tcp_uncomp_total
counter_u64_t tcp_inp_lro_wokeup_queue
counter_u64_t tcp_bad_csums
counter_u64_t tcp_would_have_but
counter_u64_t tcp_comp_total
counter_u64_t tcp_extra_mbuf
counter_u64_t tcp_inp_lro_direct_queue
void tcp_offload_listen_stop(struct tcpcb *tp)
void tcp_offload_pmtu_update(struct tcpcb *tp, tcp_seq seq, int mtu)
void tcp_offload_detach(struct tcpcb *tp)
int tcp_addoptions(struct tcpopt *to, u_char *optp)
void tcp_setpersist(struct tcpcb *tp)
int tcp_default_output(struct tcpcb *tp)
int tcp_pcap_aggressive_free
void tcp_pcap_tcpcb_init(struct tcpcb *tp)
void tcp_pcap_drain(struct mbufq *queue)
void tcp_reass_global_init(void)
void tcp_reass_flush(struct tcpcb *tp)
void tcp_clean_sackreport(struct tcpcb *tp)
void tcp_free_sackholes(struct tcpcb *tp)
static __inline uint32_t tcp_ts_getticks(void)
#define V_tcp_isn_reseed_interval
#define TS_OFFSET_SECRET_LENGTH
int register_tcp_functions_as_name(struct tcp_function_block *blk, const char *name, int wait)
char * tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr)
static int sysctl_drop(SYSCTL_HANDLER_ARGS)
SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ack_war_timewindow, CTLFLAG_RW, &tcp_ack_war_time_window, 1000, "If the tcp_stack does ack-war prevention how many milliseconds are in its time window?")
struct tcp_function_block * find_and_ref_tcp_functions(struct tcp_function_set *fs)
void tcp_decrement_paced_conn(void)
struct inpcb * tcp_drop_syn_sent(struct inpcb *inp, int errno)
int register_tcp_functions(struct tcp_function_block *blk, int wait)
int tcp_can_enable_pacing(void)
static void tcp_init(void *arg __unused)
struct tcpcb * tcp_drop(struct tcpcb *tp, int errno)
SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_VNET|CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_NEEDGIANT, &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I", "Default TCP Maximum Segment Size")
tcp_seq tcp_new_isn(struct in_conninfo *inc)
static uint8_t tcp_pacing_warning
INPCBSTORAGE_DEFINE(tcpcbstor, "tcpinp", "tcp_inpcb", "tcp", "tcphash")
static uint32_t shadow_num_connections
static int tcp_pcblist(SYSCTL_HANDLER_ARGS)
static void tcp_vnet_init(void *arg __unused)
int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce, bool force)
int register_tcp_functions_as_names(struct tcp_function_block *blk, int wait, const char *names[], int *num_names)
static volatile int next_tcp_stack_id
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr)
static void tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged)
VNET_DEFINE_STATIC(int, icmp_may_rst)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers")
static void tcp_over_udp_stop(void)
char * tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr)
VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_vnet_init, NULL)
void tcp_discardcb(struct tcpcb *tp)
static int tcp_pacing_limit
#define ISN_STATIC_INCREMENT
#define ISN_BYTES_PER_SECOND
VNET_DEFINE(int, tcp_mssdflt)
#define ISN_SECRET_LENGTH
u_int tcp_maxseg(const struct tcpcb *tp)
void tcp_switch_back_to_default(struct tcpcb *tp)
static uint32_t tcp_keyed_hash(struct in_conninfo *inc, u_char *key, u_int len)
struct tcpcb * tcp_newtcpcb(struct inpcb *inp)
static struct inpcb * tcp_mtudisc_notify(struct inpcb *, int)
static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
static int sysctl_net_inet_list_func_info(SYSCTL_HANDLER_ARGS)
static int sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
void tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
void tcp_state_change(struct tcpcb *tp, int newstate)
struct tcpcb * tcp_close(struct tcpcb *tp)
void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags)
static int tcp_over_udp_start(void)
static int tcp_sysctl_setsockopt(SYSCTL_HANDLER_ARGS)
static int tcp_soreceive_stream
static struct mtx isn_mtx
u_int tcp_fixed_maxseg(const struct tcpcb *tp)
static int tcp_default_handoff_ok(struct tcpcb *tp)
static int sysctl_net_inet_tcp_map_limit_check(SYSCTL_HANDLER_ARGS)
static struct tcp_function_block tcp_def_funcblk
struct rwlock tcp_function_lock
static struct tcp_function_block * find_and_ref_tcp_default_fb(void)
void tcp_log_end_status(struct tcpcb *tp, uint8_t status)
uint32_t tcp_ack_war_time_window
static int tcp_tcbhashsize
static void tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, const struct sockaddr *sa, void *ctx)
SYSINIT(tcp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, tcp_init, NULL)
bool tcp_freecb(struct tcpcb *tp)
static struct tcp_function_block * tcp_func_set_ptr
SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET|CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minimum TCP Maximum Segment Size")
#define V_isn_last_reseed
static volatile uint32_t number_of_tcp_connections_pacing
struct tcptemp * tcpip_maketemplate(struct inpcb *inp)
void tcp_record_dsack(struct tcpcb *tp, tcp_seq start, tcp_seq end, int tlp)
static int sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
static struct tcp_function_block * find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
static int sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
struct tcp_funchead t_functions
static struct inpcb * tcp_notify(struct inpcb *, int)
#define ISN_RANDOM_INCREMENT
static int tcp_default_fb_init(struct tcpcb *tp)
#define V_ts_offset_secret
int find_tcp_function_alias(struct tcp_function_block *blk, struct tcp_function_set *fs)
static struct tcp_function_block * find_tcp_functions_locked(struct tcp_function_set *fs)
static int maketcp_hashsize(int size)
void tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
uint32_t tcp_new_ts_offset(struct in_conninfo *inc)
struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *blk)
static struct inpcb * tcp_mtudisc(struct inpcb *, int)
int syncache_pcblist(struct sysctl_req *req)
void syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port)
void tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
int tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
void tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
#define TCPTV_FINWAIT2_TIMEOUT
void tcp_twclose(struct tcptw *tw, int reuse)
int tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
int tcp_default_ctloutput(struct inpcb *inp, struct sockopt *sopt)
uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *)
static uint16_t tcp_get_flags(const struct tcphdr *th)
#define EXIT_FASTRECOVERY(t_flags)
#define IN_FASTRECOVERY(t_flags)
static void tcp_set_flags(struct tcphdr *th, uint16_t flags)
#define TCPSTATES_DEC(state)
#define V_tcp_udp_tunneling_port
#define V_tcp_udp_tunneling_overhead
#define TCP_TUNNELING_PORT_MAX
#define TCPSTAT_ADD(name, val)
#define TCP_FUNC_BEING_REMOVED
void tcp6_use_min_mtu(struct tcpcb *)
#define V_tcp_map_entries_limit
#define TCP_EI_STATUS_MAX_VALUE
#define TCP_TUNNELING_OVERHEAD_MIN
#define TCPSTATES_INC(state)
#define TCP_MIN_MAP_ENTRIES_LIMIT
#define V_tcp_ts_offset_per_conn
void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *)
#define V_tcp_log_in_vain
#define TCP_TUNNELING_OVERHEAD_DEFAULT
uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *)
#define TCP_TUNNELING_OVERHEAD_MAX
void tcp_ctlinput(int, struct sockaddr *, void *)
#define TCP_END_BYTE_INFO
struct pr_usrreqs tcp_usrreqs
#define TCPSTAT_INC(name)
#define IS_FASTOPEN(t_flags)
#define HHOOK_TCP_EST_OUT
#define TCPCTL_DO_RFC1323
#define V_path_mtu_discovery
#define TCP_TUNNELING_PORT_DEFAULT
#define TCP_EI_EMPTY_SLOT
#define TCP_TUNNELING_PORT_MIN
int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)