33#include "opt_tcpdebug.h"
34#include "opt_ratelimit.h"
35#include "opt_kern_tls.h"
38#include <sys/module.h>
39#include <sys/kernel.h>
44#include <sys/malloc.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/sysctl.h>
60#include <sys/refcount.h>
62#include <sys/tim_filter.h>
64#include <sys/kthread.h>
65#include <sys/kern_prefetch.h>
66#include <sys/protosw.h>
69#include <machine/cpu.h>
74#include <net/route/nhop.h>
87#include <netinet6/in6_pcb.h>
88#include <netinet6/ip6_var.h>
105#ifdef NETFLIX_SHARED_CWND
106#include <netinet/tcp_shared_cwnd.h>
115#include <netinet6/tcp6_var.h>
119#include <netipsec/ipsec_support.h>
121#if defined(IPSEC) || defined(IPSEC_SUPPORT)
122#include <netipsec/ipsec.h>
123#include <netipsec/ipsec6.h>
128#include <machine/in_cksum.h>
131#include <security/mac/mac_framework.h>
141#define TICKS2SBT(__t) (tick_sbt * ((sbintime_t)(__t)))
146#define V_newreno_beta VNET(newreno_beta)
147#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
226static int32_t rack_tcp_accounting = 0;
341counter_u64_t rack_adjust_map_bw;
391#define RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
393#define RACK_TCPT_RANGESET(tv, value, tvmin, tvmax, slop) do { \
394 (tv) = (value) + slop; \
395 if ((u_long)(tv) < (u_long)(tvmin)) \
397 if ((u_long)(tv) > (u_long)(tvmax)) \
406 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to,
407 uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
410 struct socket *so,
struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
411 uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
427static int32_t
rack_ctor(
void *mem, int32_t size,
void *arg, int32_t how);
432 struct socket *so,
struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
434static void rack_dtor(
void *mem, int32_t size,
void *arg);
444 uint64_t bw_est, uint64_t bw, uint64_t len_time,
int method,
int line,
455 tcp_seq th_ack,
int line,
uint8_t quality);
463 struct tcphdr *th,
int entered_rec,
int dup_ack_struck);
499static int32_t
tcp_addrack(module_t mod, int32_t type,
void *data);
502 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
503 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
506 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
507 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
510 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
511 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
514 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
518 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
519 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
522 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
523 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
526 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
527 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
530 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
531 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
534 struct socket *so,
struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen,
535 int32_t tlen,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt,
uint8_t iptos);
565 printf(
"No cc algorithm?\n");
574 if (
CC_ALGO(tp)->ctl_output == NULL) {
589 sopt.sopt_dir = SOPT_SET;
592 error =
CC_ALGO(tp)->ctl_output(tp->
ccv, &sopt, &opt);
603 error =
CC_ALGO(tp)->ctl_output(tp->
ccv, &sopt, &opt);
633 0, &log,
false, NULL, NULL, 0, &tv);
665 memcpy(&old, ptr,
sizeof(
struct newreno));
691 0, &log,
false, NULL, NULL, 0, &tv);
695#ifdef NETFLIX_PEAKRATE
697rack_update_peakrate_thr(
struct tcpcb *tp)
714 if (error || req->newptr == NULL)
717 error = SYSCTL_IN(req, &stat,
sizeof(
uint32_t));
722 printf(
"Clearing RACK counters\n");
737 counter_u64_zero(rack_adjust_map_bw);
778 struct sysctl_oid *rack_counters;
779 struct sysctl_oid *rack_attack;
780 struct sysctl_oid *rack_pacing;
781 struct sysctl_oid *rack_timely;
782 struct sysctl_oid *rack_timers;
783 struct sysctl_oid *rack_tlp;
784 struct sysctl_oid *rack_misc;
785 struct sysctl_oid *rack_features;
786 struct sysctl_oid *rack_measure;
787 struct sysctl_oid *rack_probertt;
788 struct sysctl_oid *rack_hw_pacing;
794 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
795 "Rack Sack Attack Counters and Controls");
800 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
804 OID_AUTO,
"rate_sample_method", CTLFLAG_RW,
806 "What method should we use for rate sampling 0=high, 1=low ");
812 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
813 "ProbeRTT related Controls");
815 SYSCTL_CHILDREN(rack_probertt),
816 OID_AUTO,
"exit_per_hpb", CTLFLAG_RW,
818 "What percentage above goodput do we clamp CA/SS to at exit on high-BDP path 110%");
820 SYSCTL_CHILDREN(rack_probertt),
821 OID_AUTO,
"exit_per_nonhpb", CTLFLAG_RW,
823 "What percentage above goodput do we clamp CA/SS to at exit on a non high-BDP path 100%");
825 SYSCTL_CHILDREN(rack_probertt),
826 OID_AUTO,
"gp_per_mul", CTLFLAG_RW,
828 "What percentage of goodput do we pace at in probertt");
830 SYSCTL_CHILDREN(rack_probertt),
831 OID_AUTO,
"gp_per_reduce", CTLFLAG_RW,
833 "What percentage of goodput do we reduce every gp_srtt");
835 SYSCTL_CHILDREN(rack_probertt),
836 OID_AUTO,
"gp_per_low", CTLFLAG_RW,
838 "What percentage of goodput do we allow the multiplier to fall to");
840 SYSCTL_CHILDREN(rack_probertt),
841 OID_AUTO,
"time_between", CTLFLAG_RW,
843 "How many useconds between the lowest rtt falling must past before we enter probertt");
845 SYSCTL_CHILDREN(rack_probertt),
846 OID_AUTO,
"safety", CTLFLAG_RW,
848 "If not zero, provides a maximum usecond that you can stay in probertt (2sec = 2000000)");
850 SYSCTL_CHILDREN(rack_probertt),
851 OID_AUTO,
"sets_cwnd", CTLFLAG_RW,
853 "Do we set the cwnd too (if always_lower is on)");
855 SYSCTL_CHILDREN(rack_probertt),
856 OID_AUTO,
"maxdrainsrtts", CTLFLAG_RW,
858 "Maximum number of gp_srtt's to hold in drain waiting for flight to reach goal");
860 SYSCTL_CHILDREN(rack_probertt),
861 OID_AUTO,
"mustdrainsrtts", CTLFLAG_RW,
863 "We must drain this many gp_srtt's waiting for flight to reach goal");
865 SYSCTL_CHILDREN(rack_probertt),
866 OID_AUTO,
"goal_use_min_entry", CTLFLAG_RW,
868 "Should we use the min-rtt to calculate the goal rtt (else gp_srtt) at entry");
870 SYSCTL_CHILDREN(rack_probertt),
871 OID_AUTO,
"goal_use_min_exit", CTLFLAG_RW,
873 "How to set cwnd at exit, 0 - dynamic, 1 - use min-rtt, 2 - use curgprtt, 3 - entry gp-rtt");
875 SYSCTL_CHILDREN(rack_probertt),
876 OID_AUTO,
"length_div", CTLFLAG_RW,
878 "How many recent goodput srtt periods plus hold tim does probertt last (bottom of fraction)");
880 SYSCTL_CHILDREN(rack_probertt),
881 OID_AUTO,
"length_mul", CTLFLAG_RW,
883 "How many recent goodput srtt periods plus hold tim does probertt last (top of fraction)");
885 SYSCTL_CHILDREN(rack_probertt),
886 OID_AUTO,
"holdtim_at_target", CTLFLAG_RW,
888 "What is the minimum time we hold probertt at target");
890 SYSCTL_CHILDREN(rack_probertt),
891 OID_AUTO,
"filter_life", CTLFLAG_RW,
893 "What is the time for the filters life in useconds");
895 SYSCTL_CHILDREN(rack_probertt),
896 OID_AUTO,
"lower_within", CTLFLAG_RW,
898 "If the rtt goes lower within this percentage of the time, go into probe-rtt");
900 SYSCTL_CHILDREN(rack_probertt),
901 OID_AUTO,
"must_move", CTLFLAG_RW,
903 "How much is the minimum movement in rtt to count as a drop for probertt purposes");
905 SYSCTL_CHILDREN(rack_probertt),
906 OID_AUTO,
"clear_is_cnts", CTLFLAG_RW,
908 "Do we clear I/S counts on exiting probe-rtt");
910 SYSCTL_CHILDREN(rack_probertt),
911 OID_AUTO,
"hbp_extra_drain", CTLFLAG_RW,
913 "How many extra drain gpsrtt's do we get in highly buffered paths");
915 SYSCTL_CHILDREN(rack_probertt),
916 OID_AUTO,
"hbp_threshold", CTLFLAG_RW,
918 "We are highly buffered if min_rtt_seen / max_rtt_seen > this-threshold");
924 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
925 "Pacing related Controls");
927 SYSCTL_CHILDREN(rack_pacing),
928 OID_AUTO,
"max_pace_over", CTLFLAG_RW,
930 "What is the maximum allowable percentage that we can pace above (so 30 = 130% of our goal)");
932 SYSCTL_CHILDREN(rack_pacing),
933 OID_AUTO,
"pace_to_one", CTLFLAG_RW,
935 "Do we allow low b/w pacing of 1MSS instead of two");
937 SYSCTL_CHILDREN(rack_pacing),
938 OID_AUTO,
"limit_wsrtt", CTLFLAG_RW,
940 "Do we limit pacing time based on srtt");
942 SYSCTL_CHILDREN(rack_pacing),
943 OID_AUTO,
"init_win", CTLFLAG_RW,
945 "Do we have a rack initial window 0 = system default");
947 SYSCTL_CHILDREN(rack_pacing),
948 OID_AUTO,
"gp_per_ss", CTLFLAG_RW,
950 "If non zero, what percentage of goodput to pace at in slow start");
952 SYSCTL_CHILDREN(rack_pacing),
953 OID_AUTO,
"gp_per_ca", CTLFLAG_RW,
955 "If non zero, what percentage of goodput to pace at in congestion avoidance");
957 SYSCTL_CHILDREN(rack_pacing),
958 OID_AUTO,
"gp_per_rec", CTLFLAG_RW,
960 "If non zero, what percentage of goodput to pace at in recovery");
962 SYSCTL_CHILDREN(rack_pacing),
963 OID_AUTO,
"pace_max_seg", CTLFLAG_RW,
965 "What size is the max for TSO segments in pacing and burst mitigation");
967 SYSCTL_CHILDREN(rack_pacing),
968 OID_AUTO,
"burst_reduces", CTLFLAG_RW,
970 "When doing only burst mitigation what is the reduce divisor");
973 OID_AUTO,
"use_pacing", CTLFLAG_RW,
975 "If set we use pacing, if clear we use only the original burst mitigation");
977 SYSCTL_CHILDREN(rack_pacing),
978 OID_AUTO,
"rate_cap", CTLFLAG_RW,
980 "If set we apply this value to the absolute rate cap used by pacing");
983 OID_AUTO,
"req_measure_cnt", CTLFLAG_RW,
985 "If doing dynamic pacing, how many measurements must be in before we start pacing?");
991 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
992 "Pacing related Controls");
994 SYSCTL_CHILDREN(rack_hw_pacing),
995 OID_AUTO,
"rwnd_factor", CTLFLAG_RW,
997 "How many times does snd_wnd need to be bigger than pace_max_seg so we will hold off and get more acks?");
999 SYSCTL_CHILDREN(rack_hw_pacing),
1000 OID_AUTO,
"pace_enobuf_mult", CTLFLAG_RW,
1002 "By how many time_betweens should we boost the pacing time if we see a ENOBUFS?");
1004 SYSCTL_CHILDREN(rack_hw_pacing),
1005 OID_AUTO,
"pace_enobuf_max", CTLFLAG_RW,
1007 "What is the max boost the pacing time if we see a ENOBUFS?");
1009 SYSCTL_CHILDREN(rack_hw_pacing),
1010 OID_AUTO,
"pace_enobuf_min", CTLFLAG_RW,
1012 "What is the min boost the pacing time if we see a ENOBUFS?");
1014 SYSCTL_CHILDREN(rack_hw_pacing),
1015 OID_AUTO,
"enable", CTLFLAG_RW,
1017 "Should RACK attempt to use hw pacing?");
1019 SYSCTL_CHILDREN(rack_hw_pacing),
1020 OID_AUTO,
"rate_cap", CTLFLAG_RW,
1022 "Does the highest hardware pacing rate cap the rate we will send at??");
1024 SYSCTL_CHILDREN(rack_hw_pacing),
1025 OID_AUTO,
"rate_min", CTLFLAG_RW,
1027 "Do we need a minimum estimate of this many bytes per second in order to engage hw pacing?");
1029 SYSCTL_CHILDREN(rack_hw_pacing),
1030 OID_AUTO,
"rate_to_low", CTLFLAG_RW,
1032 "If we fall below this rate, dis-engage hw pacing?");
1034 SYSCTL_CHILDREN(rack_hw_pacing),
1035 OID_AUTO,
"up_only", CTLFLAG_RW,
1037 "Do we allow hw pacing to lower the rate selected?");
1039 SYSCTL_CHILDREN(rack_hw_pacing),
1040 OID_AUTO,
"extra_mss_precise", CTLFLAG_RW,
1042 "If the rates between software and hardware match precisely how many extra time_betweens do we get?");
1047 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1048 "Rack Timely RTT Controls");
1051 SYSCTL_CHILDREN(rack_timely),
1052 OID_AUTO,
"upper", CTLFLAG_RW,
1054 "Rack timely upper range for equal b/w (in percentage)");
1056 SYSCTL_CHILDREN(rack_timely),
1057 OID_AUTO,
"lower", CTLFLAG_RW,
1059 "Rack timely lower range for equal b/w (in percentage)");
1061 SYSCTL_CHILDREN(rack_timely),
1062 OID_AUTO,
"rtt_max_mul", CTLFLAG_RW,
1064 "Rack timely multipler of lowest rtt for rtt_max");
1066 SYSCTL_CHILDREN(rack_timely),
1067 OID_AUTO,
"rtt_min_div", CTLFLAG_RW,
1069 "Rack timely divisor used for rtt + (rtt * mul/divisor) for check for lower rtt");
1071 SYSCTL_CHILDREN(rack_timely),
1072 OID_AUTO,
"rtt_min_mul", CTLFLAG_RW,
1074 "Rack timely multiplier used for rtt + (rtt * mul/divisor) for check for lower rtt");
1076 SYSCTL_CHILDREN(rack_timely),
1077 OID_AUTO,
"decrease", CTLFLAG_RW,
1079 "Rack timely decrease percentage of our GP multiplication factor");
1081 SYSCTL_CHILDREN(rack_timely),
1082 OID_AUTO,
"increase", CTLFLAG_RW,
1084 "Rack timely increase perentage of our GP multiplication factor");
1086 SYSCTL_CHILDREN(rack_timely),
1087 OID_AUTO,
"lowerbound", CTLFLAG_RW,
1089 "Rack timely lowest percentage we allow GP multiplier to fall to");
1091 SYSCTL_CHILDREN(rack_timely),
1092 OID_AUTO,
"upperboundss", CTLFLAG_RW,
1094 "Rack timely higest percentage we allow GP multiplier in SS to raise to (0 is no upperbound)");
1096 SYSCTL_CHILDREN(rack_timely),
1097 OID_AUTO,
"upperboundca", CTLFLAG_RW,
1099 "Rack timely higest percentage we allow GP multiplier to CA raise to (0 is no upperbound)");
1101 SYSCTL_CHILDREN(rack_timely),
1102 OID_AUTO,
"dynamicgp", CTLFLAG_RW,
1104 "Rack timely do we enable dynmaic timely goodput by default");
1106 SYSCTL_CHILDREN(rack_timely),
1107 OID_AUTO,
"no_rec_red", CTLFLAG_RW,
1109 "Rack timely do we prohibit the recovery multiplier from being lowered");
1111 SYSCTL_CHILDREN(rack_timely),
1112 OID_AUTO,
"red_clear_cnt", CTLFLAG_RW,
1114 "Rack timely what threshold do we count to before another boost during b/w decent");
1116 SYSCTL_CHILDREN(rack_timely),
1117 OID_AUTO,
"max_push_rise", CTLFLAG_RW,
1119 "Rack timely how many times do we push up with b/w increase");
1121 SYSCTL_CHILDREN(rack_timely),
1122 OID_AUTO,
"max_push_drop", CTLFLAG_RW,
1124 "Rack timely how many times do we push back on b/w decent");
1126 SYSCTL_CHILDREN(rack_timely),
1127 OID_AUTO,
"min_segs", CTLFLAG_RW,
1129 "Rack timely when setting the cwnd what is the min num segments");
1131 SYSCTL_CHILDREN(rack_timely),
1132 OID_AUTO,
"noback_max", CTLFLAG_RW,
1134 "Rack timely when deciding if to backoff on a loss, do we use under max rtt else min");
1136 SYSCTL_CHILDREN(rack_timely),
1137 OID_AUTO,
"interim_timely_only", CTLFLAG_RW,
1139 "Rack timely when doing interim timely's do we only do timely (no b/w consideration)");
1141 SYSCTL_CHILDREN(rack_timely),
1142 OID_AUTO,
"nonstop", CTLFLAG_RW,
1144 "Rack timely don't stop increase");
1146 SYSCTL_CHILDREN(rack_timely),
1147 OID_AUTO,
"dec_raise_thresh", CTLFLAG_RW,
1149 "If the CA or SS is below this threshold raise on the first 3 b/w lowers (0=always)");
1151 SYSCTL_CHILDREN(rack_timely),
1152 OID_AUTO,
"bottom_drag_segs", CTLFLAG_RW,
1154 "Bottom dragging if not these many segments outstanding and room");
1161 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1162 "TLP and Rack related Controls");
1164 SYSCTL_CHILDREN(rack_tlp),
1165 OID_AUTO,
"use_rrr", CTLFLAG_RW,
1167 "Do we use Rack Rapid Recovery");
1169 SYSCTL_CHILDREN(rack_tlp),
1170 OID_AUTO,
"post_rec_labc", CTLFLAG_RW,
1172 "Since we do early recovery, do we override the l_abc to a value, if so what?");
1174 SYSCTL_CHILDREN(rack_tlp),
1175 OID_AUTO,
"nonrxt_use_cr", CTLFLAG_RW,
1177 "Do we use ss/ca rate if in recovery we are transmitting a new data chunk");
1179 SYSCTL_CHILDREN(rack_tlp),
1180 OID_AUTO,
"tlpmethod", CTLFLAG_RW,
1182 "What method do we do for TLP time calc 0=no-de-ack-comp, 1=ID, 2=2.1, 3=2.2");
1184 SYSCTL_CHILDREN(rack_tlp),
1185 OID_AUTO,
"limit", CTLFLAG_RW,
1187 "How many TLP's can be sent without sending new data");
1189 SYSCTL_CHILDREN(rack_tlp),
1190 OID_AUTO,
"use_greater", CTLFLAG_RW,
1192 "Should we use the rack_rtt time if its greater than srtt");
1194 SYSCTL_CHILDREN(rack_tlp),
1195 OID_AUTO,
"tlpminto", CTLFLAG_RW,
1197 "TLP minimum timeout per the specification (in microseconds)");
1199 SYSCTL_CHILDREN(rack_tlp),
1200 OID_AUTO,
"send_oldest", CTLFLAG_RW,
1202 "Should we always send the oldest TLP and RACK-TLP");
1204 SYSCTL_CHILDREN(rack_tlp),
1205 OID_AUTO,
"rack_tlimit", CTLFLAG_RW,
1207 "How many times can a rack timeout drive out sends");
1209 SYSCTL_CHILDREN(rack_tlp),
1210 OID_AUTO,
"tlp_cwnd_flag", CTLFLAG_RW,
1212 "When a TLP completes a retran should we enter recovery");
1214 SYSCTL_CHILDREN(rack_tlp),
1215 OID_AUTO,
"reorder_thresh", CTLFLAG_RW,
1217 "What factor for rack will be added when seeing reordering (shift right)");
1219 SYSCTL_CHILDREN(rack_tlp),
1220 OID_AUTO,
"rtt_tlp_thresh", CTLFLAG_RW,
1222 "What divisor for TLP rtt/retran will be added (1=rtt, 2=1/2 rtt etc)");
1224 SYSCTL_CHILDREN(rack_tlp),
1225 OID_AUTO,
"reorder_fade", CTLFLAG_RW,
1227 "Does reorder detection fade, if so how many microseconds (0 means never)");
1229 SYSCTL_CHILDREN(rack_tlp),
1230 OID_AUTO,
"pktdelay", CTLFLAG_RW,
1232 "Extra RACK time (in microseconds) besides reordering thresh");
1239 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1240 "Timer related controls");
1242 SYSCTL_CHILDREN(rack_timers),
1243 OID_AUTO,
"persmin", CTLFLAG_RW,
1245 "What is the minimum time in microseconds between persists");
1247 SYSCTL_CHILDREN(rack_timers),
1248 OID_AUTO,
"persmax", CTLFLAG_RW,
1250 "What is the largest delay in microseconds between persists");
1252 SYSCTL_CHILDREN(rack_timers),
1253 OID_AUTO,
"delayed_ack", CTLFLAG_RW,
1255 "Delayed ack time (40ms in microseconds)");
1257 SYSCTL_CHILDREN(rack_timers),
1258 OID_AUTO,
"minrto", CTLFLAG_RW,
1260 "Minimum RTO in microseconds -- set with caution below 1000 due to TLP");
1262 SYSCTL_CHILDREN(rack_timers),
1263 OID_AUTO,
"maxrto", CTLFLAG_RW,
1265 "Maximum RTO in microseconds -- should be at least as large as min_rto");
1267 SYSCTL_CHILDREN(rack_timers),
1268 OID_AUTO,
"minto", CTLFLAG_RW,
1270 "Minimum rack timeout in microseconds");
1276 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1277 "Measure related controls");
1279 SYSCTL_CHILDREN(rack_measure),
1280 OID_AUTO,
"wma_divisor", CTLFLAG_RW,
1282 "When doing b/w calculation what is the divisor for the WMA");
1284 SYSCTL_CHILDREN(rack_measure),
1285 OID_AUTO,
"end_cwnd", CTLFLAG_RW,
1287 "Does a cwnd just-return end the measurement window (app limited)");
1289 SYSCTL_CHILDREN(rack_measure),
1290 OID_AUTO,
"end_rwnd", CTLFLAG_RW,
1292 "Does an rwnd just-return end the measurement window (app limited -- not persists)");
1294 SYSCTL_CHILDREN(rack_measure),
1295 OID_AUTO,
"min_target", CTLFLAG_RW,
1297 "What is the minimum target window (in mss) for a GP measurements");
1299 SYSCTL_CHILDREN(rack_measure),
1300 OID_AUTO,
"goal_bdp", CTLFLAG_RW,
1302 "What is the goal BDP to measure");
1304 SYSCTL_CHILDREN(rack_measure),
1305 OID_AUTO,
"min_srtts", CTLFLAG_RW,
1307 "What is the goal BDP to measure");
1309 SYSCTL_CHILDREN(rack_measure),
1310 OID_AUTO,
"min_measure_tim", CTLFLAG_RW,
1312 "What is the Minimum time time for a measurement if 0, this is off");
1318 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1319 "Feature controls");
1321 SYSCTL_CHILDREN(rack_features),
1322 OID_AUTO,
"cmpack", CTLFLAG_RW,
1324 "Should RACK have LRO send compressed acks");
1326 SYSCTL_CHILDREN(rack_features),
1327 OID_AUTO,
"fsb", CTLFLAG_RW,
1329 "Should RACK use the fast send block?");
1331 SYSCTL_CHILDREN(rack_features),
1332 OID_AUTO,
"rfo", CTLFLAG_RW,
1334 "Should RACK use rack_fast_output()?");
1336 SYSCTL_CHILDREN(rack_features),
1337 OID_AUTO,
"rsmrfo", CTLFLAG_RW,
1339 "Should RACK use rack_fast_rsm_output()?");
1341 SYSCTL_CHILDREN(rack_features),
1342 OID_AUTO,
"non_paced_lro_queue", CTLFLAG_RW,
1344 "Should RACK use mbuf queuing for non-paced connections");
1346 SYSCTL_CHILDREN(rack_features),
1347 OID_AUTO,
"hystartplusplus", CTLFLAG_RW,
1349 "Should RACK enable HyStart++ on connections?");
1355 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1356 "Misc related controls");
1357#ifdef TCP_ACCOUNTING
1359 SYSCTL_CHILDREN(rack_misc),
1360 OID_AUTO,
"tcp_acct", CTLFLAG_RW,
1361 &rack_tcp_accounting, 0,
1362 "Should we turn on TCP accounting for all rack sessions?");
1365 SYSCTL_CHILDREN(rack_misc),
1366 OID_AUTO,
"apply_rtt_with_low_conf", CTLFLAG_RW,
1368 "When a persist or keep-alive probe is not answered do we calculate rtt on subsequent answers?");
1370 SYSCTL_CHILDREN(rack_misc),
1371 OID_AUTO,
"rack_dsack_ctl", CTLFLAG_RW,
1373 "How do we process dsack with respect to rack timers, bit field, 3 is standards based?");
1375 SYSCTL_CHILDREN(rack_misc),
1376 OID_AUTO,
"prr_addback_max", CTLFLAG_RW,
1378 "What is the maximum number of MSS we allow to be added back if prr can't send all its data?");
1380 SYSCTL_CHILDREN(rack_misc),
1381 OID_AUTO,
"stats_gets_ms", CTLFLAG_RW,
1383 "What do we feed the stats framework (1 = ms_rtt, 0 = us_rtt, 2 = ms_rtt from hdwr, > 2 usec rtt from hdwr)?");
1385 SYSCTL_CHILDREN(rack_misc),
1386 OID_AUTO,
"clientlowbuf", CTLFLAG_RW,
1388 "Client low buffer level (below this we are more aggressive in DGP exiting recovery (0 = off)?");
1390 SYSCTL_CHILDREN(rack_misc),
1391 OID_AUTO,
"defprofile", CTLFLAG_RW,
1393 "Should RACK use a default profile (0=no, num == profile num)?");
1395 SYSCTL_CHILDREN(rack_misc),
1396 OID_AUTO,
"shared_cwnd", CTLFLAG_RW,
1398 "Should RACK try to use the shared cwnd on connections where allowed");
1400 SYSCTL_CHILDREN(rack_misc),
1401 OID_AUTO,
"limits_on_scwnd", CTLFLAG_RW,
1403 "Should RACK place low end time limits on the shared cwnd feature");
1405 SYSCTL_CHILDREN(rack_misc),
1406 OID_AUTO,
"iMac_dack", CTLFLAG_RW,
1408 "Should RACK try to emulate iMac delayed ack");
1410 SYSCTL_CHILDREN(rack_misc),
1411 OID_AUTO,
"no_prr", CTLFLAG_RW,
1413 "Should RACK not use prr and only pace (must have pacing on)");
1415 SYSCTL_CHILDREN(rack_misc),
1416 OID_AUTO,
"bb_verbose", CTLFLAG_RW,
1418 "Should RACK black box logging be verbose");
1420 SYSCTL_CHILDREN(rack_misc),
1421 OID_AUTO,
"data_after_close", CTLFLAG_RW,
1423 "Do we hold off sending a RST until all pending data is ack'd");
1425 SYSCTL_CHILDREN(rack_misc),
1426 OID_AUTO,
"no_sack_needed", CTLFLAG_RW,
1428 "Do we allow rack to run on connections not supporting SACK");
1430 SYSCTL_CHILDREN(rack_misc),
1431 OID_AUTO,
"prr_sendalot", CTLFLAG_RW,
1433 "Send a lot in prr");
1435 SYSCTL_CHILDREN(rack_misc),
1436 OID_AUTO,
"autoscale", CTLFLAG_RW,
1438 "What percentage should rack scale up its snd buffer by?");
1441 SYSCTL_CHILDREN(rack_attack),
1442 OID_AUTO,
"detect_highsackratio", CTLFLAG_RW,
1444 "Highest sack to ack ratio seen");
1446 SYSCTL_CHILDREN(rack_attack),
1447 OID_AUTO,
"detect_highmoveratio", CTLFLAG_RW,
1449 "Highest move to non-move ratio seen");
1452 SYSCTL_CHILDREN(rack_attack),
1453 OID_AUTO,
"acktotal", CTLFLAG_RD,
1455 "Total number of Ack's");
1458 SYSCTL_CHILDREN(rack_attack),
1459 OID_AUTO,
"exp_sacktotal", CTLFLAG_RD,
1461 "Total expresss number of Sack's");
1464 SYSCTL_CHILDREN(rack_attack),
1465 OID_AUTO,
"sacktotal", CTLFLAG_RD,
1467 "Total number of SACKs");
1470 SYSCTL_CHILDREN(rack_attack),
1471 OID_AUTO,
"move_none", CTLFLAG_RD,
1473 "Total number of SACK index reuse of postions under threshold");
1476 SYSCTL_CHILDREN(rack_attack),
1477 OID_AUTO,
"move_some", CTLFLAG_RD,
1479 "Total number of SACK index reuse of postions over threshold");
1482 SYSCTL_CHILDREN(rack_attack),
1483 OID_AUTO,
"attacks", CTLFLAG_RD,
1485 "Total number of SACK attackers that had sack disabled");
1488 SYSCTL_CHILDREN(rack_attack),
1489 OID_AUTO,
"reversed", CTLFLAG_RD,
1491 "Total number of SACK attackers that were later determined false positive");
1494 SYSCTL_CHILDREN(rack_attack),
1495 OID_AUTO,
"nextmerge", CTLFLAG_RD,
1497 "Total number of times we used the next merge");
1500 SYSCTL_CHILDREN(rack_attack),
1501 OID_AUTO,
"prevmerge", CTLFLAG_RD,
1503 "Total number of times we used the prev merge");
1507 SYSCTL_CHILDREN(rack_counters),
1508 OID_AUTO,
"fto_send", CTLFLAG_RD,
1512 SYSCTL_CHILDREN(rack_counters),
1513 OID_AUTO,
"fto_rsm_send", CTLFLAG_RD,
1517 SYSCTL_CHILDREN(rack_counters),
1518 OID_AUTO,
"nfto_resend", CTLFLAG_RD,
1522 SYSCTL_CHILDREN(rack_counters),
1523 OID_AUTO,
"nfto_send", CTLFLAG_RD,
1527 SYSCTL_CHILDREN(rack_counters),
1528 OID_AUTO,
"rfo_extended", CTLFLAG_RD,
1533 SYSCTL_CHILDREN(rack_counters),
1534 OID_AUTO,
"hwpace_init_fail", CTLFLAG_RD,
1539 SYSCTL_CHILDREN(rack_counters),
1540 OID_AUTO,
"hwpace_lost", CTLFLAG_RD,
1544 SYSCTL_CHILDREN(rack_counters),
1545 OID_AUTO,
"tlp_to_total", CTLFLAG_RD,
1547 "Total number of tail loss probe expirations");
1550 SYSCTL_CHILDREN(rack_counters),
1551 OID_AUTO,
"tlp_new", CTLFLAG_RD,
1553 "Total number of tail loss probe sending new data");
1556 SYSCTL_CHILDREN(rack_counters),
1557 OID_AUTO,
"tlp_retran", CTLFLAG_RD,
1559 "Total number of tail loss probe sending retransmitted data");
1562 SYSCTL_CHILDREN(rack_counters),
1563 OID_AUTO,
"tlp_retran_bytes", CTLFLAG_RD,
1565 "Total bytes of tail loss probe sending retransmitted data");
1568 SYSCTL_CHILDREN(rack_counters),
1569 OID_AUTO,
"rack_to_tot", CTLFLAG_RD,
1571 "Total number of times the rack to expired");
1574 SYSCTL_CHILDREN(rack_counters),
1575 OID_AUTO,
"saw_enobufs", CTLFLAG_RD,
1577 "Total number of times a sends returned enobuf for non-hdwr paced connections");
1580 SYSCTL_CHILDREN(rack_counters),
1581 OID_AUTO,
"saw_enobufs_hw", CTLFLAG_RD,
1583 "Total number of times a send returned enobuf for hdwr paced connections");
1586 SYSCTL_CHILDREN(rack_counters),
1587 OID_AUTO,
"saw_enetunreach", CTLFLAG_RD,
1589 "Total number of times a send received a enetunreachable");
1592 SYSCTL_CHILDREN(rack_counters),
1593 OID_AUTO,
"alloc_hot", CTLFLAG_RD,
1595 "Total allocations from the top of our list");
1598 SYSCTL_CHILDREN(rack_counters),
1599 OID_AUTO,
"allocs", CTLFLAG_RD,
1601 "Total allocations of tracking structures");
1604 SYSCTL_CHILDREN(rack_counters),
1605 OID_AUTO,
"allochard", CTLFLAG_RD,
1607 "Total allocations done with sleeping the hard way");
1610 SYSCTL_CHILDREN(rack_counters),
1611 OID_AUTO,
"allocemerg", CTLFLAG_RD,
1613 "Total allocations done from emergency cache");
1616 SYSCTL_CHILDREN(rack_counters),
1617 OID_AUTO,
"alloc_limited", CTLFLAG_RD,
1619 "Total allocations dropped due to limit");
1622 SYSCTL_CHILDREN(rack_counters),
1623 OID_AUTO,
"alloc_limited_conns", CTLFLAG_RD,
1625 "Connections with allocations dropped due to limit");
1628 SYSCTL_CHILDREN(rack_counters),
1629 OID_AUTO,
"split_limited", CTLFLAG_RD,
1631 "Split allocations dropped due to limit");
1634 SYSCTL_CHILDREN(rack_counters),
1635 OID_AUTO,
"persist_sends", CTLFLAG_RD,
1637 "Number of times we sent a persist probe");
1640 SYSCTL_CHILDREN(rack_counters),
1641 OID_AUTO,
"persist_acks", CTLFLAG_RD,
1643 "Number of times a persist probe was acked");
1646 SYSCTL_CHILDREN(rack_counters),
1647 OID_AUTO,
"persist_loss", CTLFLAG_RD,
1649 "Number of times we detected a lost persist probe (no ack)");
1652 SYSCTL_CHILDREN(rack_counters),
1653 OID_AUTO,
"persist_loss_ends", CTLFLAG_RD,
1655 "Number of lost persist probe (no ack) that the run ended with a PERSIST abort");
1657 rack_adjust_map_bw = counter_u64_alloc(M_WAITOK);
1659 SYSCTL_CHILDREN(rack_counters),
1660 OID_AUTO,
"map_adjust_req", CTLFLAG_RD,
1661 &rack_adjust_map_bw,
1662 "Number of times we hit the case where the sb went up and down on a sendmap entry");
1666 SYSCTL_CHILDREN(rack_counters),
1667 OID_AUTO,
"cmp_ack_equiv", CTLFLAG_RD,
1669 "Number of compressed acks total represented");
1672 SYSCTL_CHILDREN(rack_counters),
1673 OID_AUTO,
"cmp_ack_not", CTLFLAG_RD,
1675 "Number of non compresseds acks that we processed");
1680 SYSCTL_CHILDREN(rack_counters),
1681 OID_AUTO,
"sack_long", CTLFLAG_RD,
1683 "Total times we had to walk whole list for sack processing");
1686 SYSCTL_CHILDREN(rack_counters),
1687 OID_AUTO,
"sack_restart", CTLFLAG_RD,
1689 "Total times we had to walk whole list due to a restart");
1692 SYSCTL_CHILDREN(rack_counters),
1693 OID_AUTO,
"sack_short", CTLFLAG_RD,
1695 "Total times we took shortcut for sack processing");
1698 SYSCTL_CHILDREN(rack_attack),
1699 OID_AUTO,
"skipacked", CTLFLAG_RD,
1701 "Total number of times we skipped previously sacked");
1704 SYSCTL_CHILDREN(rack_attack),
1705 OID_AUTO,
"ofsplit", CTLFLAG_RD,
1707 "Total number of times we did the old fashion tree split");
1710 SYSCTL_CHILDREN(rack_counters),
1711 OID_AUTO,
"idle_reduce_oninput", CTLFLAG_RD,
1713 "Total number of idle reductions on input");
1716 SYSCTL_CHILDREN(rack_counters),
1717 OID_AUTO,
"collapsed_win", CTLFLAG_RD,
1719 "Total number of collapsed windows");
1722 SYSCTL_CHILDREN(rack_counters),
1723 OID_AUTO,
"tried_scwnd", CTLFLAG_RD,
1725 "Total number of scwnd attempts");
1728 OID_AUTO,
"outsize", CTLFLAG_RD,
1732 OID_AUTO,
"opts", CTLFLAG_RD,
1736 OID_AUTO,
"clear", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1834#ifdef NETFLIX_PEAKRATE
1864#ifdef NETFLIX_PEAKRATE
1893 goto configured_rate;
1931 memset(&log, 0,
sizeof(log));
1949 0, &log,
false, &tv);
1955 uint64_t rate, uint64_t hw_rate,
int line,
1961 const struct ifnet *ifp;
1963 memset(&log, 0,
sizeof(log));
1964 log.
u_bbr.
flex1 = ((hw_rate >> 32) & 0x00000000ffffffff);
1965 log.
u_bbr.
flex2 = (hw_rate & 0x00000000ffffffff);
1974 log.
u_bbr.
flex3 = (((uint64_t)ifp >> 32) & 0x00000000ffffffff);
1975 log.
u_bbr.
flex4 = ((uint64_t)ifp & 0x00000000ffffffff);
1997 0, &log,
false, &tv);
2007 uint64_t bw_est, high_rate;
2012 bw_est /= (uint64_t)100;
2021 if (bw_est >= high_rate) {
2024 bw_est, high_rate, __LINE__,
2045 if (bw_est > high_rate) {
2093 0, &log,
false, &tv);
2129 0, &log,
false, &tv);
2162 0, &log,
false, &tv);
2171 int flag,
uint32_t th_ack,
int line)
2211 0, &log,
false, &tv);
2279 0, &log,
false, &tv);
2298 memset(&log, 0,
sizeof(log));
2335 0, &log,
false, &tv);
2347 memset(&log, 0,
sizeof(log));
2358 0, &log,
false, &tv);
2387 0, &log,
false, &tv);
2415 0, &log,
false, tv);
2426 memset(&log, 0,
sizeof(log));
2456 0, &log,
false, &tv);
2467 memset(&log, 0,
sizeof(log));
2484 0, &log,
false, &tv);
2517 tlen, &log,
false, &tv);
2523 struct timeval *tv,
uint32_t flags_on_entry)
2551 0, &log,
false, tv);
2584 0, &log,
false, &tv);
2614 0, &log,
false, &tv);
2646 0, &log,
false, &tv);
2650#ifdef NETFLIX_EXP_DETECTION
2652rack_log_sad(
struct tcp_rack *rack,
int event)
2658 memset(&log.u_bbr, 0,
sizeof(log.u_bbr));
2664 log.u_bbr.flex6 = tcp_sack_to_ack_thresh;
2665 log.u_bbr.pkts_out = tcp_sack_to_move_thresh;
2666 log.u_bbr.lt_epoch = (tcp_force_detection << 8);
2668 log.u_bbr.applimited = tcp_map_minimum;
2670 log.u_bbr.flex8 = event;
2673 log.u_bbr.delivered = tcp_sad_decay_val;
2678 0, &log,
false, &tv);
2732 counter_u64_free(rack_adjust_map_bw);
2818 if (rsm != NULL && limit_type) {
2882 uint64_t srtt, bw, len, tim;
2924 srtt = (uint64_t)tp->
t_srtt;
2929 len = roundup(len, segsiz);
2936 minl = roundup(minl, segsiz);
2954 return (max((
uint32_t)len, def_len));
3004 uint32_t logged, uint64_t cur_bw, uint64_t low_bnd,
3005 uint64_t up_bnd,
int line,
uint8_t method)
3011 memset(&log, 0,
sizeof(log));
3048 0, &log,
false, &tv);
3074 uint64_t act_rate, max_allow_rate;
3079 if ((cur_bw == 0) || (last_bw_est == 0)) {
3096 act_rate = cur_bw * (uint64_t)mult;
3099 max_allow_rate /= 100;
3100 if (act_rate < max_allow_rate) {
3159 int32_t calc, logged, plus;
3262 perf = (((uint64_t)curper * ((uint64_t)1000000 -
3264 (((uint64_t)rtt_diff * (uint64_t)1000000)/
3266 (uint64_t)1000000)) /
3268 if (perf > curper) {
3291 perf = (((uint64_t)curper * ((uint64_t)1000000 -
3293 ((uint64_t)highrttthresh * (uint64_t)1000000) /
3294 (uint64_t)rtt)) / 100)) /(uint64_t)1000000);
3301 uint64_t logvar, logvar2, logvar3;
3302 uint32_t logged, new_per, ss_red, ca_red, rec_red, alt, val;
3309 ss_red = ca_red = rec_red = 0;
3318 if (timely_says == 2) {
3340 if (timely_says == 2) {
3366 logvar, __LINE__, 10);
3373 if (timely_says == 2) {
3399 logvar, __LINE__, 10);
3475 bwdp *= (uint64_t)rtt;
3665 }
else if (sum >= 15) {
3720 uint64_t no_overflow;
3768 if ((
TSTMP_LT(us_cts, must_stay) &&
3810 uint64_t cur_bw, up_bnd, low_bnd, subfr;
3846 }
else if ((last_bw_est < low_bnd) && !losses) {
3869 (timely_says == 0)) {
3877 (timely_says == 0) ||
3902 }
else if ((timely_says != 2) &&
3904 (last_bw_est > up_bnd)) {
3956 (last_bw_est < low_bnd)) {
3976 int32_t timely_says;
3977 uint64_t log_mult, log_rtt_a_diff;
3979 log_rtt_a_diff = rtt;
3980 log_rtt_a_diff <<= 32;
3981 log_rtt_a_diff |= (
uint32_t)rtt_diff;
3988 log_mult |= prev_rtt;
3991 log_rtt_a_diff, __LINE__, 4);
4000 log_mult |= prev_rtt;
4004 log_rtt_a_diff, __LINE__, 5);
4018 log_mult = prev_rtt;
4019 if (rtt_diff <= 0) {
4034 return (timely_says);
4039 tcp_seq th_ack,
int line,
uint8_t quality)
4041 uint64_t tim, bytes_ps, ltim, stim, utim;
4042 uint32_t segsiz, bytes, reqbytes, us_cts;
4043 int32_t gput, new_rtt_diff, timely_says;
4044 uint64_t resid_bw, subpart = 0, addpart = 0, srtt;
4067 utim = max(stim, 1);
4070 gput = (((uint64_t) (th_ack - tp->
gput_seq)) << 3) / ltim;
4072 if ((tim == 0) && (stim == 0)) {
4080 0, 0, 0, 10, __LINE__, NULL, quality);
4081 goto skip_measurement;
4088 0, 0, 0, 10, __LINE__, NULL, quality);
4089 goto skip_measurement;
4125 0, 0, 0, 10, __LINE__, NULL, quality);
4126 goto skip_measurement;
4129 bytes_ps = (uint64_t)bytes;
4150 reqbytes -= (2 * segsiz);
4157 0, 0, 10, __LINE__, NULL, quality);
4158 goto skip_measurement;
4199 11, __LINE__, NULL, quality);
4213 0, 0, 10, __LINE__, NULL, quality);
4256 srtt = (uint64_t)tp->
t_srtt;
4284 subpart /= (srtt * 8);
4291 addpart = bytes_ps * utim;
4292 addpart /= (srtt * 8);
4304 addpart = bytes_ps / 2;
4310 if ((utim / srtt) <= 1) {
4321 addpart = bytes_ps * utim;
4386 stats_voi_update_abs_s32(tp->
t_stats,
4391 tp->
t_flags &= ~TF_GPUTINPROG;
4431 tp->
t_flags &= ~TF_GPUTINPROG;
4433 0, 0, 0, 6, __LINE__, NULL, quality);
4454 rsm = RB_FIND(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, &fe);
4489 __LINE__, NULL, quality);
4524#ifdef NETFLIX_PEAKRATE
4533 rack_update_peakrate_thr(tp);
4541 tp->
ccv->
flags &= ~CCF_CWND_LIMITED;
4551 tp->
ccv->
flags &= ~CCF_ABC_SENTAWND;
4575 0, &log,
false, NULL, NULL, 0, &tv);
4577 if (
CC_ALGO(tp)->ack_received != NULL) {
4602 if (acked <= rack->
r_ctl.rc_out_at_rto){
4615#ifdef NETFLIX_PEAKRATE
4654 if (
CC_ALGO(tp)->post_recovery != NULL) {
4682 0, &log,
false, NULL, NULL, 0, &tv);
4723 uint32_t ssthresh_enter, cwnd_enter, in_rec_at_entry, orig_cwnd;
4730 in_rec_at_entry = 0;
4734 in_rec_at_entry = 1;
4738 tp->
t_flags &= ~TF_WASFRECOVERY;
4739 tp->
t_flags &= ~TF_WASCRECOVERY;
4787 tp->
t_flags &= ~TF_WASFRECOVERY;
4791 tp->
t_flags &= ~TF_WASCRECOVERY;
4797 if ((
CC_ALGO(tp)->cong_signal != NULL) &&
4824 if (
CC_ALGO(tp)->after_idle != NULL)
4853#define DELAY_ACK(tp, tlen) \
4854 (((tp->t_flags & TF_RXWIN0SENT) == 0) && \
4855 ((tp->t_flags & TF_DELACK) == 0) && \
4856 (tlen <= tp->t_maxseg) && \
4857 (tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN)))
4890 RB_FOREACH_REVERSE_FROM(prsm, rack_rb_tree_head, rsm) {
4954 thresh = srtt + (srtt >> 2);
4961 thresh += (srtt >> 2);
4974 if (thresh > (srtt * 2)) {
4998 thresh = (srtt * 2);
5011 if (alt_thresh > thresh)
5012 thresh = alt_thresh;
5016 prsm = TAILQ_PREV(rsm, rack_head, r_tnext);
5017 if (prsm && (len <= segsiz)) {
5031 thresh += inter_gap;
5032 }
else if (len <= segsiz) {
5039 if (alt_thresh > thresh)
5040 thresh = alt_thresh;
5044 if (len <= segsiz) {
5050 if (alt_thresh > thresh)
5051 thresh = alt_thresh;
5082 else if (tp->
t_srtt == 0)
5151 uint32_t thresh, exp, to, srtt, time_since_sent, tstmp_touse;
5154 int32_t is_tlp_timer = 0;
5171 if ((rsm == NULL) || sup_rack) {
5174 time_since_sent = 0;
5193 time_since_sent = cts - tstmp_touse;
5198 if (to > time_since_sent)
5199 to -= time_since_sent;
5310 time_since_sent = 0;
5316 time_since_sent = cts - tstmp_touse;
5343 if (thresh > time_since_sent) {
5344 to = thresh - time_since_sent;
5367 if (is_tlp_timer == 0) {
5389#ifdef NETFLIX_SHARED_CWND
5390 if (rack->
r_ctl.rc_scw) {
5416#ifdef NETFLIX_SHARED_CWND
5417 if (rack->
r_ctl.rc_scw) {
5440 if (time_idle >= idle_min) {
5468 struct hpts_diag *diag,
struct timeval *tv)
5500 0, &log,
false, tv);
5522 len, &log,
false, &tv);
5528 int32_t slot,
uint32_t tot_len_this_send,
int sup_rack)
5612#ifdef NETFLIX_EXP_DETECTION
5614 (slot < tcp_sad_pacing_interval)) {
5625 slot = tcp_sad_pacing_interval;
5641 if ((hpts_timeout == 0) &&
5686 if (left < hpts_timeout)
5687 hpts_timeout = left;
5695 if (hpts_timeout > 0x7ffffffe)
5696 hpts_timeout = 0x7ffffffe;
5702 (hpts_timeout < slot) &&
5712 slot = hpts_timeout;
5774 ((hpts_timeout) && (hpts_timeout < slot))) {
5789 }
else if (hpts_timeout) {
5806 panic(
"tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
5807 tp, rack, tot_len_this_send, cts, slot, hpts_timeout);
5894 if (src_rsm->
m && (src_rsm->
orig_m_len != src_rsm->
m->m_len)) {
5900 while (soff >= m->m_len) {
5904 KASSERT((m != NULL),
5905 (
"rsm:%p nrsm:%p hit at soff:%u null m",
5906 src_rsm, rsm, soff));
5929 for (idx = 0; idx < nrsm->
r_rtr_cnt; idx++) {
5934 nrsm->
r_flags &= ~RACK_HAS_SYN;
5937 rsm->
r_flags &= ~RACK_HAS_FIN;
5940 rsm->
r_flags &= ~RACK_HAD_PUSH;
5950 KASSERT(((rsm->
m != NULL) ||
5952 (
"rsm:%p nrsm:%p rack:%p -- rsm->m is NULL?", rsm, nrsm, rack));
6003 r_rsm->
r_flags &= ~RACK_APP_LIMITED;
6008 (void)RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, r_rsm);
6010 rm = RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, r_rsm);
6012 panic(
"removing head in rack:%p rsm:%p rm:%p",
6046 int collapsed_win = 0;
6057 return (-ETIMEDOUT);
6070 avail = sbavail(&so->so_snd);
6094 tp->
t_flags &= ~TF_GPUTINPROG;
6098 0, 0, 18, __LINE__, NULL, 0);
6112 if ((amm + out) > tp->
snd_wnd) {
6123 if (out + amm <= tp->snd_wnd) {
6132 if (out + amm <= tp->snd_wnd)
6146 if (collapsed_win == 0) {
6166 RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->
r_ctl.
rc_mtree) {
6201 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
6203 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
6204 if (insret != NULL) {
6205 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
6206 nrsm, insret, rack, rsm);
6210 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
6275 return (-ETIMEDOUT);
6277 KASSERT(inp != NULL, (
"%s: tp %p tp->t_inpcb == NULL", __func__, tp));
6295 retval = -ETIMEDOUT;
6311 retval = -ETIMEDOUT;
6327 &t_template->
tt_t, (
struct mbuf *)NULL,
6332 free(t_template, M_TEMP);
6392 &t_template->
tt_t, (
struct mbuf *)NULL,
6394 free(t_template, M_TEMP);
6402 return (-ETIMEDOUT);
6448 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, trsm, rsm, r_tnext);
6473#ifdef NETFLIX_EXP_DETECTION
6496 frac = tp->
t_srtt & 0x1f;
6497 tp->
t_srtt = TICKS_2_USEC(val);
6554 if ((srtt == 0) && (tp->
t_srtt != 0))
6595 tp->
t_flags &= ~TF_GPUTINPROG;
6599 0, 0, 18, __LINE__, NULL, 0);
6604 return (-ETIMEDOUT);
6612 tp->
t_flags &= ~TF_WASFRECOVERY;
6616 tp->
t_flags &= ~TF_WASCRECOVERY;
6760 }
else if (isipv6) {
6771#if defined(INET6) && defined(INET)
6802 tp->
t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
6871 0, 0, 18, __LINE__, NULL, 0);
6872 tp->
t_flags &= ~TF_GPUTINPROG;
6896 goto skip_time_check;
6906 if (hpts_calling == 0) {
6985 if (hpts_removed == 0)
7073 rsm->
r_flags &= ~RACK_SACK_PASSED;
7101 if (c_end == rsm->
r_end) {
7109 *lenp = (len - act_len);
7110 return (rsm->
r_end);
7137 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
7139 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
7140 if (insret != NULL) {
7141 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
7142 nrsm, insret, rack, rsm);
7146 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
7149 rsm->
r_flags &= (~RACK_HAS_FIN);
7167 register uint32_t snd_max, snd_una;
7198 if (th_flags & TH_RST) {
7208 if (th_flags & (TH_SYN | TH_FIN)) {
7215 if ((th_flags & TH_SYN) && (seq_out == tp->
iss))
7217 if (th_flags & TH_FIN)
7227 if (
SEQ_LEQ((seq_out + len), snd_una)) {
7231 if (
SEQ_LT(seq_out, snd_una)) {
7235 end = seq_out + len;
7238 len = end - seq_out;
7250 if (seq_out == snd_max) {
7261 if (th_flags & TH_FIN) {
7271 if (th_flags & TH_SYN) {
7294 if (rsm->
m->m_len <= rsm->
soff) {
7307 while (lm->m_len <= rsm->
soff) {
7308 rsm->
soff -= lm->m_len;
7310 KASSERT(lm != NULL, (
"%s rack:%p lm goes null orig_off:%u origmb:%p rsm->soff:%u",
7311 __func__, rack, s_moff, s_mb, rsm->
soff));
7322 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
7324 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
7325 if (insret != NULL) {
7326 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
7327 nrsm, insret, rack, rsm);
7342 prsm = RB_PREV(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
7351 memset(&fe, 0,
sizeof(fe));
7353 if (hintrsm && (hintrsm->
r_start == seq_out)) {
7360 if ((rsm) && (rsm->
r_start == seq_out)) {
7371 rsm = RB_FIND(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, &fe);
7373 if (rsm->
r_start == seq_out) {
7399 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
7401 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
7402 if (insret != NULL) {
7403 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
7404 nrsm, insret, rack, rsm);
7408 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
7411 rsm->
r_flags &= (~RACK_HAS_FIN);
7427 printf(
"seq_out:%u len:%d snd_una:%u snd_max:%u -- but rsm not found?\n",
7429 printf(
"Starting Dump of all rack entries\n");
7431 printf(
"rsm:%p start:%u end:%u\n",
7434 printf(
"Dump complete\n");
7435 panic(
"seq_out not found rack:%p tp:%p",
7444 panic(
"seq_out:%u(%d) is beyond snd_max:%u tp:%p",
7445 seq_out, len, tp->
snd_max, tp);
7468 if (us_rtt < rack->
r_ctl.rc_gp_lowrtt)
7473 if ((confidence == 1) &&
7610 delta = tp->
t_srtt - rtt;
7614 tp->
t_srtt += (rtt >> 3);
7694 if (old_rtt > us_rtt) {
7748 len_acked = th_ack - rsm->
r_start;
7775 if (
CC_ALGO(tp)->rttsample != NULL) {
7780 if (ack_type ==
SACKED) {
7862 if (
CC_ALGO(tp)->rttsample != NULL) {
7966 rack_head, r_tnext) {
7988 nrsm->
r_flags &= ~RACK_WAS_SACKPASS;
8091 seq, tp->
gput_seq, 0, 5, line, NULL, 0);
8106 tp->
t_flags &= ~TF_GPUTINPROG;
8108 0, 0, 0, 6, __LINE__, NULL, 0);
8145 int32_t used_ref = 1;
8148 start = sack->
start;
8151 memset(&fe, 0,
sizeof(fe));
8153 if ((rsm == NULL) ||
8163 rsm = RB_FIND(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, &fe);
8253 next->r_start = start;
8284 nrsm = TAILQ_NEXT(rsm, r_tnext);
8294 (end ==
next->r_end)) {
8301 start =
next->r_end;
8335 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
8337 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
8338 if (insret != NULL) {
8339 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
8340 nrsm, insret, rack, rsm);
8344 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
8348 rsm->
r_flags &= (~RACK_HAS_FIN);
8356 if (end == rsm->
r_end) {
8358 rsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8363 rsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8372 rsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8433 rsm->
r_flags &= ~RACK_SACK_PASSED;
8449 if (end == rsm->
r_end) {
8457 nrsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8509 prev = RB_PREV(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8628 rsm->
r_flags &= (~RACK_HAS_FIN);
8631 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
8633 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
8634 if (insret != NULL) {
8635 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
8636 nrsm, insret, rack, rsm);
8640 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
8652 rsm->
r_flags &= ~RACK_SACK_PASSED;
8665 }
else if (start != end){
8693 prev = RB_PREV(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8700 prev = RB_PREV(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8705 if (used_ref == 0) {
8712 nrsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8732 panic(
"rack:%p rsm:%p flags:0x%x in tmap?",
8742 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, tmap, rsm, r_tnext);
8746 rsm = RB_NEXT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8761#define timersub(tvp, uvp, vvp) \
8763 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
8764 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
8765 if ((vvp)->tv_usec < 0) { \
8767 (vvp)->tv_usec += 1000000; \
8776 (res.tv_sec >= 1) ||
8783#ifdef NETFLIX_EXP_DETECTION
8792#ifdef NETFLIX_EXP_DETECTION
8795 (pkt_delta < tcp_sad_low_pps)){
8881 if ((th_ack - 1) == tp->
iss) {
8895 panic(
"No rack map tp:%p for state:%d ack:%u rack:%p snd_una:%u snd_max:%u snd_nxt:%u\n",
8905 printf(
"Rack map starts at r_start:%u for th_ack:%u huh? ts:%d rs:%d\n",
8966 (void)RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8968 rm = RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
8970 panic(
"removing head in rack:%p rsm:%p rm:%p",
8992 rsm->
r_flags &= ~RACK_SACK_PASSED;
9020 left = th_ack - rsm->
r_end;
9069 ack_am = (th_ack - rsm->
r_start);
9090 while (rsm->
soff >= rsm->
m->m_len) {
9091 rsm->
soff -= rsm->
m->m_len;
9092 rsm->
m = rsm->
m->m_next;
9093 KASSERT((rsm->
m != NULL),
9094 (
" nrsm:%p hit at soff:%u null m",
9107 int sack_pass_fnd = 0;
9126 if (sack_pass_fnd == 0) {
9144#ifdef NETFLIX_EXP_DETECTION
9149 tcp_sack_to_ack_thresh &&
9150 tcp_sack_to_move_thresh &&
9157 uint64_t ackratio, moveratio, movetotal;
9160 rack_log_sad(rack, 1);
9162 ackratio *= (uint64_t)(1000);
9175 moveratio *= (uint64_t)1000;
9177 moveratio /= movetotal;
9186 if ((ackratio > tcp_sack_to_ack_thresh) &&
9187 (moveratio > tcp_sack_to_move_thresh)) {
9194 if (tcp_attack_on_turns_on_logging) {
9204 rack_log_sad(rack, 2);
9208 if ((ackratio <= tcp_restoral_thresh) ||
9211 rack_log_sad(rack, 3);
9217 (bytes_this_ack / segsiz));
9254 goto skip_dsack_round;
9266 goto skip_dsack_round;
9348 if (changed > limit)
9367 struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1];
9369 int32_t i, j, k, num_sack_blks = 0;
9371 int loop_start = 0, moved_two = 0;
9384 th_ack = th->th_ack;
9412 changed = th_ack - rsm->
r_start;
9424 if (dup_ack_struck && (changed == 0)) {
9435 bcopy((to->
to_sacks + i * TCPOLEN_SACK),
9436 &sack,
sizeof(sack));
9438 sack.
end = ntohl(sack.
end);
9444 sack_blocks[num_sack_blks] = sack;
9470 num_sack_blks, th->th_ack);
9472 if (num_sack_blks == 0) {
9474 goto out_with_totals;
9476 if (num_sack_blks < 2) {
9481 for (i = 0; i < num_sack_blks; i++) {
9482 for (j = i + 1; j < num_sack_blks; j++) {
9483 if (
SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
9484 sack = sack_blocks[i];
9485 sack_blocks[i] = sack_blocks[j];
9486 sack_blocks[j] = sack;
9495 if (num_sack_blks == 0)
9496 goto out_with_totals;
9497 if (num_sack_blks > 1) {
9498 for (i = 0; i < num_sack_blks; i++) {
9499 for (j = i + 1; j < num_sack_blks; j++) {
9500 if (sack_blocks[i].end == sack_blocks[j].end) {
9507 if (
SEQ_LT(sack_blocks[j].start, sack_blocks[i].start)) {
9518 for (k = (j + 1); k < num_sack_blks; k++) {
9520 sack_blocks[j].
end = sack_blocks[k].
end;
9548 if (num_sack_blks == 1) {
9570 goto out_with_totals;
9593 if (num_sack_blks > 1) {
9605 for (i = loop_start; i < num_sack_blks; i++) {
9656 if (num_sack_blks > 1) {
9669#ifdef NETFLIX_EXP_DETECTION
9681 entered_recovery = 1;
9707 (entered_recovery == 0)) {
9730 rsm = TAILQ_NEXT(rsm, r_tnext);
9732 if (rsm && (rsm->
r_dupack < 0xff)) {
9764 struct socket *so, int32_t acked)
9793 uint64_t bw, calc_bw, rtt;
9801 goto no_measurement;
9805 calc_bw = bw * 1000000;
9911 gating_val = min((sbavail(&so->so_snd) - (tp->
snd_max - tp->
snd_una)),
9913 if (new_total <= gating_val) {
9918 (
"rack:%p left_to_send:%u sbavail:%u out:%u",
9952 SOCKBUF_LOCK_ASSERT(sb);
9955 if ((rsm == NULL) || (
m == NULL)) {
9959 while (rsm->
m && (rsm->
m ==
m)) {
9965 tm = sbsndmbuf(sb, (rsm->
r_start - snd_una), &soff);
9969 if (rsm->
soff != soff) {
9976 counter_u64_add(rack_adjust_map_bw, 1);
9985 rsm->
m = sbsndmbuf(sb, (rsm->
r_start - snd_una), &rsm->
soff);
10008 int32_t * ofia, int32_t thflags, int32_t *ret_val)
10010 int32_t ourfinisacked = 0;
10011 int32_t nsegs, acked_amount;
10013 struct mbuf *mfree;
10015 int32_t under_pacing = 0;
10016 int32_t recovery = 0;
10031 int in_rec, dup_ack_struck = 0;
10039 if ((th->th_ack == tp->
snd_una) &&
10043 dup_ack_struck = 1;
10078 nsegs = max(1, m->m_pkthdr.lro_nsegs);
10100 tp->
t_flags &= ~TF_PREVVALID;
10118#ifdef NETFLIX_HTTP_LOGGING
10119 tcp_http_check_for_comp(rack->
rc_tp, th->th_ack);
10142 *ofia = ourfinisacked;
10160 SOCKBUF_LOCK(&so->so_snd);
10161 acked_amount = min(acked, (
int)sbavail(&so->so_snd));
10163 mfree = sbcut_locked(&so->so_snd, acked_amount);
10164 if ((sbused(&so->so_snd) == 0) &&
10165 (acked > acked_amount) &&
10177 if (acked_amount && sbavail(&so->so_snd))
10181 sowwakeup_locked(so);
10189 if (under_pacing &&
10199 tp->
t_flags &= ~TF_PREVVALID;
10213 (sbavail(&so->so_snd) == 0) &&
10229 *ofia = ourfinisacked;
10259 memset(&fe, 0,
sizeof(fe));
10262 rsm = RB_FIND(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, &fe);
10274 if (nrsm == NULL) {
10282 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
10284 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm);
10285 if (insret != NULL) {
10286 panic(
"Insert in rb tree of %p fails ret:%p rack:%p rsm:%p",
10287 nrsm, insret, rack, rsm);
10292 TAILQ_INSERT_AFTER(&rack->
r_ctl.
rc_tmap, rsm, nrsm, r_tnext);
10303 RB_FOREACH_FROM(nrsm, rack_rb_tree_head, rsm) {
10314 RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->
r_ctl.
rc_mtree) {
10316 rsm->
r_flags &= ~RACK_RWND_COLLAPSED;
10325 int32_t tlen, int32_t tfo_syn)
10331 goto no_delayed_ack;
10390 struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
10391 uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
10403 nsegs = max(1, m->m_pkthdr.lro_nsegs);
10404 if ((thflags & TH_ACK) &&
10419 }
else if (thflags & TH_ACK) {
10420 if ((tp->
snd_wl2 == th->th_ack) && (tiwin < tp->snd_wnd)) {
10477 if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
10479 tcp_seq save_start = th->th_seq;
10480 tcp_seq save_rnxt = tp->
rcv_nxt;
10481 int save_tlen = tlen;
10483 m_adj(m, drop_hdrlen);
10495 if (th->th_seq == tp->
rcv_nxt &&
10499#ifdef NETFLIX_SB_LIMITS
10500 u_int mcnt, appended;
10502 if (so->so_rcv.sb_shlim) {
10503 mcnt = m_memcnt(m);
10505 if (counter_fo_get(so->so_rcv.sb_shlim, mcnt,
10506 CFO_NOSLEEP, NULL) ==
false) {
10507 counter_u64_add(tcp_sb_shlim_fails, 1);
10527 SOCKBUF_LOCK(&so->so_rcv);
10528 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
10531#ifdef NETFLIX_SB_LIMITS
10534 sbappendstream_locked(&so->so_rcv, m, 0);
10538 sorwakeup_locked(so);
10539#ifdef NETFLIX_SB_LIMITS
10540 if (so->so_rcv.sb_shlim && appended != mcnt)
10541 counter_fo_release(so->so_rcv.sb_shlim,
10551 tcp_seq temp = save_start;
10553 thflags =
tcp_reass(tp, th, &temp, &tlen, m);
10558 sorwakeup_locked(so);
10564 if ((tlen == 0) && (
SEQ_LT(save_start, save_rnxt))) {
10571 save_start + save_tlen);
10586 save_start + save_tlen);
10588 }
else if (tlen >= save_tlen) {
10592 save_start + save_tlen);
10593 }
else if (tlen > 0) {
10596 save_start + tlen);
10601 thflags &= ~TH_FIN;
10608 if (thflags & TH_FIN) {
10682 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
10686 int32_t newsize = 0;
10688#ifdef NETFLIX_SB_LIMITS
10689 u_int mcnt, appended;
10697 struct tcphdr tcp_savetcp;
10706 if (__predict_false(th->th_seq != tp->
rcv_nxt)) {
10712 if (tiwin && tiwin != tp->
snd_wnd) {
10722 if (__predict_false((th->th_ack != tp->
snd_una))) {
10725 if (__predict_false(tlen > sbspace(&so->so_rcv))) {
10738 nsegs = max(1, m->m_pkthdr.lro_nsegs);
10740#ifdef NETFLIX_SB_LIMITS
10741 if (so->so_rcv.sb_shlim) {
10742 mcnt = m_memcnt(m);
10744 if (counter_fo_get(so->so_rcv.sb_shlim, mcnt,
10745 CFO_NOSLEEP, NULL) ==
false) {
10746 counter_u64_add(tcp_sb_shlim_fails, 1);
10777 if (so->so_options & SO_DEBUG)
10779 (
void *)tcp_saveipgen, &tcp_savetcp, 0);
10784 SOCKBUF_LOCK(&so->so_rcv);
10785 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
10793 if (!sbreserve_locked(&so->so_rcv,
10794 newsize, so, NULL))
10795 so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
10796 m_adj(m, drop_hdrlen);
10797#ifdef NETFLIX_SB_LIMITS
10800 sbappendstream_locked(&so->so_rcv, m, 0);
10805 sorwakeup_locked(so);
10806#ifdef NETFLIX_SB_LIMITS
10807 if (so->so_rcv.sb_shlim && mcnt != appended)
10808 counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
10827 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
10838 struct tcphdr tcp_savetcp;
10841 int32_t under_pacing = 0;
10856 if (__predict_false(tiwin == 0)) {
10882 nsegs = max(1, m->m_pkthdr.lro_nsegs);
10933 tp->
t_flags &= ~TF_PREVVALID;
10949 hhook_run_tcp_est_in(tp, th, to);
10954 struct mbuf *mfree;
10957 SOCKBUF_LOCK(&so->so_snd);
10958 mfree = sbcut_locked(&so->so_snd, acked);
10964 sowwakeup_locked(so);
10977#ifdef NETFLIX_HTTP_LOGGING
10978 tcp_http_check_for_comp(rack->
rc_tp, th->th_ack);
11008 if (so->so_options & SO_DEBUG)
11010 (
void *)tcp_saveipgen,
11013 if (under_pacing &&
11022 tp->
t_flags &= ~TF_PREVVALID;
11035 if (sbavail(&so->so_snd)) {
11048 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11051 int32_t ret_val = 0;
11053 int32_t ourfinisacked = 0;
11068 if ((thflags & TH_ACK) &&
11075 if ((thflags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) {
11077 mtod(m,
const char *), tp, th);
11082 if (thflags & TH_RST) {
11086 if (!(thflags & TH_SYN)) {
11090 tp->
irs = th->th_seq;
11093 if (thflags & TH_ACK) {
11094 int tfo_partial = 0;
11099 mac_socketpeer_set_from_mbuf(m, so);
11121 if (
DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial) {
11156 thflags &= ~TH_SYN;
11160 mtod(m,
const char *), tp, th);
11185 thflags &= ~TH_FIN;
11189 tp->
snd_wl1 = th->th_seq - 1;
11190 tp->
rcv_up = th->th_seq;
11197 if (thflags & TH_ACK) {
11210 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val))
11219 if (ourfinisacked) {
11230 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
11231 soisdisconnected(so);
11242 tiwin, thflags, nxt_pkt));
11252 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11256 int32_t ret_val = 0;
11257 int32_t ourfinisacked = 0;
11260 if ((thflags & TH_ACK) &&
11276 if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
11280 }
else if (thflags & TH_SYN) {
11288 }
else if (!(thflags & (TH_ACK | TH_FIN | TH_RST))) {
11294 if ((thflags & TH_RST) ||
11320 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
11342 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
11353 if ((thflags & TH_ACK) == 0) {
11358 tiwin, thflags, nxt_pkt));
11382 mtod(m,
const char *), tp, th);
11403 if (tlen == 0 && (thflags & TH_FIN) == 0) {
11404 (void)
tcp_reass(tp, (
struct tcphdr *)0, NULL, 0,
11409 sorwakeup_locked(so);
11412 tp->
snd_wl1 = th->th_seq - 1;
11425 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
11435 if (ourfinisacked) {
11445 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
11446 soisdisconnected(so);
11456 tiwin, thflags, nxt_pkt));
11466 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11469 int32_t ret_val = 0;
11488 __predict_true((thflags & (TH_SYN | TH_FIN | TH_RST | TH_ACK)) == TH_ACK) &&
11490 __predict_true(th->th_seq == tp->
rcv_nxt)) {
11492 if (
rack_fastack(m, th, so, tp, to, drop_hdrlen, tlen,
11498 tiwin, nxt_pkt, iptos)) {
11505 if ((thflags & TH_RST) ||
11515 if (thflags & TH_SYN) {
11528 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
11550 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
11559 if ((thflags & TH_ACK) == 0) {
11562 tiwin, thflags, nxt_pkt));
11576 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
11579 if (sbavail(&so->so_snd)) {
11588 tiwin, thflags, nxt_pkt));
11598 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11601 int32_t ret_val = 0;
11606 if ((thflags & TH_RST) ||
11615 if (thflags & TH_SYN) {
11628 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
11650 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
11659 if ((thflags & TH_ACK) == 0) {
11662 tiwin, thflags, nxt_pkt));
11676 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val)) {
11679 if (sbavail(&so->so_snd)) {
11688 tiwin, thflags, nxt_pkt));
11693 struct tcpcb *tp, int32_t *tlen,
struct tcphdr *th,
struct socket *so)
11708 if (sbavail(&so->so_snd) == 0)
11712 tp->
rcv_nxt = th->th_seq + *tlen;
11726 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11729 int32_t ret_val = 0;
11730 int32_t ourfinisacked = 0;
11736 if ((thflags & TH_RST) ||
11745 if (thflags & TH_SYN) {
11758 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
11767 if ((so->so_state & SS_NOFDREF) && tlen) {
11788 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
11797 if ((thflags & TH_ACK) == 0) {
11800 tiwin, thflags, nxt_pkt));
11813 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
11816 if (ourfinisacked) {
11826 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
11827 soisdisconnected(so);
11835 if (sbavail(&so->so_snd)) {
11844 tiwin, thflags, nxt_pkt));
11854 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11857 int32_t ret_val = 0;
11858 int32_t ourfinisacked = 0;
11864 if ((thflags & TH_RST) ||
11873 if (thflags & TH_SYN) {
11886 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
11895 if ((so->so_state & SS_NOFDREF) && tlen) {
11916 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
11925 if ((thflags & TH_ACK) == 0) {
11928 tiwin, thflags, nxt_pkt));
11941 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
11944 if (ourfinisacked) {
11949 if (sbavail(&so->so_snd)) {
11958 tiwin, thflags, nxt_pkt));
11968 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
11971 int32_t ret_val = 0;
11972 int32_t ourfinisacked = 0;
11978 if ((thflags & TH_RST) ||
11987 if (thflags & TH_SYN) {
12000 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
12009 if ((so->so_state & SS_NOFDREF) && tlen) {
12030 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
12039 if ((thflags & TH_ACK) == 0) {
12042 tiwin, thflags, nxt_pkt));
12055 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
12058 if (ourfinisacked) {
12063 if (sbavail(&so->so_snd)) {
12072 tiwin, thflags, nxt_pkt));
12082 struct tcpcb *tp,
struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
12085 int32_t ret_val = 0;
12086 int32_t ourfinisacked = 0;
12093 if ((thflags & TH_RST) ||
12102 if (thflags & TH_SYN) {
12115 if (
_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
12124 if ((so->so_state & SS_NOFDREF) &&
12146 ((thflags & (TH_SYN | TH_FIN)) != 0))) {
12155 if ((thflags & TH_ACK) == 0) {
12158 tiwin, thflags, nxt_pkt));
12171 if (
rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val)) {
12174 if (sbavail(&so->so_snd)) {
12183 tiwin, thflags, nxt_pkt));
12197 uint64_t bw_est, rate_wanted;
12199 uint32_t user_max, orig_min, orig_max;
12225 tp, bw_est, segsiz, 0,
12230#ifdef NETFLIX_PEAKRATE
12240 rate_wanted = *fill_override;
12278 struct ip *
ip = NULL;
12280 struct udphdr *udp = NULL;
12289 udp = (
struct udphdr *)((caddr_t)ip6 +
sizeof(
struct ip6_hdr));
12293 rack->
r_ctl.
fsb.
th = (
struct tcphdr *)(udp + 1);
12296 rack->
r_ctl.
fsb.
th = (
struct tcphdr *)(ip6 + 1);
12309 udp = (
struct udphdr *)((caddr_t)
ip +
sizeof(
struct ip));
12313 rack->
r_ctl.
fsb.
th = (
struct tcphdr *)(udp + 1);
12339 M_TCPFSB, M_NOWAIT|M_ZERO);
12405#ifdef TCP_ACCOUNTING
12406 if (rack_tcp_accounting) {
12407 tp->
t_flags2 |= TF2_TCP_ACCOUNTING;
12497#ifdef NETFLIX_EXP_DETECTION
12498 if (tcp_force_detection)
12555 (void)RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
12557 insret = RB_INSERT(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
12558 if (insret != NULL) {
12559 panic(
"Insert in rb tree fails ret:%p rack:%p rsm:%p",
12560 insret, rack, rsm);
12563 TAILQ_INSERT_TAIL(&rack->
r_ctl.
rc_tmap, rsm, r_tnext);
12587 tp->
t_flags &= ~TF_GPUTINPROG;
12690 struct mbuf *save, *m;
12696 save = m->m_nextpkt;
12697 m->m_nextpkt = NULL;
12702 tp->
t_flags &= ~TF_FORCEDATA;
12703#ifdef NETFLIX_SHARED_CWND
12704 if (rack->
r_ctl.rc_scw) {
12711 tcp_shared_cwnd_free_full(tp, rack->
r_ctl.rc_scw,
12714 rack->
r_ctl.rc_scw = NULL;
12726 val = USEC_2_TICKS(tp->
t_srtt);
12778 free(dol, M_TCPDO);
12789 RB_FOREACH_SAFE(rsm, rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, nrsm) {
12791 (void)RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
12793 rm = RB_REMOVE(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, rsm);
12795 panic(
"At fini, rack:%p rsm:%p rm:%p",
12817 tp->
t_flags &= ~TF_GPUTINPROG;
12980 }
else if ((tp->
snd_wl2 == ack) && (tiwin < tp->snd_wnd)) {
13025 struct timeval ltv;
13026 char tcp_hdr_buf[60];
13028 struct timespec ts;
13032#ifdef NETFLIX_HTTP_LOGGING
13033 struct http_sendfile_track *http_req;
13036 http_req = tcp_http_find_req_for_seq(tp, (ae->
ack-1));
13038 http_req = tcp_http_find_req_for_seq(tp, ae->
ack);
13061 ts.tv_sec = ae->
timestamp / 1000000000;
13062 ts.tv_nsec = ae->
timestamp % 1000000000;
13063 ltv.tv_sec = ts.tv_sec;
13064 ltv.tv_usec = ts.tv_nsec / 1000;
13069 ts.tv_sec = ae->
timestamp / 1000000000;
13070 ts.tv_nsec = ae->
timestamp % 1000000000;
13071 ltv.tv_sec = ts.tv_sec;
13072 ltv.tv_usec = ts.tv_nsec / 1000;
13078#ifdef NETFLIX_HTTP_LOGGING
13092 if (http_req->flags & TCP_HTTP_TRACK_FLG_OPEN) {
13099 if (http_req->flags & TCP_HTTP_TRACK_FLG_COMP) {
13105 memset(tcp_hdr_buf, 0,
sizeof(tcp_hdr_buf));
13106 th = (
struct tcphdr *)tcp_hdr_buf;
13107 th->th_seq = ae->
seq;
13108 th->th_ack = ae->
ack;
13109 th->th_win = ae->
win;
13111 th->th_sport = tp->
t_inpcb->inp_fport;
13112 th->th_dport = tp->
t_inpcb->inp_lport;
13119 th->th_off = ((
sizeof(
struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2);
13120 cp = (u_char *)(th + 1);
13125 *cp = TCPOPT_TIMESTAMP;
13127 *cp = TCPOLEN_TIMESTAMP;
13130 bcopy((
char *)&val,
13133 bcopy((
char *)&val,
13134 (
char *)(cp + 4),
sizeof(
uint32_t));
13136 th->th_off = (
sizeof(
struct tcphdr) >> 2);
13148 if (tp->
snd_una != high_seq) {
13157 0, &log,
true, <v);
13236#ifdef TCP_ACCOUNTING
13241 struct timespec ts;
13244 uint32_t tiwin, ms_cts, cts, acked, acked_amount, high_seq, win_seq, the_win, win_upd_ack;
13245 int cnt, i, did_out, ourfinisacked = 0;
13246 struct tcpopt to_holder, *to = NULL;
13247#ifdef TCP_ACCOUNTING
13248 int win_up_req = 0;
13251 int under_pacing = 1;
13253#ifdef TCP_ACCOUNTING
13287 0, 0, 18, __LINE__, NULL, 0);
13288 tp->
t_flags &= ~TF_GPUTINPROG;
13293 KASSERT((m->m_len >=
sizeof(
struct tcp_ackent)),
13294 (
"tp:%p m_cmpack:%p with invalid len:%u", tp, m, m->m_len));
13311 for (i = 0; i < cnt; i++) {
13312#ifdef TCP_ACCOUNTING
13313 ts_val = get_cyclecount();
13325 }
else if (
SEQ_GT(ae->
ack, high_seq)) {
13328 }
else if ((tiwin == the_win) && (rack->
rc_in_persist == 0)){
13353#ifdef TCP_ACCOUNTING
13354 rdstc = get_cyclecount();
13355 if (rdstc > ts_val) {
13358 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13359 tp->tcp_proc_time[ae->
ack_val_set] += (rdstc - ts_val);
13386#ifdef TCP_ACCOUNTING
13388 counter_u64_add(tcp_cnt_counters[ae->
ack_val_set], 1);
13389 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13418 ts.tv_sec = ae->
timestamp / 1000000000;
13419 ts.tv_nsec = ae->
timestamp % 1000000000;
13429#ifdef TCP_ACCOUNTING
13432 win_upd_ack = ae->
ack;
13452 win_upd_ack = ae->
ack;
13457#ifdef TCP_ACCOUNTING
13459 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13460 tp->tcp_cnt_counters[
CNT_OF_ACKS_IN] += (((ae->
ack - high_seq) + segsiz - 1) / segsiz);
13463 (((ae->
ack - high_seq) + segsiz - 1) / segsiz));
13465 high_seq = ae->
ack;
13478 0, &log,
false, NULL, NULL, 0, &tv);
13491 if (
CC_ALGO(tp)->newround != NULL) {
13497 ts.tv_sec = ae->
timestamp / 1000000000;
13498 ts.tv_nsec = ae->
timestamp % 1000000000;
13517#ifdef TCP_ACCOUNTING
13518 rdstc = get_cyclecount();
13519 if (rdstc > ts_val) {
13520 counter_u64_add(tcp_proc_time[ae->
ack_val_set] , (rdstc - ts_val));
13521 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13522 tp->tcp_proc_time[ae->
ack_val_set] += (rdstc - ts_val);
13529#ifdef TCP_ACCOUNTING
13530 ts_val = get_cyclecount();
13532 acked_amount = acked = (high_seq - tp->
snd_una);
13541 if (acked >= segsiz) {
13549 ac = acked / segsiz;
13570 acked_amount = acked = (high_seq - tp->
snd_una);
13572 if (acked > sbavail(&so->so_snd))
13573 acked_amount = sbavail(&so->so_snd);
13574#ifdef NETFLIX_EXP_DETECTION
13582 rack_do_detection(tp, rack, acked_amount, segsiz);
13597 if ((sbused(&so->so_snd) == 0) &&
13598 (acked > acked_amount) &&
13618 tp->
t_flags &= ~TF_PREVVALID;
13626 if (acked_amount > 0) {
13627 struct mbuf *mfree;
13630 SOCKBUF_LOCK(&so->so_snd);
13631 mfree = sbcut_locked(&so->so_snd, acked_amount);
13637 sowwakeup_locked(so);
13661#ifdef NETFLIX_HTTP_LOGGING
13662 tcp_http_check_for_comp(rack->
rc_tp, high_seq);
13666 if (under_pacing &&
13675 tp->
t_flags &= ~TF_PREVVALID;
13689 (sbavail(&so->so_snd) == 0) &&
13699#ifdef TCP_ACCOUNTING
13700 rdstc = get_cyclecount();
13701 if (rdstc > ts_val) {
13702 counter_u64_add(tcp_proc_time[
ACK_CUMACK] , (rdstc - ts_val));
13703 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13704 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13712#ifdef TCP_ACCOUNTING
13727 goto send_out_a_rst;
13729 if ((sbused(&so->so_snd) == 0) &&
13739 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
13740 soisdisconnected(so);
13746 if (ourfinisacked == 0) {
13756 if (sbavail(&so->so_snd)) {
13765#ifdef TCP_ACCOUNTING
13766 rdstc = get_cyclecount();
13767 if (rdstc > ts_val) {
13768 counter_u64_add(tcp_proc_time[
ACK_CUMACK] , (rdstc - ts_val));
13769 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13770 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13781 if (ourfinisacked) {
13784#ifdef TCP_ACCOUNTING
13785 rdstc = get_cyclecount();
13786 if (rdstc > ts_val) {
13789 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13790 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13801#ifdef TCP_ACCOUNTING
13802 rdstc = get_cyclecount();
13803 if (rdstc > ts_val) {
13806 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13807 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13818#ifdef TCP_ACCOUNTING
13819 rdstc = get_cyclecount();
13820 if (rdstc > ts_val) {
13823 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13824 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13829 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
13830 soisdisconnected(so);
13848#ifdef TCP_ACCOUNTING
13849 rdstc = get_cyclecount();
13850 if (rdstc > ts_val) {
13851 counter_u64_add(tcp_proc_time[
ACK_CUMACK] , (rdstc - ts_val));
13852 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13853 tp->tcp_proc_time[
ACK_CUMACK] += (rdstc - ts_val);
13858 }
else if (win_up_req) {
13859 rdstc = get_cyclecount();
13860 if (rdstc > ts_val) {
13861 counter_u64_add(tcp_proc_time[
ACK_RWND] , (rdstc - ts_val));
13862 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
13863 tp->tcp_proc_time[
ACK_RWND] += (rdstc - ts_val);
13872#ifdef TCP_ACCOUNTING
13882 if (tcp_output(tp) < 0) {
13883#ifdef TCP_ACCOUNTING
13891#ifdef TCP_ACCOUNTING
13902 struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
uint8_t iptos,
13903 int32_t nxt_pkt,
struct timeval *tv)
13905#ifdef TCP_ACCOUNTING
13908 int32_t thflags, retval, did_out = 0;
13909 int32_t way_out = 0;
13917 struct timespec ts;
13921 int32_t prev_state = 0;
13922#ifdef TCP_ACCOUNTING
13923 int ack_val_set = 0xf;
13935 panic(
"Impossible reach m has ackcmp? m:%p tp:%p", m, tp);
13939 nsegs = m->m_pkthdr.lro_nsegs;
13942#ifdef TCP_ACCOUNTING
13944 if (thflags & TH_ACK)
13945 ts_val = get_cyclecount();
13947 if ((m->m_flags & M_TSTMP) ||
13948 (m->m_flags & M_TSTMP_LRO)) {
13949 mbuf_tstmp2timespec(m, &ts);
13954 kern_prefetch(rack, &prev_state);
13961#ifdef TCP_ACCOUNTING
13962 if (thflags & TH_ACK) {
13980 ack_val_set = tcp_do_ack_accounting(tp, th, &to, tiwin,
13987 memset(&to, 0,
sizeof(to));
13989 (th->th_off << 2) -
sizeof(
struct tcphdr),
13990 (thflags & TH_SYN) ?
TO_SYN : 0);
13991 NET_EPOCH_ASSERT();
14019 0, 0, 18, __LINE__, NULL, 0);
14020 tp->
t_flags &= ~TF_GPUTINPROG;
14023 high_seq = th->th_ack;
14026 struct timeval ltv;
14027#ifdef NETFLIX_HTTP_LOGGING
14028 struct http_sendfile_track *http_req;
14031 http_req = tcp_http_find_req_for_seq(tp, (th->th_ack-1));
14033 http_req = tcp_http_find_req_for_seq(tp, th->th_ack);
14052#ifdef TCP_ACCOUNTING
14056 if (m->m_flags & M_TSTMP) {
14058 mbuf_tstmp2timespec(m, &ts);
14059 ltv.tv_sec = ts.tv_sec;
14060 ltv.tv_usec = ts.tv_nsec / 1000;
14062 }
else if (m->m_flags & M_TSTMP_LRO) {
14064 mbuf_tstmp2timespec(m, &ts);
14065 ltv.tv_sec = ts.tv_sec;
14066 ltv.tv_usec = ts.tv_nsec / 1000;
14072#ifdef NETFLIX_HTTP_LOGGING
14086 if (http_req->flags & TCP_HTTP_TRACK_FLG_OPEN) {
14093 if (http_req->flags & TCP_HTTP_TRACK_FLG_COMP) {
14100 tlen, &log,
true, <v);
14102 if ((thflags & TH_SYN) && (thflags & TH_FIN) &&
V_drop_synfin) {
14106 goto done_with_input;
14116#ifdef TCP_ACCOUNTING
14133 goto done_with_input;
14177 KASSERT(rack->
rc_inp != NULL,
14178 (
"%s: rack->rc_inp unexpectedly NULL", __func__));
14179 if (rack->
rc_inp == NULL) {
14198 tp->
t_flags &= ~TF_REQ_SCALE;
14211 tp->
t_flags &= ~TF_REQ_TSTMP;
14217 tp->
t_flags &= ~TF_SACK_PERMIT;
14249#ifdef TCP_ACCOUNTING
14257 if (thflags & TH_FIN)
14280 (rsm = RB_MIN(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree)) != NULL)
14281 kern_prefetch(rsm, &prev_state);
14284 tp, &to, drop_hdrlen,
14285 tlen, tiwin, thflags, nxt_pkt, iptos);
14287 if ((retval == 0) &&
14289 panic(
"retval:%d tp:%p t_inpcb:NULL state:%d",
14290 retval, tp, prev_state);
14319#ifdef TCP_ACCOUNTING
14329 if (ack_val_set != 0xf) {
14332 crtsc = get_cyclecount();
14333 counter_u64_add(tcp_proc_time[ack_val_set] , (crtsc - ts_val));
14334 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
14335 tp->tcp_proc_time[ack_val_set] += (crtsc - ts_val);
14339 if (nxt_pkt == 0) {
14342 if (tcp_output(tp) < 0)
14362 0, &log,
false, NULL, NULL, 0, &tv);
14374 if (
CC_ALGO(tp)->newround != NULL) {
14378 if ((nxt_pkt == 0) &&
14405 if (late && (did_out == 0)) {
14411 goto do_output_now;
14416 }
else if (nxt_pkt == 0) {
14427 panic(
"OP:%d retval:%d tp:%p t_inpcb:NULL state:%d",
14429 retval, tp, prev_state);
14432#ifdef TCP_ACCOUNTING
14437 if (ack_val_set != 0xf) {
14440 crtsc = get_cyclecount();
14441 counter_u64_add(tcp_proc_time[ack_val_set] , (crtsc - ts_val));
14449#ifdef TCP_ACCOUNTING
14457 struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
uint8_t iptos)
14468 if (m->m_flags & M_TSTMP_LRO) {
14469 tv.tv_sec = m->m_pkthdr.rcv_tstmp /1000000000;
14470 tv.tv_usec = (m->m_pkthdr.rcv_tstmp % 1000000000)/1000;
14476 drop_hdrlen, tlen, iptos, 0, &tv) == 0) {
14486 uint32_t srtt = 0, thresh = 0, ts_low = 0;
14527 if ((tsused == ts_low) ||
14532 if ((tsused - ts_low) < thresh) {
14556 uint64_t bw_est, uint64_t bw, uint64_t len_time,
int method,
14563 memset(&log, 0,
sizeof(log));
14619 0, &log,
false, &tv);
14639 if (new_tso > user_max)
14640 new_tso = user_max;
14647 uint64_t lentim, fill_bw;
14684 *rate_wanted = fill_bw;
14685 if ((fill_bw <
RACK_MIN_BW) || (fill_bw < *rate_wanted))
14692 uint64_t high_rate;
14695 if (fill_bw > high_rate) {
14697 if (*rate_wanted > high_rate) {
14702 fill_bw, high_rate, __LINE__,
14704 fill_bw = high_rate;
14708 }
else if ((rack->
r_ctl.
crte == NULL) &&
14719 uint64_t high_rate;
14723 if (fill_bw > high_rate) {
14724 fill_bw = high_rate;
14736 *rate_wanted = fill_bw;
14737 if (non_paced || (lentim < slot)) {
14739 0, lentim, 12, __LINE__, NULL, 0);
14740 return ((int32_t)lentim);
14750 int can_start_hw_pacing = 1;
14761 uint64_t cwnd, tr_perms = 0;
14762 int32_t reduce = 0;
14772 srtt = max(tp->
t_srtt, 1);
14778 tr_perms = (cwnd * 1000) / srtt;
14779 if (tr_perms == 0) {
14791 slot = len / tr_perms;
14795 if (reduce < slot) {
14802 uint64_t rate_wanted = 0;
14810 uint64_t bw_est, res, lentim, rate_wanted;
14821#ifdef NETFLIX_PEAKRATE
14826 bw_est = rate_wanted = 0;
14831 if ((bw_est == 0) || (rate_wanted == 0) ||
14840 segs = (len + segsiz - 1) / segsiz;
14849 can_start_hw_pacing = 0;
14856 res = lentim / rate_wanted;
14878 rate_wanted, bw_est, __LINE__,
14894 slot =
pace_to_fill_cwnd(rack, slot, (len+segs), segsiz, &capped, &rate_wanted, 0);
14899 (can_start_hw_pacing > 0) &&
14950 (rate_wanted <= rack->
r_ctl.crte_prev_rate)) {
14973 if (nrte == NULL) {
14978 rate_wanted, 0, __LINE__,
15018#ifdef NETFLIX_PEAKRATE
15036 if (srtt < (uint64_t)slot) {
15051 int hw_boost_delay;
15058 slot += hw_boost_delay;
15065 tcp_seq startseq,
uint32_t sb_offset)
15129 __LINE__, NULL, 0);
15145 if ((my_rsm == NULL) ||
15185 if ((my_rsm == NULL) ||
15212 __LINE__, NULL, 0);
15228 my_rsm = RB_FIND(rack_rb_tree_head, &rack->
r_ctl.
rc_mtree, &fe);
15260 9, __LINE__, NULL, 0);
15265 uint32_t avail, int32_t sb_offset)
15270 if (tp->
snd_wnd > cwnd_to_use)
15271 sendwin = cwnd_to_use;
15281 if (flight >= sendwin) {
15289 len = sendwin - flight;
15294 if ((len + sb_offset) > avail) {
15299 len = avail - sb_offset;
15307 unsigned ipoptlen, int32_t orig_len, int32_t len,
int error,
15308 int rsm_is_null,
int optlen,
int line,
uint16_t mode)
15334 len, &log,
false, NULL, NULL, 0, &tv);
15339static struct mbuf *
15342 int32_t seglimit, int32_t segsize,
int hw_tls)
15345 struct ktls_session *tls, *ntls;
15347 struct mbuf *start;
15350 struct mbuf *m, *n, **np, *smb;
15353 int32_t len = *plen;
15355 int32_t len_cp = 0;
15358 soff = off = the_off;
15363 if (hw_tls && (m->m_flags & M_EXTPG))
15364 tls = m->m_epg_tls;
15378 if (m->m_flags & M_EXTPG)
15379 ntls = m->m_epg_tls;
15395 mlen = min(len, m->m_len - off);
15405 if (m->m_flags & M_EXTPG) {
15406 fragsize = min(segsize, PAGE_SIZE);
15409 fragsize = segsize;
15414 if ((frags + 1) >= seglimit) {
15425 if ((frags + howmany(mlen, fragsize)) >= seglimit) {
15426 mlen = (seglimit - frags - 1) * fragsize;
15428 *plen = len_cp + len;
15430 frags += howmany(mlen, fragsize);
15434 KASSERT(seglimit > 0,
15435 (
"%s: seglimit went too low", __func__));
15437 n = m_get(M_NOWAIT, m->m_type);
15443 len_cp += n->m_len;
15444 if (m->m_flags & (M_EXT|M_EXTPG)) {
15445 n->m_data = m->m_data + off;
15448 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
15455 if (len || (soff == smb->m_len)) {
15501static struct mbuf *
15503 int32_t seglimit, int32_t segsize,
struct mbuf **s_mb,
int *s_soff)
15505 struct mbuf *m, *n;
15526 KASSERT(soff >= 0, (
"%s, negative off %d", __FUNCTION__, soff));
15527 KASSERT(*plen >= 0, (
"%s, negative len %d", __FUNCTION__, *plen));
15528 KASSERT(soff < m->m_len, (
"%s rack:%p len:%u m:%p m->m_len:%u < off?",
15530 rack, *plen, m, m->m_len));
15552 struct ip *
ip = NULL;
15553 struct udphdr *udp = NULL;
15554 struct tcphdr *th = NULL;
15555 struct mbuf *m = NULL;
15559#ifdef TCP_ACCOUNTING
15564 u_char opt[TCP_MAXOLEN];
15566 int32_t slot, segsiz, max_val, tso = 0, error, ulen = 0;
15568 uint32_t if_hw_tsomaxsegcount = 0, startseq;
15576 hdrlen =
sizeof(
struct ip6_hdr) +
sizeof(
struct tcphdr);
15597 flags = tcp_outflags[tp->
t_state];
15598 if (flags & (TH_SYN|TH_RST)) {
15605 if (flags & TH_FIN) {
15618 hdrlen +=
sizeof(
struct udphdr);
15631 if (MHLEN < hdrlen + max_linkhdr)
15632 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
15635 m = m_gethdr(M_NOWAIT, MT_DATA);
15638 m->m_data += max_linkhdr;
15644 if ((tso) && (len + optlen > tp->
t_maxseg)) {
15656 if (if_hw_tsomax != 0) {
15658 max_len = (if_hw_tsomax - hdrlen -
15660 if (max_len <= 0) {
15662 }
else if (len > max_len) {
15666 if (len <= segsiz) {
15676 if ((tso == 0) && (len > segsiz))
15679 (len <= MHLEN - hdrlen - max_linkhdr)) {
15682 th->th_seq = htonl(rsm->
r_start);
15683 th->th_ack = htonl(tp->
rcv_nxt);
15695 if (th->th_win == 0) {
15699 tp->
t_flags &= ~TF_RXWIN0SENT;
15716 if (rsm->
m == NULL)
15723 if (len <= segsiz) {
15732 if ((m->m_next == NULL) || (len <= 0)){
15737 ulen = hdrlen + len -
sizeof(
struct ip6_hdr);
15739 ulen = hdrlen + len -
sizeof(
struct ip);
15742 m->m_pkthdr.rcvif = (
struct ifnet *)0;
15752 ip6->ip6_flow |= htonl(ect << 20);
15762 m->m_pkthdr.len = hdrlen + len;
15766 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
15767 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
15769 th->th_sum = htons(0);
15772 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
15773 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
15774 th->th_sum = in6_cksum_pseudo(ip6,
15775 sizeof(
struct tcphdr) + optlen + len,
IPPROTO_TCP,
15780#if defined(INET6) && defined(INET)
15786 m->m_pkthdr.csum_flags = CSUM_UDP;
15787 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
15790 th->th_sum = htons(0);
15793 m->m_pkthdr.csum_flags = CSUM_TCP;
15794 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
15801 (
"%s: IP version incorrect: %d", __func__,
ip->
ip_v));
15805 KASSERT(len > tp->
t_maxseg - optlen,
15806 (
"%s: len <= tso_segsz tp:%p", __func__, tp));
15807 m->m_pkthdr.csum_flags |= CSUM_TSO;
15808 m->m_pkthdr.tso_segsz = tp->
t_maxseg - optlen;
15813 ip6->ip6_plen = htons(m->m_pkthdr.len -
sizeof(*ip6));
15817 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
15820#if defined(INET) && defined(INET6)
15825 ip->
ip_len = htons(m->m_pkthdr.len);
15833 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
15842 bcopy(opt, th + 1, optlen);
15843 th->th_off = (
sizeof(
struct tcphdr) + optlen) >> 2;
15845 th->th_off =
sizeof(
struct tcphdr) >> 2;
15864 if (doing_tlp == 0)
15876 len, &log,
false, NULL, NULL, 0, tv);
15881 error = ip6_output(m, NULL,
15883 0, NULL, NULL, inp);
15886#if defined(INET) && defined(INET6)
15926 idx = (len / segsiz) + 3;
15938 if (error && (error == ENOBUFS)) {
15973 rsm->r_flags &= ~RACK_MUST_RXT;
15977 rsm->r_flags &= ~RACK_MUST_RXT;
15980#ifdef TCP_ACCOUNTING
15981 crtsc = get_cyclecount();
15982 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
15985 counter_u64_add(tcp_cnt_counters[
SND_OUT_DATA], cnt_thru);
15986 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
15989 counter_u64_add(tcp_proc_time[
SND_OUT_DATA], (crtsc - ts_val));
15990 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
15991 tp->tcp_cnt_counters[
CNT_OF_MSS_OUT] += ((len + segsiz - 1) / segsiz);
15993 counter_u64_add(tcp_cnt_counters[
CNT_OF_MSS_OUT], ((len + segsiz - 1) / segsiz));
16037 if ((tp->
snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
16038 sbused(&so->so_snd) >=
16039 (so->so_snd.sb_hiwat / 8 * 7) &&
16041 sendwin >= (sbused(&so->so_snd) -
16049 scaleup += so->so_snd.sb_hiwat;
16052 if (!sbreserve_locked(&so->so_snd, scaleup, so, curthread))
16053 so->so_snd.sb_flags &= ~SB_AUTOSIZE;
16060 uint32_t cts,
uint32_t ms_cts,
struct timeval *tv,
long tot_len,
int *send_err)
16071 struct ip *
ip = NULL;
16072 struct udphdr *udp = NULL;
16073 struct tcphdr *th = NULL;
16074 struct mbuf *m, *s_mb;
16078#ifdef TCP_ACCOUNTING
16082 u_char opt[TCP_MAXOLEN];
16085 int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
16088 uint32_t if_hw_tsomaxsegcount = 0, startseq;
16096 hdrlen =
sizeof(
struct ip6_hdr) +
sizeof(
struct tcphdr);
16122 hdrlen +=
sizeof(
struct udphdr);
16136 if (MHLEN < hdrlen + max_linkhdr)
16137 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
16140 m = m_gethdr(M_NOWAIT, MT_DATA);
16143 m->m_data += max_linkhdr;
16149 if ((tso) && (len + optlen > tp->
t_maxseg)) {
16161 if (if_hw_tsomax != 0) {
16163 max_len = (if_hw_tsomax - hdrlen -
16165 if (max_len <= 0) {
16167 }
else if (len > max_len) {
16171 if (len <= segsiz) {
16181 if ((tso == 0) && (len > segsiz))
16184 (len <= MHLEN - hdrlen - max_linkhdr)) {
16188 th->th_seq = htonl(tp->
snd_max);
16189 th->th_ack = htonl(tp->
rcv_nxt);
16191 if (th->th_win == 0) {
16195 tp->
t_flags &= ~TF_RXWIN0SENT;
16207 m->m_next =
rack_fo_m_copym(rack, &len, if_hw_tsomaxsegcount, if_hw_tsomaxsegsize,
16209 if (len <= segsiz) {
16223 if ((m->m_next == NULL) || (len <= 0)){
16228 ulen = hdrlen + len -
sizeof(
struct ip6_hdr);
16230 ulen = hdrlen + len -
sizeof(
struct ip);
16233 m->m_pkthdr.rcvif = (
struct ifnet *)0;
16243 ip6->ip6_flow |= htonl(ect << 20);
16253 m->m_pkthdr.len = hdrlen + len;
16257 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
16258 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
16260 th->th_sum = htons(0);
16263 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
16264 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
16265 th->th_sum = in6_cksum_pseudo(ip6,
16266 sizeof(
struct tcphdr) + optlen + len,
IPPROTO_TCP,
16271#if defined(INET6) && defined(INET)
16277 m->m_pkthdr.csum_flags = CSUM_UDP;
16278 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
16281 th->th_sum = htons(0);
16284 m->m_pkthdr.csum_flags = CSUM_TCP;
16285 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
16292 (
"%s: IP version incorrect: %d", __func__,
ip->
ip_v));
16296 KASSERT(len > tp->
t_maxseg - optlen,
16297 (
"%s: len <= tso_segsz tp:%p", __func__, tp));
16298 m->m_pkthdr.csum_flags |= CSUM_TSO;
16299 m->m_pkthdr.tso_segsz = tp->
t_maxseg - optlen;
16304 ip6->ip6_plen = htons(m->m_pkthdr.len -
sizeof(*ip6));
16308 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
16311#if defined(INET) && defined(INET6)
16316 ip->
ip_len = htons(m->m_pkthdr.len);
16324 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
16333 bcopy(opt, th + 1, optlen);
16334 th->th_off = (
sizeof(
struct tcphdr) + optlen) >> 2;
16336 th->th_off =
sizeof(
struct tcphdr) >> 2;
16364 len, &log,
false, NULL, NULL, 0, tv);
16369 error = ip6_output(m, NULL,
16371 0, NULL, NULL, inp);
16374#if defined(INET) && defined(INET6)
16413 idx = (len / segsiz) + 3;
16419 if (len <= rack->
r_ctl.fsb.left_to_send)
16449#ifdef TCP_ACCOUNTING
16450 crtsc = get_cyclecount();
16451 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
16454 counter_u64_add(tcp_cnt_counters[
SND_OUT_DATA], cnt_thru);
16455 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
16458 counter_u64_add(tcp_proc_time[
SND_OUT_DATA], (crtsc - ts_val));
16459 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
16460 tp->tcp_cnt_counters[
CNT_OF_MSS_OUT] += ((tot_len + segsiz - 1) / segsiz);
16462 counter_u64_add(tcp_cnt_counters[
CNT_OF_MSS_OUT], ((tot_len + segsiz - 1) / segsiz));
16479 int32_t len, error = 0;
16481 struct mbuf *m, *s_mb = NULL;
16483 uint32_t if_hw_tsomaxsegcount = 0;
16485 int32_t segsiz, minseg;
16486 long tot_len_this_send = 0;
16488 struct ip *
ip = NULL;
16491 struct ipovly *ipov = NULL;
16493 struct udphdr *udp = NULL;
16499 u_char opt[TCP_MAXOLEN];
16500 unsigned ipoptlen, optlen, hdrlen, ulen=0;
16503#if defined(IPSEC) || defined(IPSEC_SUPPORT)
16504 unsigned ipsec_optlen = 0;
16507 int32_t idle, sendalot;
16508 int32_t sub_from_prr = 0;
16509 volatile int32_t sack_rxmit;
16514 int32_t sup_rack = 0;
16515 uint32_t cts, ms_cts, delayed, early;
16518 uint8_t hpts_calling, doing_tlp = 0;
16519 uint32_t cwnd_to_use, pace_max_seg;
16520 int32_t do_a_prefetch = 0;
16521 int32_t prefetch_rsm = 0;
16522 int32_t orig_len = 0;
16524 int32_t prefetch_so_done = 0;
16527 struct sockbuf *sb;
16528 uint64_t ts_val = 0;
16529#ifdef TCP_ACCOUNTING
16537 bool hw_tls =
false;
16541#ifdef TCP_ACCOUNTING
16543 ts_val = get_cyclecount();
16546 NET_EPOCH_ASSERT();
16550#ifdef TCP_ACCOUNTING
16564#ifdef TCP_ACCOUNTING
16604#ifdef TCP_ACCOUNTING
16611 return (retval < 0 ? retval : 0);
16619#ifdef TCP_ACCOUNTING
16636#ifdef TCP_ACCOUNTING
16637 crtsc = get_cyclecount();
16638 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
16639 tp->tcp_proc_time[
SND_BLOCKED] += (crtsc - ts_val);
16641 counter_u64_add(tcp_proc_time[
SND_BLOCKED], (crtsc - ts_val));
16642 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
16645 counter_u64_add(tcp_cnt_counters[
SND_BLOCKED], 1);
16661 }
else if (early) {
16674 (doing_tlp == 0) &&
16679 ret =
rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error);
16703 goto just_return_nolock;
16761 flags = tcp_outflags[tp->
t_state];
16770 goto just_return_nolock;
16772 TAILQ_INSERT_TAIL(&rack->
r_ctl.
rc_free, rsm, r_tnext);
16781 if (flags & TH_RST) {
16795 (
"%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
16796 __func__, __LINE__,
16810 panic(
"Huh, tp:%p rack:%p rsm:%p start:%u < snd_una:%u\n",
16816 (
"%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
16817 __func__, __LINE__,
16849 (
"%s:%d: r.start:%u < SND.UNA:%u; tp:%p, rack:%p, rsm:%p",
16850 __func__, __LINE__,
16853 cwin = min(tp->
snd_wnd, tlen);
16857 (doing_tlp == 0) &&
16865 int sendwin, flight;
16869 if (flight >= sendwin) {
16872 goto just_return_nolock;
16882 goto just_return_nolock;
16912 if ((rsm == NULL) &&
16923 goto just_return_nolock;
16951 rsm->
r_flags &= ~RACK_MUST_RXT;
16955 if (do_a_prefetch == 0) {
16956 kern_prefetch(sb, &do_a_prefetch);
16959#ifdef NETFLIX_SHARED_CWND
16960 if ((tp->
t_flags2 & TF2_TCP_SCWND_ALLOWED) &&
16965 (rack->
r_ctl.rc_scw == NULL) &&
16970 rack->
r_ctl.rc_scw = tcp_shared_cwnd_alloc(tp,
16974 if (rack->
r_ctl.rc_scw &&
16976 sbavail(&so->so_snd)) {
16981 if (rack->
r_ctl.rc_scw) {
16997 if ((sack_rxmit == 0) && (prefetch_rsm == 0)) {
17001 kern_prefetch(end_rsm, &prefetch_rsm);
17020 if ((sack_rxmit == 0) &&
17024 avail = sbavail(sb);
17037 len = tp->
snd_wnd - sb_offset;
17069 if (tp->
snd_wnd > outstanding) {
17070 len = tp->
snd_wnd - outstanding;
17072 if ((sb_offset + len) > avail) {
17074 if (avail > sb_offset)
17075 len = avail - sb_offset;
17082 }
else if (avail > sb_offset) {
17083 len = avail - sb_offset;
17095 if (len > segsiz) {
17106 }
else if (len < segsiz) {
17115 leftinsb = sbavail(sb) - sb_offset;
17116 if (leftinsb > len) {
17128 if ((sack_rxmit == 0) &&
17134 if (prefetch_so_done == 0) {
17135 kern_prefetch(so, &prefetch_so_done);
17136 prefetch_so_done = 1;
17144 ((sack_rxmit == 0) && (tp->
t_rxtshift == 0))) {
17175 (((flags & TH_SYN) && (tp->
t_rxtshift > 0)) ||
17178 (flags & TH_RST))) {
17213 (sb_offset < (
int)sbavail(sb))) {
17216 }
else if ((rsm == NULL) &&
17217 (doing_tlp == 0) &&
17218 (len < pace_max_seg)) {
17227 (len < (
int)(sbavail(sb) - sb_offset))) {
17244 }
else if ((cwnd_to_use >= max(minseg, (segsiz * 4))) &&
17246 (len < (
int)(sbavail(sb) - sb_offset)) &&
17262 (len < (
int)(sbavail(sb) - sb_offset)) &&
17273 }
else if ((rack->
r_ctl.
crte != NULL) &&
17275 (cwnd_to_use >= (pace_max_seg + (4 * segsiz))) &&
17277 (len < (
int)(sbavail(sb) - sb_offset))) {
17306 KASSERT(len >= 0, (
"[%s:%d]: len < 0", __func__, __LINE__));
17325#if defined(IPSEC) || defined(IPSEC_SUPPORT)
17331 if (isipv6 && IPSEC_ENABLED(ipv6))
17332 ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->
t_inpcb);
17338 if (IPSEC_ENABLED(ipv4))
17339 ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->
t_inpcb);
17343#if defined(IPSEC) || defined(IPSEC_SUPPORT)
17344 ipoptlen += ipsec_optlen;
17372 recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
17386 if (len >= segsiz) {
17490 if (adv >= (int32_t)(2 * segsiz) &&
17491 (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
17492 recwin <= (int32_t)(so->so_rcv.sb_hiwat / 8) ||
17493 so->so_rcv.sb_hiwat <= 8 * segsiz)) {
17497 if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat) {
17520 if ((flags & TH_FIN) &&
17529 SOCKBUF_UNLOCK(sb);
17534 if (tot_len_this_send > 0) {
17538 if ((error == 0) &&
17540 ((flags & (TH_SYN|TH_FIN)) == 0) &&
17548 (len > 0) && (orig_len > 0) &&
17549 (orig_len > len) &&
17550 ((orig_len - len) >= segsiz) &&
17565 (
"rack:%p left_to_send:%u sbavail:%u out:%u",
17581 ipoptlen, orig_len, len, 0,
17582 1, optlen, __LINE__, 1);
17586 int end_window = 0;
17614 }
else if ((idle == 0) &&
17644 panic(
"rack:%p hit JR_ASSESSING case cwnd_to_use:%u?", rack, cwnd_to_use);
17700 tp->
t_flags &= ~TF_GPUTINPROG;
17704 0, 0, 18, __LINE__, NULL, 0);
17729 tp->
gput_ack, 0, 0, 4, __LINE__, NULL, 0);
17736 (sbavail(sb) > tp->
snd_wnd) &&
17744#ifdef NETFLIX_SHARED_CWND
17745 if ((sbavail(sb) == 0) &&
17746 rack->
r_ctl.rc_scw) {
17751#ifdef TCP_ACCOUNTING
17752 if (tot_len_this_send > 0) {
17753 crtsc = get_cyclecount();
17754 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17758 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17761 counter_u64_add(tcp_proc_time[
SND_OUT_DATA], (crtsc - ts_val));
17762 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17763 tp->tcp_cnt_counters[
CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) / segsiz);
17765 counter_u64_add(tcp_cnt_counters[
CNT_OF_MSS_OUT], ((tot_len_this_send + segsiz - 1) / segsiz));
17767 crtsc = get_cyclecount();
17768 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17771 counter_u64_add(tcp_cnt_counters[
SND_LIMITED], 1);
17772 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17773 tp->tcp_proc_time[
SND_LIMITED] += (crtsc - ts_val);
17775 counter_u64_add(tcp_proc_time[
SND_LIMITED], (crtsc - ts_val));
17782 if (rsm || sack_rxmit)
17786 if ((flags & TH_FIN) &&
17802 SOCKBUF_LOCK_ASSERT(sb);
17807 tp->
t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
17819 hdrlen =
sizeof(
struct ip6_hdr) + sizeof(struct tcphdr);
17832 if (flags & TH_SYN) {
17886 (so->so_rcv.sb_flags & SB_AUTOSIZE))
17890 if (flags & TH_SYN)
17899#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
17918 SOCKBUF_UNLOCK(&so->so_snd);
17919#ifdef TCP_ACCOUNTING
17920 crtsc = get_cyclecount();
17921 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17925 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
17928 counter_u64_add(tcp_proc_time[
SND_OUT_FAIL], (crtsc - ts_val));
17931 return (EHOSTUNREACH);
17933 hdrlen +=
sizeof(
struct udphdr);
17937 ipoptlen = ip6_optlen(tp->
t_inpcb);
17942 offsetof(
struct ipoption, ipopt_list);
17945#if defined(IPSEC) || defined(IPSEC_SUPPORT)
17946 ipoptlen += ipsec_optlen;
17954 if (len + optlen + ipoptlen > tp->
t_maxseg) {
17964 KASSERT(ipoptlen == 0,
17965 (
"%s: TSO can't do IP options", __func__));
17971 if (if_hw_tsomax != 0) {
17973 max_len = (if_hw_tsomax - hdrlen -
17975 if (max_len <= 0) {
17977 }
else if (len > max_len) {
17987 max_len = (tp->
t_maxseg - optlen);
17988 if ((sb_offset + len) < sbavail(sb)) {
17989 moff = len % (u_int)max_len;
17999 if (len <= segsiz) {
18014 if (optlen + ipoptlen >= tp->
t_maxseg) {
18023 SOCKBUF_UNLOCK(&so->so_snd);
18028 len = tp->
t_maxseg - optlen - ipoptlen;
18036 (
"%s: len > IP_MAXPACKET", __func__));
18039 if (max_linkhdr + hdrlen > MCLBYTES)
18041 if (max_linkhdr + hdrlen > MHLEN)
18043 panic(
"tcphdr too big");
18051 KASSERT(len >= 0, (
"[%s:%d]: len < 0", __func__, __LINE__));
18053 (flags & TH_FIN) &&
18065 hw_tls = (sb->sb_flags & SB_TLS_IFNET) != 0;
18079 if (len > max_val) {
18084 if (MHLEN < hdrlen + max_linkhdr)
18085 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
18088 m = m_gethdr(M_NOWAIT, MT_DATA);
18091 SOCKBUF_UNLOCK(sb);
18096 m->m_data += max_linkhdr;
18103 mb = sbsndptr_noadv(sb, sb_offset, &moff);
18106 if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
18107 m_copydata(mb, moff, (
int)len,
18108 mtod(m, caddr_t)+hdrlen);
18110 sbsndptr_adv(sb, mb, len);
18113 struct sockbuf *msb;
18121 if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb,
18122 ((rsm == NULL) ? hw_tls : 0)
18123#ifdef NETFLIX_COPY_ARGS
18127 if (len <= (tp->
t_maxseg - optlen)) {
18136 if (m->m_next == NULL) {
18137 SOCKBUF_UNLOCK(sb);
18174 if (sb_offset + len == sbused(sb) &&
18176 !(flags & TH_SYN)) {
18181 SOCKBUF_UNLOCK(sb);
18183 SOCKBUF_UNLOCK(sb);
18186 else if (flags & (TH_SYN | TH_FIN | TH_RST))
18191 m = m_gethdr(M_NOWAIT, MT_DATA);
18198 if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
18200 M_ALIGN(m, hdrlen);
18203 m->m_data += max_linkhdr;
18206 SOCKBUF_UNLOCK_ASSERT(sb);
18207 m->m_pkthdr.rcvif = (
struct ifnet *)0;
18209 mac_inpcb_create_mbuf(inp, m);
18223 ulen = hdrlen + len -
sizeof(
struct ip6_hdr);
18226 ulen = hdrlen + len -
sizeof(
struct ip);
18232 ip6 = mtod(m,
struct ip6_hdr *);
18234 udp = (
struct udphdr *)((caddr_t)ip6 +
sizeof(
struct ip6_hdr));
18237 ulen = hdrlen + len -
sizeof(
struct ip6_hdr);
18239 th = (
struct tcphdr *)(udp + 1);
18241 th = (
struct tcphdr *)(ip6 + 1);
18246 ip = mtod(m,
struct ip *);
18251 udp = (
struct udphdr *)((caddr_t)
ip +
sizeof(
struct ip));
18254 ulen = hdrlen + len -
sizeof(
struct ip);
18256 th = (
struct tcphdr *)(udp + 1);
18258 th = (
struct tcphdr *)(
ip + 1);
18288 ip6->ip6_flow |= htonl(ect << 20);
18308 if (sack_rxmit == 0) {
18309 if (len || (flags & (TH_SYN | TH_FIN))) {
18310 th->th_seq = htonl(tp->
snd_nxt);
18313 th->th_seq = htonl(tp->
snd_max);
18317 th->th_seq = htonl(rsm->
r_start);
18320 th->th_ack = htonl(tp->
rcv_nxt);
18327 if (flags & TH_RST) {
18330 if (recwin < (
long)(so->so_rcv.sb_hiwat / 4) &&
18331 recwin < (
long)segsiz) {
18344 if (flags & TH_SYN)
18345 th->th_win = htons((u_short)
18346 (min(sbspace(&so->so_rcv), TCP_MAXWIN)));
18349 recwin = roundup2(recwin, 1 << tp->
rcv_scale);
18350 th->th_win = htons((u_short)(recwin >> tp->
rcv_scale));
18360 if (th->th_win == 0) {
18364 tp->
t_flags &= ~TF_RXWIN0SENT;
18385 ip6 = mtod(m,
struct ip6_hdr *);
18388 ip = mtod(m,
struct ip *);
18394#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
18402 if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th,
18413 bcopy(opt, th + 1, optlen);
18414 th->th_off = (
sizeof(
struct tcphdr) + optlen) >> 2;
18420 m->m_pkthdr.len = hdrlen + len;
18428 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
18429 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
18431 th->th_sum = htons(0);
18434 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
18435 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
18436 th->th_sum = in6_cksum_pseudo(ip6,
18437 sizeof(
struct tcphdr) + optlen + len,
IPPROTO_TCP,
18442#if defined(INET6) && defined(INET)
18448 m->m_pkthdr.csum_flags = CSUM_UDP;
18449 m->m_pkthdr.csum_data = offsetof(
struct udphdr, uh_sum);
18452 th->th_sum = htons(0);
18455 m->m_pkthdr.csum_flags = CSUM_TCP;
18456 m->m_pkthdr.csum_data = offsetof(
struct tcphdr, th_sum);
18463 (
"%s: IP version incorrect: %d", __func__,
ip->
ip_v));
18472 KASSERT(len > tp->
t_maxseg - optlen,
18473 (
"%s: len <= tso_segsz", __func__));
18474 m->m_pkthdr.csum_flags |= CSUM_TSO;
18475 m->m_pkthdr.tso_segsz = tp->
t_maxseg - optlen;
18477 KASSERT(len + hdrlen == m_length(m, NULL),
18478 (
"%s: mbuf chain different than expected: %d + %u != %u",
18479 __func__, len, hdrlen, m_length(m, NULL)));
18483 hhook_run_tcp_est_out(tp, th, &to, len, tso);
18506 if (rsm || sack_rxmit) {
18527 len, &log,
false, NULL, NULL, 0, &tv);
18555 ip6->ip6_plen = htons(m->m_pkthdr.len -
sizeof(*ip6));
18560 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
18563 TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
18567 error = ip6_output(m,
18568#
if defined(IPSEC) || defined(IPSEC_SUPPORT)
18577 if (error == EMSGSIZE && inp->
inp_route6.ro_nh != NULL)
18581#if defined(INET) && defined(INET6)
18586 ip->
ip_len = htons(m->m_pkthdr.len);
18589 ip->
ip_ttl = in6_selecthlim(inp, NULL);
18608 tp->
t_flags2 &= ~TF2_PLPMTU_PMTUD;
18617#
if defined(IPSEC) || defined(IPSEC_SUPPORT)
18625 if (error == EMSGSIZE && inp->
inp_route.ro_nh != NULL)
18641 if (rsm && doing_tlp) {
18648 if (rsm && (doing_tlp == 0)) {
18664 tot_len_this_send += len;
18667 else if (len == 1) {
18669 }
else if (len > 1) {
18672 idx = (len / segsiz) + 3;
18697 rsm, add_flag, s_mb, s_moff, hw_tls);
18700 if ((error == 0) &&
18705 tcp_seq startseq = tp->
snd_nxt;
18708 if (rsm && (doing_tlp == 0))
18716 if (doing_tlp == 0) {
18735 if (flags & (TH_SYN | TH_FIN)) {
18736 if (flags & TH_SYN)
18738 if (flags & TH_FIN) {
18801 SOCKBUF_UNLOCK_ASSERT(sb);
18815#ifdef TCP_ACCOUNTING
18816 crtsc = get_cyclecount();
18817 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18821 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18824 counter_u64_add(tcp_proc_time[
SND_OUT_FAIL], (crtsc - ts_val));
18861#ifdef TCP_ACCOUNTING
18862 crtsc = get_cyclecount();
18863 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18867 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18870 counter_u64_add(tcp_proc_time[
SND_OUT_FAIL], (crtsc - ts_val));
18886#ifdef TCP_ACCOUNTING
18887 crtsc = get_cyclecount();
18888 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18892 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
18895 counter_u64_add(tcp_proc_time[
SND_OUT_FAIL], (crtsc - ts_val));
18930 if ((error == 0) && (flags & TH_FIN))
18932 if (flags & TH_RST) {
18940 }
else if ((slot == 0) && (sendalot == 0) && tot_len_this_send) {
18996 if ((error == 0) &&
18998 ((flags & (TH_SYN|TH_FIN)) == 0) &&
19007 (len > 0) && (orig_len > 0) &&
19008 (orig_len > len) &&
19009 ((orig_len - len) >= segsiz) &&
19024 (
"rack:%p left_to_send:%u sbavail:%u out:%u",
19038 ipoptlen, orig_len, len, error,
19039 (rsm == NULL), optlen, __LINE__, 2);
19040 }
else if (sendalot) {
19044 if ((error == 0) &&
19046 ((flags & (TH_SYN|TH_FIN)) == 0) &&
19055 (len > 0) && (orig_len > 0) &&
19056 (orig_len > len) &&
19057 ((orig_len - len) >= segsiz) &&
19072 (
"rack:%p left_to_send:%u sbavail:%u out:%u",
19084 ipoptlen, orig_len, len, error,
19085 (rsm == NULL), optlen, __LINE__, 3);
19087 ret =
rack_fast_output(tp, rack, ts_val, cts, ms_cts, &tv, tot_len_this_send, &error);
19101#ifdef TCP_ACCOUNTING
19102 crtsc = get_cyclecount() - ts_val;
19103 if (tot_len_this_send) {
19104 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
19108 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
19112 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
19113 tp->tcp_cnt_counters[
CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) /segsiz);
19115 counter_u64_add(tcp_cnt_counters[
CNT_OF_MSS_OUT], ((tot_len_this_send + segsiz - 1) /segsiz));
19117 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
19120 counter_u64_add(tcp_cnt_counters[
SND_OUT_ACK], 1);
19121 if (tp->
t_flags2 & TF2_TCP_ACCOUNTING) {
19124 counter_u64_add(tcp_proc_time[
SND_OUT_ACK], crtsc);
19128 if (error == ENOBUFS)
19221 }
else if (prof == 3) {
19263 }
else if (prof == 2) {
19295 }
else if (prof == 0) {
19380 M_TCPFSB, M_NOWAIT|M_ZERO);
19397 struct epoch_tracker et;
19398 struct sockopt sopt;
19404 switch (sopt_name) {
19406 case TCP_RACK_DSACK_OPT:
19408 if (optval & 0x1) {
19413 if (optval & 0x2) {
19420 case TCP_RACK_PACING_BETA:
19433 sopt.sopt_dir = SOPT_SET;
19436 if (
CC_ALGO(tp)->ctl_output != NULL)
19437 error =
CC_ALGO(tp)->ctl_output(tp->
ccv, &sopt, &opt);
19450 case TCP_RACK_TIMER_SLOP:
19463 case TCP_RACK_PACING_BETA_ECN:
19476 sopt.sopt_dir = SOPT_SET;
19479 if (
CC_ALGO(tp)->ctl_output != NULL)
19480 error =
CC_ALGO(tp)->ctl_output(tp->
ccv, &sopt, &opt);
19492 case TCP_DEFER_OPTIONS:
19504 case TCP_RACK_MEASURE_CNT:
19506 if (optval && (optval <= 0xff)) {
19511 case TCP_REC_ABC_VAL:
19518 case TCP_RACK_ABC_VAL:
19520 if ((optval > 0) && (optval < 255))
19525 case TCP_HDWR_UP_ONLY:
19532 case TCP_PACING_RATE_CAP:
19536 case TCP_RACK_PROFILE:
19540 case TCP_USE_CMP_ACKS:
19553 case TCP_SHARED_CWND_TIME_LIMIT:
19560 case TCP_RACK_PACE_TO_FILL:
19579 case TCP_RACK_NO_PUSH_AT_MAX:
19583 else if (optval < 0xff)
19588 case TCP_SHARED_CWND_ENABLE:
19595 case TCP_RACK_MBUF_QUEUE:
19607 case TCP_RACK_NONRXT_CFG_RATE:
19618 else if (optval == 1)
19620 else if (optval == 2)
19625 case TCP_TIMELY_DYN_ADJ:
19631 if (optval >= 100) {
19640 case TCP_RACK_DO_DETECTION:
19647 case TCP_RACK_TLP_USE:
19655 case TCP_RACK_TLP_REDUCE:
19661 case TCP_RACK_PACE_ALWAYS:
19694 case TCP_BBR_RACK_INIT_RATE:
19723 case TCP_BBR_IWINTSO:
19725 if (optval && (optval <= 0xff)) {
19736#ifdef NETFLIX_PEAKRATE
19757 case TCP_RACK_FORCE_MSEG:
19764 case TCP_RACK_PACE_MAX_SEG:
19770 case TCP_RACK_PACE_RATE_REC:
19788 case TCP_RACK_PACE_RATE_SS:
19803 __LINE__, NULL, 0);
19806 case TCP_RACK_PACE_RATE_CA:
19821 __LINE__, NULL, 0);
19823 case TCP_RACK_GP_INCREASE_REC:
19830 __LINE__, NULL, 0);
19832 case TCP_RACK_GP_INCREASE_CA:
19848 __LINE__, NULL, 0);
19850 case TCP_RACK_GP_INCREASE_SS:
19866 __LINE__, NULL, 0);
19868 case TCP_RACK_RR_CONF:
19870 if (optval && optval <= 3)
19875 case TCP_HDWR_RATE_CAP:
19886 case TCP_BBR_HDWR_PACE:
19907 case TCP_RACK_PRR_SENDALOT:
19912 case TCP_RACK_MIN_TO:
19917 case TCP_RACK_EARLY_SEG:
19922 case TCP_RACK_ENABLE_HYSTART:
19935 case TCP_RACK_REORD_THRESH:
19938 if ((optval > 0) && (optval < 31))
19943 case TCP_RACK_REORD_FADE:
19948 case TCP_RACK_TLP_THRESH:
19956 case TCP_BBR_USE_RACK_RR:
19963 case TCP_FAST_RSM_HACK:
19970 case TCP_RACK_PKT_DELAY:
19984 NET_EPOCH_ENTER(et);
19986 NET_EPOCH_EXIT(et);
19990 case TCP_BBR_RACK_RTT_USE:
19999 case TCP_DATA_AFTER_CLOSE:
20009#ifdef NETFLIX_STATS
20010 tcp_log_socket_option(tp, sopt_name, optval, error);
20027 free(dol, M_TCPDO);
20042 RB_FOREACH(rsm, rack_rb_tree_head, &rack->
r_ctl.
rc_mtree) {
20057 if (flags & PRUS_OOB)
20058 return (EOPNOTSUPP);
20101 int32_t error = 0, optval;
20105 if (rack == NULL) {
20116 switch (sopt->sopt_level) {
20120 switch (sopt->sopt_name) {
20121 case IPV6_USE_MIN_MTU:
20128 ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
20137 switch (sopt->sopt_name) {
20156 switch (sopt->sopt_name) {
20157 case TCP_RACK_TLP_REDUCE:
20159 case TCP_RACK_PACE_ALWAYS:
20160 case TCP_BBR_RACK_INIT_RATE:
20161 case TCP_BBR_IWINTSO:
20162 case TCP_RACK_PACE_MAX_SEG:
20163 case TCP_RACK_FORCE_MSEG:
20164 case TCP_RACK_PACE_RATE_CA:
20165 case TCP_RACK_PACE_RATE_SS:
20166 case TCP_RACK_PACE_RATE_REC:
20167 case TCP_RACK_GP_INCREASE_CA:
20168 case TCP_RACK_GP_INCREASE_SS:
20169 case TCP_RACK_GP_INCREASE_REC:
20170 case TCP_RACK_RR_CONF:
20171 case TCP_BBR_HDWR_PACE:
20172 case TCP_HDWR_RATE_CAP:
20173 case TCP_PACING_RATE_CAP:
20174 case TCP_HDWR_UP_ONLY:
20176 case TCP_FAST_RSM_HACK:
20178 case TCP_RACK_PRR_SENDALOT:
20179 case TCP_RACK_MIN_TO:
20180 case TCP_RACK_EARLY_SEG:
20181 case TCP_RACK_REORD_THRESH:
20182 case TCP_RACK_REORD_FADE:
20183 case TCP_RACK_TLP_THRESH:
20184 case TCP_RACK_PKT_DELAY:
20185 case TCP_RACK_TLP_USE:
20186 case TCP_BBR_RACK_RTT_USE:
20187 case TCP_BBR_USE_RACK_RR:
20188 case TCP_RACK_DO_DETECTION:
20190 case TCP_TIMELY_DYN_ADJ:
20191 case TCP_DATA_AFTER_CLOSE:
20192 case TCP_RACK_NONRXT_CFG_RATE:
20193 case TCP_SHARED_CWND_ENABLE:
20194 case TCP_RACK_MBUF_QUEUE:
20195 case TCP_RACK_NO_PUSH_AT_MAX:
20196 case TCP_RACK_PACE_TO_FILL:
20197 case TCP_SHARED_CWND_TIME_LIMIT:
20198 case TCP_RACK_PROFILE:
20199 case TCP_USE_CMP_ACKS:
20200 case TCP_RACK_ABC_VAL:
20201 case TCP_REC_ABC_VAL:
20202 case TCP_RACK_MEASURE_CNT:
20203 case TCP_DEFER_OPTIONS:
20204 case TCP_RACK_DSACK_OPT:
20205 case TCP_RACK_PACING_BETA:
20206 case TCP_RACK_PACING_BETA_ECN:
20207 case TCP_RACK_TIMER_SLOP:
20208 case TCP_RACK_ENABLE_HYSTART:
20216 if (sopt->sopt_name == TCP_PACING_RATE_CAP) {
20217 error = sooptcopyin(sopt, &loptval,
sizeof(loptval),
sizeof(loptval));
20224 error = sooptcopyin(sopt, &optval,
sizeof(optval),
sizeof(optval));
20233 return (ECONNRESET);
20237 return (ENOPROTOOPT);
20240 (sopt->sopt_name != TCP_DEFER_OPTIONS) &&
20241 (sopt->sopt_name != TCP_RACK_PACING_BETA) &&
20242 (sopt->sopt_name != TCP_RACK_PACING_BETA_ECN) &&
20243 (sopt->sopt_name != TCP_RACK_MEASURE_CNT)) {
20264 bzero(ti,
sizeof(*ti));
20266 ti->tcpi_state = tp->
t_state;
20268 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
20270 ti->tcpi_options |= TCPI_OPT_SACK;
20272 ti->tcpi_options |= TCPI_OPT_WSCALE;
20277 ti->tcpi_options |= TCPI_OPT_ECN;
20279 ti->tcpi_options |= TCPI_OPT_TFO;
20283 ti->tcpi_rtt = tp->
t_srtt;
20291 ti->tcpi_rcv_space = tp->
rcv_wnd;
20292 ti->tcpi_rcv_nxt = tp->
rcv_nxt;
20293 ti->tcpi_snd_wnd = tp->
snd_wnd;
20294 ti->tcpi_snd_bwnd = 0;
20295 ti->tcpi_snd_nxt = tp->
snd_nxt;
20301#ifdef NETFLIX_STATS
20304 memcpy(&ti->tcpi_rxsyninfo, &tp->t_rxsyninfo,
sizeof(
struct tcpsyninfo));
20308 ti->tcpi_options |= TCPI_OPT_TOE;
20319 int32_t error, optval;
20320 uint64_t val, loptval;
20321 struct tcp_info ti;
20331 if (rack == NULL) {
20335 switch (sopt->sopt_name) {
20341 error = sooptcopyout(sopt, &ti,
sizeof ti);
20349 case TCP_RACK_PACING_BETA:
20374 case TCP_RACK_PACING_BETA_ECN:
20391 case TCP_RACK_DSACK_OPT:
20400 case TCP_RACK_ENABLE_HYSTART:
20413 case TCP_FAST_RSM_HACK:
20416 case TCP_DEFER_OPTIONS:
20419 case TCP_RACK_MEASURE_CNT:
20422 case TCP_REC_ABC_VAL:
20425 case TCP_RACK_ABC_VAL:
20428 case TCP_HDWR_UP_ONLY:
20431 case TCP_PACING_RATE_CAP:
20434 case TCP_RACK_PROFILE:
20438 case TCP_USE_CMP_ACKS:
20441 case TCP_RACK_PACE_TO_FILL:
20446 case TCP_RACK_NO_PUSH_AT_MAX:
20449 case TCP_SHARED_CWND_ENABLE:
20452 case TCP_RACK_NONRXT_CFG_RATE:
20463 case TCP_RACK_DO_DETECTION:
20466 case TCP_RACK_MBUF_QUEUE:
20470 case TCP_TIMELY_DYN_ADJ:
20473 case TCP_BBR_IWINTSO:
20476 case TCP_RACK_TLP_REDUCE:
20480 case TCP_BBR_RACK_INIT_RATE:
20487 case TCP_RACK_FORCE_MSEG:
20490 case TCP_RACK_PACE_MAX_SEG:
20494 case TCP_RACK_PACE_ALWAYS:
20498 case TCP_RACK_PRR_SENDALOT:
20502 case TCP_RACK_MIN_TO:
20506 case TCP_RACK_EARLY_SEG:
20510 case TCP_RACK_REORD_THRESH:
20514 case TCP_RACK_REORD_FADE:
20518 case TCP_BBR_USE_RACK_RR:
20522 case TCP_RACK_RR_CONF:
20525 case TCP_HDWR_RATE_CAP:
20528 case TCP_BBR_HDWR_PACE:
20531 case TCP_RACK_TLP_THRESH:
20535 case TCP_RACK_PKT_DELAY:
20539 case TCP_RACK_TLP_USE:
20542 case TCP_RACK_PACE_RATE_CA:
20545 case TCP_RACK_PACE_RATE_SS:
20548 case TCP_RACK_PACE_RATE_REC:
20551 case TCP_RACK_GP_INCREASE_SS:
20554 case TCP_RACK_GP_INCREASE_CA:
20557 case TCP_BBR_RACK_RTT_USE:
20563 case TCP_DATA_AFTER_CLOSE:
20566 case TCP_SHARED_CWND_TIME_LIMIT:
20569 case TCP_RACK_TIMER_SLOP:
20578 if (TCP_PACING_RATE_CAP)
20579 error = sooptcopyout(sopt, &loptval,
sizeof loptval);
20581 error = sooptcopyout(sopt, &optval,
sizeof optval);
20589 if (sopt->sopt_dir == SOPT_SET) {
20591 }
else if (sopt->sopt_dir == SOPT_GET) {
20594 panic(
"%s: sopt_dir $%d", __func__, sopt->sopt_dir);
20599 __XSTRING(STACKNAME),
20601 __XSTRING(STACKALIAS),
20608 memset(mem, 0, size);
20628 rack_zone = uma_zcreate(__XSTRING(MODNAME)
"_map",
20634 rack_ctor, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
20638 SYSCTL_STATIC_CHILDREN(_net_inet_tcp),
20641 __XSTRING(STACKALIAS),
20643 __XSTRING(STACKNAME),
20645 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
20648 printf(
"Failed to add sysctl node\n");
20657 printf(
"Failed to register %s stack name for "
20659 __XSTRING(MODNAME));
20665 printf(
"Failed to register rack module -- err:%d\n", err);
20689 return (EOPNOTSUPP);
20695 .name = __XSTRING(MODNAME),
#define CCF_USE_LOCAL_ABC
#define CCF_HYSTART_CAN_SH_CWND
#define CCF_HYSTART_ALLOWED
#define CCF_HYSTART_CONS_SSTH
#define CCALGONAME_NEWRENO
#define CC_NEWRENO_BETA_ECN
#define CC_NEWRENO_BETA_ECN_ENABLED
#define BANDLIM_UNLIMITED
#define BANDLIM_RST_OPENPORT
u_short in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)
void in_losing(struct inpcb *inp)
#define INP_DONT_SACK_QUEUE
#define INP_MBUF_QUEUE_READY
#define INP_WLOCK_ASSERT(inp)
#define INP_SUPPORTS_MBUFQ
#define IPV6_FLOWINFO_MASK
int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp)
static int32_t rack_gp_rtt_mindiv
static void rack_log_rtt_shrinks(struct tcp_rack *rack, uint32_t us_cts, uint32_t rtt, uint32_t line, uint8_t reas)
counter_u64_t rack_to_alloc_hard
static int32_t rack_hptsi_segments
static void rack_exit_probertt(struct tcp_rack *rack, uint32_t us_cts)
static int rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val, uint32_t cts, uint32_t ms_cts, struct timeval *tv, long tot_len, int *send_err)
counter_u64_t rack_sack_proc_all
static void rack_setup_offset_for_rsm(struct rack_sendmap *src_rsm, struct rack_sendmap *rsm)
static void rack_fini(struct tcpcb *tp, int32_t tcb_is_purged)
static int32_t rack_wma_divisor
counter_u64_t rack_tlp_retran_bytes
static void rack_adjust_orig_mlen(struct rack_sendmap *rsm)
static int32_t rack_use_rsm_rfo
static int32_t rack_hw_pace_extra_slots
static struct rack_sendmap * rack_find_lowest_rsm(struct tcp_rack *rack)
static int rack_enough_for_measurement(struct tcpcb *tp, struct tcp_rack *rack, tcp_seq th_ack, uint8_t *quality)
static int32_t rack_limits_scwnd
counter_u64_t rack_persists_loss
static uint32_t rack_probertt_use_min_rtt_entry
static int32_t rack_gp_rtt_maxmul
static void rack_do_decay(struct tcp_rack *rack)
static uint16_t rack_per_of_gp_lowthresh
static int32_t rack_send_a_lot_in_prr
static void rack_update_prr(struct tcpcb *tp, struct tcp_rack *rack, uint32_t changed, tcp_seq th_ack)
#define RACK_REXMTVAL(tp)
static void rack_log_map_chg(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *prev, struct rack_sendmap *rsm, struct rack_sendmap *next, int flag, uint32_t th_ack, int line)
static uint32_t rack_min_rtt_movement
static void rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_t flags, unsigned ipoptlen, int32_t orig_len, int32_t len, int error, int rsm_is_null, int optlen, int line, uint16_t mode)
static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm)
counter_u64_t rack_move_none
static int32_t rack_max_per_above
static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm)
static const int32_t rack_free_cache
static int32_t rack_per_upper_bound_ca
static uint16_t rack_per_of_gp_ss
counter_u64_t rack_to_alloc_emerg
static int rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static void rack_adjust_sendmap(struct tcp_rack *rack, struct sockbuf *sb, tcp_seq snd_una)
static uint32_t rack_min_srtts
static uint32_t rack_probertt_lower_within
static int rack_add_deferred_option(struct tcp_rack *rack, int sopt_name, uint64_t loptval)
static void rack_remxt_tmr(struct tcpcb *tp)
static void rack_collapsed_window(struct tcp_rack *rack)
static int rack_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static int rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8_t *doing_tlp)
static uint32_t rack_probertt_gpsrtt_cnt_mul
counter_u64_t rack_persists_lost_ends
counter_u64_t rack_fto_send
counter_u64_t rack_proc_non_comp_ack
static void rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line, uint64_t *fill_override)
static struct mbuf * rack_fo_base_copym(struct mbuf *the_m, uint32_t the_off, int32_t *plen, struct rack_fast_send_blk *fsb, int32_t seglimit, int32_t segsize, int hw_tls)
static void rack_need_set_test(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack, int line, int use_which)
static int32_t rack_enable_hw_pacing
static void rack_validate_multipliers_at_or_above100(struct tcp_rack *rack)
static uint16_t rack_get_output_gain(struct tcp_rack *rack, struct rack_sendmap *rsm)
static int rack_process_option(struct tcpcb *tp, struct tcp_rack *rack, int sopt_name, uint32_t optval, uint64_t loptval)
static void tcp_rack_partialack(struct tcpcb *tp)
static int32_t rack_handoff_ok(struct tcpcb *tp)
static __inline int rb_map_cmp(struct rack_sendmap *b, struct rack_sendmap *a)
static int32_t rack_do_dyn_mul
static uint32_t rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack, struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts, int *moved_two)
static struct rack_sendmap * rack_alloc(struct tcp_rack *rack)
static void rack_update_seg(struct tcp_rack *rack)
static int rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static uint32_t rack_grab_rtt(struct tcpcb *tp, struct tcp_rack *rack)
static void rack_init_sysctls(void)
static int32_t rack_persist_max
static int32_t rack_gp_decrease_per
counter_u64_t rack_input_idle_reduces
static int32_t rack_req_segs
static int rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt)
static int32_t rack_client_low_buf
static int rack_timeout_rack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static struct rack_sendmap * rack_check_recovery_mode(struct tcpcb *tp, uint32_t tsused)
static int32_t rack_tlp_threshold_use
static int32_t pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
static int rack_init_fsb(struct tcpcb *tp, struct tcp_rack *rack)
static void rack_enter_probertt(struct tcp_rack *rack, uint32_t us_cts)
static bool rack_mod_inited
#define RACK_TCPT_RANGESET(tv, value, tvmin, tvmax, slop)
static void rack_undo_cc_pacing(struct tcp_rack *rack)
static int rack_ctloutput(struct inpcb *inp, struct sockopt *sopt)
static int32_t rack_rwnd_block_ends_measure
counter_u64_t rack_try_scwnd
static int32_t rack_enable_shared_cwnd
static void rack_handle_delayed_ack(struct tcpcb *tp, struct tcp_rack *rack, int32_t tlen, int32_t tfo_syn)
static int32_t rack_tlp_use_greater
struct sysctl_ctx_list rack_sysctl_ctx
RB_GENERATE(rack_rb_tree_head, rack_sendmap, r_next, rb_map_cmp)
static int32_t rack_verbose_logging
static int rack_note_dsack(struct tcp_rack *rack, tcp_seq start, tcp_seq end)
counter_u64_t rack_saw_enobuf_hw
static uint32_t rack_get_measure_window(struct tcpcb *tp, struct tcp_rack *rack)
static void rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint64_t bw_est, uint64_t bw, uint64_t len_time, int method, int line, struct rack_sendmap *rsm, uint8_t quality)
static void rack_apply_updated_usrtt(struct tcp_rack *rack, uint32_t us_rtt, uint32_t us_cts)
static void rack_stop_all_timers(struct tcpcb *tp)
static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt, uint32_t len, uint32_t us_tim, int confidence, struct rack_sendmap *rsm, uint16_t rtrcnt)
static int rack_set_profile(struct tcp_rack *rack, int prof)
static int32_t rack_gp_per_bw_mul_down
static uint32_t rack_min_probertt_hold
static int32_t rack_max_abc_post_recovery
DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY)
static uint32_t rack_probertt_gpsrtt_cnt_div
static void rack_gain_for_fastoutput(struct tcp_rack *rack, struct tcpcb *tp, struct socket *so, uint32_t acked_amount)
static int32_t rack_reorder_fade
counter_u64_t rack_sack_used_next_merge
static int32_t rack_persist_min
static void rack_log_type_pacing_sizes(struct tcpcb *tp, struct tcp_rack *rack, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint8_t frm)
counter_u64_t rack_to_tot
static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack)
counter_u64_t rack_sack_used_prev_merge
static int32_t rack_use_max_for_nobackoff
static int rack_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static void rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, uint32_t th_ack, uint16_t nsegs, uint16_t type, int32_t recovery)
counter_u64_t rack_opts_arry[RACK_OPTS_SIZE]
static int32_t rack_probertt_clear_is
static void rack_mtu_change(struct tcpcb *tp)
static int32_t rack_limit_time_with_srtt
counter_u64_t rack_collapsed_win
static int rack_bw_can_be_raised(struct tcp_rack *rack, uint64_t cur_bw, uint64_t last_bw_est, uint16_t mult)
static void rack_dtor(void *mem, int32_t size, void *arg)
static uint32_t rack_get_persists_timer_val(struct tcpcb *tp, struct tcp_rack *rack)
static uint32_t rack_probe_rtt_sets_cwnd
static void rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm)
static void rack_hw_tls_change(struct tcpcb *tp, int chg)
static uint32_t rack_highest_sack_thresh_seen
static int32_t rack_gp_increase_per
static void rack_timer_audit(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb)
static int32_t rack_do_hystart
VNET_DECLARE(uint32_t, newreno_beta)
static int32_t rack_pkt_delay
static int32_t use_rack_rr
static void rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t tsused, uint32_t thresh, int mod)
static int rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8_t hpts_calling, uint8_t *doing_tlp)
MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1)
static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type)
static void rack_log_rtt_sample(struct tcp_rack *rack, uint32_t rtt)
static struct tcp_function_block __tcp_rack
static void rack_init_fsb_block(struct tcpcb *tp, struct tcp_rack *rack)
static uint32_t rack_timer_start(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int sup_rack)
static __inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start)
counter_u64_t rack_saw_enetunreach
static int rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static void rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered_rec, int dup_ack_struck)
static int32_t rack_ignore_data_after_close
static int rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t nxt_pkt, uint8_t iptos)
static uint32_t rack_time_between_probertt
static int32_t rack_use_fsb
static uint32_t rc_init_window(struct tcp_rack *rack)
static void rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts, struct hpts_diag *diag, struct timeval *tv)
static int rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint64_t ts_val, uint32_t cts, uint32_t ms_cts, struct timeval *tv, int len, uint8_t doing_tlp)
counter_u64_t rack_to_alloc
static void rack_exit_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static int32_t rack_disable_prr
static int32_t tcp_addrack(module_t mod, int32_t type, void *data)
static int rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, uint32_t tiwin, int32_t tlen, int32_t *ofia, int32_t thflags, int32_t *ret_val)
static int32_t rack_hbp_thresh
static int32_t rack_use_rfo
static void rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
static uint64_t rack_get_output_bw(struct tcp_rack *rack, uint64_t bw, struct rack_sendmap *rsm, int *capped)
static int32_t rack_rto_min
static struct rack_sendmap * rack_alloc_full_limit(struct tcp_rack *rack)
static int32_t rack_apply_rtt_with_reduced_conf
static void rack_set_cc_pacing(struct tcp_rack *rack)
static void rack_do_win_updates(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tiwin, uint32_t seq, uint32_t ack, uint32_t cts, uint32_t high_seq)
counter_u64_t rack_sack_proc_restart
static int32_t rack_gp_no_rec_chg
static int32_t rack_tlp_thresh
counter_u64_t rack_tlp_tot
static void rack_start_hpts_timer(struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts, int32_t slot, uint32_t tot_len_this_send, int sup_rack)
static int32_t rack_timely_max_push_rise
static void rack_increase_bw_mul(struct tcp_rack *rack, int timely_says, uint64_t cur_bw, uint64_t last_bw_est, int override)
static int32_t rack_hw_rate_min
static int32_t rack_enobuf_hw_boost_mult
counter_u64_t rack_to_alloc_limited
MODULE_VERSION(MODNAME, 1)
static int32_t rack_use_cmp_acks
static uint16_t rack_per_of_gp_probertt
static void rack_fill_info(struct tcpcb *tp, struct tcp_info *ti)
static int32_t rack_enobuf_hw_min
static void rack_counter_destroy(void)
static uint32_t rack_calc_thresh_tlp(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t srtt)
counter_u64_t rack_tlp_retran
static uint8_t rack_req_measurements
static void rack_handle_might_revert(struct tcpcb *tp, struct tcp_rack *rack)
static int32_t rack_timely_int_timely_only
static void rack_validate_fo_sendwin_up(struct tcpcb *tp, struct tcp_rack *rack)
counter_u64_t rack_hot_alloc
counter_u64_t rack_hw_pace_init_fail
static int32_t rack_prr_addbackmax
static void rack_log_timely(struct tcp_rack *rack, uint32_t logged, uint64_t cur_bw, uint64_t low_bnd, uint64_t up_bnd, int line, uint8_t method)
static int32_t rack_lower_cwnd_at_tlp
static int rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static void rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos)
counter_u64_t rack_saw_enobuf
static int rack_timeout_keepalive(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static void rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_t th_ack, uint32_t cts, struct tcpopt *to)
static int32_t rack_timely_max_push_drop
static void rack_handle_probe_response(struct tcp_rack *rack, uint32_t tiwin, uint32_t us_cts)
static void rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag)
static uint32_t rack_probertt_use_min_rtt_exit
static void rack_log_hdwr_pacing(struct tcp_rack *rack, uint64_t rate, uint64_t hw_rate, int line, int error, uint16_t mod)
static void rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line, uint32_t us_cts, struct timeval *tv, uint32_t flags_on_entry)
counter_u64_t rack_fto_rsm_send
static void rack_log_rtt_sample_calc(struct tcp_rack *rack, uint32_t rtt, uint32_t send_time, uint32_t ack_time, int where)
static struct mbuf * rack_fo_m_copym(struct tcp_rack *rack, int32_t *plen, int32_t seglimit, int32_t segsize, struct mbuf **s_mb, int *s_soff)
static int32_t rack_always_send_oldest
static int32_t rack_slot_reduction
RB_PROTOTYPE(rack_rb_tree_head, rack_sendmap, r_next, rb_map_cmp)
static int32_t rack_output(struct tcpcb *tp)
static int32_t rack_enobuf_hw_max
static int32_t rack_per_upper_bound_ss
static int32_t rack_rate_sample_method
static void rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv)
static void rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot, uint8_t hpts_calling, int reason, uint32_t cwnd_to_use)
static void rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick, int event, int line)
static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how)
counter_u64_t rack_split_limited
static void tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
static struct rack_sendmap * rack_alloc_limit(struct tcp_rack *rack, uint8_t limit_type)
static int32_t rack_def_profile
static void rack_validate_multipliers_at_or_below_100(struct tcp_rack *rack)
static uint32_t rack_def_data_window
static uint32_t rack_decrease_highrtt(struct tcp_rack *rack, uint32_t curper, uint32_t rtt)
counter_u64_t rack_sack_splits
static uint64_t rack_bw_rate_cap
static void rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
struct rack_sendmap * tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack, uint32_t tsused)
static int sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
counter_u64_t rack_tlp_newdata
static int rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack)
static int32_t rack_hw_rwnd_factor
counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE]
static uint16_t rack_atexit_prtt
static void rack_strike_dupack(struct tcp_rack *rack)
static void rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd)
static int32_t rack_timely_dec_clear
counter_u64_t rack_extended_rfo
static int32_t rack_non_rxt_use_cr
static void rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent *ae, int ackval, uint32_t high_seq)
static int32_t rack_hw_rate_to_low
static void rack_set_prtt_target(struct tcp_rack *rack, uint32_t segsiz, uint32_t rtt)
static void rack_post_recovery(struct tcpcb *tp, uint32_t th_seq)
static int rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen, uint8_t iptos, int32_t nxt_pkt, struct timeval *tv)
static void rack_update_multiplier(struct tcp_rack *rack, int32_t timely_says, uint64_t last_bw_est, uint32_t rtt, int32_t rtt_diff)
counter_u64_t rack_alloc_limited_conns
static int rack_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static void rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uint32_t len, int type)
static uint16_t rack_per_of_gp_rec
#define DELAY_ACK(tp, tlen)
static void rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t timers)
counter_u64_t rack_sack_proc_short
static void rack_decrease_bw_mul(struct tcp_rack *rack, int timely_says, uint32_t rtt, int32_t rtt_diff)
static int rack_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static int rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos)
static int32_t rack_pace_every_seg
static uint32_t rack_get_pacing_len(struct tcp_rack *rack, uint64_t bw, uint32_t mss)
static int rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mbuf *m, int nxt_pkt, struct timeval *tv)
static int32_t rack_dsack_std_based
static const char * rack_stack_names[]
struct sysctl_oid * rack_sysctl_root
MALLOC_DEFINE(M_TCPFSB, "tcp_fsb", "TCP fast send block")
static uint64_t rack_get_fixed_pacing_bw(struct tcp_rack *rack)
counter_u64_t rack_persists_acks
static struct rack_sendmap * rack_merge_rsm(struct tcp_rack *rack, struct rack_sendmap *l_rsm, struct rack_sendmap *r_rsm)
static int32_t rack_tlp_limit
static uint16_t rack_per_of_gp_ca
static void rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len, uint32_t seq_out, uint16_t th_flags, int32_t err, uint64_t ts, struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff, int hw_tls)
static void rack_log_doseg_done(struct tcp_rack *rack, uint32_t cts, int32_t nxt_pkt, int32_t did_out, int way_out, int nsegs)
static void rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
counter_u64_t rack_hw_pace_lost
static int32_t rack_delayed_ack_time
static uint32_t rack_must_drain
static int32_t rack_gp_rtt_minmul
static int is_rsm_inside_declared_tlp_block(struct tcp_rack *rack, struct rack_sendmap *rsm)
static void rack_clear_rate_sample(struct tcp_rack *rack)
static uint64_t rack_get_bw(struct tcp_rack *rack)
static void rack_sndbuf_autoscale(struct tcp_rack *rack)
static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line)
static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type)
static int32_t rack_stopall(struct tcpcb *tp)
static int rack_get_sockopt(struct inpcb *inp, struct sockopt *sopt)
static int32_t rack_hw_up_only
static int32_t rack_enable_mqueue_for_nonpaced
static void rack_cc_after_idle(struct tcp_rack *rack, struct tcpcb *tp)
static void rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack)
static int32_t rack_gp_per_bw_mul_up
static void rack_convert_rtts(struct tcpcb *tp)
static void rack_check_bottom_drag(struct tcpcb *tp, struct tcp_rack *rack, struct socket *so, int32_t acked)
static int rack_pru_options(struct tcpcb *tp, int flags)
#define timersub(tvp, uvp, vvp)
static int rack_set_sockopt(struct inpcb *inp, struct sockopt *sopt)
static int32_t rack_down_raise_thresh
static int32_t rack_cwnd_block_ends_measure
static void rack_cc_conn_init(struct tcpcb *tp)
static void rack_start_gp_measurement(struct tcpcb *tp, struct tcp_rack *rack, tcp_seq startseq, uint32_t sb_offset)
static int32_t rack_timely_no_stopping
static void rack_peer_reneges(struct tcp_rack *rack, struct rack_sendmap *rsm, tcp_seq th_ack)
counter_u64_t rack_sack_skipped_acked
static uint32_t rack_max_drain_wait
static void rack_apply_deferred_options(struct tcp_rack *rack)
static uint32_t rack_probertt_filter_life
static int32_t rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz)
static void rack_un_collapse_window(struct tcp_rack *rack)
static uint32_t rack_min_measure_usec
static struct rack_sendmap * rack_find_high_nonack(struct tcp_rack *rack, struct rack_sendmap *rsm)
counter_u64_t rack_move_some
static uint32_t rack_probe_rtt_safety_val
static int32_t rack_limited_retran
#define V_newreno_beta_ecn
static uint16_t rack_per_of_gp_probertt_reduce
static int32_t rack_hw_rate_caps
static void rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t len, struct rack_sendmap *rsm, int conf)
int32_t rack_clear_counter
static void rack_free_trim(struct tcp_rack *rack)
static int32_t rack_stats_gets_ms_rtt
static int32_t rack_max_drain_hbp
counter_u64_t rack_ack_total
counter_u64_t rack_sack_total
static int32_t rack_tlp_min
static void rack_check_probe_rtt(struct tcp_rack *rack, uint32_t us_cts)
static int32_t rack_per_lower_bound
static int32_t rack_pace_one_seg
counter_u64_t rack_non_fto_send
static uint16_t rack_atexit_prtt_hbp
static int32_t rack_reorder_thresh
static void rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack, tcp_seq th_ack, int line, uint8_t quality)
static int32_t rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff, uint32_t prev_rtt)
static void rack_log_dsack_event(struct tcp_rack *rack, uint8_t mod, uint32_t flex4, uint32_t flex5, uint32_t flex6)
static int32_t rack_init(struct tcpcb *tp)
static uint32_t rack_highest_move_thresh_seen
counter_u64_t rack_sack_attacks_detected
counter_u64_t rack_sack_attacks_reversed
static int rack_check_data_after_close(struct mbuf *m, struct tcpcb *tp, int32_t *tlen, struct tcphdr *th, struct socket *so)
static int rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static int32_t rack_use_imac_dack
counter_u64_t rack_multi_single_eq
static int rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, uint32_t tiwin, int32_t nxt_pkt, uint32_t cts)
static uint32_t rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint64_t ts, int32_t *lenp, uint16_t add_flag)
static int32_t rack_rto_max
counter_u64_t rack_nfto_resend
static uint32_t rack_calc_thresh_rack(struct tcp_rack *rack, uint32_t srtt, uint32_t cts)
static void rack_log_alt_to_to_cancel(struct tcp_rack *rack, uint32_t flex1, uint32_t flex2, uint32_t flex3, uint32_t flex4, uint32_t flex5, uint32_t flex6, uint16_t flex7, uint8_t mod)
counter_u64_t rack_express_sack
static int32_t rack_sack_not_required
static int32_t rack_min_to
static int32_t rack_autosndbuf_inc
static int32_t rack_timely_min_segs
static uint32_t rack_goal_bdp
counter_u64_t rack_persists_sends
static int rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
static int32_t rack_default_init_window
static uint32_t rack_get_decrease(struct tcp_rack *rack, uint32_t curper, int32_t rtt_diff)
static uint32_t rack_what_can_we_send(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cwnd_to_use, uint32_t avail, int32_t sb_offset)
int __ctf_process_rst(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
uint32_t ctf_decay_count(uint32_t count, uint32_t decay)
int32_t ctf_progress_timeout_check(struct tcpcb *tp, bool log)
int ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt)
void ctf_log_sack_filter(struct tcpcb *tp, int num_sack_blks, struct sackblk *sack_blocks)
void ctf_ack_war_checks(struct tcpcb *tp, uint32_t *ts, uint32_t *cnt)
int ctf_ts_check_ac(struct tcpcb *tp, int32_t thflags)
void ctf_do_dropwithreset(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t rstreason, int32_t tlen)
void ctf_do_drop(struct mbuf *m, struct tcpcb *tp)
void ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t *ret_val)
int _ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t *tlenp, int32_t *thf, int32_t *drop_hdrlen, int32_t *ret_val, uint32_t *ts, uint32_t *cnt)
uint32_t ctf_outstanding(struct tcpcb *tp)
uint32_t ctf_fixed_maxseg(struct tcpcb *tp)
uint32_t ctf_flight_size(struct tcpcb *tp, uint32_t rc_sacked)
void __ctf_do_dropafterack(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t thflags, int32_t tlen, int32_t *ret_val, uint32_t *ts, uint32_t *cnt)
int ctf_ts_check(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t tlen, int32_t thflags, int32_t *ret_val)
void ctf_calc_rwin(struct socket *so, struct tcpcb *tp)
void ctf_do_dropwithreset_conn(struct mbuf *m, struct tcpcb *tp, struct tcphdr *th, int32_t rstreason, int32_t tlen)
#define ONE_POINT_TWO_MEG
#define TCP_MSS_ACCT_SIZE
#define CTF_JR_APP_LIMITED
#define TCP_MSS_ACCT_INPACE
#define TCP_MSS_ACCT_JUSTRET
#define ctf_do_dropafterack(a, b, c, d, e, f)
#define TCP_MSS_ACCT_SNDACK
#define TCP_MSS_ACCT_PERSIST
#define CTF_JR_CWND_LIMITED
#define TCP_MSS_ACCT_ATIMER
#define PACE_MAX_IP_BYTES
#define DUP_ACK_THRESHOLD
#define CTF_JR_RWND_LIMITED
int sack_filter_blks(struct sack_filter *sf, struct sackblk *in, int numblks, tcp_seq th_ack)
void sack_filter_clear(struct sack_filter *sf, tcp_seq seq)
const struct tcp_hwrate_limit_table * crte
char name[TCP_CA_NAME_MAX]
struct socket * inp_socket
struct ip6_pktopts * in6p_outputopts
struct route_in6 inp_route6
struct mbuf * inp_options
struct m_snd_tag * inp_snd_tag
struct in_conninfo inp_inc
struct in_addr ip_src ip_dst
uint32_t rc_ssthresh_at_erec
uint32_t retran_during_recovery
uint32_t rc_fixed_pacing_rate_rec
struct time_filter_small rc_gp_min_rtt
uint32_t rc_went_idle_time
struct rack_sendmap * rc_resend
uint32_t rc_prr_delivered
uint32_t rc_highest_us_rtt
struct rack_sendmap * rc_end_appl
struct rack_sendmap * rc_first_appl
uint32_t rc_probertt_sndmax_atexit
uint32_t rc_pace_max_segs
struct rack_sendmap * rc_sacklast
uint16_t rack_per_of_gp_ss
uint16_t rc_early_recovery_segs
uint8_t rc_rate_sample_method
uint32_t rc_num_split_allocs
uint32_t challenge_ack_ts
uint32_t rc_lower_rtt_us_cts
struct rack_sendmap * rc_tlpsend
uint16_t rack_per_of_gp_ca
uint16_t rc_reorder_shift
uint32_t rc_last_timeout_snduna
uint16_t rack_per_of_gp_probertt
uint16_t rack_per_of_gp_rec
uint32_t rc_num_maps_alloced
uint32_t rc_rack_tmit_time
struct rack_rb_tree_head rc_mtree
uint32_t rc_lowest_us_rtt
uint32_t rc_last_output_to
uint32_t rc_snd_max_at_rto
uint32_t rc_target_probertt_flight
uint32_t last_sent_tlp_seq
struct timeval rc_last_time_decay
uint32_t rc_fixed_pacing_rate_ss
uint32_t last_tlp_acked_start
struct newreno rc_saved_beta
uint32_t rc_tlp_threshold
uint32_t sack_moved_extra
uint32_t rc_loss_at_start
uint32_t last_tlp_acked_end
struct timeval act_rcv_time
uint8_t rc_tlp_cwnd_reduce
uint8_t rc_no_push_at_mrtt
uint32_t rc_pace_min_segs
uint32_t sack_noextra_move
uint16_t last_sent_tlp_len
uint32_t rc_fixed_pacing_rate_ca
uint32_t rc_prr_recovery_fs
uint32_t challenge_ack_cnt
struct rack_fast_send_blk fsb
uint32_t rc_app_limited_cnt
uint32_t rc_time_probertt_entered
const struct tcp_hwrate_limit_table * crte
uint32_t rc_rack_largest_cwnd
struct def_opt_head opt_list
uint32_t rc_tlp_rxt_last_time
uint32_t rc_time_probertt_starts
uint32_t rc_time_of_last_probertt
uint32_t persist_lost_ends
struct sack_filter rack_sf
struct rack_rtt_sample rack_rs
uint64_t r_tim_lastsent[RACK_NUM_OF_RETRANS]
char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]
void(* tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t)
const struct tcp_rate_set * ptbl
union tcp_log_stackspecific tlb_stackinfo
uint8_t rc_tlp_in_progress
int32_t(* r_substate)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, struct tcpopt *, int32_t, int32_t, uint32_t, int, int, uint8_t)
uint8_t rc_pace_fill_if_rttin_range
uint8_t rack_hdw_pace_ena
uint8_t rc_last_tlp_acked_set
struct rack_control r_ctl
uint8_t rack_attempted_scwnd
uint8_t rack_scwnd_is_idle
uint8_t rc_last_sent_tlp_seq_valid
uint8_t rc_rack_tmr_std_based
uint8_t rc_allow_data_af_clo
uint16_t rc_highly_buffered
uint16_t r_use_labc_for_rec
uint8_t rack_enable_scwnd
uint8_t rc_srtt_measure_made
uint16_t rc_gp_no_rec_chg
uint8_t set_pacing_done_a_iw
uint8_t probe_not_answered
uint8_t app_limited_needs_set
uint16_t r_rack_hw_rate_caps
uint8_t rc_ack_can_sendout_data
uint8_t rack_rec_nonrxt_use_cr
uint8_t sack_attack_disable
uint8_t rc_rack_use_dsack
uint16_t measure_saw_probe_rtt
uint16_t rc_gp_timely_dec_cnt
uint8_t rc_last_tlp_past_cumack
uint8_t rc_last_sent_tlp_past_cumack
uint8_t rc_user_set_max_segs
uint16_t rc_gp_timely_inc_cnt
uint8_t rack_attempt_hdwr_pace
uint16_t rc_dragged_bottom
uint8_t rc_dsack_round_seen
uint8_t rack_tlp_threshold_use
uint8_t alloc_limit_reported
unsigned int * t_tfo_pending
struct sackblk sackblks[MAX_SACK_BLKS]
struct tcp_timer * t_timers
union tcpcb::@55 t_tfo_cookie
int32_t t_stats_gput_prev
struct tcp_log_id_bucket * t_lib
u_int t_pmtud_saved_maxseg
struct statsblob * t_stats
uint8_t t_tfo_client_cookie_len
uint32_t snd_ssthresh_prev
uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]
struct tcp_function_block * t_fb
#define VOI_TCP_CALCFRWINDIFF
void tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen, struct tcphdr *th, int req)
int tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
int tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
void tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
uint16_t tcp_ecn_output_syn_sent(struct tcpcb *tp)
void tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
void tcp_fastopen_disable_path(struct tcpcb *tp)
void tcp_fastopen_decrement_counter(unsigned int *counter)
void tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
#define TCP_FASTOPEN_COOKIE_LEN
#define TCPS_HAVERCVDSYN(s)
#define TCPS_HAVERCVDFIN(s)
#define TCPS_SYN_RECEIVED
#define TCPS_HAVEESTABLISHED(s)
void tcp_hpts_remove(struct inpcb *inp)
bool tcp_in_hpts(struct inpcb *inp)
uint32_t tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag)
#define HPTS_USEC_TO_SLOTS(x)
static __inline uint32_t tcp_tv_to_mssectick(const struct timeval *sv)
#define HPTS_TICKS_PER_SLOT
static __inline uint32_t tcp_tv_to_usectick(const struct timeval *sv)
#define HPTS_MS_TO_SLOTS(x)
static __inline uint32_t tcp_get_usecs(struct timeval *tv)
#define HPTS_USEC_IN_MSEC
#define tcp_hpts_insert(inp, slot)
void tcp_log_flowend(struct tcpcb *tp)
struct tcp_log_buffer * tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, union tcp_log_stackspecific *stackinfo, int th_hostorder, const char *output_caller, const char *func, int line, const struct timeval *itv)
int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force)
#define TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv)
void tcp_lro_reg_mbufq(void)
void tcp_lro_dereg_mbufq(void)
int tcp_offload_output(struct tcpcb *tp)
void tcp_offload_tcp_info(struct tcpcb *tp, struct tcp_info *ti)
int tcp_addoptions(struct tcpopt *to, u_char *optp)
struct mbuf * tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls)
#define RACK_USE_END_OR_THACK
#define RACK_RWND_COLLAPSED
#define RACK_QUALITY_HIGH
#define RACK_TO_FRM_DELACK
#define RACK_OPTS_INC(name)
#define RACK_QUALITY_PERSIST
#define RACK_QUALITY_PROBERTT
uint64_t rack_to_usec_ts(struct timeval *tv)
#define RACK_QUALITY_APPLIMITED
#define RACK_SENT_W_DSACK
#define RACK_RTTS_ENTERPROBE
uint32_t rack_ts_to_msec(uint64_t ts)
#define RACK_TIMELY_CNT_BOOST
#define RACK_RTTS_EXITPROBE
#define RACK_LIMIT_TYPE_SPLIT
#define RACK_WAS_SACKPASS
#define RACK_HYSTART_ON_W_SC_C
#define RACK_HYSTART_ON_W_SC
#define RACK_QUALITY_NONE
#define RACK_RTTS_REACHTARGET
#define RACK_QUALITY_ALLACKED
#define RACK_NUM_OF_RETRANS
#define RACK_TO_FRM_PERSIST
void tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
static uint64_t tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle)
uint32_t tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, const struct tcp_hwrate_limit_table *te, int *err)
static const struct tcp_hwrate_limit_table * tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp, struct ifnet *ifp, uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
static const struct tcp_hwrate_limit_table * tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
static uint64_t tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp)
static void tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, struct tcpcb *tp)
#define ETHERNET_SEGMENT_SIZE
int tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start, int *tlenp, struct mbuf *m)
void tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
void tcp_clean_sackreport(struct tcpcb *tp)
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
int tcp_dsack_block_exists(struct tcpcb *tp)
void tcp_clean_dsack_blocks(struct tcpcb *tp)
static __inline uint32_t tcp_ts_getticks(void)
#define tcp_rcvseqinit(tp)
void tcp_decrement_paced_conn(void)
int tcp_can_enable_pacing(void)
struct tcpcb * tcp_drop(struct tcpcb *tp, int errno)
int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce, bool force)
int register_tcp_functions_as_names(struct tcp_function_block *blk, int wait, const char *names[], int *num_names)
u_int tcp_maxseg(const struct tcpcb *tp)
void tcp_switch_back_to_default(struct tcpcb *tp)
void tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
void tcp_state_change(struct tcpcb *tp, int newstate)
struct tcpcb * tcp_close(struct tcpcb *tp)
void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags)
void tcp_log_end_status(struct tcpcb *tp, uint8_t status)
struct tcptemp * tcpip_maketemplate(struct inpcb *inp)
void tcp_record_dsack(struct tcpcb *tp, tcp_seq start, tcp_seq end, int tlp)
int tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
int tcp_backoff[TCP_MAXRXTSHIFT+1]
int tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type)
int tcp_rexmit_drop_options
void tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
int tcp_fast_finwait2_recycle
#define V_tcp_always_keepalive
#define V_tcp_pmtud_blackhole_detect
#define V_tcp_pmtud_blackhole_mss
#define V_tcp_v6pmtud_blackhole_mss
void tcp_twstart(struct tcpcb *tp)
int tcp_default_ctloutput(struct inpcb *inp, struct sockopt *sopt)
#define TCP_EI_STATUS_SERVER_FIN
#define TCP_EI_STATUS_SERVER_RST
#define V_tcp_do_autosndbuf
#define TCP_EI_STATUS_RST_IN_FRONT
static uint16_t tcp_get_flags(const struct tcphdr *th)
#define TF2_PLPMTU_MAXSEGSNT
#define TCP_EI_STATUS_KEEP_MAX
#define IN_CONGRECOVERY(t_flags)
#define IN_FASTRECOVERY(t_flags)
static void tcp_set_flags(struct tcphdr *th, uint16_t flags)
#define TCP_FUNC_OUTPUT_CANDROP
#define V_tcp_udp_tunneling_port
#define V_tcp_udp_tunneling_overhead
#define V_tcp_autosndbuf_max
#define V_tcp_autosndbuf_inc
#define TF2_FBYTES_COMPLETE
void tcp6_use_min_mtu(struct tcpcb *)
#define V_tcp_map_entries_limit
#define V_tcp_delack_enabled
#define TF2_PLPMTU_BLACKHOLE
#define TCP_EI_STATUS_DATA_A_CLOSE
#define EXIT_CONGRECOVERY(t_flags)
#define IN_RECOVERY(t_flags)
#define TCP_EI_STATUS_CLIENT_FIN
#define V_tcp_tolerate_missing_ts
#define BYTES_THIS_ACK(tp, th)
#define V_tcp_map_split_limit
#define TCP_EI_STATUS_RETRAN
#define KMOD_TCPSTAT_INC(name)
#define IS_FASTOPEN(t_flags)
#define ENTER_FASTRECOVERY(t_flags)
#define V_path_mtu_discovery
#define KMOD_TCPSTAT_ADD(name, val)
#define EXIT_RECOVERY(t_flags)
#define TCP_EI_STATUS_PERSIST_MAX
#define ENTER_CONGRECOVERY(t_flags)
static void tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, uint8_t is_tlp, int hw_tls)
#define UDPSTAT_INC(name)