FreeBSD kernel IPv4 code
tcp_timewait.c
Go to the documentation of this file.
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#include "opt_ipsec.h"
40#include "opt_tcpdebug.h"
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/callout.h>
45#include <sys/kernel.h>
46#include <sys/sysctl.h>
47#include <sys/malloc.h>
48#include <sys/mbuf.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/socket.h>
52#include <sys/socketvar.h>
53#ifndef INVARIANTS
54#include <sys/syslog.h>
55#endif
56#include <sys/protosw.h>
57#include <sys/random.h>
58
59#include <vm/uma.h>
60
61#include <net/route.h>
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/vnet.h>
65
66#include <netinet/in.h>
67#include <netinet/in_kdtrace.h>
68#include <netinet/in_pcb.h>
69#include <netinet/in_systm.h>
70#include <netinet/in_var.h>
71#include <netinet/ip.h>
72#include <netinet/ip_icmp.h>
73#include <netinet/ip_var.h>
74#ifdef INET6
75#include <netinet/ip6.h>
76#include <netinet6/in6_pcb.h>
77#include <netinet6/ip6_var.h>
78#include <netinet6/scope6_var.h>
79#include <netinet6/nd6.h>
80#endif
81#include <netinet/tcp.h>
82#include <netinet/tcp_fsm.h>
83#include <netinet/tcp_seq.h>
84#include <netinet/tcp_timer.h>
85#include <netinet/tcp_var.h>
86#include <netinet/tcp_hpts.h>
87#ifdef INET6
88#include <netinet6/tcp6_var.h>
89#endif
90#include <netinet/tcpip.h>
91#ifdef TCPDEBUG
92#include <netinet/tcp_debug.h>
93#endif
94#ifdef INET6
95#include <netinet6/ip6protosw.h>
96#endif
97
98#include <netinet/udp.h>
99#include <netinet/udp_var.h>
100
101#include <netipsec/ipsec_support.h>
102
103#include <machine/in_cksum.h>
104
105#include <security/mac/mac_framework.h>
106
107VNET_DEFINE_STATIC(uma_zone_t, tcptw_zone);
108#define V_tcptw_zone VNET(tcptw_zone)
109static int maxtcptw;
110
111/*
112 * The timed wait queue contains references to each of the TCP sessions
113 * currently in the TIME_WAIT state. The queue pointers, including the
114 * queue pointers in each tcptw structure, are protected using the global
115 * timewait lock, which must be held over queue iteration and modification.
116 *
117 * Rules on tcptw usage:
118 * - a inpcb is always freed _after_ its tcptw
119 * - a tcptw relies on its inpcb reference counting for memory stability
120 * - a tcptw is dereferenceable only while its inpcb is locked
121 */
123#define V_twq_2msl VNET(twq_2msl)
124
125/* Global timewait lock */
126VNET_DEFINE_STATIC(struct rwlock, tw_lock);
127#define V_tw_lock VNET(tw_lock)
128
129#define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0)
130#define TW_LOCK_DESTROY(tw) rw_destroy(&(tw))
131#define TW_RLOCK(tw) rw_rlock(&(tw))
132#define TW_WLOCK(tw) rw_wlock(&(tw))
133#define TW_RUNLOCK(tw) rw_runlock(&(tw))
134#define TW_WUNLOCK(tw) rw_wunlock(&(tw))
135#define TW_LOCK_ASSERT(tw) rw_assert(&(tw), RA_LOCKED)
136#define TW_RLOCK_ASSERT(tw) rw_assert(&(tw), RA_RLOCKED)
137#define TW_WLOCK_ASSERT(tw) rw_assert(&(tw), RA_WLOCKED)
138#define TW_UNLOCK_ASSERT(tw) rw_assert(&(tw), RA_UNLOCKED)
139
140static void tcp_tw_2msl_reset(struct tcptw *, int);
141static void tcp_tw_2msl_stop(struct tcptw *, int);
142static int tcp_twrespond(struct tcptw *, int);
143
144static int
146{
147 int halfrange;
148
149 /*
150 * Max out at half the ephemeral port range so that TIME_WAIT
151 * sockets don't tie up too many ephemeral ports.
152 */
154 halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2;
155 else
156 halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2;
157 /* Protect against goofy port ranges smaller than 32. */
158 return (imin(imax(halfrange, 32), maxsockets / 5));
159}
160
161static int
162sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
163{
164 int error, new;
165
166 if (maxtcptw == 0)
167 new = tcptw_auto_size();
168 else
169 new = maxtcptw;
170 error = sysctl_handle_int(oidp, &new, 0, req);
171 if (error == 0 && req->newptr)
172 if (new >= 32) {
173 maxtcptw = new;
174 uma_zone_set_max(V_tcptw_zone, maxtcptw);
175 }
176 return (error);
177}
178
179SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw,
180 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
181 &maxtcptw, 0, sysctl_maxtcptw, "IU",
182 "Maximum number of compressed TCP TIME_WAIT entries");
183
184VNET_DEFINE_STATIC(bool, nolocaltimewait) = true;
185#define V_nolocaltimewait VNET(nolocaltimewait)
186SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
187 &VNET_NAME(nolocaltimewait), true,
188 "Do not create compressed TCP TIME_WAIT entries for local connections");
189
190void
192{
193
194 if (maxtcptw == 0)
195 uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
196}
197
198void
200{
201
202 V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
203 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
204 TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
205 if (maxtcptw == 0)
206 uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
207 else
208 uma_zone_set_max(V_tcptw_zone, maxtcptw);
209 TAILQ_INIT(&V_twq_2msl);
210 TW_LOCK_INIT(V_tw_lock, "tcptw");
211}
212
213#ifdef VIMAGE
214void
215tcp_tw_destroy(void)
216{
217 struct tcptw *tw;
218 struct epoch_tracker et;
219
220 NET_EPOCH_ENTER(et);
221 while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
222 tcp_twclose(tw, 0);
223 NET_EPOCH_EXIT(et);
224
226 uma_zdestroy(V_tcptw_zone);
227}
228#endif
229
230/*
231 * Move a TCP connection into TIME_WAIT state.
232 * tcbinfo is locked.
233 * inp is locked, and is unlocked before returning.
234 */
235void
237{
238 struct tcptw twlocal, *tw;
239 struct inpcb *inp = tp->t_inpcb;
240 struct socket *so;
241 uint32_t recwin;
242 bool acknow, local;
243#ifdef INET6
244 bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
245#endif
246
247 NET_EPOCH_ASSERT();
248 INP_WLOCK_ASSERT(inp);
249
250 /* A dropped inp should never transition to TIME_WAIT state. */
251 KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: "
252 "(inp->inp_flags & INP_DROPPED) != 0"));
253
254 if (V_nolocaltimewait) {
255#ifdef INET6
256 if (isipv6)
257 local = in6_localaddr(&inp->in6p_faddr);
258 else
259#endif
260#ifdef INET
261 local = in_localip(inp->inp_faddr);
262#else
263 local = false;
264#endif
265 } else
266 local = false;
267
268 /*
269 * For use only by DTrace. We do not reference the state
270 * after this point so modifying it in place is not a problem.
271 */
273
274 if (local)
275 tw = &twlocal;
276 else
277 tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
278 if (tw == NULL) {
279 /*
280 * Reached limit on total number of TIMEWAIT connections
281 * allowed. Remove a connection from TIMEWAIT queue in LRU
282 * fashion to make room for this connection.
283 * If that fails, use on stack tw at least to be able to
284 * run through tcp_twrespond() and standard tcpcb discard
285 * routine.
286 *
287 * XXX: Check if it possible to always have enough room
288 * in advance based on guarantees provided by uma_zalloc().
289 */
290 tw = tcp_tw_2msl_scan(1);
291 if (tw == NULL) {
292 tw = &twlocal;
293 local = true;
294 }
295 }
296 /*
297 * For !local case the tcptw will hold a reference on its inpcb
298 * until tcp_twclose is called.
299 */
300 tw->tw_inpcb = inp;
301
302 /*
303 * Recover last window size sent.
304 */
305 so = inp->inp_socket;
306 recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
307 (long)TCP_MAXWIN << tp->rcv_scale);
308 if (recwin < (so->so_rcv.sb_hiwat / 4) &&
309 recwin < tp->t_maxseg)
310 recwin = 0;
311 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
312 recwin < (tp->rcv_adv - tp->rcv_nxt))
313 recwin = (tp->rcv_adv - tp->rcv_nxt);
314 tw->last_win = (u_short)(recwin >> tp->rcv_scale);
315
316 /*
317 * Set t_recent if timestamps are used on the connection.
318 */
321 tw->t_recent = tp->ts_recent;
322 tw->ts_offset = tp->ts_offset;
323 } else {
324 tw->t_recent = 0;
325 tw->ts_offset = 0;
326 }
327
328 tw->snd_nxt = tp->snd_nxt;
329 tw->t_port = tp->t_port;
330 tw->rcv_nxt = tp->rcv_nxt;
331 tw->tw_time = 0;
332 tw->tw_flags = tp->t_flags;
333
334/* XXX
335 * If this code will
336 * be used for fin-wait-2 state also, then we may need
337 * a ts_recent from the last segment.
338 */
339 acknow = tp->t_flags & TF_ACKNOW;
340
341 /*
342 * First, discard tcpcb state, which includes stopping its timers and
343 * freeing it. tcp_discardcb() used to also release the inpcb, but
344 * that work is now done in the caller.
345 *
346 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
347 * and might not be needed here any longer.
348 */
349#ifdef TCPHPTS
350 tcp_hpts_remove(inp);
351#endif
352 tcp_discardcb(tp);
353 soisdisconnected(so);
354 tw->tw_so_options = so->so_options;
355 inp->inp_flags |= INP_TIMEWAIT;
356 if (acknow)
357 tcp_twrespond(tw, TH_ACK);
358 if (local)
359 in_pcbdrop(inp);
360 else {
361 in_pcbref(inp); /* Reference from tw */
362 tw->tw_cred = crhold(so->so_cred);
363 inp->inp_ppcb = tw;
365 tcp_tw_2msl_reset(tw, 0);
366 }
367
368 /*
369 * If the inpcb owns the sole reference to the socket, then we can
370 * detach and free the socket as it is not needed in time wait.
371 */
372 if (inp->inp_flags & INP_SOCKREF) {
373 KASSERT(so->so_state & SS_PROTOREF,
374 ("tcp_twstart: !SS_PROTOREF"));
375 inp->inp_flags &= ~INP_SOCKREF;
376 INP_WUNLOCK(inp);
377 SOCK_LOCK(so);
378 so->so_state &= ~SS_PROTOREF;
379 sofree(so);
380 } else
381 INP_WUNLOCK(inp);
382}
383
384/*
385 * Returns 1 if the TIME_WAIT state was killed and we should start over,
386 * looking for a pcb in the listen state. Returns 0 otherwise.
387 *
388 * For pure SYN-segments the PCB shall be read-locked and the tcpopt pointer
389 * may be NULL. For the rest write-lock and valid tcpopt.
390 */
391int
392tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
393 struct mbuf *m, int tlen)
394{
395 struct tcptw *tw;
396 int thflags;
397 tcp_seq seq;
398
399 NET_EPOCH_ASSERT();
400 INP_LOCK_ASSERT(inp);
401
402 /*
403 * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
404 * still present. This is undesirable, but temporarily necessary
405 * until we work out how to handle inpcb's who's timewait state has
406 * been removed.
407 */
408 tw = intotw(inp);
409 if (tw == NULL)
410 goto drop;
411
412 thflags = tcp_get_flags(th);
413#ifdef INVARIANTS
414 if ((thflags & (TH_SYN | TH_ACK)) == TH_SYN)
415 INP_RLOCK_ASSERT(inp);
416 else {
417 INP_WLOCK_ASSERT(inp);
418 KASSERT(to != NULL,
419 ("%s: called without options on a non-SYN segment",
420 __func__));
421 }
422#endif
423
424 /*
425 * NOTE: for FIN_WAIT_2 (to be added later),
426 * must validate sequence number before accepting RST
427 */
428
429 /*
430 * If the segment contains RST:
431 * Drop the segment - see Stevens, vol. 2, p. 964 and
432 * RFC 1337.
433 */
434 if (thflags & TH_RST)
435 goto drop;
436
437#if 0
438/* PAWS not needed at the moment */
439 /*
440 * RFC 1323 PAWS: If we have a timestamp reply on this segment
441 * and it's less than ts_recent, drop it.
442 */
443 if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
444 TSTMP_LT(to.to_tsval, tp->ts_recent)) {
445 if ((thflags & TH_ACK) == 0)
446 goto drop;
447 goto ack;
448 }
449 /*
450 * ts_recent is never updated because we never accept new segments.
451 */
452#endif
453
454 /*
455 * If a new connection request is received
456 * while in TIME_WAIT, drop the old connection
457 * and start over if the sequence numbers
458 * are above the previous ones.
459 * Allow UDP port number changes in this case.
460 */
461 if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
462 /*
463 * In case we can't upgrade our lock just pretend we have
464 * lost this packet.
465 */
466 if (((thflags & (TH_SYN | TH_ACK)) == TH_SYN) &&
467 INP_TRY_UPGRADE(inp) == 0)
468 goto drop;
469 tcp_twclose(tw, 0);
470 TCPSTAT_INC(tcps_tw_recycles);
471 return (1);
472 }
473
474 /*
475 * Send RST if UDP port numbers don't match
476 */
477 if (tw->t_port != m->m_pkthdr.tcp_tun_port) {
478 if (tcp_get_flags(th) & TH_ACK) {
479 tcp_respond(NULL, mtod(m, void *), th, m,
480 (tcp_seq)0, th->th_ack, TH_RST);
481 } else {
482 if (tcp_get_flags(th) & TH_SYN)
483 tlen++;
484 if (tcp_get_flags(th) & TH_FIN)
485 tlen++;
486 tcp_respond(NULL, mtod(m, void *), th, m,
487 th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK);
488 }
489 INP_UNLOCK(inp);
490 TCPSTAT_INC(tcps_tw_resets);
491 return (0);
492 }
493
494 /*
495 * Drop the segment if it does not contain an ACK.
496 */
497 if ((thflags & TH_ACK) == 0)
498 goto drop;
499
500 INP_WLOCK_ASSERT(inp);
501
502 /*
503 * If timestamps were negotiated during SYN/ACK and a
504 * segment without a timestamp is received, silently drop
505 * the segment, unless the missing timestamps are tolerated.
506 * See section 3.2 of RFC 7323.
507 */
508 if (((to->to_flags & TOF_TS) == 0) && (tw->t_recent != 0) &&
510 goto drop;
511 }
512
513 /*
514 * Reset the 2MSL timer if this is a duplicate FIN.
515 */
516 if (thflags & TH_FIN) {
517 seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
518 if (seq + 1 == tw->rcv_nxt)
519 tcp_tw_2msl_reset(tw, 1);
520 }
521
522 /*
523 * Acknowledge the segment if it has data or is not a duplicate ACK.
524 */
525 if (thflags != TH_ACK || tlen != 0 ||
526 th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt) {
527 TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
528 tcp_twrespond(tw, TH_ACK);
529 TCPSTAT_INC(tcps_tw_responds);
530 goto dropnoprobe;
531 }
532drop:
533 TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
534dropnoprobe:
535 INP_UNLOCK(inp);
536 m_freem(m);
537 return (0);
538}
539
540void
541tcp_twclose(struct tcptw *tw, int reuse)
542{
543 struct socket *so;
544 struct inpcb *inp;
545
546 /*
547 * At this point, we are in one of two situations:
548 *
549 * (1) We have no socket, just an inpcb<->twtcp pair. We can free
550 * all state.
551 *
552 * (2) We have a socket -- if we own a reference, release it and
553 * notify the socket layer.
554 */
555 inp = tw->tw_inpcb;
556 KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
557 KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
558 NET_EPOCH_ASSERT();
559 INP_WLOCK_ASSERT(inp);
560
561 tcp_tw_2msl_stop(tw, reuse);
562 inp->inp_ppcb = NULL;
563 in_pcbdrop(inp);
564
565 so = inp->inp_socket;
566 if (so != NULL) {
567 /*
568 * If there's a socket, handle two cases: first, we own a
569 * strong reference, which we will now release, or we don't
570 * in which case another reference exists (XXXRW: think
571 * about this more), and we don't need to take action.
572 */
573 if (inp->inp_flags & INP_SOCKREF) {
574 inp->inp_flags &= ~INP_SOCKREF;
575 INP_WUNLOCK(inp);
576 SOCK_LOCK(so);
577 KASSERT(so->so_state & SS_PROTOREF,
578 ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
579 so->so_state &= ~SS_PROTOREF;
580 sofree(so);
581 } else {
582 /*
583 * If we don't own the only reference, the socket and
584 * inpcb need to be left around to be handled by
585 * tcp_usr_detach() later.
586 */
587 INP_WUNLOCK(inp);
588 }
589 } else {
590 /*
591 * The socket has been already cleaned-up for us, only free the
592 * inpcb.
593 */
594 in_pcbfree(inp);
595 }
596 TCPSTAT_INC(tcps_closed);
597}
598
599static int
600tcp_twrespond(struct tcptw *tw, int flags)
601{
602 struct inpcb *inp = tw->tw_inpcb;
603#if defined(INET6) || defined(INET)
604 struct tcphdr *th = NULL;
605#endif
606 struct mbuf *m;
607#ifdef INET
608 struct ip *ip = NULL;
609#endif
610 u_int hdrlen, optlen, ulen;
611 int error = 0; /* Keep compiler happy */
612 struct tcpopt to;
613#ifdef INET6
614 struct ip6_hdr *ip6 = NULL;
615 int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
616#endif
617 struct udphdr *udp = NULL;
618 hdrlen = 0; /* Keep compiler happy */
619
620 INP_WLOCK_ASSERT(inp);
621
622 m = m_gethdr(M_NOWAIT, MT_DATA);
623 if (m == NULL)
624 return (ENOBUFS);
625 m->m_data += max_linkhdr;
626
627#ifdef MAC
628 mac_inpcb_create_mbuf(inp, m);
629#endif
630
631#ifdef INET6
632 if (isipv6) {
633 hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
634 ip6 = mtod(m, struct ip6_hdr *);
635 if (tw->t_port) {
636 udp = (struct udphdr *)(ip6 + 1);
637 hdrlen += sizeof(struct udphdr);
639 udp->uh_dport = tw->t_port;
640 ulen = (hdrlen - sizeof(struct ip6_hdr));
641 th = (struct tcphdr *)(udp + 1);
642 } else
643 th = (struct tcphdr *)(ip6 + 1);
644 tcpip_fillheaders(inp, tw->t_port, ip6, th);
645 }
646#endif
647#if defined(INET6) && defined(INET)
648 else
649#endif
650#ifdef INET
651 {
652 hdrlen = sizeof(struct tcpiphdr);
653 ip = mtod(m, struct ip *);
654 if (tw->t_port) {
655 udp = (struct udphdr *)(ip + 1);
656 hdrlen += sizeof(struct udphdr);
658 udp->uh_dport = tw->t_port;
659 ulen = (hdrlen - sizeof(struct ip));
660 th = (struct tcphdr *)(udp + 1);
661 } else
662 th = (struct tcphdr *)(ip + 1);
663 tcpip_fillheaders(inp, tw->t_port, ip, th);
664 }
665#endif
666 to.to_flags = 0;
667
668 /*
669 * Send a timestamp and echo-reply if both our side and our peer
670 * have sent timestamps in our SYN's and this is not a RST.
671 */
672 if (tw->t_recent && flags == TH_ACK) {
673 to.to_flags |= TOF_TS;
675 to.to_tsecr = tw->t_recent;
676 }
677#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
678 if (tw->tw_flags & TF_SIGNATURE)
680#endif
681 optlen = tcp_addoptions(&to, (u_char *)(th + 1));
682
683 if (udp) {
684 ulen += optlen;
685 udp->uh_ulen = htons(ulen);
686 }
687 m->m_len = hdrlen + optlen;
688 m->m_pkthdr.len = m->m_len;
689
690 KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
691
692 th->th_seq = htonl(tw->snd_nxt);
693 th->th_ack = htonl(tw->rcv_nxt);
694 th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
695 tcp_set_flags(th, flags);
696 th->th_win = htons(tw->last_win);
697
698#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
699 if (tw->tw_flags & TF_SIGNATURE) {
700 if (!TCPMD5_ENABLED() ||
701 TCPMD5_OUTPUT(m, th, to.to_signature) != 0)
702 return (-1);
703 }
704#endif
705#ifdef INET6
706 if (isipv6) {
707 if (tw->t_port) {
708 m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
709 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
710 udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
711 th->th_sum = htons(0);
712 } else {
713 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
714 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
715 th->th_sum = in6_cksum_pseudo(ip6,
716 sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
717 }
718 ip6->ip6_hlim = in6_selecthlim(inp, NULL);
719 TCP_PROBE5(send, NULL, NULL, ip6, NULL, th);
720 error = ip6_output(m, inp->in6p_outputopts, NULL,
721 (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
722 }
723#endif
724#if defined(INET6) && defined(INET)
725 else
726#endif
727#ifdef INET
728 {
729 if (tw->t_port) {
730 m->m_pkthdr.csum_flags = CSUM_UDP;
731 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
732 udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
733 ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
734 th->th_sum = htons(0);
735 } else {
736 m->m_pkthdr.csum_flags = CSUM_TCP;
737 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
738 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
739 htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
740 }
741 ip->ip_len = htons(m->m_pkthdr.len);
743 ip->ip_off |= htons(IP_DF);
744 TCP_PROBE5(send, NULL, NULL, ip, NULL, th);
745 error = ip_output(m, inp->inp_options, NULL,
746 ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
747 NULL, inp);
748 }
749#endif
750 if (flags & TH_ACK)
751 TCPSTAT_INC(tcps_sndacks);
752 else
753 TCPSTAT_INC(tcps_sndctrl);
754 TCPSTAT_INC(tcps_sndtotal);
755 return (error);
756}
757
758static void
759tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
760{
761
762 NET_EPOCH_ASSERT();
764
766 if (rearm)
767 TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
768 tw->tw_time = ticks + 2 * V_tcp_msl;
769 TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
771}
772
773static void
774tcp_tw_2msl_stop(struct tcptw *tw, int reuse)
775{
776 struct ucred *cred;
777 struct inpcb *inp;
778 int released __unused;
779
780 NET_EPOCH_ASSERT();
781
783 inp = tw->tw_inpcb;
784 tw->tw_inpcb = NULL;
785
786 TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
787 cred = tw->tw_cred;
788 tw->tw_cred = NULL;
790
791 if (cred != NULL)
792 crfree(cred);
793
794 released = in_pcbrele_wlocked(inp);
795 KASSERT(!released, ("%s: inp should not be released here", __func__));
796
797 if (!reuse)
798 uma_zfree(V_tcptw_zone, tw);
800}
801
802struct tcptw *
804{
805 struct tcptw *tw;
806 struct inpcb *inp;
807
808 NET_EPOCH_ASSERT();
809
810 for (;;) {
812 tw = TAILQ_FIRST(&V_twq_2msl);
813 if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
815 break;
816 }
817 KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
818 __func__));
819
820 inp = tw->tw_inpcb;
821 in_pcbref(inp);
823
824 INP_WLOCK(inp);
825 tw = intotw(inp);
826 if (in_pcbrele_wlocked(inp)) {
827 if (__predict_true(tw == NULL)) {
828 continue;
829 } else {
830 /* This should not happen as in TIMEWAIT
831 * state the inp should not be destroyed
832 * before its tcptw. If INVARIANTS is
833 * defined panic.
834 */
835#ifdef INVARIANTS
836 panic("%s: Panic before an infinite "
837 "loop: INP_TIMEWAIT && (INP_FREED "
838 "|| inp last reference) && tw != "
839 "NULL", __func__);
840#else
841 log(LOG_ERR, "%s: Avoid an infinite "
842 "loop: INP_TIMEWAIT && (INP_FREED "
843 "|| inp last reference) && tw != "
844 "NULL", __func__);
845#endif
846 break;
847 }
848 }
849
850 if (tw == NULL) {
851 /* tcp_twclose() has already been called */
852 INP_WUNLOCK(inp);
853 continue;
854 }
855
856 tcp_twclose(tw, reuse);
857 if (reuse)
858 return tw;
859 }
860
861 return NULL;
862}
static TAILQ_HEAD(handler_chain, proto_handler)
Definition: alias_mod.c:57
bool in_localip(struct in_addr in)
Definition: in.c:131
__uint32_t uint32_t
Definition: in.h:62
#define IPPROTO_TCP
Definition: in.h:45
#define IPPROTO_UDP
Definition: in.h:46
u_short in_pseudo(u_int32_t a, u_int32_t b, u_int32_t c)
Definition: in_cksum.c:197
#define TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)
Definition: in_kdtrace.h:47
void in_pcbdrop(struct inpcb *inp)
Definition: in_pcb.c:1928
void in_pcbref(struct inpcb *inp)
Definition: in_pcb.c:1762
bool in_pcbrele_wlocked(struct inpcb *inp)
Definition: in_pcb.c:1792
void in_pcbfree(struct inpcb *inp)
Definition: in_pcb.c:1818
#define INP_TRY_UPGRADE(inp)
Definition: in_pcb.h:524
#define INP_LOCK_ASSERT(inp)
Definition: in_pcb.h:527
#define INC_ISIPV6
Definition: in_pcb.h:124
#define INP_WLOCK(inp)
Definition: in_pcb.h:518
#define V_ipport_lastauto
Definition: in_pcb.h:727
#define INP_WLOCK_ASSERT(inp)
Definition: in_pcb.h:529
#define INP_TIMEWAIT
Definition: in_pcb.h:644
#define V_ipport_firstauto
Definition: in_pcb.h:726
#define INP_DROPPED
Definition: in_pcb.h:646
#define INP_WUNLOCK(inp)
Definition: in_pcb.h:522
#define INP_UNLOCK(inp)
Definition: in_pcb.h:523
#define INP_RLOCK_ASSERT(inp)
Definition: in_pcb.h:528
#define INP_SOCKREF
Definition: in_pcb.h:647
#define IP_DF
Definition: ip.h:13
int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp)
Definition: ip_output.c:320
#define IP_ROUTETOIF
Definition: ip_var.h:169
in_addr_t s_addr
Definition: in.h:84
u_int8_t inc_flags
Definition: in_pcb.h:114
Definition: in_pcb.h:217
struct socket * inp_socket
Definition: in_pcb.h:254
struct ip6_pktopts * in6p_outputopts
Definition: in_pcb.h:286
int inp_flags
Definition: in_pcb.h:246
struct mbuf * inp_options
Definition: in_pcb.h:279
void * inp_ppcb
Definition: in_pcb.h:253
struct in_conninfo inp_inc
Definition: in_pcb.h:270
Definition: ip6.h:74
Definition: ip.h:51
struct in_addr ip_src ip_dst
Definition: ip.h:71
u_short ip_len
Definition: ip.h:61
u_short ip_off
Definition: ip.h:63
Definition: tcp_var.h:132
tcp_seq snd_nxt
Definition: tcp_var.h:151
u_char rcv_scale
Definition: tcp_var.h:171
u_int32_t ts_recent
Definition: tcp_var.h:169
u_int32_t ts_offset
Definition: tcp_var.h:157
tcp_seq rcv_nxt
Definition: tcp_var.h:163
u_int t_flags
Definition: tcp_var.h:146
uint32_t t_port
Definition: tcp_var.h:139
struct inpcb * t_inpcb
Definition: tcp_var.h:134
tcp_seq rcv_adv
Definition: tcp_var.h:164
Definition: tcpip.h:41
u_char * to_signature
Definition: tcp_var.h:590
u_int32_t to_tsval
Definition: tcp_var.h:587
u_int32_t to_flags
Definition: tcp_var.h:578
u_int32_t to_tsecr
Definition: tcp_var.h:588
Definition: tcp_var.h:629
u_int tw_flags
Definition: tcp_var.h:642
int tw_time
Definition: tcp_var.h:640
tcp_seq snd_nxt
Definition: tcp_var.h:633
u_int32_t ts_offset
Definition: tcp_var.h:639
u_int32_t t_recent
Definition: tcp_var.h:638
u_short last_win
Definition: tcp_var.h:635
struct ucred * tw_cred
Definition: tcp_var.h:637
uint32_t t_port
Definition: tcp_var.h:631
struct inpcb * tw_inpcb
Definition: tcp_var.h:630
tcp_seq rcv_nxt
Definition: tcp_var.h:634
short tw_so_options
Definition: tcp_var.h:636
Definition: udp.h:45
u_short uh_ulen
Definition: udp.h:48
u_short uh_sport
Definition: udp.h:46
u_short uh_sum
Definition: udp.h:49
u_short uh_dport
Definition: udp.h:47
#define TCPS_TIME_WAIT
Definition: tcp_fsm.h:60
void tcp_hpts_remove(struct inpcb *inp)
Definition: tcp_hpts.c:563
int tcp_addoptions(struct tcpopt *to, u_char *optp)
Definition: tcp_output.c:1790
#define SEQ_GT(a, b)
Definition: tcp_seq.h:44
static __inline uint32_t tcp_ts_getticks(void)
Definition: tcp_seq.h:89
#define TSTMP_LT(a, b)
Definition: tcp_seq.h:59
void tcp_discardcb(struct tcpcb *tp)
Definition: tcp_subr.c:2304
void tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr)
Definition: tcp_subr.c:1637
void tcp_state_change(struct tcpcb *tp, int newstate)
Definition: tcp_subr.c:3999
void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags)
Definition: tcp_subr.c:1728
#define V_tcp_msl
Definition: tcp_timer.h:214
void tcp_twclose(struct tcptw *tw, int reuse)
Definition: tcp_timewait.c:541
static int sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
Definition: tcp_timewait.c:162
SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET|CTLFLAG_RW, &VNET_NAME(nolocaltimewait), true, "Do not create compressed TCP TIME_WAIT entries for local connections")
static int tcp_twrespond(struct tcptw *, int)
Definition: tcp_timewait.c:600
VNET_DEFINE_STATIC(uma_zone_t, tcptw_zone)
#define TW_WUNLOCK(tw)
Definition: tcp_timewait.c:134
#define TW_RUNLOCK(tw)
Definition: tcp_timewait.c:133
void tcp_twstart(struct tcpcb *tp)
Definition: tcp_timewait.c:236
#define V_nolocaltimewait
Definition: tcp_timewait.c:185
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_NEEDGIANT, &maxtcptw, 0, sysctl_maxtcptw, "IU", "Maximum number of compressed TCP TIME_WAIT entries")
void tcp_tw_init(void)
Definition: tcp_timewait.c:199
#define TW_RLOCK(tw)
Definition: tcp_timewait.c:131
#define V_tcptw_zone
Definition: tcp_timewait.c:108
#define TW_LOCK_INIT(tw, d)
Definition: tcp_timewait.c:129
#define TW_WLOCK(tw)
Definition: tcp_timewait.c:132
static int maxtcptw
Definition: tcp_timewait.c:109
void tcp_tw_zone_change(void)
Definition: tcp_timewait.c:191
#define V_twq_2msl
Definition: tcp_timewait.c:123
__FBSDID("$FreeBSD$")
#define V_tw_lock
Definition: tcp_timewait.c:127
struct tcptw * tcp_tw_2msl_scan(int reuse)
Definition: tcp_timewait.c:803
static void tcp_tw_2msl_stop(struct tcptw *, int)
Definition: tcp_timewait.c:774
#define TW_LOCK_DESTROY(tw)
Definition: tcp_timewait.c:130
static int tcptw_auto_size(void)
Definition: tcp_timewait.c:145
static void tcp_tw_2msl_reset(struct tcptw *, int)
Definition: tcp_timewait.c:759
int tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, struct mbuf *m, int tlen)
Definition: tcp_timewait.c:392
#define TF_SIGNATURE
Definition: tcp_var.h:519
#define TF_ACKNOW
Definition: tcp_var.h:497
#define TOF_SIGNATURE
Definition: tcp_var.h:583
static uint16_t tcp_get_flags(const struct tcphdr *th)
Definition: tcp_var.h:1265
static void tcp_set_flags(struct tcphdr *th, uint16_t flags)
Definition: tcp_var.h:1271
#define TCPSTATES_DEC(state)
Definition: tcp_var.h:858
#define V_tcp_udp_tunneling_port
Definition: tcp_var.h:1067
#define TF_NOOPT
Definition: tcp_var.h:500
#define intotw(ip)
Definition: tcp_var.h:646
#define TCPSTATES_INC(state)
Definition: tcp_var.h:857
#define TF_RCVD_TSTMP
Definition: tcp_var.h:505
#define V_tcp_tolerate_missing_ts
Definition: tcp_var.h:1040
#define TCPSTAT_INC(name)
Definition: tcp_var.h:842
#define V_path_mtu_discovery
Definition: tcp_var.h:1029
#define TF_REQ_TSTMP
Definition: tcp_var.h:504
#define TOF_TS
Definition: tcp_var.h:582