FreeBSD kernel IPv4 code
tcp_pcap.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2015
3 * Jonathan Looney. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/queue.h>
30#include <sys/param.h>
31#include <sys/types.h>
32#include <sys/socket.h>
33#include <sys/socketvar.h>
34#include <sys/sysctl.h>
35#include <sys/systm.h>
36#include <sys/mbuf.h>
37#include <sys/eventhandler.h>
38#include <machine/atomic.h>
39#include <netinet/tcp_var.h>
40#include <netinet/tcp_pcap.h>
41
42#define M_LEADINGSPACE_NOWRITE(m) \
43 ((m)->m_data - M_START(m))
44
48
49SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
50 CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
51 "Free saved packets when the memory system comes under pressure");
54 "Number of clusters currently referenced on TCP PCAP queues");
57 "Maximum number of clusters allowed to be referenced on TCP PCAP "
58 "queues");
59
63SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
64 CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
65 "Number of mbufs with external storage reused for the TCP PCAP "
66 "functionality");
67SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
68 CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
69 "Number of mbufs with internal storage reused for the TCP PCAP "
70 "functionality");
71SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
72 CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
73 "Number of new mbufs allocated for the TCP PCAP functionality");
74
75VNET_DEFINE(int, tcp_pcap_packets) = 0;
76#define V_tcp_pcap_packets VNET(tcp_pcap_packets)
77SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
78 CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
79 "Default number of packets saved per direction per TCPCB");
80
81/* Initialize the values. */
82static void
84{
85
86 tcp_pcap_clusters_referenced_max = nmbclusters / 4;
87}
88
89void
91{
92
94 EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
95 NULL, EVENTHANDLER_PRI_ANY);
96}
97
98/*
99 * If we are below the maximum allowed cluster references,
100 * increment the reference count and return TRUE. Otherwise,
101 * leave the reference count alone and return FALSE.
102 */
103static __inline bool
105{
106 if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
108 atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
109 return FALSE;
110 }
111 return TRUE;
112}
113
114/*
115 * For all the external entries in m, apply the given adjustment.
116 * This can be used to adjust the counter when an mbuf chain is
117 * copied or freed.
118 */
119static __inline void
120tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
121{
122 while (m) {
123 if (m->m_flags & M_EXT)
124 atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
125
126 m = m->m_next;
127 }
128}
129
130/*
131 * Free all mbufs in a chain, decrementing the reference count as
132 * necessary.
133 *
134 * Functions in this file should use this instead of m_freem() when
135 * they are freeing mbuf chains that may contain clusters that were
136 * already included in tcp_pcap_clusters_referenced_cur.
137 */
138static void
139tcp_pcap_m_freem(struct mbuf *mb)
140{
141 while (mb != NULL) {
142 if (mb->m_flags & M_EXT)
143 atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
144 1);
145 mb = m_free(mb);
146 }
147}
148
149/*
150 * Copy data from m to n, where n cannot fit all the data we might
151 * want from m.
152 *
153 * Prioritize data like this:
154 * 1. TCP header
155 * 2. IP header
156 * 3. Data
157 */
158static void
159tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
160{
161 struct mbuf *m_cur = m;
162 int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
163
164 /* Below, we assume these will be non-NULL. */
165 KASSERT(th, ("%s: called with th == NULL", __func__));
166 KASSERT(m, ("%s: called with m == NULL", __func__));
167 KASSERT(n, ("%s: called with n == NULL", __func__));
168
169 /* We assume this initialization occurred elsewhere. */
170 KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
171 __func__, n->m_len));
172 KASSERT(n->m_data == M_START(n),
173 ("%s: called with n->m_data != M_START(n)", __func__));
174
175 /*
176 * Calculate the size of the TCP header. We use this often
177 * enough that it is worth just calculating at the start.
178 */
179 tcp_off = th->th_off << 2;
180
181 /* Trim off leading empty mbufs. */
182 while (m && m->m_len == 0)
183 m = m->m_next;
184
185 if (m) {
186 m_cur = m;
187 }
188 else {
189 /*
190 * No data? Highly unusual. We would expect to at
191 * least see a TCP header in the mbuf.
192 * As we have a pointer to the TCP header, I guess
193 * we should just copy that. (???)
194 */
195fallback:
196 bytes_to_copy = tcp_off;
197 if (bytes_to_copy > M_SIZE(n))
198 bytes_to_copy = M_SIZE(n);
199 bcopy(th, n->m_data, bytes_to_copy);
200 n->m_len = bytes_to_copy;
201 return;
202 }
203
204 /*
205 * Find TCP header. Record the total number of bytes up to,
206 * and including, the TCP header.
207 */
208 while (m_cur) {
209 if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
210 (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
211 break;
212 bytes_to_copy += m_cur->m_len;
213 m_cur = m_cur->m_next;
214 }
215 if (m_cur)
216 bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
217 else
218 goto fallback;
219 bytes_to_copy += tcp_off;
220
221 /*
222 * If we already want to copy more bytes than we can hold
223 * in the destination mbuf, skip leading bytes and copy
224 * what we can.
225 *
226 * Otherwise, consider trailing data.
227 */
228 if (bytes_to_copy > M_SIZE(n)) {
229 skip = bytes_to_copy - M_SIZE(n);
230 bytes_to_copy = M_SIZE(n);
231 }
232 else {
233 /*
234 * Determine how much trailing data is in the chain.
235 * We start with the length of this mbuf (the one
236 * containing th) and subtract the size of the TCP
237 * header (tcp_off) and the size of the data prior
238 * to th (th - m_cur->m_data).
239 *
240 * This *should not* be negative, as the TCP code
241 * should put the whole TCP header in a single
242 * mbuf. But, it isn't a problem if it is. We will
243 * simple work off our negative balance as we look
244 * at subsequent mbufs.
245 */
246 trailing_data = m_cur->m_len - tcp_off;
247 trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
248 m_cur = m_cur->m_next;
249 while (m_cur) {
250 trailing_data += m_cur->m_len;
251 m_cur = m_cur->m_next;
252 }
253 if ((bytes_to_copy + trailing_data) > M_SIZE(n))
254 bytes_to_copy = M_SIZE(n);
255 else
256 bytes_to_copy += trailing_data;
257 }
258
259 m_copydata(m, skip, bytes_to_copy, n->m_data);
260 n->m_len = bytes_to_copy;
261}
262
263void
264tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
265{
266 struct mbuf *n = NULL, *mhead;
267
268 KASSERT(th, ("%s: called with th == NULL", __func__));
269 KASSERT(m, ("%s: called with m == NULL", __func__));
270 KASSERT(queue, ("%s: called with queue == NULL", __func__));
271
272 /* We only care about data packets. */
273 while (m && m->m_type != MT_DATA)
274 m = m->m_next;
275
276 /* We only need to do something if we still have an mbuf. */
277 if (!m)
278 return;
279
280 /* If we are not saving mbufs, return now. */
281 if (queue->mq_maxlen == 0)
282 return;
283
284 /*
285 * Check to see if we will need to recycle mbufs.
286 *
287 * If we need to get rid of mbufs to stay below
288 * our packet count, try to reuse the mbuf. Once
289 * we already have a new mbuf (n), then we can
290 * simply free subsequent mbufs.
291 *
292 * Note that most of the logic in here is to deal
293 * with the reuse. If we are fine with constant
294 * mbuf allocs/deallocs, we could ditch this logic.
295 * But, it only seems to make sense to reuse
296 * mbufs we already have.
297 */
298 while (mbufq_full(queue)) {
299 mhead = mbufq_dequeue(queue);
300
301 if (n) {
302 tcp_pcap_m_freem(mhead);
303 }
304 else {
305 /*
306 * If this held an external cluster, try to
307 * detach the cluster. But, if we held the
308 * last reference, go through the normal
309 * free-ing process.
310 */
311 if (mhead->m_flags & M_EXTPG) {
312 /* Don't mess around with these. */
313 tcp_pcap_m_freem(mhead);
314 continue;
315 } else if (mhead->m_flags & M_EXT) {
316 switch (mhead->m_ext.ext_type) {
317 case EXT_SFBUF:
318 /* Don't mess around with these. */
319 tcp_pcap_m_freem(mhead);
320 continue;
321 default:
322 if (atomic_fetchadd_int(
323 mhead->m_ext.ext_cnt, -1) == 1)
324 {
325 /*
326 * We held the last reference
327 * on this cluster. Restore
328 * the reference count and put
329 * it back in the pool.
330 */
331 *(mhead->m_ext.ext_cnt) = 1;
332 tcp_pcap_m_freem(mhead);
333 continue;
334 }
335 /*
336 * We were able to cleanly free the
337 * reference.
338 */
339 atomic_subtract_int(
341 1);
343 break;
344 }
345 } else {
347 }
348
349 n = mhead;
350 tcp_pcap_m_freem(n->m_next);
351 m_init(n, M_NOWAIT, MT_DATA, 0);
352 }
353 }
354
355 /* Check to see if we need to get a new mbuf. */
356 if (!n) {
357 if (!(n = m_get(M_NOWAIT, MT_DATA)))
358 return;
360 }
361
362 /*
363 * What are we dealing with? If a cluster, attach it. Otherwise,
364 * try to copy the data from the beginning of the mbuf to the
365 * end of data. (There may be data between the start of the data
366 * area and the current data pointer. We want to get this, because
367 * it may contain header information that is useful.)
368 * In cases where that isn't possible, settle for what we can
369 * get.
370 */
371 if ((m->m_flags & (M_EXT|M_EXTPG)) &&
373 n->m_data = m->m_data;
374 n->m_len = m->m_len;
375 mb_dupcl(n, m);
376 }
377 else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
378 /*
379 * At this point, n is guaranteed to be a normal mbuf
380 * with no cluster and no packet header. Because the
381 * logic in this code block requires this, the assert
382 * is here to catch any instances where someone
383 * changes the logic to invalidate that assumption.
384 */
385 KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
386 ("%s: Unexpected flags (%#x) for mbuf",
387 __func__, n->m_flags));
388 n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
389 n->m_len = m->m_len;
390 if (m->m_flags & M_EXTPG)
391 m_copydata(m, 0, m->m_len, n->m_data);
392 else
393 bcopy(M_START(m), n->m_dat,
394 m->m_len + M_LEADINGSPACE_NOWRITE(m));
395 }
396 else {
397 /*
398 * This is the case where we need to "settle for what
399 * we can get". The most probable way to this code
400 * path is that we've already taken references to the
401 * maximum number of mbuf clusters we can, and the data
402 * is too long to fit in an mbuf's internal storage.
403 * Try for a "best fit".
404 */
405 tcp_pcap_copy_bestfit(th, m, n);
406
407 /* Don't try to get additional data. */
408 goto add_to_queue;
409 }
410
411 if (m->m_next) {
412 n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
413 tcp_pcap_adj_cluster_reference(n->m_next, 1);
414 }
415
416add_to_queue:
417 /* Add the new mbuf to the list. */
418 if (mbufq_enqueue(queue, n)) {
419 /* This shouldn't happen. If INVARIANTS is defined, panic. */
420 KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
422 }
423}
424
425void
426tcp_pcap_drain(struct mbufq *queue)
427{
428 struct mbuf *m;
429 while ((m = mbufq_dequeue(queue)))
431}
432
433void
435{
436 mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
437 mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
438}
439
440void
441tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
442{
443 queue->mq_maxlen = newval;
444 while (queue->mq_len > queue->mq_maxlen)
445 tcp_pcap_m_freem(mbufq_dequeue(queue));
446}
447
448int
449tcp_pcap_get_sock_max(struct mbufq *queue)
450{
451 return queue->mq_maxlen;
452}
Definition: tcp_var.h:132
static __inline bool tcp_pcap_take_cluster_reference(void)
Definition: tcp_pcap.c:104
#define V_tcp_pcap_packets
Definition: tcp_pcap.c:76
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free, CTLFLAG_RW, &tcp_pcap_aggressive_free, 0, "Free saved packets when the memory system comes under pressure")
void tcp_pcap_init(void)
Definition: tcp_pcap.c:90
static __inline void tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
Definition: tcp_pcap.c:120
static int tcp_pcap_alloc_reuse_ext
Definition: tcp_pcap.c:60
int tcp_pcap_aggressive_free
Definition: tcp_pcap.c:45
static int tcp_pcap_alloc_reuse_mbuf
Definition: tcp_pcap.c:61
static int tcp_pcap_clusters_referenced_max
Definition: tcp_pcap.c:47
void tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
Definition: tcp_pcap.c:441
static void tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
Definition: tcp_pcap.c:159
static int tcp_pcap_alloc_new_mbuf
Definition: tcp_pcap.c:62
VNET_DEFINE(int, tcp_pcap_packets)=0
static void tcp_pcap_max_set(void)
Definition: tcp_pcap.c:83
void tcp_pcap_tcpcb_init(struct tcpcb *tp)
Definition: tcp_pcap.c:434
static void tcp_pcap_m_freem(struct mbuf *mb)
Definition: tcp_pcap.c:139
void tcp_pcap_drain(struct mbufq *queue)
Definition: tcp_pcap.c:426
int tcp_pcap_get_sock_max(struct mbufq *queue)
Definition: tcp_pcap.c:449
void tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
Definition: tcp_pcap.c:264
#define M_LEADINGSPACE_NOWRITE(m)
Definition: tcp_pcap.c:42
static int tcp_pcap_clusters_referenced_cur
Definition: tcp_pcap.c:46