41#include <sys/eventhandler.h>
42#include <sys/kernel.h>
45#include <sys/malloc.h>
46#include <sys/limits.h>
49#include <sys/sysctl.h>
50#include <sys/socket.h>
53#include <net/if_var.h>
54#include <net/rss_config.h>
55#include <net/netisr.h>
63#include <security/mac/mac_framework.h>
71#define IPREASS_NHASH_LOG2 10
72#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
73#define IPREASS_HMASK (IPREASS_NHASH - 1)
82#define V_ipq VNET(ipq)
84#define V_ipq_hashseed VNET(ipq_hashseed)
86#define IPQ_LOCK(i) mtx_lock(&V_ipq[i].lock)
87#define IPQ_TRYLOCK(i) mtx_trylock(&V_ipq[i].lock)
88#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
89#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
92#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
98void ipreass_destroy(
void);
136#define IP_MAXFRAGS (nmbclusters / 32)
137#define IP_MAXFRAGPACKETS (imin(IP_MAXFRAGS, IPREASS_NHASH * 50))
140static u_int __exclusive_cache_line
nfrags;
143 "Maximum number of IPv4 fragments allowed across all reassembly queues");
146 "Current number of IPv4 fragments across all reassembly queues");
149#define V_ipq_zone VNET(ipq_zone)
151 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
153 "Maximum number of IPv4 fragment reassembly queue entries");
155 &VNET_NAME(ipq_zone),
156 "Current number of IPv4 fragment reassembly queue entries");
159#define V_noreass VNET(noreass)
162#define V_maxfragsperpacket VNET(maxfragsperpacket)
163SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
164 &VNET_NAME(maxfragsperpacket), 0,
165 "Maximum number of IPv4 fragments allowed per packet");
167 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
169 "Maximum number of IPv4 fragment reassembly queue entries per bucket");
181#define M_IP_FRAG M_PROTO9
186 struct mbuf *p, *q, *nq, *t;
188 struct ifnet *srcifp;
189 struct ipqhead *head;
190 int i, hlen,
next, tmpmax;
205 (tmpmax >= 0 && atomic_load_int(&
nfrags) >= (u_int)tmpmax)) {
212 ip = mtod(m,
struct ip *);
235 m->m_flags &= ~M_IP_FRAG;
251 srcifp = m->m_pkthdr.rcvif;
258 m->m_pkthdr.PH_loc.ptr =
ip;
267 hashkey[0] =
ip->ip_src.s_addr;
273 head = &
V_ipq[hash].head;
280 TAILQ_FOREACH(fp, head, ipq_list)
282 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
285 mac_ipq_match(m, fp) &&
300 if (mac_ipq_init(fp, M_NOWAIT) != 0) {
305 mac_ipq_create(m, fp);
307 TAILQ_INSERT_HEAD(head, fp, ipq_list);
310 atomic_add_int(&
nfrags, 1);
314 fp->ipq_src =
ip->ip_src;
337 }
else if ((m->m_flags &
M_IP_FRAG) == 0)
340 atomic_add_int(&
nfrags, 1);
342 mac_ipq_update(m, fp);
346#define GETIP(m) ((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
367 for (p = NULL, q = fp->
ipq_frags; q; p = q, q = q->m_nextpkt)
387 m->m_pkthdr.csum_flags = 0;
391 m->m_nextpkt = p->m_nextpkt;
405 ntohs(
GETIP(q)->ip_off);
410 q->m_pkthdr.csum_flags = 0;
417 atomic_subtract_int(&
nfrags, 1);
432 for (p = NULL, q = fp->
ipq_frags; q; p = q, q = q->m_nextpkt) {
467 for (q = nq; q != NULL; q = nq) {
470 m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
471 m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
480 while (m->m_pkthdr.csum_data & 0xffff0000)
481 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
482 (m->m_pkthdr.csum_data >> 16);
485 mac_ipq_reassemble(fp, m);
495 ip->ip_src = fp->ipq_src;
497 TAILQ_REMOVE(head, fp, ipq_list);
503 if (m->m_flags & M_PKTHDR) {
506 m->m_pkthdr.rcvif = srcifp;
527 m->m_pkthdr.flowid = rss_hash;
528 M_HASHTYPE_SET(m, rss_type);
538 netisr_dispatch(NETISR_IP_DIRECT, m);
549 atomic_subtract_int(&
nfrags, 1);
568 TAILQ_INIT(&
V_ipq[i].head);
569 mtx_init(&
V_ipq[i].lock,
"IP reassembly", NULL,
570 MTX_DEF | MTX_DUPOK);
575 V_ipq_zone = uma_zcreate(
"ipq",
sizeof(
struct ipq), NULL, NULL, NULL,
576 NULL, UMA_ALIGN_PTR, 0);
581 if (IS_DEFAULT_VNET(curvnet)) {
584 NULL, EVENTHANDLER_PRI_ANY);
594 struct ipq *fp, *tmp;
596 if (atomic_load_int(&
nfrags) == 0)
600 if (TAILQ_EMPTY(&
V_ipq[i].head))
603 TAILQ_FOREACH_SAFE(fp, &
V_ipq[i].head, ipq_list, tmp)
619 while(!TAILQ_EMPTY(&
V_ipq[i].head))
622 (
"%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i,
635 struct ipq *fp, *temp;
639 KASSERT(ifp != NULL, (
"%s: ifp is NULL", __func__));
641 CURVNET_SET_QUIET(ifp->if_vnet);
655 TAILQ_FOREACH_SAFE(fp, &
V_ipq[i].head, ipq_list, temp) {
656 for (m = fp->
ipq_frags; m != NULL; m = m->m_nextpkt) {
658 if (m->m_pkthdr.rcvif == ifp)
659 m->m_pkthdr.rcvif = NULL;
680 mtx_destroy(&
V_ipq[i].lock);
703 (fp = TAILQ_LAST(&
V_ipq[i].head, ipqhead)) != NULL)
715 while (uma_zone_get_cur(
V_ipq_zone) > target) {
718 fp = TAILQ_LAST(&
V_ipq[i].head, ipqhead);
729 VNET_ITERATOR_DECL(vnet_iter);
734 VNET_LIST_RLOCK_NOSLEEP();
735 VNET_FOREACH(vnet_iter) {
736 CURVNET_SET(vnet_iter);
742 VNET_LIST_RUNLOCK_NOSLEEP();
761 error = sysctl_handle_int(oidp, &max, 0, req);
762 if (error || !req->newptr)
773 }
else if (max == 0) {
776 }
else if (max == -1) {
839 TAILQ_REMOVE(&
bucket->head, fp, ipq_list);
853 error = sysctl_handle_int(oidp, &max, 0, req);
854 if (error || !req->newptr)
static TAILQ_HEAD(handler_chain, proto_handler)
EVENTHANDLER_DEFINE(ifnet_departure_event, ipreass_cleanup, NULL, 0)
static void ipreass_drain_tomax(void)
struct mbuf * ip_reass(struct mbuf *m)
#define IP_MAXFRAGPACKETS
SYSCTL_UINT(_net_inet_ip, OID_AUTO, curfrags, CTLFLAG_RD, &nfrags, 0, "Current number of IPv4 fragments across all reassembly queues")
static void ipq_free(struct ipqbucket *, struct ipq *)
static u_int __exclusive_cache_line nfrags
VNET_DEFINE_STATIC(struct ipqbucket, ipq[IPREASS_NHASH])
SYSCTL_DECL(_net_inet_ip)
static void ipreass_zone_change(void *)
#define IPQ_LOCK_ASSERT(i)
static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
#define V_ipreass_maxbucketsize
SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET|CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_NEEDGIANT, NULL, 0, sysctl_maxfragpackets, "I", "Maximum number of IPv4 fragment reassembly queue entries")
static void ipq_timeout(struct ipqbucket *bucket, struct ipq *fp)
static void ipreass_cleanup(void *arg __unused, struct ifnet *ifp)
static struct ipq * ipq_reuse(int)
static void ipq_drop(struct ipqbucket *bucket, struct ipq *fp)
static int sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS)
void ipreass_slowtimo(void)
#define V_maxfragsperpacket
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW, &maxfrags, 0, "Maximum number of IPv4 fragments allowed across all reassembly queues")
SYSCTL_UMA_CUR(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET, &VNET_NAME(ipq_zone), "Current number of IPv4 fragment reassembly queue entries")
#define IPSTAT_ADD(name, val)
struct in_addr ip_src ip_dst
struct in_addr ipq_src ipq_dst