38#include "opt_ipstealth.h"
47#include <sys/malloc.h>
48#include <sys/domain.h>
49#include <sys/protosw.h>
50#include <sys/socket.h>
52#include <sys/kernel.h>
54#include <sys/rmlock.h>
55#include <sys/rwlock.h>
57#include <sys/syslog.h>
58#include <sys/sysctl.h>
61#include <net/if_types.h>
62#include <net/if_var.h>
66#include <net/route/nhop.h>
67#include <net/netisr.h>
68#include <net/rss_config.h>
82#include <machine/in_cksum.h>
86#include <netipsec/ipsec_support.h>
88#include <sys/socketvar.h>
90#include <security/mac/mac_framework.h>
101extern void ipreass_destroy(
void);
107SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
108 &VNET_NAME(ipforwarding), 0,
109 "Enable IP forwarding between interfaces");
116SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
117 &VNET_NAME(ipsendredirects), 0,
118 "Enable sending IP redirects");
121#define V_ip_strong_es VNET(ip_strong_es)
123 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es),
false,
124 "Packet's IP destination address must match address on arrival interface");
127#define V_ip_sav VNET(ip_sav)
129 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav),
true,
130 "Drop incoming packets with source address that is a local address");
134static struct netisr_handler
ip_nh = {
137 .nh_proto = NETISR_IP,
140 .nh_policy = NETISR_POLICY_CPU,
141 .nh_dispatch = NETISR_DISPATCH_HYBRID,
143 .nh_policy = NETISR_POLICY_FLOW,
155static struct netisr_handler ip_direct_nh = {
156 .nh_name =
"ip_direct",
158 .nh_proto = NETISR_IP_DIRECT,
160 .nh_policy = NETISR_POLICY_CPU,
161 .nh_dispatch = NETISR_DISPATCH_HYBRID,
166extern struct protosw
inetsw[];
176SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
177 &ip_mtu, 0,
"Default MTU");
182SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
183 &VNET_NAME(ipstealth), 0,
184 "IP stealth mode, no TTL decrementation on forwarding");
193 "IP statistics (struct ipstat, netinet/ip_var.h)");
207 counter_u64_add(VNET(
ipstat)[statnum], 1);
214 counter_u64_add(VNET(
ipstat)[statnum], -1);
222 netisr_getqlimit(&
ip_nh, &qlimit);
223 error = sysctl_handle_int(oidp, &qlimit, 0, req);
224 if (error || !req->newptr)
228 return (netisr_setqlimit(&
ip_nh, qlimit));
231 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
233 "Maximum size of the IP input queue");
238 u_int64_t qdrops_long;
241 netisr_getqdrops(&
ip_nh, &qdrops_long);
242 qdrops = qdrops_long;
243 error = sysctl_handle_int(oidp, &qdrops, 0, req);
244 if (error || !req->newptr)
248 netisr_clearqdrops(&
ip_nh);
253 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
255 "Number of packets dropped from the IP input queue");
259sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
263 netisr_getqlimit(&ip_direct_nh, &qlimit);
264 error = sysctl_handle_int(oidp, &qlimit, 0, req);
265 if (error || !req->newptr)
269 return (netisr_setqlimit(&ip_direct_nh, qlimit));
271SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
272 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
273 0, 0, sysctl_netinet_intr_direct_queue_maxlen,
274 "I",
"Maximum size of the IP direct input queue");
277sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
279 u_int64_t qdrops_long;
282 netisr_getqdrops(&ip_direct_nh, &qdrops_long);
283 qdrops = qdrops_long;
284 error = sysctl_handle_int(oidp, &qdrops, 0, req);
285 if (error || !req->newptr)
289 netisr_clearqdrops(&ip_direct_nh);
293SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
294 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
295 sysctl_netinet_intr_direct_queue_drops,
"I",
296 "Number of packets dropped from the IP direct input queue");
306 struct pfil_head_args args;
315 args.pa_version = PFIL_VERSION;
316 args.pa_flags = PFIL_IN | PFIL_OUT;
317 args.pa_type = PFIL_TYPE_IP4;
321 if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
322 &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
323 HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
324 printf(
"%s: WARNING: unable to register input helper hook\n",
326 if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
327 &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
328 HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
329 printf(
"%s: WARNING: unable to register output helper hook\n",
333 netisr_register_vnet(&
ip_nh);
335 netisr_register_vnet(&ip_direct_nh);
348 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
349 KASSERT(pr, (
"%s: PF_INET not found", __func__));
352 for (
int i = 0; i < IPPROTO_MAX; i++)
360 if (pr->pr_domain->dom_family == PF_INET &&
361 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
363 if (pr->pr_protocol < IPPROTO_MAX)
367 netisr_register(&
ip_nh);
369 netisr_register(&ip_direct_nh);
376ip_destroy(
void *unused __unused)
381 netisr_unregister_vnet(&ip_direct_nh);
383 netisr_unregister_vnet(&
ip_nh);
386 error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
388 printf(
"%s: WARNING: unable to deregister input helper hook "
389 "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
390 "error %d returned\n", __func__, error);
392 error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
394 printf(
"%s: WARNING: unable to deregister output helper hook "
395 "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
396 "error %d returned\n", __func__, error);
403 rib_flush_routes_family(AF_INET);
428 ip = mtod(m,
struct ip *);
431#if defined(IPSEC) || defined(IPSEC_SUPPORT)
432 if (IPSEC_ENABLED(ipv4)) {
433 if (IPSEC_INPUT(ipv4, m, hlen,
ip->
ip_p) != 0)
450 struct ip *
ip = NULL;
463 if (m->m_flags & M_FASTFWD_OURS) {
464 m->m_flags &= ~M_FASTFWD_OURS;
466 ip = mtod(m,
struct ip *);
474 if (__predict_false(m->m_pkthdr.len <
sizeof(
struct ip)))
477 if (m->m_len <
sizeof(
struct ip)) {
478 m = m_pullup(m,
sizeof(
struct ip));
479 if (__predict_false(m == NULL)) {
484 ip = mtod(m,
struct ip *);
492 if (__predict_false(hlen <
sizeof(
struct ip))) {
496 if (hlen > m->m_len) {
497 m = m_pullup(m, hlen);
498 if (__predict_false(m == NULL)) {
502 ip = mtod(m,
struct ip *);
505 IP_PROBE(receive, NULL, NULL,
ip, m->m_pkthdr.rcvif,
ip, NULL);
508 ifp = m->m_pkthdr.rcvif;
510 IN_LOOPBACK(ntohl(
ip->ip_src.s_addr))) {
511 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
517 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
518 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
520 if (hlen ==
sizeof(
struct ip)) {
523 sum = in_cksum(m, hlen);
526 if (__predict_false(sum)) {
532 if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
538 if (__predict_false(
ip_len < hlen)) {
549 if (__predict_false(m->m_pkthdr.len <
ip_len)) {
554 if (m->m_pkthdr.len >
ip_len) {
555 if (m->m_len == m->m_pkthdr.len) {
559 m_adj(m,
ip_len - m->m_pkthdr.len);
574#
if defined(IPSEC) || defined(IPSEC_SUPPORT)
575 && (!IPSEC_ENABLED(ipv4) ||
576 IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
585 if (m->m_flags & M_FASTFWD_OURS) {
586 m->m_flags &= ~M_FASTFWD_OURS;
587 ip = mtod(m,
struct ip *);
592#if defined(IPSEC) || defined(IPSEC_SUPPORT)
596 if (IPSEC_ENABLED(ipv4) &&
597 IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
620 ip = mtod(m,
struct ip *);
623 if (m->m_flags & M_FASTFWD_OURS) {
624 m->m_flags &= ~M_FASTFWD_OURS;
627 if (m->m_flags & M_IP_NEXTHOP) {
628 if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
665 (m->m_flags & (M_MCAST|M_BCAST)) == 0)
679 ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
680 ifp->if_carp == NULL && (dchg == 0);
693 if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
702 if (
V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) &&
708 counter_u64_add(ia->
ia_ifa.ifa_ipackets, 1);
709 counter_u64_add(ia->
ia_ifa.ifa_ibytes, m->m_pkthdr.len);
721 if (ifp->if_flags & IFF_BROADCAST) {
722 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
723 if (ifa->ifa_addr->sa_family != AF_INET)
726 if (
satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
728 counter_u64_add(ia->
ia_ifa.ifa_ipackets, 1);
729 counter_u64_add(ia->
ia_ifa.ifa_ibytes,
735 counter_u64_add(ia->
ia_ifa.ifa_ipackets, 1);
736 counter_u64_add(ia->
ia_ifa.ifa_ibytes,
769 if (
ip->
ip_p == IPPROTO_IGMP) {
787 IN_LINKLOCAL(ntohl(
ip->ip_src.s_addr))) {
810 if (V_ipstealth && hlen >
sizeof (
struct ip) &&
ip_dooptions(m, 1))
823 ip = mtod(m,
struct ip *);
828#if defined(IPSEC) || defined(IPSEC_SUPPORT)
829 if (IPSEC_ENABLED(ipv4)) {
830 if (IPSEC_INPUT(ipv4, m, hlen,
ip->
ip_p) != 0)
854 VNET_ITERATOR_DECL(vnet_iter);
856 VNET_LIST_RLOCK_NOSLEEP();
857 VNET_FOREACH(vnet_iter) {
858 CURVNET_SET(vnet_iter);
862 VNET_LIST_RUNLOCK_NOSLEEP();
868 VNET_ITERATOR_DECL(vnet_iter);
870 VNET_LIST_RLOCK_NOSLEEP();
871 VNET_FOREACH(vnet_iter) {
872 CURVNET_SET(vnet_iter);
876 VNET_LIST_RUNLOCK_NOSLEEP();
889 if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
890 return (EPROTONOSUPPORT);
896 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
898 return (EPFNOSUPPORT);
905 if (pr->pr_domain->dom_family == PF_INET &&
906 pr->pr_protocol && pr->pr_protocol == ipproto) {
911 return (EPROTONOSUPPORT);
920 if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
921 return (EPROTONOSUPPORT);
924 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
926 return (EPFNOSUPPORT);
937 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
938 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
939 EMSGSIZE, EHOSTUNREACH, 0, 0,
940 0, 0, EHOSTUNREACH, 0,
941 ENOPROTOOPT, ECONNREFUSED
961 struct ip *
ip = mtod(m,
struct ip *);
968 int error, type = 0, code = 0, mtu = 0;
986 bzero(&ro,
sizeof(ro));
991 flowid = m->m_pkthdr.flowid;
993 if (ro.ro_nh != NULL) {
994 ia =
ifatoia(ro.ro_nh->nh_ifa);
1013 mcopy = m_gethdr(M_NOWAIT, m->m_type);
1014 if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
1024 if (mcopy != NULL) {
1025 mcopy->m_len = min(ntohs(
ip->
ip_len), M_TRAILINGSPACE(mcopy));
1026 mcopy->m_pkthdr.len = mcopy->m_len;
1027 m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1030 if (V_ipstealth == 0)
1033#if defined(IPSEC) || defined(IPSEC_SUPPORT)
1034 if (IPSEC_ENABLED(ipv4)) {
1035 if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1039 if (error != EINPROGRESS)
1056 ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
1057 struct nhop_object *nh;
1061 if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
1063 u_long src = ntohl(
ip->ip_src.s_addr);
1065 if (nh_ia != NULL &&
1070 if (nh->nh_flags & NHF_GATEWAY) {
1071 if (nh->gw_sa.sa_family == AF_INET)
1072 dest.
s_addr = nh->gw4_sa.sin_addr.s_addr;
1083 if (error == EMSGSIZE && ro.ro_nh)
1084 mtu = ro.ro_nh->nh_mtu;
1128 mtu = min(mtu, ia->ia_ifp->if_mtu);
1131 mtu = ia->ia_ifp->if_mtu;
1146#define CHECK_SO_CT(sp, ct) \
1147 (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1156 if ((inp->
inp_socket->so_options & SO_BINTIME) ||
1158 struct bintime boottimebin, bt;
1159 struct timespec ts1;
1161 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
1163 mbuf_tstmp2timespec(m, &ts1);
1164 timespec2bintime(&ts1, &bt);
1165 getboottimebin(&boottimebin);
1166 bintime_add(&bt, &boottimebin);
1170 *mp = sbcreatecontrol((caddr_t)&bt,
sizeof(bt),
1171 SCM_BINTIME, SOL_SOCKET);
1173 mp = &(*mp)->m_next;
1178 struct bintime boottimebin, bt1;
1179 struct timespec ts1;
1182 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
1184 mbuf_tstmp2timespec(m, &ts1);
1185 timespec2bintime(&ts1, &bt1);
1186 getboottimebin(&boottimebin);
1187 bintime_add(&bt1, &boottimebin);
1188 bintime2timeval(&bt1, &tv);
1192 *mp = sbcreatecontrol((caddr_t)&tv,
sizeof(tv),
1193 SCM_TIMESTAMP, SOL_SOCKET);
1195 mp = &(*mp)->m_next;
1199 struct bintime boottimebin;
1200 struct timespec ts, ts1;
1202 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
1204 mbuf_tstmp2timespec(m, &ts);
1205 getboottimebin(&boottimebin);
1206 bintime2timespec(&boottimebin, &ts1);
1207 timespecadd(&ts, &ts1, &ts);
1211 *mp = sbcreatecontrol((caddr_t)&ts,
sizeof(ts),
1212 SCM_REALTIME, SOL_SOCKET);
1214 mp = &(*mp)->m_next;
1220 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
1222 mbuf_tstmp2timespec(m, &ts);
1225 *mp = sbcreatecontrol((caddr_t)&ts,
sizeof(ts),
1226 SCM_MONOTONIC, SOL_SOCKET);
1228 mp = &(*mp)->m_next;
1232 if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
1234 struct sock_timestamp_info sti;
1236 bzero(&sti,
sizeof(sti));
1237 sti.st_info_flags = ST_INFO_HW;
1238 if ((m->m_flags & M_TSTMP_HPREC) != 0)
1239 sti.st_info_flags |= ST_INFO_HW_HPREC;
1240 *mp = sbcreatecontrol((caddr_t)&sti,
sizeof(sti), SCM_TIME_INFO,
1243 mp = &(*mp)->m_next;
1246 *mp = sbcreatecontrol((caddr_t)&
ip->
ip_dst,
1249 mp = &(*mp)->m_next;
1252 *mp = sbcreatecontrol((caddr_t)&
ip->
ip_ttl,
1255 mp = &(*mp)->m_next;
1264 *mp = sbcreatecontrol((caddr_t)opts_deleted_above,
1267 mp = &(*mp)->m_next;
1274 mp = &(*mp)->m_next;
1280 struct sockaddr_dl sdl;
1283 struct sockaddr_dl *sdp;
1284 struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
1286 if ((ifp = m->m_pkthdr.rcvif)) {
1287 sdp = (
struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1291 if (sdp->sdl_family != AF_LINK ||
1292 sdp->sdl_len >
sizeof(sdlbuf)) {
1295 bcopy(sdp, sdl2, sdp->sdl_len);
1299 offsetof(
struct sockaddr_dl, sdl_data[0]);
1300 sdl2->sdl_family = AF_LINK;
1301 sdl2->sdl_index = 0;
1302 sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1304 *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
1307 mp = &(*mp)->m_next;
1310 *mp = sbcreatecontrol((caddr_t)&
ip->
ip_tos,
1313 mp = &(*mp)->m_next;
1319 flowid = m->m_pkthdr.flowid;
1320 flow_type = M_HASHTYPE_GET(m);
1326 *mp = sbcreatecontrol((caddr_t) &flowid,
1329 mp = &(*mp)->m_next;
1330 *mp = sbcreatecontrol((caddr_t) &flow_type,
1333 mp = &(*mp)->m_next;
1341 flowid = m->m_pkthdr.flowid;
1342 flow_type = M_HASHTYPE_GET(m);
1344 if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
1345 *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
1348 mp = &(*mp)->m_next;
1363#define V_ip_rsvp_on VNET(ip_rsvp_on)
1369 if (so->so_type != SOCK_RAW ||
1370 so->so_proto->pr_protocol != IPPROTO_RSVP)
1416 return (IPPROTO_DONE);
1426 return (IPPROTO_DONE);
1432 return (IPPROTO_DONE);
1436 return (IPPROTO_DONE);
VNET_PCPUSTAT_SYSUNINIT(igmpstat)
void in_ifscrub_all(void)
int in_canforward(struct in_addr in)
bool in_localip_fib(struct in_addr in, uint16_t fib)
u_int in_cksum_hdr(const struct ip *ip)
struct nhop_object * fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid)
#define IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5)
#define INP_RECVRSSBUCKETID
void ip_direct_input(struct mbuf *)
#define V_in_ifaddrhashtbl
struct mbuf * ip_tryforward(struct mbuf *)
static LIST_HEAD(carp_softc)
VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, div_destroy, NULL)
#define V_ipsendredirects
#define ICMP_UNREACH_HOST
#define ICMP_UNREACH_NEEDFRAG
int ip_next_mtu(int, int)
#define ICMP_REDIRECT_HOST
#define ICMP_TIMXCEED_INTRANS
void icmp_error(struct mbuf *, int, int, uint32_t, int)
struct mbuf * ip_srcroute(struct mbuf *m0)
int ip_dooptions(struct mbuf *m, int pass)
int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp)
struct mbuf * ip_reass(struct mbuf *m)
int(* rsvp_input_p)(struct mbuf **, int *, int)
int(* ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *)
int rip_input(struct mbuf **, int *, int)
struct socket * inp_socket
struct in_addr ip_src ip_dst