39#include "opt_kern_tls.h"
40#include "opt_mbuf_stress_test.h"
41#include "opt_ratelimit.h"
48#include <sys/kernel.h>
51#include <sys/malloc.h>
55#include <sys/protosw.h>
57#include <sys/socket.h>
58#include <sys/socketvar.h>
59#include <sys/sysctl.h>
63#include <net/if_var.h>
64#include <net/if_vlan_var.h>
65#include <net/if_llatbl.h>
66#include <net/ethernet.h>
67#include <net/netisr.h>
70#include <net/route/nhop.h>
71#include <net/rss_config.h>
89#if defined(SCTP) || defined(SCTP_SUPPORT)
94#include <netipsec/ipsec_support.h>
96#include <machine/in_cksum.h>
98#include <security/mac/mac_framework.h>
100#ifdef MBUF_STRESS_TEST
101static int mbuf_frag_size = 0;
102SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
103 &mbuf_frag_size, 0,
"Fragment outgoing mbufs to this size");
106static void ip_mloopback(
struct ifnet *,
const struct mbuf *,
int);
109extern struct protosw
inetsw[];
115 struct m_tag *fwd_tag = NULL;
119 int pflags = PFIL_OUT;
125 ip = mtod(m,
struct ip *);
139 ip = mtod(m,
struct ip *);
143 m->m_flags |= M_SKIP_FIREWALL;
146 m->m_flags |= M_FASTFWD_OURS;
147 if (m->m_pkthdr.rcvif == NULL)
148 m->m_pkthdr.rcvif = V_loif;
149 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
150 m->m_pkthdr.csum_flags |=
151 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
152 m->m_pkthdr.csum_data = 0xffff;
154 m->m_pkthdr.csum_flags |=
155 CSUM_IP_CHECKED | CSUM_IP_VALID;
156#if defined(SCTP) || defined(SCTP_SUPPORT)
157 if (m->m_pkthdr.csum_flags & CSUM_SCTP)
158 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
160 *error = netisr_queue(NETISR_IP, m);
164 bzero(dst,
sizeof(*dst));
172 if ((*fibnum) != M_GETFIB(m)) {
173 m->m_flags |= M_SKIP_FIREWALL;
174 *fibnum = M_GETFIB(m);
179 if (m->m_flags & M_FASTFWD_OURS) {
180 if (m->m_pkthdr.rcvif == NULL)
181 m->m_pkthdr.rcvif = V_loif;
182 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
183 m->m_pkthdr.csum_flags |=
184 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
185 m->m_pkthdr.csum_data = 0xffff;
187#if defined(SCTP) || defined(SCTP_SUPPORT)
188 if (m->m_pkthdr.csum_flags & CSUM_SCTP)
189 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
191 m->m_pkthdr.csum_flags |=
192 CSUM_IP_CHECKED | CSUM_IP_VALID;
194 *error = netisr_queue(NETISR_IP, m);
198 if ((m->m_flags & M_IP_NEXTHOP) &&
199 ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
200 bcopy((fwd_tag+1), dst,
sizeof(
struct sockaddr_in));
201 m->m_flags |= M_SKIP_FIREWALL;
202 m->m_flags &= ~M_IP_NEXTHOP;
203 m_tag_delete(m, fwd_tag);
213 const struct sockaddr *gw,
struct route *ro,
bool stamp_tag)
216 struct ktls_session *tls = NULL;
218 struct m_snd_tag *mst;
221 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
231 if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
232 tls = ktls_hold(m->m_next->m_epg_tls);
252 if (inp != NULL && mst == NULL) {
256 in_pcboutput_txrtlmt(inp, ifp, m);
262 if (stamp_tag && mst != NULL) {
263 KASSERT(m->m_pkthdr.rcvif == NULL,
264 (
"trying to add a send tag to a forwarded packet"));
265 if (mst->ifp != ifp) {
272 m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
273 m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
276 error = (*ifp->if_output)(ifp, m, gw, ro);
283 error = ktls_output_eagain(inp, tls);
289 in_pcboutput_eagain(inp);
298 int nh_flags = nh->nh_flags;
300 ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
302 ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
303 ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
304 ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
320ip_output(
struct mbuf *m,
struct mbuf *opt,
struct route *ro,
int flags,
321 struct ip_moptions *imo,
struct inpcb *inp)
324 struct ifnet *ifp = NULL;
326 int hlen =
sizeof (
struct ip);
331 const struct sockaddr *gw;
336 struct route iproute;
338#if defined(IPSEC) || defined(IPSEC_SUPPORT)
339 int no_route_but_check_spd = 0;
366 ip = mtod(m,
struct ip *);
390 bzero(ro,
sizeof (*ro));
393 if (ro->ro_nh == NULL) {
398 gw = (
const struct sockaddr *)dst;
404 if (inp != NULL && ro->ro_nh != NULL)
414 if (ro->ro_nh != NULL &&
415 ((!NH_IS_VALID(ro->ro_nh)) || dst->
sin_family != AF_INET ||
417 RO_INVALIDATE_CACHE(ro);
427 M_GETFIB(m)))) == NULL &&
429 M_GETFIB(m)))) == NULL) {
440 src =
IA_SIN(ia)->sin_addr;
443 M_GETFIB(m)))) == NULL &&
445 M_GETFIB(m)))) == NULL) {
453 isbroadcast = ifp->if_flags & IFF_BROADCAST ?
455 src =
IA_SIN(ia)->sin_addr;
457 imo != NULL && imo->imo_multicast_ifp != NULL) {
462 ifp = imo->imo_multicast_ifp;
468 src =
IA_SIN(ia)->sin_addr;
471 }
else if (ro != &iproute) {
472 if (ro->ro_nh == NULL) {
479 flowid = m->m_pkthdr.flowid;
483 if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh))) {
484#if defined(IPSEC) || defined(IPSEC_SUPPORT)
489 no_route_but_check_spd = 1;
493 error = EHOSTUNREACH;
497 struct nhop_object *nh = ro->ro_nh;
501 counter_u64_add(nh->nh_pksent, 1);
503 if (nh->nh_flags & NHF_GATEWAY)
505 if (nh->nh_flags & NHF_HOST)
506 isbroadcast = (nh->nh_flags & NHF_BROADCAST);
507 else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
512 src =
IA_SIN(ia)->sin_addr;
514 struct nhop_object *nh;
519#if defined(IPSEC) || defined(IPSEC_SUPPORT)
524 no_route_but_check_spd = 1;
528 error = EHOSTUNREACH;
534 if (nh->nh_flags & NHF_GATEWAY)
537 src =
IA_SIN(ia)->sin_addr;
538 isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
539 (NHF_HOST | NHF_BROADCAST)) ||
540 ((ifp->if_flags & IFF_BROADCAST) &&
541 (gw->sa_family == AF_INET) &&
546 KASSERT(mtu > 0, (
"%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p",
548 (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp));
551 m->m_flags |= M_MCAST;
557 gw = (
const struct sockaddr *)dst;
562 ip->
ip_ttl = imo->imo_multicast_ttl;
563 if (imo->imo_multicast_vif != -1)
569 ip->
ip_ttl = IP_DEFAULT_MULTICAST_TTL;
573 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
574 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
588 (imo && imo->imo_multicast_loop)) {
636 if (
ip->
ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
657 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
658 error = EADDRNOTAVAIL;
670 m->m_flags |= M_BCAST;
672 m->m_flags &= ~M_BCAST;
676#if defined(IPSEC) || defined(IPSEC_SUPPORT)
677 if (IPSEC_ENABLED(ipv4)) {
678 if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) {
679 if (error == EINPROGRESS)
687 if (no_route_but_check_spd) {
689 error = EHOSTUNREACH;
693 ip = mtod(m,
struct ip *);
705 ip = mtod(m,
struct ip *);
712 ro->ro_prepend = NULL;
714 gw = (
const struct sockaddr *)dst;
715 ip = mtod(m,
struct ip *);
721 EVL_APPLY_PRI(m, vlan_pcp);
725 IN_LOOPBACK(ntohl(
ip->ip_src.s_addr))) {
726 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
728 error = EADDRNOTAVAIL;
734 if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
735 m = mb_unmapped_to_ext(m);
743 m->m_pkthdr.csum_flags |= CSUM_IP;
744 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
746 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
748#if defined(SCTP) || defined(SCTP_SUPPORT)
749 if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
751 m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
763 (m->m_pkthdr.csum_flags & ifp->if_hwassist &
764 (CSUM_TSO | CSUM_INNER_TSO)) != 0) {
766 if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
768 m->m_pkthdr.csum_flags &= ~CSUM_IP;
778 if (m->m_pkthdr.csum_flags &
779 (CSUM_TSO | CSUM_INNER_TSO))
780 counter_u64_add(ia->
ia_ifa.ifa_opackets,
781 m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
783 counter_u64_add(ia->
ia_ifa.ifa_opackets, 1);
785 counter_u64_add(ia->
ia_ifa.ifa_obytes, m->m_pkthdr.len);
787#ifdef MBUF_STRESS_TEST
788 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
789 m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
804 (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_INNER_TSO))) {
823 counter_u64_add(ia->
ia_ifa.ifa_opackets, 1);
824 counter_u64_add(ia->
ia_ifa.ifa_obytes,
833 IP_PROBE(send, NULL, NULL, mtod(m,
struct ip *), ifp,
834 mtod(m,
struct ip *), NULL);
860 u_long if_hwassist_flags)
864 int len = (mtu - hlen) & ~7;
866 struct mbuf *m0 = *m_frag;
890 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
892 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
894#if defined(SCTP) || defined(SCTP_SUPPORT)
895 if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
896 sctp_delayed_cksum(m0, hlen);
897 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
900 if (len > PAGE_SIZE) {
913 off = MIN(mtu, m0->m_pkthdr.len);
920 goto smart_frag_failure;
921 off = ((off - hlen) & ~7) + hlen;
922 newlen = (~PAGE_MASK) & mtu;
923 if ((newlen +
sizeof (
struct ip)) > mtu) {
935 firstlen = off - hlen;
936 mnext = &m0->m_nextpkt;
948 int mhlen =
sizeof (
struct ip);
950 m = m_gethdr(M_NOWAIT, MT_DATA);
963 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
974 m->m_data += max_linkhdr;
975 mhip = mtod(m,
struct ip *);
977 if (hlen >
sizeof (
struct ip)) {
980 mhip->
ip_hl = mhlen >> 2;
989 mhip->
ip_len = htons((u_short)(len + mhlen));
990 m->m_next = m_copym(m0, off, len, M_NOWAIT);
991 if (m->m_next == NULL) {
997 m->m_pkthdr.len = mhlen + len;
999 mac_netinet_fragment(m0, m);
1003 if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
1004 mhip->
ip_sum = in_cksum(m, mhlen);
1005 m->m_pkthdr.csum_flags &= ~CSUM_IP;
1008 mnext = &m->m_nextpkt;
1016 m_adj(m0, hlen + firstlen -
ip_len);
1017 m0->m_pkthdr.len = hlen + firstlen;
1018 ip->
ip_len = htons((u_short)m0->m_pkthdr.len);
1021 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
1023 m0->m_pkthdr.csum_flags &= ~CSUM_IP;
1038 ip = mtod(m,
struct ip *);
1041 if (m->m_pkthdr.csum_flags & CSUM_UDP) {
1043 if (offset +
sizeof(
struct udphdr) > m->m_len) {
1044 m_copydata(m, offset + offsetof(
struct udphdr,
1045 uh_ulen),
sizeof(cklen), (caddr_t)&cklen);
1046 cklen = ntohs(cklen);
1048 uh = (
struct udphdr *)mtodo(m, offset);
1058 offset += m->m_pkthdr.csum_data;
1060 if (offset +
sizeof(csum) > m->m_len)
1061 m_copyback(m, offset,
sizeof(csum), (caddr_t)&csum);
1063 *(u_short *)mtodo(m, offset) = csum;
1083 if (sopt->sopt_level == SOL_SOCKET &&
1084 sopt->sopt_dir == SOPT_SET) {
1085 switch (sopt->sopt_name) {
1088 if ((so->so_options & SO_REUSEADDR) != 0)
1097 if ((so->so_options & SO_REUSEPORT) != 0)
1104 case SO_REUSEPORT_LB:
1106 if ((so->so_options & SO_REUSEPORT_LB) != 0)
1119 case SO_MAX_PACING_RATE:
1136 switch (sopt->sopt_dir) {
1138 switch (sopt->sopt_name) {
1145 if (sopt->sopt_valsize > MLEN) {
1149 m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
1154 m->m_len = sopt->sopt_valsize;
1155 error = sooptcopyin(sopt, mtod(m,
char *), m->m_len,
1168 if (sopt->sopt_td != NULL) {
1169 error = priv_check(sopt->sopt_td,
1170 PRIV_NETINET_BINDANY);
1177 case IP_RSS_LISTEN_BUCKET:
1183 case IP_RECVRETOPTS:
1184 case IP_ORIGDSTADDR:
1185 case IP_RECVDSTADDR:
1193 case IP_RECVRSSBUCKETID:
1196 error = sooptcopyin(sopt, &optval,
sizeof optval,
1201 switch (sopt->sopt_name) {
1211 if (optval >= 0 && optval <=
MAXTTL)
1217#define OPTSET(bit) do { \
1220 inp->inp_flags |= bit; \
1222 inp->inp_flags &= ~bit; \
1226#define OPTSET2(bit, val) do { \
1229 inp->inp_flags2 |= bit; \
1231 inp->inp_flags2 &= ~bit; \
1239 case IP_RECVRETOPTS:
1243 case IP_RECVDSTADDR:
1247 case IP_ORIGDSTADDR:
1278 case IP_RSS_LISTEN_BUCKET:
1279 if ((optval >= 0) &&
1280 (optval < rss_getnumbuckets())) {
1287 case IP_RECVRSSBUCKETID:
1292 if ((optval >= -1) && (optval <=
1322 case IP_MULTICAST_IF:
1323 case IP_MULTICAST_VIF:
1324 case IP_MULTICAST_TTL:
1325 case IP_MULTICAST_LOOP:
1326 case IP_ADD_MEMBERSHIP:
1327 case IP_DROP_MEMBERSHIP:
1328 case IP_ADD_SOURCE_MEMBERSHIP:
1329 case IP_DROP_SOURCE_MEMBERSHIP:
1330 case IP_BLOCK_SOURCE:
1331 case IP_UNBLOCK_SOURCE:
1333 case MCAST_JOIN_GROUP:
1334 case MCAST_LEAVE_GROUP:
1335 case MCAST_JOIN_SOURCE_GROUP:
1336 case MCAST_LEAVE_SOURCE_GROUP:
1337 case MCAST_BLOCK_SOURCE:
1338 case MCAST_UNBLOCK_SOURCE:
1343 error = sooptcopyin(sopt, &optval,
sizeof optval,
1350 case IP_PORTRANGE_DEFAULT:
1355 case IP_PORTRANGE_HIGH:
1360 case IP_PORTRANGE_LOW:
1372#if defined(IPSEC) || defined(IPSEC_SUPPORT)
1373 case IP_IPSEC_POLICY:
1374 if (IPSEC_ENABLED(ipv4)) {
1375 error = IPSEC_PCBCTL(ipv4, inp, sopt);
1382 error = ENOPROTOOPT;
1388 switch (sopt->sopt_name) {
1393 struct mbuf *options;
1396 M_COPYALL, M_NOWAIT);
1398 if (options != NULL) {
1399 error = sooptcopyout(sopt,
1400 mtod(options,
char *),
1407 sopt->sopt_valsize = 0;
1415 case IP_RECVRETOPTS:
1416 case IP_ORIGDSTADDR:
1417 case IP_RECVDSTADDR:
1430 case IP_RSSBUCKETID:
1431 case IP_RECVRSSBUCKETID:
1434 switch (sopt->sopt_name) {
1447#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1448#define OPTBIT2(bit) (inp->inp_flags2 & bit ? 1 : 0)
1454 case IP_RECVRETOPTS:
1458 case IP_RECVDSTADDR:
1462 case IP_ORIGDSTADDR:
1476 optval = IP_PORTRANGE_HIGH;
1478 optval = IP_PORTRANGE_LOW;
1505 case IP_RSSBUCKETID:
1510 optval = rss_bucket;
1514 case IP_RECVRSSBUCKETID:
1530 error = sooptcopyout(sopt, &optval,
sizeof optval);
1537 case IP_MULTICAST_IF:
1538 case IP_MULTICAST_VIF:
1539 case IP_MULTICAST_TTL:
1540 case IP_MULTICAST_LOOP:
1545#if defined(IPSEC) || defined(IPSEC_SUPPORT)
1546 case IP_IPSEC_POLICY:
1547 if (IPSEC_ENABLED(ipv4)) {
1548 error = IPSEC_PCBCTL(ipv4, inp, sopt);
1555 error = ENOPROTOOPT;
1580 copym = m_dup(m, M_NOWAIT);
1581 if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
1582 copym = m_pullup(copym, hlen);
1583 if (copym != NULL) {
1585 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1587 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1588 copym->m_pkthdr.csum_flags |=
1589 CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1590 copym->m_pkthdr.csum_data = 0xffff;
1596 ip = mtod(copym,
struct ip *);
1598 ip->
ip_sum = in_cksum(copym, hlen);
1599 if_simloop(ifp, copym, AF_INET, 0);
SYSCTL_INT(_net_inet_accf_http, OID_AUTO, parsehttpversion, CTLFLAG_RW, &parse_http_version, 1, "Parse http version so that non 1.x requests work")
int in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
bool in_localip(struct in_addr in)
u_short in_cksum_skip(struct mbuf *m, int len, int skip)
struct nhop_object * fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid)
#define IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5)
int inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
int inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
#define INP_LOCK_ASSERT(inp)
#define INP_RSS_BUCKET_SET
#define INP_RATE_LIMIT_CHANGED
#define INP_RECVRSSBUCKETID
#define IFP_TO_IA(ifp, ia)
void ip_fillid(struct ip *ip)
struct mbuf * ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
int ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
int ip_optcopy(struct ip *ip, struct ip *jp)
int ip_ctloutput(struct socket *so, struct sockopt *sopt)
static int ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, const struct sockaddr *gw, struct route *ro, bool stamp_tag)
int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp)
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags)
static void ip_mloopback(struct ifnet *, const struct mbuf *, int)
#define OPTSET2(bit, val)
static int ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags, struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
void in_delayed_cksum(struct mbuf *m)
static void rt_update_ro_flags(struct route *ro, const struct nhop_object *nh)
static u_int __exclusive_cache_line nfrags
#define IP_NODEFAULTFLOWID
#define IP_ALLOWBROADCAST
#define IPSTAT_ADD(name, val)
int(* ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *)
u_long(* ip_mcast_src)(int)
uint32_t inp_rss_listen_bucket
struct mbuf * inp_options
struct m_snd_tag * inp_snd_tag
struct in_conninfo inp_inc
struct in_addr ip_src ip_dst