35#include "opt_kern_tls.h"
36#include "opt_ratelimit.h"
42#include <sys/kernel.h>
44#include <sys/module.h>
46#include <sys/protosw.h>
47#include <sys/domain.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/sglist.h>
51#include <sys/taskqueue.h>
52#include <netinet/in.h>
53#include <netinet/in_pcb.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
57#include <netinet/tcp_fsm.h>
58#include <netinet/tcp_seq.h>
59#include <netinet/tcp_var.h>
60#include <netinet/toecore.h>
62#include <security/mac/mac_framework.h>
65#include <vm/vm_extern.h>
68#include <vm/vm_page.h>
70#include <dev/iscsi/iscsi_proto.h>
79static void t4_aiotx_cancel(
struct kaiocb *job);
80static void t4_aiotx_queue_toep(
struct socket *so,
struct toepcb *toep);
87 unsigned int nparams, flowclen, paramidx;
95 (
"%s: flowc for tid %u sent already", __func__, toep->
tid));
116 panic(
"%s: allocation failure.", __func__);
119 memset(flowc, 0, wr->wr_len);
126#define FLOWC_PARAM(__m, __v) \
128 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
129 flowc->mnemval[paramidx].val = htobe32(__v); \
135 FLOWC_PARAM(PFNVFN, pfvf);
137 FLOWC_PARAM(PORT, pi->
tx_chan);
142 FLOWC_PARAM(SNDNXT, tp->snd_nxt);
143 FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
145 FLOWC_PARAM(MSS, 512);
147 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
149 tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0);
152 FLOWC_PARAM(ULP_MODE,
ulp_mode(toep));
159 KASSERT(paramidx == nparams, (
"nparams mismatch"));
164 (
"%s: not enough credits (%d)", __func__, toep->
tx_credits));
179update_tx_rate_limit(
struct adapter *sc,
struct toepcb *toep, u_int Bps)
182 const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000;
185 CTR3(
KTR_CXGBE,
"%s: tid %u, rate %uKbps", __func__, toep->
tid, kbps);
194 MPASS(
tc_idx >= 0 && tc_idx < sc->params.nsched_cls);
200 int nparams = 1, flowclen, flowclen16;
203 flowclen =
sizeof(*flowc) + nparams *
sizeof(
struct
205 flowclen16 = howmany(flowclen, 16);
215 memset(flowc, 0, wr->
wr_len);
251 struct inpcb *inp = toep->
inp;
252 struct tcpcb *tp = intotcpcb(inp);
254 INP_WLOCK_ASSERT(inp);
256 CTR6(
KTR_CXGBE,
"%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
258 inp->inp_flags & INP_DROPPED ?
"inp dropped" :
259 tcpstates[tp->t_state],
260 toep->
flags, inp->inp_flags,
262 " (abort already in progress)" :
"");
270 (
"%s: flowc_wr not sent for tid %d.", __func__, tid));
275 panic(
"%s: allocation failure.", __func__);
280 if (inp->inp_flags & INP_DROPPED)
281 req->
rsvd0 = htobe32(snd_nxt);
283 req->
rsvd0 = htobe32(tp->snd_nxt);
291 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
292 struct socket *so = inp->inp_socket;
295 sbflush(&so->so_snd);
306assign_rxopt(
struct tcpcb *tp, uint16_t opt)
308 struct toepcb *toep = tp->t_toe;
309 struct inpcb *inp = tp->t_inpcb;
312 INP_LOCK_ASSERT(inp);
316 if (inp->inp_inc.inc_flags & INC_ISIPV6)
317 tp->t_maxseg -=
sizeof(
struct ip6_hdr) + sizeof(struct tcphdr);
319 tp->t_maxseg -=
sizeof(
struct ip) + sizeof(struct tcphdr);
325 tp->t_flags |= TF_RCVD_TSTMP;
327 tp->ts_recent_age = tcp_ts_getticks();
333 tp->t_flags |= TF_SACK_PERMIT;
336 tp->t_flags &= ~TF_SACK_PERMIT;
340 tp->t_flags |= TF_RCVD_SCALE;
343 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
344 (TF_RCVD_SCALE | TF_REQ_SCALE)) {
345 tp->rcv_scale = tp->request_r_scale;
351 "assign_rxopt: tid %d, mtu_idx %u, emss %u, ts %u, sack %u, wscale %u",
365 struct inpcb *inp = toep->
inp;
366 struct socket *so = inp->inp_socket;
367 struct tcpcb *tp = intotcpcb(inp);
368 uint16_t tcpopt = be16toh(opt);
370 INP_WLOCK_ASSERT(inp);
371 KASSERT(tp->t_state == TCPS_SYN_SENT ||
372 tp->t_state == TCPS_SYN_RECEIVED,
373 (
"%s: TCP state %s", __func__, tcpstates[tp->t_state]));
375 CTR6(
KTR_CXGBE,
"%s: tid %d, so %p, inp %p, tp %p, toep %p",
376 __func__, toep->
tid, so, inp, tp, toep);
378 tcp_state_change(tp, TCPS_ESTABLISHED);
379 tp->t_starttime = ticks;
380 TCPSTAT_INC(tcps_connects);
385 tp->rcv_adv += tp->rcv_wnd;
386 tp->last_ack_sent = tp->rcv_nxt;
390 tp->snd_una = iss + 1;
391 tp->snd_nxt = iss + 1;
392 tp->snd_max = iss + 1;
394 assign_rxopt(tp, tcpopt);
410 KASSERT(credits >= 0, (
"%s: %d credits", __func__, credits));
444 struct adapter *sc = tod->tod_softc;
445 struct inpcb *inp = tp->t_inpcb;
446 struct socket *so = inp->inp_socket;
447 struct sockbuf *sb = &so->so_rcv;
448 struct toepcb *toep = tp->t_toe;
451 INP_WLOCK_ASSERT(
inp);
452 SOCKBUF_LOCK_ASSERT(sb);
454 rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
455 if (rx_credits > 0 &&
456 (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 ||
457 (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
458 sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) {
460 tp->rcv_wnd += rx_credits;
461 tp->rcv_adv += rx_credits;
467t4_rcvd(
struct toedev *tod,
struct tcpcb *tp)
469 struct inpcb *inp = tp->t_inpcb;
470 struct socket *so = inp->inp_socket;
471 struct sockbuf *sb = &so->so_rcv;
486 unsigned int tid = toep->
tid;
495 (
"%s: flowc_wr not sent for tid %u.", __func__, tid));
500 panic(
"%s: allocation failure.", __func__);
506 req->wr.wr_mid = htonl(
V_FW_WR_LEN16(howmany(
sizeof(*req), 16)) |
513 toep->
flags &= ~TPF_SEND_FIN;
519#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
520#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
521#define MIN_ISO_TX_CREDITS (howmany(sizeof(struct cpl_tx_data_iso), 16))
522#define MIN_TX_CREDITS(iso) \
523 (MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0))
527max_imm_payload(
int tx_credits,
int iso)
532 KASSERT(tx_credits >= 0 &&
533 tx_credits <= MAX_OFLD_TX_CREDITS,
534 ("%s: %d credits", __func__, tx_credits));
536 if (tx_credits < MIN_TX_CREDITS(iso))
539 if (tx_credits >= (n * EQ_ESIZE) / 16)
540 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr) -
543 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr) -
549max_dsgl_nsegs(int tx_credits, int iso)
552 int sge_pair_credits = tx_credits - MIN_TX_CREDITS(iso);
554 KASSERT(tx_credits >= 0 &&
555 tx_credits <= MAX_OFLD_TX_CREDITS,
556 (
"%s: %d credits", __func__, tx_credits));
558 if (tx_credits < MIN_TX_CREDITS(iso))
561 nseg += 2 * (sge_pair_credits * 16 / 24);
562 if ((sge_pair_credits * 16) % 24 == 16)
569write_tx_wr(
void *dst,
struct toepcb *toep,
int fw_wr_opcode,
570 unsigned int immdlen,
unsigned int plen, uint8_t credits,
int shove,
584 if (plen < 2 * toep->params.emss)
600write_tx_sgl(
void *dst,
struct mbuf *start,
struct mbuf *stop,
int nsegs,
int n)
606 struct sglist_seg segs[n];
608 KASSERT(nsegs > 0, (
"%s: nsegs 0", __func__));
610 sglist_init(&sg, n, segs);
615 for (m = start; m != stop; m = m->m_next) {
616 if (m->m_flags & M_EXTPG)
617 rc = sglist_append_mbuf_epg(&sg, m,
618 mtod(m, vm_offset_t), m->m_len);
620 rc = sglist_append(&sg, mtod(m,
void *), m->m_len);
621 if (__predict_false(rc != 0))
622 panic(
"%s: sglist_append %d", __func__, rc);
624 for (j = 0; j < sg.sg_nseg; i++, j++) {
627 usgl->
addr0 = htobe64(segs[j].ss_paddr);
629 usgl->
sge[i / 2].
len[i & 1] =
632 htobe64(segs[j].ss_paddr);
641 usgl->
sge[i / 2].
len[1] = htobe32(0);
642 KASSERT(nsegs == 0, (
"%s: nsegs %d, start %p, stop %p",
643 __func__, nsegs, start, stop));
651#define OFLD_SGL_LEN (41)
670 struct mbuf *sndptr, *m, *sb_sndptr;
673 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
674 struct inpcb *inp = toep->
inp;
675 struct tcpcb *tp = intotcpcb(inp);
676 struct socket *so = inp->inp_socket;
677 struct sockbuf *sb = &so->so_snd;
678 int tx_credits, shove, compl, sowwakeup;
680 bool nomap_mbuf_seen;
682 INP_WLOCK_ASSERT(inp);
684 (
"%s: flowc_wr not sent for tid %u.", __func__, toep->
tid));
690 (
"%s: ulp_mode %u for toep %p", __func__,
ulp_mode(toep), toep));
693 CTR5(
KTR_CXGBE,
"%s: tid %d toep flags %#x tp flags %#x drop %d",
694 __func__, toep->
tid, toep->
flags, tp->t_flags, drop);
700 if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
701 (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
702 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
712 (
"%s: drop (%d) != 0 but tx is suspended", __func__, drop));
725 sbdrop_locked(sb, drop);
728 sb_sndptr = sb->sb_sndptr;
729 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
733 nomap_mbuf_seen =
false;
734 for (m = sndptr; m != NULL; m = m->m_next) {
737 if ((m->m_flags & M_NOTAVAIL) != 0)
739 if (m->m_flags & M_EXTPG) {
741 if (m->m_epg_tls != NULL) {
751 n = sglist_count_mbuf_epg(m,
752 mtod(m, vm_offset_t), m->m_len);
754 n = sglist_count(mtod(m,
void *), m->m_len);
760 if (
plen > max_imm && nsegs > max_nsegs) {
769 t4_aiotx_queue_toep(so,
771 sowwakeup_locked(so);
774 SOCKBUF_UNLOCK_ASSERT(sb);
780 if (m->m_flags & M_EXTPG)
781 nomap_mbuf_seen =
true;
782 if (max_nsegs_1mbuf < n)
787 if (
plen > max_imm && nsegs == max_nsegs) {
793 if (sbused(sb) > sb->sb_hiwat * 5 / 8 &&
799 if (sb->sb_flags & SB_AUTOSIZE &&
800 V_tcp_do_autosndbuf &&
801 sb->sb_hiwat < V_tcp_autosndbuf_max &&
802 sbused(sb) >= sb->sb_hiwat * 7 / 8) {
803 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
804 V_tcp_autosndbuf_max);
806 if (!sbreserve_locked(sb, newsize, so, NULL))
807 sb->sb_flags &= ~SB_AUTOSIZE;
812 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
813 t4_aiotx_queue_toep(so, toep);
814 sowwakeup_locked(so);
817 SOCKBUF_UNLOCK_ASSERT(sb);
821 KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
822 (
"%s: nothing to send, but m != NULL is ready",
828 panic(
"%s: excess tx.", __func__);
830 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
831 if (
plen <= max_imm && !nomap_mbuf_seen) {
843 credits = howmany(wr->
wr_len, 16);
846 m_copydata(sndptr, 0,
plen, (
void *)(txwr + 1));
853 wr_len =
sizeof(*txwr) +
sizeof(
struct ulptx_sgl) +
854 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
863 credits = howmany(wr_len, 16);
866 write_tx_sgl(txwr + 1, sndptr, m, nsegs,
869 uint64_t *pad = (uint64_t *)
870 ((uintptr_t)txwr + wr_len);
876 (
"%s: not enough credits", __func__));
895 KASSERT(sb_sndptr, (
"%s: sb_sndptr is NULL", __func__));
896 sb->sb_sndptr = sb_sndptr;
903 KASSERT(toep->
txsd_avail > 0, (
"%s: no txsd", __func__));
909 txsd = &toep->
txsd[0];
914 }
while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
922rqdrop_locked(
struct mbufq *q,
int plen)
927 m = mbufq_dequeue(q);
934 MPASS(plen >= m->m_pkthdr.len);
936 plen -= m->m_pkthdr.len;
945#define ULP_ISO G_TX_ULP_SUBMODE(F_FW_ISCSI_TX_DATA_WR_ULPSUBMODE_ISO)
948write_tx_data_iso(
void *dst, u_int ulp_submode, uint8_t flags, uint16_t mss,
991write_iscsi_mbuf_wr(
struct toepcb *toep,
struct mbuf *sndptr)
998 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
999 u_int adjusted_plen, imm_data, ulp_submode;
1000 struct inpcb *inp = toep->
inp;
1001 struct tcpcb *tp = intotcpcb(inp);
1002 int tx_credits, shove, npdu, wr_len;
1004 static const u_int ulp_extra_len[] = {0, 4, 4, 8};
1005 bool iso, nomap_mbuf_seen;
1007 M_ASSERTPKTHDR(sndptr);
1009 tx_credits = min(toep->
tx_credits, MAX_OFLD_TX_CREDITS);
1011 plen = sndptr->m_pkthdr.len;
1013 (
"raw WR len %u is greater than max WR len", plen));
1014 if (plen > tx_credits * 16)
1018 if (__predict_false(wr == NULL))
1021 m_copydata(sndptr, 0, plen,
wrtod(wr));
1026 max_imm = max_imm_payload(tx_credits, iso);
1027 max_nsegs = max_dsgl_nsegs(tx_credits, iso);
1032 max_nsegs_1mbuf = 0;
1033 nomap_mbuf_seen =
false;
1034 for (m = sndptr; m != NULL; m = m->m_next) {
1037 if (m->m_flags & M_EXTPG)
1038 n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t),
1041 n = sglist_count(mtod(m,
void *), m->m_len);
1050 if ((nomap_mbuf_seen || plen > max_imm) && nsegs > max_nsegs)
1053 if (m->m_flags & M_EXTPG)
1054 nomap_mbuf_seen =
true;
1055 if (max_nsegs_1mbuf < n)
1056 max_nsegs_1mbuf = n;
1060 panic(
"%s: excess tx.", __func__);
1067 MPASS((plen & 3) == 0);
1068 MPASS(sndptr->m_pkthdr.len == plen);
1070 shove = !(tp->t_flags & TF_MORETOCOME);
1079 MPASS(ulp_submode < nitems(ulp_extra_len));
1080 npdu = iso ? howmany(plen - ISCSI_BHS_SIZE, iso_mss) : 1;
1081 adjusted_plen = plen + ulp_extra_len[ulp_submode] * npdu;
1083 adjusted_plen += ISCSI_BHS_SIZE * (npdu - 1);
1084 wr_len =
sizeof(*txwr);
1087 if (plen <= max_imm && !nomap_mbuf_seen) {
1096 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
1105 credits = howmany(wr->
wr_len, 16);
1110 adjusted_plen, credits, shove, ulp_submode | ULP_ISO);
1112 MPASS(plen == sndptr->m_pkthdr.len);
1113 write_tx_data_iso(cpl_iso, ulp_submode,
1118 adjusted_plen, credits, shove, ulp_submode);
1122 if (imm_data != 0) {
1123 m_copydata(sndptr, 0, plen, p);
1125 write_tx_sgl(p, sndptr, m, nsegs, max_nsegs_1mbuf);
1127 uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len);
1133 (
"%s: not enough credits: credits %u "
1134 "toep->tx_credits %u tx_credits %u nsegs %u "
1135 "max_nsegs %u iso %d", __func__, credits,
1136 toep->
tx_credits, tx_credits, nsegs, max_nsegs, iso));
1138 tp->snd_nxt += adjusted_plen;
1139 tp->snd_max += adjusted_plen;
1152 struct mbuf *sndptr, *m;
1155 u_int plen, credits;
1156 struct inpcb *inp = toep->
inp;
1158 struct mbufq *pduq = &toep->
ulp_pduq;
1160 INP_WLOCK_ASSERT(inp);
1162 (
"%s: flowc_wr not sent for tid %u.", __func__, toep->
tid));
1164 (
"%s: ulp_mode %u for toep %p", __func__,
ulp_mode(toep), toep));
1175 (
"%s: drop (%d) != 0 but tx is suspended", __func__, drop));
1180 struct socket *so = inp->inp_socket;
1181 struct sockbuf *sb = &so->so_snd;
1190 if (__predict_false(sbused(sb)) > 0) {
1200 sbdrop_locked(sb, min(sbu, drop));
1201 drop -= min(sbu, drop);
1203 sowwakeup_locked(so);
1208 while ((sndptr = mbufq_first(pduq)) != NULL) {
1209 wr = write_iscsi_mbuf_wr(toep, sndptr);
1215 plen = sndptr->m_pkthdr.len;
1216 credits = howmany(wr->
wr_len, 16);
1218 (
"%s: not enough credits", __func__));
1220 m = mbufq_dequeue(pduq);
1244 KASSERT(toep->
txsd_avail > 0, (
"%s: no txsd", __func__));
1250 txsd = &toep->
txsd[0];
1263t4_push_data(
struct adapter *sc,
struct toepcb *toep,
int drop)
1277 struct adapter *sc = tod->tod_softc;
1279 struct inpcb *inp = tp->t_inpcb;
1281 struct toepcb *toep = tp->t_toe;
1283 INP_WLOCK_ASSERT(
inp);
1284 KASSERT((
inp->inp_flags & INP_DROPPED) == 0,
1285 (
"%s: inp %p dropped.", __func__,
inp));
1286 KASSERT(toep != NULL, (
"%s: toep is NULL", __func__));
1288 t4_push_data(sc, toep, 0);
1296 struct adapter *sc = tod->tod_softc;
1298 struct inpcb *inp = tp->t_inpcb;
1300 struct toepcb *toep = tp->t_toe;
1302 INP_WLOCK_ASSERT(
inp);
1303 KASSERT((
inp->inp_flags & INP_DROPPED) == 0,
1304 (
"%s: inp %p dropped.", __func__,
inp));
1305 KASSERT(toep != NULL, (
"%s: toep is NULL", __func__));
1308 if (tp->t_state >= TCPS_ESTABLISHED)
1309 t4_push_data(sc, toep, 0);
1317 struct adapter *sc = tod->tod_softc;
1318#if defined(INVARIANTS)
1319 struct inpcb *inp = tp->t_inpcb;
1321 struct toepcb *toep = tp->t_toe;
1323 INP_WLOCK_ASSERT(
inp);
1324 KASSERT((
inp->inp_flags & INP_DROPPED) == 0,
1325 (
"%s: inp %p dropped.", __func__,
inp));
1326 KASSERT(toep != NULL, (
"%s: toep is NULL", __func__));
1330 (
"%s: flowc for tid %u [%s] not sent already",
1331 __func__, toep->
tid, tcpstates[tp->t_state]));
1345 unsigned int tid =
GET_TID(cpl);
1347 struct inpcb *inp = toep->
inp;
1348 struct tcpcb *tp = NULL;
1350 struct epoch_tracker et;
1356 (
"%s: unexpected opcode 0x%x", __func__, opcode));
1357 KASSERT(m == NULL, (
"%s: wasn't expecting payload", __func__));
1366 CTR4(
KTR_CXGBE,
"%s: tid %u, synqe %p (0x%x)", __func__, tid,
1371 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1373 CURVNET_SET(toep->
vnet);
1374 NET_EPOCH_ENTER(et);
1376 tp = intotcpcb(inp);
1379 "%s: tid %u (%s), toep_flags 0x%x, ddp_flags 0x%x, inp %p",
1380 __func__, tid, tp ? tcpstates[tp->t_state] :
"no tp", toep->
flags,
1386 so = inp->inp_socket;
1390 if (__predict_false(toep->
ddp.
flags &
1405 KASSERT(tp->rcv_nxt + 1 == be32toh(cpl->
rcv_nxt),
1406 (
"%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
1410 tp->rcv_nxt = be32toh(cpl->
rcv_nxt);
1412 switch (tp->t_state) {
1413 case TCPS_SYN_RECEIVED:
1414 tp->t_starttime = ticks;
1417 case TCPS_ESTABLISHED:
1418 tcp_state_change(tp, TCPS_CLOSE_WAIT);
1421 case TCPS_FIN_WAIT_1:
1422 tcp_state_change(tp, TCPS_CLOSING);
1425 case TCPS_FIN_WAIT_2:
1428 INP_UNLOCK_ASSERT(inp);
1437 log(LOG_ERR,
"%s: TID %u received CPL_PEER_CLOSE in state %d\n",
1438 __func__, tid, tp->t_state);
1456 unsigned int tid =
GET_TID(cpl);
1458 struct inpcb *inp = toep->
inp;
1459 struct tcpcb *tp = NULL;
1460 struct socket *so = NULL;
1461 struct epoch_tracker et;
1467 (
"%s: unexpected opcode 0x%x", __func__, opcode));
1468 KASSERT(m == NULL, (
"%s: wasn't expecting payload", __func__));
1469 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1471 CURVNET_SET(toep->
vnet);
1472 NET_EPOCH_ENTER(et);
1474 tp = intotcpcb(inp);
1476 CTR4(
KTR_CXGBE,
"%s: tid %u (%s), toep_flags 0x%x",
1477 __func__, tid, tp ? tcpstates[tp->t_state] :
"no tp", toep->
flags);
1482 so = inp->inp_socket;
1483 tp->snd_una = be32toh(cpl->
snd_nxt) - 1;
1485 switch (tp->t_state) {
1490 INP_UNLOCK_ASSERT(inp);
1503 case TCPS_FIN_WAIT_1:
1504 if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1505 soisdisconnected(so);
1506 tcp_state_change(tp, TCPS_FIN_WAIT_2);
1511 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
1512 __func__, tid, tcpstates[tp->t_state]);
1531 panic(
"%s: allocation failure.", __func__);
1536 cpl->
cmd = rst_status;
1542abort_status_to_errno(
struct tcpcb *tp,
unsigned int abort_reason)
1544 switch (abort_reason) {
1547 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1566 unsigned int tid =
GET_TID(cpl);
1571 struct epoch_tracker et;
1577 (
"%s: unexpected opcode 0x%x", __func__, opcode));
1578 KASSERT(m == NULL, (
"%s: wasn't expecting payload", __func__));
1583 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1586 CTR4(
KTR_CXGBE,
"%s: negative advice %d for tid %d (0x%x)",
1592 CURVNET_SET(toep->
vnet);
1593 NET_EPOCH_ENTER(et);
1596 tp = intotcpcb(inp);
1599 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
1600 __func__, tid, tp ? tcpstates[tp->t_state] :
"no tp", toep->
flags,
1601 inp->inp_flags, cpl->
status);
1614 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
1615 struct socket *so = inp->inp_socket;
1618 so_error_set(so, abort_status_to_errno(tp,
1641 unsigned int tid =
GET_TID(cpl);
1643 struct inpcb *inp = toep->
inp;
1649 (
"%s: unexpected opcode 0x%x", __func__, opcode));
1650 KASSERT(m == NULL, (
"%s: wasn't expecting payload", __func__));
1655 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1657 CTR5(
KTR_CXGBE,
"%s: tid %u, toep %p, inp %p, status %d",
1658 __func__, tid, toep, inp, cpl->
status);
1661 (
"%s: wasn't expecting abort reply", __func__));
1673 const struct cpl_rx_data *cpl = mtod(m,
const void *);
1674 unsigned int tid =
GET_TID(cpl);
1676 struct inpcb *inp = toep->
inp;
1680 struct epoch_tracker et;
1681 int len, rx_credits;
1682 uint32_t ddp_placed = 0;
1691 CTR4(
KTR_CXGBE,
"%s: tid %u, synqe %p (0x%x)", __func__, tid,
1697 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1700 m_adj(m,
sizeof(*cpl));
1701 len = m->m_pkthdr.len;
1704 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1705 CTR4(
KTR_CXGBE,
"%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1706 __func__, tid, len, inp->inp_flags);
1712 tp = intotcpcb(inp);
1717 CTR3(
KTR_CXGBE,
"%s: tid %u, raw TLS data (%d bytes)",
1718 __func__, tid, len);
1723 if (__predict_false(tp->rcv_nxt != be32toh(cpl->
seq)))
1724 ddp_placed = be32toh(cpl->
seq) - tp->rcv_nxt;
1727 if (tp->rcv_wnd < len) {
1729 (
"%s: negative window size", __func__));
1733 tp->t_rcvtime = ticks;
1737 so = inp_inpcbtosocket(inp);
1741 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
1742 CTR3(
KTR_CXGBE,
"%s: tid %u, excess rx (%d bytes)",
1743 __func__, tid, len);
1750 CURVNET_SET(toep->
vnet);
1751 NET_EPOCH_ENTER(et);
1753 tp = tcp_drop(tp, ECONNRESET);
1763 MPASS(toep->
vnet == so->so_vnet);
1764 CURVNET_SET(toep->
vnet);
1765 if (sb->sb_flags & SB_AUTOSIZE &&
1766 V_tcp_do_autorcvbuf &&
1767 sb->sb_hiwat < V_tcp_autorcvbuf_max &&
1768 len > (sbspace(sb) / 8 * 7)) {
1769 unsigned int hiwat = sb->sb_hiwat;
1771 V_tcp_autorcvbuf_max);
1773 if (!sbreserve_locked(sb, newsize, so, NULL))
1774 sb->sb_flags &= ~SB_AUTOSIZE;
1781 CTR3(
KTR_CXGBE,
"%s: tid %u, non-ddp rx (%d bytes)",
1782 __func__, tid, len);
1789 (
"%s: DDP switched on by itself.",
1794 CTR1(
KTR_CXGBE,
"%s: fell out of DDP mode",
1812 sbappendstream_locked(sb, m, 0);
1813 rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
1814 if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
1816 tp->rcv_wnd += rx_credits;
1817 tp->rcv_adv += rx_credits;
1822 CTR2(
KTR_CXGBE,
"%s: tid %u queueing AIO task", __func__,
1826 sorwakeup_locked(so);
1827 SOCKBUF_UNLOCK_ASSERT(sb);
1840 const struct cpl_fw4_ack *cpl = (
const void *)(rss + 1);
1846 uint8_t credits = cpl->
credits;
1859 (
"%s: credits for a synq entry %p", __func__, toep));
1866 (
"%s: unexpected opcode 0x%x", __func__, opcode));
1867 KASSERT(m == NULL, (
"%s: wasn't expecting payload", __func__));
1868 KASSERT(toep->
tid == tid, (
"%s: toep tid mismatch", __func__));
1877 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
1878 (
"%s: inp_flags 0x%x", __func__, inp->inp_flags));
1880 tp = intotcpcb(inp);
1883 tcp_seq snd_una = be32toh(cpl->
snd_una);
1886 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
1888 "%s: unexpected seq# %x for TID %u, snd_una %x\n",
1889 __func__, snd_una, toep->
tid, tp->snd_una);
1893 if (tp->snd_una != snd_una) {
1894 tp->snd_una = snd_una;
1895 tp->ts_recent_age = tcp_ts_getticks();
1899#ifdef VERBOSE_TRACES
1900 CTR3(
KTR_CXGBE,
"%s: tid %d credits %u", __func__, tid, credits);
1902 so = inp->inp_socket;
1907 (
"%s: too many (or partial) credits", __func__));
1914 (
"%s: txsd avail > total", __func__));
1916 txsd = &toep->
txsd[0];
1928#ifdef VERBOSE_TRACES
1929 CTR2(
KTR_CXGBE,
"%s: tid %d calling t4_push_frames", __func__,
1932 toep->
flags &= ~TPF_TX_SUSPENDED;
1933 CURVNET_SET(toep->
vnet);
1934 t4_push_data(sc, toep,
plen);
1936 }
else if (
plen > 0) {
1937 struct sockbuf *sb = &so->so_snd;
1943 if (__predict_false(sbu > 0)) {
1950 sbdrop_locked(sb, min(sbu, plen));
1951 plen -= min(sbu, plen);
1953 sowwakeup_locked(so);
1956#ifdef VERBOSE_TRACES
1957 CTR3(
KTR_CXGBE,
"%s: tid %d dropped %d bytes", __func__,
1960 sbdrop_locked(sb, plen);
1961 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
1962 t4_aiotx_queue_toep(so, toep);
1963 sowwakeup_locked(so);
1965 SOCKBUF_UNLOCK_ASSERT(sb);
1975 uint16_t word, uint64_t mask, uint64_t val,
int reply,
int cookie)
1989 panic(
"%s: allocation failure.", __func__);
1998 req->
mask = htobe64(mask);
1999 req->
val = htobe64(val);
2002 txsd->
tx_credits = howmany(
sizeof(*req), 16);
2006 (
"%s: not enough credits (%d)", __func__,
2051#define aio_error backend1
2052#define aio_sent backend3
2053#define aio_refs backend4
2055#define jobtotid(job) \
2056 (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
2059aiotx_free_job(
struct kaiocb *job)
2064 if (refcount_release(&job->aio_refs) == 0)
2067 error = (intptr_t)job->aio_error;
2068 status = job->aio_sent;
2069#ifdef VERBOSE_TRACES
2070 CTR5(
KTR_CXGBE,
"%s: tid %d completed %p len %ld, error %d", __func__,
2071 jobtotid(job), job, status, error);
2073 if (error != 0 && status != 0)
2075 if (error == ECANCELED)
2078 aio_complete(job, -1, error);
2081 aio_complete(job, status, 0);
2086aiotx_free_pgs(
struct mbuf *m)
2092 job = m->m_ext.ext_arg1;
2093#ifdef VERBOSE_TRACES
2094 CTR3(
KTR_CXGBE,
"%s: completed %d bytes for tid %d", __func__,
2095 m->m_len, jobtotid(job));
2098 for (
int i = 0; i < m->m_epg_npgs; i++) {
2099 pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
2100 vm_page_unwire(pg, PQ_ACTIVE);
2103 aiotx_free_job(job);
2111alloc_aiotx_mbuf(
struct kaiocb *job,
int len)
2114 vm_page_t pgs[MBUF_PEXT_MAX_PGS];
2115 struct mbuf *m, *top, *last;
2118 int i, mlen, npages, pgoff;
2120 KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes,
2121 (
"%s(%p, %d): request to send beyond end of buffer", __func__,
2129 vm = job->userproc->p_vmspace;
2131 start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent;
2132 pgoff = start & PAGE_MASK;
2137 mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff);
2138 KASSERT(mlen == len || ((start + mlen) & PAGE_MASK) == 0,
2139 (
"%s: next start (%#jx + %#x) is not page aligned",
2140 __func__, (uintmax_t)start, mlen));
2142 npages = vm_fault_quick_hold_pages(map, start, mlen,
2143 VM_PROT_WRITE, pgs, nitems(pgs));
2147 m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs);
2149 vm_page_unhold_pages(pgs, npages);
2153 m->m_epg_1st_off = pgoff;
2154 m->m_epg_npgs = npages;
2156 KASSERT(mlen + pgoff <= PAGE_SIZE,
2157 (
"%s: single page is too large (off %d len %d)",
2158 __func__, pgoff, mlen));
2159 m->m_epg_last_len = mlen;
2161 m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) -
2162 (npages - 2) * PAGE_SIZE;
2164 for (i = 0; i < npages; i++)
2165 m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
2168 m->m_ext.ext_size = npages * PAGE_SIZE;
2169 m->m_ext.ext_arg1 = job;
2170 refcount_acquire(&job->aio_refs);
2172#ifdef VERBOSE_TRACES
2173 CTR5(
KTR_CXGBE,
"%s: tid %d, new mbuf %p for job %p, npages %d",
2174 __func__, jobtotid(job), m, job, npages);
2192t4_aiotx_process_job(
struct toepcb *toep,
struct socket *so,
struct kaiocb *job)
2200 bool moretocome, sendmore;
2208 error = mac_socket_check_send(fp->f_cred, so);
2215 error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
2220 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2222 SOCK_IO_SEND_UNLOCK(so);
2223 if ((so->so_options & SO_NOSIGPIPE) == 0) {
2224 PROC_LOCK(job->userproc);
2225 kern_psignal(job->userproc, SIGPIPE);
2226 PROC_UNLOCK(job->userproc);
2232 error = so->so_error;
2235 SOCK_IO_SEND_UNLOCK(so);
2238 if ((so->so_state & SS_ISCONNECTED) == 0) {
2240 SOCK_IO_SEND_UNLOCK(so);
2244 if (sbspace(sb) < sb->sb_lowat) {
2245 MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO));
2251 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
2253 SOCK_IO_SEND_UNLOCK(so);
2257 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
2259 SOCK_IO_SEND_UNLOCK(so);
2268 if (len > job->uaiocb.aio_nbytes - job->aio_sent) {
2269 len = job->uaiocb.aio_nbytes - job->aio_sent;
2279 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
2284 m = alloc_aiotx_mbuf(job, len);
2286 SOCK_IO_SEND_UNLOCK(so);
2295 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2297 SOCK_IO_SEND_UNLOCK(so);
2302 job->aio_sent += m_length(m, NULL);
2304 sbappendstream(sb, m, 0);
2307 if (!(inp->inp_flags & INP_DROPPED)) {
2308 tp = intotcpcb(inp);
2310 tp->t_flags |= TF_MORETOCOME;
2311 error = tcp_output(tp);
2313 INP_UNLOCK_ASSERT(inp);
2314 SOCK_IO_SEND_UNLOCK(so);
2319 tp->t_flags &= ~TF_MORETOCOME;
2325 SOCK_IO_SEND_UNLOCK(so);
2335 if (job->aio_sent < job->uaiocb.aio_nbytes &&
2336 !(so->so_state & SS_NBIO)) {
2338 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
2343 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
2354 aiotx_free_job(job);
2358 job->aio_error = (
void *)(intptr_t)error;
2359 aiotx_free_job(job);
2366t4_aiotx_task(
void *context,
int pending)
2368 struct toepcb *toep = context;
2371 struct epoch_tracker et;
2374 CURVNET_SET(toep->
vnet);
2375 NET_EPOCH_ENTER(et);
2376 SOCKBUF_LOCK(&so->so_snd);
2377 while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
2378 job = TAILQ_FIRST(&toep->aiotx_jobq);
2379 TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
2380 if (!aio_clear_cancel_function(job))
2383 t4_aiotx_process_job(toep, so, job);
2386 SOCKBUF_UNLOCK(&so->so_snd);
2395t4_aiotx_queue_toep(
struct socket *so,
struct toepcb *toep)
2398 SOCKBUF_LOCK_ASSERT(&toep->
inp->inp_socket->so_snd);
2399#ifdef VERBOSE_TRACES
2400 CTR3(
KTR_CXGBE,
"%s: queueing aiotx task for tid %d, active = %s",
2401 __func__, toep->
tid, toep->
aiotx_so != NULL ?
"true" :
"false");
2412t4_aiotx_cancel(
struct kaiocb *job)
2419 so = job->fd_file->f_data;
2420 tp = so_sototcpcb(so);
2422 MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE);
2426 if (!aio_cancel_cleared(job))
2427 TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
2430 job->aio_error = (
void *)(intptr_t)ECANCELED;
2431 aiotx_free_job(job);
2437 struct tcpcb *tp = so_sototcpcb(so);
2438 struct toepcb *toep = tp->t_toe;
2442 if (job->uaiocb.aio_lio_opcode != LIO_WRITE)
2443 return (EOPNOTSUPP);
2446 return (EOPNOTSUPP);
2449 return (EOPNOTSUPP);
2451 SOCKBUF_LOCK(&so->so_snd);
2452#ifdef VERBOSE_TRACES
2453 CTR3(
KTR_CXGBE,
"%s: queueing %p for tid %u", __func__, job, toep->
tid);
2455 if (!aio_set_cancel_function(job, t4_aiotx_cancel))
2456 panic(
"new job was cancelled");
2457 refcount_init(&job->aio_refs, 1);
2458 TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
2459 if (sowriteable(so))
2460 t4_aiotx_queue_toep(so, toep);
2461 SOCKBUF_UNLOCK(&so->so_snd);
2469 TAILQ_INIT(&toep->aiotx_jobq);
2470 TASK_INIT(&toep->
aiotx_task, 0, t4_aiotx_task, toep);
static struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq)
int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *)
void t4_release_cl_rl(struct adapter *, int, int)
void t4_register_cpl_handler(int, cpl_handler_t)
static void * wrtod(struct wrqe *wr)
void t4_register_shared_cpl_handler(int, cpl_handler_t, int)
static void t4_wrq_tx(struct adapter *sc, struct wrqe *wr)
static int chip_id(struct adapter *adap)
#define INIT_TP_WR_MIT_CPL(w, cpl, tid)
#define DIV_ROUND_UP(x, y)
unsigned short mtus[NMTUS]
struct adapter_params params
__be32 reserved2_seglen_offset
struct fw_flowc_mnemval mnemval[0]
__be32 lsodisable_to_flags
counter_u64_t tx_iscsi_octets
counter_u64_t tx_iscsi_iso_wrs
counter_u64_t tx_iscsi_pdus
struct conn_params params
struct sge_ofld_rxq * ofld_rxq
struct mbufq ulp_pdu_reclaimq
struct sge_ofld_txq * ofld_txq
struct ofld_tx_sdesc txsd[]
struct ulptx_sge_pair sge[]
#define V_CPL_TX_DATA_ISO_CPLHDRLEN(x)
@ CPL_FW4_ACK_FLAGS_SEQVAL
@ CPL_ERR_FINWAIT2_TIMEDOUT
@ CPL_ERR_KEEPALIVE_TIMEDOUT
@ CPL_ERR_PERSIST_TIMEDOUT
#define V_RX_DACK_MODE(x)
#define V_CPL_TX_DATA_ISO_HDRCRC(x)
#define G_TCPOPT_SND_WSCALE(x)
#define V_CPL_TX_DATA_ISO_FIRST(x)
#define G_TCPOPT_WSCALE_OK(x)
#define V_CPL_TX_DATA_ISO_SCSI(x)
#define V_CPL_TX_DATA_ISO_PLDCRC(x)
#define G_CPL_FW4_ACK_FLOWID(x)
#define V_CPL_TX_DATA_ISO_LAST(x)
#define V_CPL_TX_DATA_ISO_IMMEDIATE(x)
#define V_CPL_TX_DATA_ISO_OP(x)
#define MK_OPCODE_TID(opcode, tid)
#define G_CPL_FW4_ACK_OPCODE(x)
#define V_TX_ULP_SUBMODE(x)
#define G_TCPOPT_TSTAMP(x)
int t4_send_fin(struct toedev *, struct tcpcb *)
void insert_ddp_data(struct toepcb *, uint32_t)
void * lookup_tid(struct adapter *, int)
static int ulp_mode(struct toepcb *toep)
void t4_init_cpl_io_handlers(void)
static bool mbuf_iscsi_iso(struct mbuf *m)
int t4_tod_output(struct toedev *, struct tcpcb *)
int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *)
void handle_ddp_indicate(struct toepcb *)
void t4_push_frames(struct adapter *, struct toepcb *, int)
void t4_rcvd_locked(struct toedev *, struct tcpcb *)
struct toepcb * hold_toepcb(struct toepcb *)
void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int)
int tls_tx_key(struct toepcb *)
#define CXGBE_ISO_TYPE(flags)
void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t)
void free_toepcb(struct toepcb *)
void ddp_queue_toep(struct toepcb *)
void t4_push_ktls(struct adapter *, struct toepcb *, int)
int send_rx_credits(struct adapter *, struct toepcb *, int)
void send_rx_modulate(struct adapter *, struct toepcb *)
void do_rx_data_tls(const struct cpl_rx_data *, struct toepcb *, struct mbuf *)
int t4_send_rst(struct toedev *, struct tcpcb *)
static uint8_t mbuf_ulp_submode(struct mbuf *m)
static bool mbuf_raw_wr(struct mbuf *m)
void tls_establish(struct toepcb *)
void send_flowc_wr(struct toepcb *, struct tcpcb *)
static uint16_t mbuf_iscsi_iso_mss(struct mbuf *m)
void t4_uninit_cpl_io_handlers(void)
void final_cpl_received(struct toepcb *)
int t4_close_conn(struct adapter *, struct toepcb *)
int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *)
void aiotx_init_toep(struct toepcb *)
void handle_ddp_close(struct toepcb *, struct tcpcb *, uint32_t)
static uint8_t mbuf_iscsi_iso_flags(struct mbuf *m)
static struct adapter * td_adapter(struct tom_data *td)
int t4_aio_queue_aiotx(struct socket *, struct kaiocb *)
void t4_push_pdus(struct adapter *, struct toepcb *, int)
void send_reset(struct adapter *, struct toepcb *, uint32_t)
void restore_so_proto(struct socket *, bool)
void send_abort_rpl(struct adapter *, struct sge_ofld_txq *, int, int)
void t4_rcvd(struct toedev *, struct tcpcb *)
static int t4_l2t_send(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
#define F_FW_OFLD_TX_DATA_WR_LSODISABLE
#define V_FW_FLOWC_WR_NPARAMS(x)
#define F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE
@ FW_FLOWC_MNEM_SCHEDCLASS
#define F_FW_OFLD_TX_DATA_WR_ALIGNPLD
#define V_FW_WR_FLOWID(x)
#define V_FW_WR_IMMDLEN(x)