FreeBSD kernel CXGBE device code
t4_cpl_io.c
Go to the documentation of this file.
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012, 2015 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_inet.h"
34#include "opt_inet6.h"
35#include "opt_kern_tls.h"
36#include "opt_ratelimit.h"
37
38#ifdef TCP_OFFLOAD
39#include <sys/param.h>
40#include <sys/aio.h>
41#include <sys/file.h>
42#include <sys/kernel.h>
43#include <sys/ktr.h>
44#include <sys/module.h>
45#include <sys/proc.h>
46#include <sys/protosw.h>
47#include <sys/domain.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/sglist.h>
51#include <sys/taskqueue.h>
52#include <netinet/in.h>
53#include <netinet/in_pcb.h>
54#include <netinet/ip.h>
55#include <netinet/ip6.h>
56#define TCPSTATES
57#include <netinet/tcp_fsm.h>
58#include <netinet/tcp_seq.h>
59#include <netinet/tcp_var.h>
60#include <netinet/toecore.h>
61
62#include <security/mac/mac_framework.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66#include <vm/pmap.h>
67#include <vm/vm_map.h>
68#include <vm/vm_page.h>
69
70#include <dev/iscsi/iscsi_proto.h>
71
72#include "common/common.h"
73#include "common/t4_msg.h"
74#include "common/t4_regs.h"
75#include "common/t4_tcb.h"
76#include "tom/t4_tom_l2t.h"
77#include "tom/t4_tom.h"
78
79static void t4_aiotx_cancel(struct kaiocb *job);
80static void t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep);
81
82void
83send_flowc_wr(struct toepcb *toep, struct tcpcb *tp)
84{
85 struct wrqe *wr;
86 struct fw_flowc_wr *flowc;
87 unsigned int nparams, flowclen, paramidx;
88 struct vi_info *vi = toep->vi;
89 struct port_info *pi = vi->pi;
90 struct adapter *sc = pi->adapter;
91 unsigned int pfvf = sc->pf << S_FW_VIID_PFN;
92 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
93
94 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
95 ("%s: flowc for tid %u sent already", __func__, toep->tid));
96
97 if (tp != NULL)
98 nparams = 8;
99 else
100 nparams = 6;
101 if (ulp_mode(toep) == ULP_MODE_TLS)
102 nparams++;
103 if (toep->tls.fcplenmax != 0)
104 nparams++;
105 if (toep->params.tc_idx != -1) {
106 MPASS(toep->params.tc_idx >= 0 &&
107 toep->params.tc_idx < sc->params.nsched_cls);
108 nparams++;
109 }
110
111 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
112
113 wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq);
114 if (wr == NULL) {
115 /* XXX */
116 panic("%s: allocation failure.", __func__);
117 }
118 flowc = wrtod(wr);
119 memset(flowc, 0, wr->wr_len);
120
121 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
122 V_FW_FLOWC_WR_NPARAMS(nparams));
123 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
124 V_FW_WR_FLOWID(toep->tid));
125
126#define FLOWC_PARAM(__m, __v) \
127 do { \
128 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
129 flowc->mnemval[paramidx].val = htobe32(__v); \
130 paramidx++; \
131 } while (0)
132
133 paramidx = 0;
134
135 FLOWC_PARAM(PFNVFN, pfvf);
136 FLOWC_PARAM(CH, pi->tx_chan);
137 FLOWC_PARAM(PORT, pi->tx_chan);
138 FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id);
139 FLOWC_PARAM(SNDBUF, toep->params.sndbuf);
140 if (tp) {
141 FLOWC_PARAM(MSS, toep->params.emss);
142 FLOWC_PARAM(SNDNXT, tp->snd_nxt);
143 FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
144 } else
145 FLOWC_PARAM(MSS, 512);
146 CTR6(KTR_CXGBE,
147 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
148 __func__, toep->tid, toep->params.emss, toep->params.sndbuf,
149 tp ? tp->snd_nxt : 0, tp ? tp->rcv_nxt : 0);
150
151 if (ulp_mode(toep) == ULP_MODE_TLS)
152 FLOWC_PARAM(ULP_MODE, ulp_mode(toep));
153 if (toep->tls.fcplenmax != 0)
154 FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax);
155 if (toep->params.tc_idx != -1)
156 FLOWC_PARAM(SCHEDCLASS, toep->params.tc_idx);
157#undef FLOWC_PARAM
158
159 KASSERT(paramidx == nparams, ("nparams mismatch"));
160
161 txsd->tx_credits = howmany(flowclen, 16);
162 txsd->plen = 0;
163 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
164 ("%s: not enough credits (%d)", __func__, toep->tx_credits));
165 toep->tx_credits -= txsd->tx_credits;
166 if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
167 toep->txsd_pidx = 0;
168 toep->txsd_avail--;
169
170 toep->flags |= TPF_FLOWC_WR_SENT;
171 t4_wrq_tx(sc, wr);
172}
173
174#ifdef RATELIMIT
175/*
176 * Input is Bytes/second (so_max_pacing_rate), chip counts in Kilobits/second.
177 */
178static int
179update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps)
180{
181 int tc_idx, rc;
182 const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000;
183 const int port_id = toep->vi->pi->port_id;
184
185 CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps);
186
187 if (kbps == 0) {
188 /* unbind */
189 tc_idx = -1;
190 } else {
191 rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx);
192 if (rc != 0)
193 return (rc);
194 MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls);
195 }
196
197 if (toep->params.tc_idx != tc_idx) {
198 struct wrqe *wr;
199 struct fw_flowc_wr *flowc;
200 int nparams = 1, flowclen, flowclen16;
201 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
202
203 flowclen = sizeof(*flowc) + nparams * sizeof(struct
205 flowclen16 = howmany(flowclen, 16);
206 if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 ||
207 (wr = alloc_wrqe(roundup2(flowclen, 16),
208 &toep->ofld_txq->wrq)) == NULL) {
209 if (tc_idx >= 0)
210 t4_release_cl_rl(sc, port_id, tc_idx);
211 return (ENOMEM);
212 }
213
214 flowc = wrtod(wr);
215 memset(flowc, 0, wr->wr_len);
216
217 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
218 V_FW_FLOWC_WR_NPARAMS(nparams));
219 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) |
220 V_FW_WR_FLOWID(toep->tid));
221
223 if (tc_idx == -1)
224 flowc->mnemval[0].val = htobe32(0xff);
225 else
226 flowc->mnemval[0].val = htobe32(tc_idx);
227
228 txsd->tx_credits = flowclen16;
229 txsd->plen = 0;
230 toep->tx_credits -= txsd->tx_credits;
231 if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
232 toep->txsd_pidx = 0;
233 toep->txsd_avail--;
234 t4_wrq_tx(sc, wr);
235 }
236
237 if (toep->params.tc_idx >= 0)
238 t4_release_cl_rl(sc, port_id, toep->params.tc_idx);
239 toep->params.tc_idx = tc_idx;
240
241 return (0);
242}
243#endif
244
245void
246send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
247{
248 struct wrqe *wr;
249 struct cpl_abort_req *req;
250 int tid = toep->tid;
251 struct inpcb *inp = toep->inp;
252 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */
253
254 INP_WLOCK_ASSERT(inp);
255
256 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
257 __func__, toep->tid,
258 inp->inp_flags & INP_DROPPED ? "inp dropped" :
259 tcpstates[tp->t_state],
260 toep->flags, inp->inp_flags,
261 toep->flags & TPF_ABORT_SHUTDOWN ?
262 " (abort already in progress)" : "");
263
264 if (toep->flags & TPF_ABORT_SHUTDOWN)
265 return; /* abort already in progress */
266
267 toep->flags |= TPF_ABORT_SHUTDOWN;
268
269 KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
270 ("%s: flowc_wr not sent for tid %d.", __func__, tid));
271
272 wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq);
273 if (wr == NULL) {
274 /* XXX */
275 panic("%s: allocation failure.", __func__);
276 }
277 req = wrtod(wr);
278
280 if (inp->inp_flags & INP_DROPPED)
281 req->rsvd0 = htobe32(snd_nxt);
282 else
283 req->rsvd0 = htobe32(tp->snd_nxt);
284 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
285 req->cmd = CPL_ABORT_SEND_RST;
286
287 /*
288 * XXX: What's the correct way to tell that the inp hasn't been detached
289 * from its socket? Should I even be flushing the snd buffer here?
290 */
291 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
292 struct socket *so = inp->inp_socket;
293
294 if (so != NULL) /* because I'm not sure. See comment above */
295 sbflush(&so->so_snd);
296 }
297
298 t4_l2t_send(sc, wr, toep->l2te);
299}
300
301/*
302 * Called when a connection is established to translate the TCP options
303 * reported by HW to FreeBSD's native format.
304 */
305static void
306assign_rxopt(struct tcpcb *tp, uint16_t opt)
307{
308 struct toepcb *toep = tp->t_toe;
309 struct inpcb *inp = tp->t_inpcb;
310 struct adapter *sc = td_adapter(toep->td);
311
312 INP_LOCK_ASSERT(inp);
313
314 toep->params.mtu_idx = G_TCPOPT_MSS(opt);
315 tp->t_maxseg = sc->params.mtus[toep->params.mtu_idx];
316 if (inp->inp_inc.inc_flags & INC_ISIPV6)
317 tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
318 else
319 tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr);
320
321 toep->params.emss = tp->t_maxseg;
322 if (G_TCPOPT_TSTAMP(opt)) {
323 toep->params.tstamp = 1;
324 toep->params.emss -= TCPOLEN_TSTAMP_APPA;
325 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */
326 tp->ts_recent = 0; /* hmmm */
327 tp->ts_recent_age = tcp_ts_getticks();
328 } else
329 toep->params.tstamp = 0;
330
331 if (G_TCPOPT_SACK(opt)) {
332 toep->params.sack = 1;
333 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */
334 } else {
335 toep->params.sack = 0;
336 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */
337 }
338
339 if (G_TCPOPT_WSCALE_OK(opt))
340 tp->t_flags |= TF_RCVD_SCALE;
341
342 /* Doing window scaling? */
343 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
344 (TF_RCVD_SCALE | TF_REQ_SCALE)) {
345 tp->rcv_scale = tp->request_r_scale;
346 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
347 } else
348 toep->params.wscale = 0;
349
350 CTR6(KTR_CXGBE,
351 "assign_rxopt: tid %d, mtu_idx %u, emss %u, ts %u, sack %u, wscale %u",
352 toep->tid, toep->params.mtu_idx, toep->params.emss,
353 toep->params.tstamp, toep->params.sack, toep->params.wscale);
354}
355
356/*
357 * Completes some final bits of initialization for just established connections
358 * and changes their state to TCPS_ESTABLISHED.
359 *
360 * The ISNs are from the exchange of SYNs.
361 */
362void
363make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt)
364{
365 struct inpcb *inp = toep->inp;
366 struct socket *so = inp->inp_socket;
367 struct tcpcb *tp = intotcpcb(inp);
368 uint16_t tcpopt = be16toh(opt);
369
370 INP_WLOCK_ASSERT(inp);
371 KASSERT(tp->t_state == TCPS_SYN_SENT ||
372 tp->t_state == TCPS_SYN_RECEIVED,
373 ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
374
375 CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p",
376 __func__, toep->tid, so, inp, tp, toep);
377
378 tcp_state_change(tp, TCPS_ESTABLISHED);
379 tp->t_starttime = ticks;
380 TCPSTAT_INC(tcps_connects);
381
382 tp->irs = irs;
383 tcp_rcvseqinit(tp);
384 tp->rcv_wnd = (u_int)toep->params.opt0_bufsize << 10;
385 tp->rcv_adv += tp->rcv_wnd;
386 tp->last_ack_sent = tp->rcv_nxt;
387
388 tp->iss = iss;
389 tcp_sendseqinit(tp);
390 tp->snd_una = iss + 1;
391 tp->snd_nxt = iss + 1;
392 tp->snd_max = iss + 1;
393
394 assign_rxopt(tp, tcpopt);
395 send_flowc_wr(toep, tp);
396
397 soisconnected(so);
398
399 if (ulp_mode(toep) == ULP_MODE_TLS)
400 tls_establish(toep);
401}
402
403int
404send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
405{
406 struct wrqe *wr;
407 struct cpl_rx_data_ack *req;
408 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
409
410 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
411
412 wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
413 if (wr == NULL)
414 return (0);
415 req = wrtod(wr);
416
418 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
419
420 t4_wrq_tx(sc, wr);
421 return (credits);
422}
423
424void
425send_rx_modulate(struct adapter *sc, struct toepcb *toep)
426{
427 struct wrqe *wr;
428 struct cpl_rx_data_ack *req;
429
430 wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
431 if (wr == NULL)
432 return;
433 req = wrtod(wr);
434
436 req->credit_dack = htobe32(F_RX_MODULATE_RX);
437
438 t4_wrq_tx(sc, wr);
439}
440
441void
442t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
443{
444 struct adapter *sc = tod->tod_softc;
445 struct inpcb *inp = tp->t_inpcb;
446 struct socket *so = inp->inp_socket;
447 struct sockbuf *sb = &so->so_rcv;
448 struct toepcb *toep = tp->t_toe;
449 int rx_credits;
450
451 INP_WLOCK_ASSERT(inp);
452 SOCKBUF_LOCK_ASSERT(sb);
453
454 rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
455 if (rx_credits > 0 &&
456 (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 ||
457 (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
458 sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) {
459 rx_credits = send_rx_credits(sc, toep, rx_credits);
460 tp->rcv_wnd += rx_credits;
461 tp->rcv_adv += rx_credits;
462 } else if (toep->flags & TPF_FORCE_CREDITS)
463 send_rx_modulate(sc, toep);
464}
465
466void
467t4_rcvd(struct toedev *tod, struct tcpcb *tp)
468{
469 struct inpcb *inp = tp->t_inpcb;
470 struct socket *so = inp->inp_socket;
471 struct sockbuf *sb = &so->so_rcv;
472
473 SOCKBUF_LOCK(sb);
474 t4_rcvd_locked(tod, tp);
475 SOCKBUF_UNLOCK(sb);
476}
477
478/*
479 * Close a connection by sending a CPL_CLOSE_CON_REQ message.
480 */
481int
482t4_close_conn(struct adapter *sc, struct toepcb *toep)
483{
484 struct wrqe *wr;
485 struct cpl_close_con_req *req;
486 unsigned int tid = toep->tid;
487
488 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
489 toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
490
491 if (toep->flags & TPF_FIN_SENT)
492 return (0);
493
494 KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
495 ("%s: flowc_wr not sent for tid %u.", __func__, tid));
496
497 wr = alloc_wrqe(sizeof(*req), &toep->ofld_txq->wrq);
498 if (wr == NULL) {
499 /* XXX */
500 panic("%s: allocation failure.", __func__);
501 }
502 req = wrtod(wr);
503
504 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
505 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
506 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
507 V_FW_WR_FLOWID(tid));
508 req->wr.wr_lo = cpu_to_be64(0);
509 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
510 req->rsvd = 0;
511
512 toep->flags |= TPF_FIN_SENT;
513 toep->flags &= ~TPF_SEND_FIN;
514 t4_l2t_send(sc, wr, toep->l2te);
515
516 return (0);
517}
518
519#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
520#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
521#define MIN_ISO_TX_CREDITS (howmany(sizeof(struct cpl_tx_data_iso), 16))
522#define MIN_TX_CREDITS(iso) \
523 (MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0))
524
525/* Maximum amount of immediate data we could stuff in a WR */
526static inline int
527max_imm_payload(int tx_credits, int iso)
528{
529 const int iso_cpl_size = iso ? sizeof(struct cpl_tx_data_iso) : 0;
530 const int n = 1; /* Use no more than one desc for imm. data WR */
531
532 KASSERT(tx_credits >= 0 &&
533 tx_credits <= MAX_OFLD_TX_CREDITS,
534 ("%s: %d credits", __func__, tx_credits));
535
536 if (tx_credits < MIN_TX_CREDITS(iso))
537 return (0);
538
539 if (tx_credits >= (n * EQ_ESIZE) / 16)
540 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr) -
541 iso_cpl_size);
542 else
543 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr) -
544 iso_cpl_size);
545}
546
547/* Maximum number of SGL entries we could stuff in a WR */
548static inline int
549max_dsgl_nsegs(int tx_credits, int iso)
550{
551 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
552 int sge_pair_credits = tx_credits - MIN_TX_CREDITS(iso);
553
554 KASSERT(tx_credits >= 0 &&
555 tx_credits <= MAX_OFLD_TX_CREDITS,
556 ("%s: %d credits", __func__, tx_credits));
557
558 if (tx_credits < MIN_TX_CREDITS(iso))
559 return (0);
560
561 nseg += 2 * (sge_pair_credits * 16 / 24);
562 if ((sge_pair_credits * 16) % 24 == 16)
563 nseg++;
564
565 return (nseg);
566}
567
568static inline void
569write_tx_wr(void *dst, struct toepcb *toep, int fw_wr_opcode,
570 unsigned int immdlen, unsigned int plen, uint8_t credits, int shove,
571 int ulp_submode)
572{
573 struct fw_ofld_tx_data_wr *txwr = dst;
574
575 txwr->op_to_immdlen = htobe32(V_WR_OP(fw_wr_opcode) |
576 V_FW_WR_IMMDLEN(immdlen));
577 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
578 V_FW_WR_LEN16(credits));
579 txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ulp_mode(toep)) |
580 V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove));
581 txwr->plen = htobe32(plen);
582
583 if (toep->params.tx_align > 0) {
584 if (plen < 2 * toep->params.emss)
585 txwr->lsodisable_to_flags |=
587 else
588 txwr->lsodisable_to_flags |=
590 (toep->params.nagle == 0 ? 0 :
592 }
593}
594
595/*
596 * Generate a DSGL from a starting mbuf. The total number of segments and the
597 * maximum segments in any one mbuf are provided.
598 */
599static void
600write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
601{
602 struct mbuf *m;
603 struct ulptx_sgl *usgl = dst;
604 int i, j, rc;
605 struct sglist sg;
606 struct sglist_seg segs[n];
607
608 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
609
610 sglist_init(&sg, n, segs);
611 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
612 V_ULPTX_NSGE(nsegs));
613
614 i = -1;
615 for (m = start; m != stop; m = m->m_next) {
616 if (m->m_flags & M_EXTPG)
617 rc = sglist_append_mbuf_epg(&sg, m,
618 mtod(m, vm_offset_t), m->m_len);
619 else
620 rc = sglist_append(&sg, mtod(m, void *), m->m_len);
621 if (__predict_false(rc != 0))
622 panic("%s: sglist_append %d", __func__, rc);
623
624 for (j = 0; j < sg.sg_nseg; i++, j++) {
625 if (i < 0) {
626 usgl->len0 = htobe32(segs[j].ss_len);
627 usgl->addr0 = htobe64(segs[j].ss_paddr);
628 } else {
629 usgl->sge[i / 2].len[i & 1] =
630 htobe32(segs[j].ss_len);
631 usgl->sge[i / 2].addr[i & 1] =
632 htobe64(segs[j].ss_paddr);
633 }
634#ifdef INVARIANTS
635 nsegs--;
636#endif
637 }
638 sglist_reset(&sg);
639 }
640 if (i & 1)
641 usgl->sge[i / 2].len[1] = htobe32(0);
642 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
643 __func__, nsegs, start, stop));
644}
645
646/*
647 * Max number of SGL entries an offload tx work request can have. This is 41
648 * (1 + 40) for a full 512B work request.
649 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
650 */
651#define OFLD_SGL_LEN (41)
652
653/*
654 * Send data and/or a FIN to the peer.
655 *
656 * The socket's so_snd buffer consists of a stream of data starting with sb_mb
657 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that
658 * was transmitted.
659 *
660 * drop indicates the number of bytes that should be dropped from the head of
661 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating
662 * contention on the send buffer lock (before this change it used to do
663 * sowwakeup and then t4_push_frames right after that when recovering from tx
664 * stalls). When drop is set this function MUST drop the bytes and wake up any
665 * writers.
666 */
667void
668t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
669{
670 struct mbuf *sndptr, *m, *sb_sndptr;
671 struct fw_ofld_tx_data_wr *txwr;
672 struct wrqe *wr;
673 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
674 struct inpcb *inp = toep->inp;
675 struct tcpcb *tp = intotcpcb(inp);
676 struct socket *so = inp->inp_socket;
677 struct sockbuf *sb = &so->so_snd;
678 int tx_credits, shove, compl, sowwakeup;
679 struct ofld_tx_sdesc *txsd;
680 bool nomap_mbuf_seen;
681
682 INP_WLOCK_ASSERT(inp);
683 KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
684 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
685
686 KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
687 ulp_mode(toep) == ULP_MODE_TCPDDP ||
688 ulp_mode(toep) == ULP_MODE_TLS ||
689 ulp_mode(toep) == ULP_MODE_RDMA,
690 ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
691
692#ifdef VERBOSE_TRACES
693 CTR5(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
694 __func__, toep->tid, toep->flags, tp->t_flags, drop);
695#endif
696 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
697 return;
698
699#ifdef RATELIMIT
700 if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
701 (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
702 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
703 }
704#endif
705
706 /*
707 * This function doesn't resume by itself. Someone else must clear the
708 * flag and call this function.
709 */
710 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
711 KASSERT(drop == 0,
712 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
713 return;
714 }
715
716 txsd = &toep->txsd[toep->txsd_pidx];
717 do {
718 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
719 max_imm = max_imm_payload(tx_credits, 0);
720 max_nsegs = max_dsgl_nsegs(tx_credits, 0);
721
722 SOCKBUF_LOCK(sb);
723 sowwakeup = drop;
724 if (drop) {
725 sbdrop_locked(sb, drop);
726 drop = 0;
727 }
728 sb_sndptr = sb->sb_sndptr;
729 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
730 plen = 0;
731 nsegs = 0;
732 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
733 nomap_mbuf_seen = false;
734 for (m = sndptr; m != NULL; m = m->m_next) {
735 int n;
736
737 if ((m->m_flags & M_NOTAVAIL) != 0)
738 break;
739 if (m->m_flags & M_EXTPG) {
740#ifdef KERN_TLS
741 if (m->m_epg_tls != NULL) {
742 toep->flags |= TPF_KTLS;
743 if (plen == 0) {
744 SOCKBUF_UNLOCK(sb);
745 t4_push_ktls(sc, toep, 0);
746 return;
747 }
748 break;
749 }
750#endif
751 n = sglist_count_mbuf_epg(m,
752 mtod(m, vm_offset_t), m->m_len);
753 } else
754 n = sglist_count(mtod(m, void *), m->m_len);
755
756 nsegs += n;
757 plen += m->m_len;
758
759 /* This mbuf sent us _over_ the nsegs limit, back out */
760 if (plen > max_imm && nsegs > max_nsegs) {
761 nsegs -= n;
762 plen -= m->m_len;
763 if (plen == 0) {
764 /* Too few credits */
765 toep->flags |= TPF_TX_SUSPENDED;
766 if (sowwakeup) {
767 if (!TAILQ_EMPTY(
768 &toep->aiotx_jobq))
769 t4_aiotx_queue_toep(so,
770 toep);
771 sowwakeup_locked(so);
772 } else
773 SOCKBUF_UNLOCK(sb);
774 SOCKBUF_UNLOCK_ASSERT(sb);
775 return;
776 }
777 break;
778 }
779
780 if (m->m_flags & M_EXTPG)
781 nomap_mbuf_seen = true;
782 if (max_nsegs_1mbuf < n)
783 max_nsegs_1mbuf = n;
784 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */
785
786 /* This mbuf put us right at the max_nsegs limit */
787 if (plen > max_imm && nsegs == max_nsegs) {
788 m = m->m_next;
789 break;
790 }
791 }
792
793 if (sbused(sb) > sb->sb_hiwat * 5 / 8 &&
794 toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
795 compl = 1;
796 else
797 compl = 0;
798
799 if (sb->sb_flags & SB_AUTOSIZE &&
800 V_tcp_do_autosndbuf &&
801 sb->sb_hiwat < V_tcp_autosndbuf_max &&
802 sbused(sb) >= sb->sb_hiwat * 7 / 8) {
803 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
804 V_tcp_autosndbuf_max);
805
806 if (!sbreserve_locked(sb, newsize, so, NULL))
807 sb->sb_flags &= ~SB_AUTOSIZE;
808 else
809 sowwakeup = 1; /* room available */
810 }
811 if (sowwakeup) {
812 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
813 t4_aiotx_queue_toep(so, toep);
814 sowwakeup_locked(so);
815 } else
816 SOCKBUF_UNLOCK(sb);
817 SOCKBUF_UNLOCK_ASSERT(sb);
818
819 /* nothing to send */
820 if (plen == 0) {
821 KASSERT(m == NULL || (m->m_flags & M_NOTAVAIL) != 0,
822 ("%s: nothing to send, but m != NULL is ready",
823 __func__));
824 break;
825 }
826
827 if (__predict_false(toep->flags & TPF_FIN_SENT))
828 panic("%s: excess tx.", __func__);
829
830 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
831 if (plen <= max_imm && !nomap_mbuf_seen) {
832
833 /* Immediate data tx */
834
835 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
836 &toep->ofld_txq->wrq);
837 if (wr == NULL) {
838 /* XXX: how will we recover from this? */
839 toep->flags |= TPF_TX_SUSPENDED;
840 return;
841 }
842 txwr = wrtod(wr);
843 credits = howmany(wr->wr_len, 16);
844 write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, plen, plen,
845 credits, shove, 0);
846 m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
847 nsegs = 0;
848 } else {
849 int wr_len;
850
851 /* DSGL tx */
852
853 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
854 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
855 wr = alloc_wrqe(roundup2(wr_len, 16),
856 &toep->ofld_txq->wrq);
857 if (wr == NULL) {
858 /* XXX: how will we recover from this? */
859 toep->flags |= TPF_TX_SUSPENDED;
860 return;
861 }
862 txwr = wrtod(wr);
863 credits = howmany(wr_len, 16);
864 write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, 0, plen,
865 credits, shove, 0);
866 write_tx_sgl(txwr + 1, sndptr, m, nsegs,
867 max_nsegs_1mbuf);
868 if (wr_len & 0xf) {
869 uint64_t *pad = (uint64_t *)
870 ((uintptr_t)txwr + wr_len);
871 *pad = 0;
872 }
873 }
874
875 KASSERT(toep->tx_credits >= credits,
876 ("%s: not enough credits", __func__));
877
878 toep->tx_credits -= credits;
879 toep->tx_nocompl += credits;
880 toep->plen_nocompl += plen;
881 if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
882 toep->tx_nocompl >= toep->tx_total / 4)
883 compl = 1;
884
885 if (compl || ulp_mode(toep) == ULP_MODE_RDMA) {
886 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
887 toep->tx_nocompl = 0;
888 toep->plen_nocompl = 0;
889 }
890
891 tp->snd_nxt += plen;
892 tp->snd_max += plen;
893
894 SOCKBUF_LOCK(sb);
895 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
896 sb->sb_sndptr = sb_sndptr;
897 SOCKBUF_UNLOCK(sb);
898
899 toep->flags |= TPF_TX_DATA_SENT;
900 if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
901 toep->flags |= TPF_TX_SUSPENDED;
902
903 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
904 txsd->plen = plen;
905 txsd->tx_credits = credits;
906 txsd++;
907 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
908 toep->txsd_pidx = 0;
909 txsd = &toep->txsd[0];
910 }
911 toep->txsd_avail--;
912
913 t4_l2t_send(sc, wr, toep->l2te);
914 } while (m != NULL && (m->m_flags & M_NOTAVAIL) == 0);
915
916 /* Send a FIN if requested, but only if there's no more data to send */
917 if (m == NULL && toep->flags & TPF_SEND_FIN)
918 t4_close_conn(sc, toep);
919}
920
921static inline void
922rqdrop_locked(struct mbufq *q, int plen)
923{
924 struct mbuf *m;
925
926 while (plen > 0) {
927 m = mbufq_dequeue(q);
928
929 /* Too many credits. */
930 MPASS(m != NULL);
931 M_ASSERTPKTHDR(m);
932
933 /* Partial credits. */
934 MPASS(plen >= m->m_pkthdr.len);
935
936 plen -= m->m_pkthdr.len;
937 m_freem(m);
938 }
939}
940
941/*
942 * Not a bit in the TCB, but is a bit in the ulp_submode field of the
943 * CPL_TX_DATA flags field in FW_ISCSI_TX_DATA_WR.
944 */
945#define ULP_ISO G_TX_ULP_SUBMODE(F_FW_ISCSI_TX_DATA_WR_ULPSUBMODE_ISO)
946
947static void
948write_tx_data_iso(void *dst, u_int ulp_submode, uint8_t flags, uint16_t mss,
949 int len, int npdu)
950{
951 struct cpl_tx_data_iso *cpl;
952 unsigned int burst_size;
953 unsigned int last;
954
955 /*
956 * The firmware will set the 'F' bit on the last PDU when
957 * either condition is true:
958 *
959 * - this large PDU is marked as the "last" slice
960 *
961 * - the amount of data payload bytes equals the burst_size
962 *
963 * The strategy used here is to always set the burst_size
964 * artificially high (len includes the size of the template
965 * BHS) and only set the "last" flag if the original PDU had
966 * 'F' set.
967 */
968 burst_size = len;
969 last = !!(flags & CXGBE_ISO_F);
970
971 cpl = (struct cpl_tx_data_iso *)dst;
975 V_CPL_TX_DATA_ISO_HDRCRC(!!(ulp_submode & ULP_CRC_HEADER)) |
976 V_CPL_TX_DATA_ISO_PLDCRC(!!(ulp_submode & ULP_CRC_DATA)) |
979
980 cpl->ahs_len = 0;
981 cpl->mpdu = htons(DIV_ROUND_UP(mss, 4));
982 cpl->burst_size = htonl(DIV_ROUND_UP(burst_size, 4));
983 cpl->len = htonl(len);
984 cpl->reserved2_seglen_offset = htonl(0);
985 cpl->datasn_offset = htonl(0);
986 cpl->buffer_offset = htonl(0);
987 cpl->reserved3 = 0;
988}
989
990static struct wrqe *
991write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
992{
993 struct mbuf *m;
994 struct fw_ofld_tx_data_wr *txwr;
995 struct cpl_tx_data_iso *cpl_iso;
996 void *p;
997 struct wrqe *wr;
998 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
999 u_int adjusted_plen, imm_data, ulp_submode;
1000 struct inpcb *inp = toep->inp;
1001 struct tcpcb *tp = intotcpcb(inp);
1002 int tx_credits, shove, npdu, wr_len;
1003 uint16_t iso_mss;
1004 static const u_int ulp_extra_len[] = {0, 4, 4, 8};
1005 bool iso, nomap_mbuf_seen;
1006
1007 M_ASSERTPKTHDR(sndptr);
1008
1009 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
1010 if (mbuf_raw_wr(sndptr)) {
1011 plen = sndptr->m_pkthdr.len;
1012 KASSERT(plen <= SGE_MAX_WR_LEN,
1013 ("raw WR len %u is greater than max WR len", plen));
1014 if (plen > tx_credits * 16)
1015 return (NULL);
1016
1017 wr = alloc_wrqe(roundup2(plen, 16), &toep->ofld_txq->wrq);
1018 if (__predict_false(wr == NULL))
1019 return (NULL);
1020
1021 m_copydata(sndptr, 0, plen, wrtod(wr));
1022 return (wr);
1023 }
1024
1025 iso = mbuf_iscsi_iso(sndptr);
1026 max_imm = max_imm_payload(tx_credits, iso);
1027 max_nsegs = max_dsgl_nsegs(tx_credits, iso);
1028 iso_mss = mbuf_iscsi_iso_mss(sndptr);
1029
1030 plen = 0;
1031 nsegs = 0;
1032 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
1033 nomap_mbuf_seen = false;
1034 for (m = sndptr; m != NULL; m = m->m_next) {
1035 int n;
1036
1037 if (m->m_flags & M_EXTPG)
1038 n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t),
1039 m->m_len);
1040 else
1041 n = sglist_count(mtod(m, void *), m->m_len);
1042
1043 nsegs += n;
1044 plen += m->m_len;
1045
1046 /*
1047 * This mbuf would send us _over_ the nsegs limit.
1048 * Suspend tx because the PDU can't be sent out.
1049 */
1050 if ((nomap_mbuf_seen || plen > max_imm) && nsegs > max_nsegs)
1051 return (NULL);
1052
1053 if (m->m_flags & M_EXTPG)
1054 nomap_mbuf_seen = true;
1055 if (max_nsegs_1mbuf < n)
1056 max_nsegs_1mbuf = n;
1057 }
1058
1059 if (__predict_false(toep->flags & TPF_FIN_SENT))
1060 panic("%s: excess tx.", __func__);
1061
1062 /*
1063 * We have a PDU to send. All of it goes out in one WR so 'm'
1064 * is NULL. A PDU's length is always a multiple of 4.
1065 */
1066 MPASS(m == NULL);
1067 MPASS((plen & 3) == 0);
1068 MPASS(sndptr->m_pkthdr.len == plen);
1069
1070 shove = !(tp->t_flags & TF_MORETOCOME);
1071
1072 /*
1073 * plen doesn't include header and data digests, which are
1074 * generated and inserted in the right places by the TOE, but
1075 * they do occupy TCP sequence space and need to be accounted
1076 * for.
1077 */
1078 ulp_submode = mbuf_ulp_submode(sndptr);
1079 MPASS(ulp_submode < nitems(ulp_extra_len));
1080 npdu = iso ? howmany(plen - ISCSI_BHS_SIZE, iso_mss) : 1;
1081 adjusted_plen = plen + ulp_extra_len[ulp_submode] * npdu;
1082 if (iso)
1083 adjusted_plen += ISCSI_BHS_SIZE * (npdu - 1);
1084 wr_len = sizeof(*txwr);
1085 if (iso)
1086 wr_len += sizeof(struct cpl_tx_data_iso);
1087 if (plen <= max_imm && !nomap_mbuf_seen) {
1088 /* Immediate data tx */
1089 imm_data = plen;
1090 wr_len += plen;
1091 nsegs = 0;
1092 } else {
1093 /* DSGL tx */
1094 imm_data = 0;
1095 wr_len += sizeof(struct ulptx_sgl) +
1096 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
1097 }
1098
1099 wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
1100 if (wr == NULL) {
1101 /* XXX: how will we recover from this? */
1102 return (NULL);
1103 }
1104 txwr = wrtod(wr);
1105 credits = howmany(wr->wr_len, 16);
1106
1107 if (iso) {
1108 write_tx_wr(txwr, toep, FW_ISCSI_TX_DATA_WR,
1109 imm_data + sizeof(struct cpl_tx_data_iso),
1110 adjusted_plen, credits, shove, ulp_submode | ULP_ISO);
1111 cpl_iso = (struct cpl_tx_data_iso *)(txwr + 1);
1112 MPASS(plen == sndptr->m_pkthdr.len);
1113 write_tx_data_iso(cpl_iso, ulp_submode,
1114 mbuf_iscsi_iso_flags(sndptr), iso_mss, plen, npdu);
1115 p = cpl_iso + 1;
1116 } else {
1117 write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, imm_data,
1118 adjusted_plen, credits, shove, ulp_submode);
1119 p = txwr + 1;
1120 }
1121
1122 if (imm_data != 0) {
1123 m_copydata(sndptr, 0, plen, p);
1124 } else {
1125 write_tx_sgl(p, sndptr, m, nsegs, max_nsegs_1mbuf);
1126 if (wr_len & 0xf) {
1127 uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len);
1128 *pad = 0;
1129 }
1130 }
1131
1132 KASSERT(toep->tx_credits >= credits,
1133 ("%s: not enough credits: credits %u "
1134 "toep->tx_credits %u tx_credits %u nsegs %u "
1135 "max_nsegs %u iso %d", __func__, credits,
1136 toep->tx_credits, tx_credits, nsegs, max_nsegs, iso));
1137
1138 tp->snd_nxt += adjusted_plen;
1139 tp->snd_max += adjusted_plen;
1140
1141 counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, npdu);
1142 counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen);
1143 if (iso)
1144 counter_u64_add(toep->ofld_txq->tx_iscsi_iso_wrs, 1);
1145
1146 return (wr);
1147}
1148
1149void
1150t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
1151{
1152 struct mbuf *sndptr, *m;
1153 struct fw_wr_hdr *wrhdr;
1154 struct wrqe *wr;
1155 u_int plen, credits;
1156 struct inpcb *inp = toep->inp;
1157 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
1158 struct mbufq *pduq = &toep->ulp_pduq;
1159
1160 INP_WLOCK_ASSERT(inp);
1161 KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
1162 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
1163 KASSERT(ulp_mode(toep) == ULP_MODE_ISCSI,
1164 ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
1165
1166 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
1167 return;
1168
1169 /*
1170 * This function doesn't resume by itself. Someone else must clear the
1171 * flag and call this function.
1172 */
1173 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
1174 KASSERT(drop == 0,
1175 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
1176 return;
1177 }
1178
1179 if (drop) {
1180 struct socket *so = inp->inp_socket;
1181 struct sockbuf *sb = &so->so_snd;
1182 int sbu;
1183
1184 /*
1185 * An unlocked read is ok here as the data should only
1186 * transition from a non-zero value to either another
1187 * non-zero value or zero. Once it is zero it should
1188 * stay zero.
1189 */
1190 if (__predict_false(sbused(sb)) > 0) {
1191 SOCKBUF_LOCK(sb);
1192 sbu = sbused(sb);
1193 if (sbu > 0) {
1194 /*
1195 * The data transmitted before the
1196 * tid's ULP mode changed to ISCSI is
1197 * still in so_snd. Incoming credits
1198 * should account for so_snd first.
1199 */
1200 sbdrop_locked(sb, min(sbu, drop));
1201 drop -= min(sbu, drop);
1202 }
1203 sowwakeup_locked(so); /* unlocks so_snd */
1204 }
1205 rqdrop_locked(&toep->ulp_pdu_reclaimq, drop);
1206 }
1207
1208 while ((sndptr = mbufq_first(pduq)) != NULL) {
1209 wr = write_iscsi_mbuf_wr(toep, sndptr);
1210 if (wr == NULL) {
1211 toep->flags |= TPF_TX_SUSPENDED;
1212 return;
1213 }
1214
1215 plen = sndptr->m_pkthdr.len;
1216 credits = howmany(wr->wr_len, 16);
1217 KASSERT(toep->tx_credits >= credits,
1218 ("%s: not enough credits", __func__));
1219
1220 m = mbufq_dequeue(pduq);
1221 MPASS(m == sndptr);
1222 mbufq_enqueue(&toep->ulp_pdu_reclaimq, m);
1223
1224 toep->tx_credits -= credits;
1225 toep->tx_nocompl += credits;
1226 toep->plen_nocompl += plen;
1227
1228 /*
1229 * Ensure there are enough credits for a full-sized WR
1230 * as page pod WRs can be full-sized.
1231 */
1232 if (toep->tx_credits <= SGE_MAX_WR_LEN * 5 / 4 &&
1233 toep->tx_nocompl >= toep->tx_total / 4) {
1234 wrhdr = wrtod(wr);
1235 wrhdr->hi |= htobe32(F_FW_WR_COMPL);
1236 toep->tx_nocompl = 0;
1237 toep->plen_nocompl = 0;
1238 }
1239
1240 toep->flags |= TPF_TX_DATA_SENT;
1241 if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
1242 toep->flags |= TPF_TX_SUSPENDED;
1243
1244 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
1245 txsd->plen = plen;
1246 txsd->tx_credits = credits;
1247 txsd++;
1248 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
1249 toep->txsd_pidx = 0;
1250 txsd = &toep->txsd[0];
1251 }
1252 toep->txsd_avail--;
1253
1254 t4_l2t_send(sc, wr, toep->l2te);
1255 }
1256
1257 /* Send a FIN if requested, but only if there are no more PDUs to send */
1258 if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN)
1259 t4_close_conn(sc, toep);
1260}
1261
1262static inline void
1263t4_push_data(struct adapter *sc, struct toepcb *toep, int drop)
1264{
1265
1266 if (ulp_mode(toep) == ULP_MODE_ISCSI)
1267 t4_push_pdus(sc, toep, drop);
1268 else if (toep->flags & TPF_KTLS)
1269 t4_push_ktls(sc, toep, drop);
1270 else
1271 t4_push_frames(sc, toep, drop);
1272}
1273
1274int
1275t4_tod_output(struct toedev *tod, struct tcpcb *tp)
1276{
1277 struct adapter *sc = tod->tod_softc;
1278#ifdef INVARIANTS
1279 struct inpcb *inp = tp->t_inpcb;
1280#endif
1281 struct toepcb *toep = tp->t_toe;
1282
1283 INP_WLOCK_ASSERT(inp);
1284 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1285 ("%s: inp %p dropped.", __func__, inp));
1286 KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1287
1288 t4_push_data(sc, toep, 0);
1289
1290 return (0);
1291}
1292
1293int
1294t4_send_fin(struct toedev *tod, struct tcpcb *tp)
1295{
1296 struct adapter *sc = tod->tod_softc;
1297#ifdef INVARIANTS
1298 struct inpcb *inp = tp->t_inpcb;
1299#endif
1300 struct toepcb *toep = tp->t_toe;
1301
1302 INP_WLOCK_ASSERT(inp);
1303 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1304 ("%s: inp %p dropped.", __func__, inp));
1305 KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1306
1307 toep->flags |= TPF_SEND_FIN;
1308 if (tp->t_state >= TCPS_ESTABLISHED)
1309 t4_push_data(sc, toep, 0);
1310
1311 return (0);
1312}
1313
1314int
1315t4_send_rst(struct toedev *tod, struct tcpcb *tp)
1316{
1317 struct adapter *sc = tod->tod_softc;
1318#if defined(INVARIANTS)
1319 struct inpcb *inp = tp->t_inpcb;
1320#endif
1321 struct toepcb *toep = tp->t_toe;
1322
1323 INP_WLOCK_ASSERT(inp);
1324 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1325 ("%s: inp %p dropped.", __func__, inp));
1326 KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1327
1328 /* hmmmm */
1329 KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
1330 ("%s: flowc for tid %u [%s] not sent already",
1331 __func__, toep->tid, tcpstates[tp->t_state]));
1332
1333 send_reset(sc, toep, 0);
1334 return (0);
1335}
1336
1337/*
1338 * Peer has sent us a FIN.
1339 */
1340static int
1341do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1342{
1343 struct adapter *sc = iq->adapter;
1344 const struct cpl_peer_close *cpl = (const void *)(rss + 1);
1345 unsigned int tid = GET_TID(cpl);
1346 struct toepcb *toep = lookup_tid(sc, tid);
1347 struct inpcb *inp = toep->inp;
1348 struct tcpcb *tp = NULL;
1349 struct socket *so;
1350 struct epoch_tracker et;
1351#ifdef INVARIANTS
1352 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1353#endif
1354
1355 KASSERT(opcode == CPL_PEER_CLOSE,
1356 ("%s: unexpected opcode 0x%x", __func__, opcode));
1357 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1358
1359 if (__predict_false(toep->flags & TPF_SYNQE)) {
1360 /*
1361 * do_pass_establish must have run before do_peer_close and if
1362 * this is still a synqe instead of a toepcb then the connection
1363 * must be getting aborted.
1364 */
1365 MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
1366 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1367 toep, toep->flags);
1368 return (0);
1369 }
1370
1371 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1372
1373 CURVNET_SET(toep->vnet);
1374 NET_EPOCH_ENTER(et);
1375 INP_WLOCK(inp);
1376 tp = intotcpcb(inp);
1377
1378 CTR6(KTR_CXGBE,
1379 "%s: tid %u (%s), toep_flags 0x%x, ddp_flags 0x%x, inp %p",
1380 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
1381 toep->ddp.flags, inp);
1382
1383 if (toep->flags & TPF_ABORT_SHUTDOWN)
1384 goto done;
1385
1386 so = inp->inp_socket;
1387 socantrcvmore(so);
1388 if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
1389 DDP_LOCK(toep);
1390 if (__predict_false(toep->ddp.flags &
1392 handle_ddp_close(toep, tp, cpl->rcv_nxt);
1393 DDP_UNLOCK(toep);
1394 }
1395
1396 if (ulp_mode(toep) == ULP_MODE_RDMA ||
1397 (ulp_mode(toep) == ULP_MODE_ISCSI && chip_id(sc) >= CHELSIO_T6)) {
1398 /*
1399 * There might be data received via DDP before the FIN
1400 * not reported to the driver. Just assume the
1401 * sequence number in the CPL is correct as the
1402 * sequence number of the FIN.
1403 */
1404 } else {
1405 KASSERT(tp->rcv_nxt + 1 == be32toh(cpl->rcv_nxt),
1406 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
1407 be32toh(cpl->rcv_nxt)));
1408 }
1409
1410 tp->rcv_nxt = be32toh(cpl->rcv_nxt);
1411
1412 switch (tp->t_state) {
1413 case TCPS_SYN_RECEIVED:
1414 tp->t_starttime = ticks;
1415 /* FALLTHROUGH */
1416
1417 case TCPS_ESTABLISHED:
1418 tcp_state_change(tp, TCPS_CLOSE_WAIT);
1419 break;
1420
1421 case TCPS_FIN_WAIT_1:
1422 tcp_state_change(tp, TCPS_CLOSING);
1423 break;
1424
1425 case TCPS_FIN_WAIT_2:
1426 restore_so_proto(so, inp->inp_vflag & INP_IPV6);
1427 tcp_twstart(tp);
1428 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1429 NET_EPOCH_EXIT(et);
1430 CURVNET_RESTORE();
1431
1432 INP_WLOCK(inp);
1433 final_cpl_received(toep);
1434 return (0);
1435
1436 default:
1437 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
1438 __func__, tid, tp->t_state);
1439 }
1440done:
1441 INP_WUNLOCK(inp);
1442 NET_EPOCH_EXIT(et);
1443 CURVNET_RESTORE();
1444 return (0);
1445}
1446
1447/*
1448 * Peer has ACK'd our FIN.
1449 */
1450static int
1451do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
1452 struct mbuf *m)
1453{
1454 struct adapter *sc = iq->adapter;
1455 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
1456 unsigned int tid = GET_TID(cpl);
1457 struct toepcb *toep = lookup_tid(sc, tid);
1458 struct inpcb *inp = toep->inp;
1459 struct tcpcb *tp = NULL;
1460 struct socket *so = NULL;
1461 struct epoch_tracker et;
1462#ifdef INVARIANTS
1463 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1464#endif
1465
1466 KASSERT(opcode == CPL_CLOSE_CON_RPL,
1467 ("%s: unexpected opcode 0x%x", __func__, opcode));
1468 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1469 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1470
1471 CURVNET_SET(toep->vnet);
1472 NET_EPOCH_ENTER(et);
1473 INP_WLOCK(inp);
1474 tp = intotcpcb(inp);
1475
1476 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
1477 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
1478
1479 if (toep->flags & TPF_ABORT_SHUTDOWN)
1480 goto done;
1481
1482 so = inp->inp_socket;
1483 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */
1484
1485 switch (tp->t_state) {
1486 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */
1487 restore_so_proto(so, inp->inp_vflag & INP_IPV6);
1488 tcp_twstart(tp);
1489release:
1490 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
1491 NET_EPOCH_EXIT(et);
1492 CURVNET_RESTORE();
1493
1494 INP_WLOCK(inp);
1495 final_cpl_received(toep); /* no more CPLs expected */
1496
1497 return (0);
1498 case TCPS_LAST_ACK:
1499 if (tcp_close(tp))
1500 INP_WUNLOCK(inp);
1501 goto release;
1502
1503 case TCPS_FIN_WAIT_1:
1504 if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1505 soisdisconnected(so);
1506 tcp_state_change(tp, TCPS_FIN_WAIT_2);
1507 break;
1508
1509 default:
1510 log(LOG_ERR,
1511 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
1512 __func__, tid, tcpstates[tp->t_state]);
1513 }
1514done:
1515 INP_WUNLOCK(inp);
1516 NET_EPOCH_EXIT(et);
1517 CURVNET_RESTORE();
1518 return (0);
1519}
1520
1521void
1522send_abort_rpl(struct adapter *sc, struct sge_ofld_txq *ofld_txq, int tid,
1523 int rst_status)
1524{
1525 struct wrqe *wr;
1526 struct cpl_abort_rpl *cpl;
1527
1528 wr = alloc_wrqe(sizeof(*cpl), &ofld_txq->wrq);
1529 if (wr == NULL) {
1530 /* XXX */
1531 panic("%s: allocation failure.", __func__);
1532 }
1533 cpl = wrtod(wr);
1534
1536 cpl->cmd = rst_status;
1537
1538 t4_wrq_tx(sc, wr);
1539}
1540
1541static int
1542abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
1543{
1544 switch (abort_reason) {
1545 case CPL_ERR_BAD_SYN:
1546 case CPL_ERR_CONN_RESET:
1547 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1552 return (ETIMEDOUT);
1553 default:
1554 return (EIO);
1555 }
1556}
1557
1558/*
1559 * TCP RST from the peer, timeout, or some other such critical error.
1560 */
1561static int
1562do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1563{
1564 struct adapter *sc = iq->adapter;
1565 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1566 unsigned int tid = GET_TID(cpl);
1567 struct toepcb *toep = lookup_tid(sc, tid);
1568 struct sge_ofld_txq *ofld_txq = toep->ofld_txq;
1569 struct inpcb *inp;
1570 struct tcpcb *tp;
1571 struct epoch_tracker et;
1572#ifdef INVARIANTS
1573 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1574#endif
1575
1576 KASSERT(opcode == CPL_ABORT_REQ_RSS,
1577 ("%s: unexpected opcode 0x%x", __func__, opcode));
1578 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1579
1580 if (toep->flags & TPF_SYNQE)
1581 return (do_abort_req_synqe(iq, rss, m));
1582
1583 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1584
1585 if (negative_advice(cpl->status)) {
1586 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
1587 __func__, cpl->status, tid, toep->flags);
1588 return (0); /* Ignore negative advice */
1589 }
1590
1591 inp = toep->inp;
1592 CURVNET_SET(toep->vnet);
1593 NET_EPOCH_ENTER(et); /* for tcp_close */
1594 INP_WLOCK(inp);
1595
1596 tp = intotcpcb(inp);
1597
1598 CTR6(KTR_CXGBE,
1599 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
1600 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
1601 inp->inp_flags, cpl->status);
1602
1603 /*
1604 * If we'd initiated an abort earlier the reply to it is responsible for
1605 * cleaning up resources. Otherwise we tear everything down right here
1606 * right now. We owe the T4 a CPL_ABORT_RPL no matter what.
1607 */
1608 if (toep->flags & TPF_ABORT_SHUTDOWN) {
1609 INP_WUNLOCK(inp);
1610 goto done;
1611 }
1612 toep->flags |= TPF_ABORT_SHUTDOWN;
1613
1614 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
1615 struct socket *so = inp->inp_socket;
1616
1617 if (so != NULL)
1618 so_error_set(so, abort_status_to_errno(tp,
1619 cpl->status));
1620 tp = tcp_close(tp);
1621 if (tp == NULL)
1622 INP_WLOCK(inp); /* re-acquire */
1623 }
1624
1625 final_cpl_received(toep);
1626done:
1627 NET_EPOCH_EXIT(et);
1628 CURVNET_RESTORE();
1629 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1630 return (0);
1631}
1632
1633/*
1634 * Reply to the CPL_ABORT_REQ (send_reset)
1635 */
1636static int
1637do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1638{
1639 struct adapter *sc = iq->adapter;
1640 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1641 unsigned int tid = GET_TID(cpl);
1642 struct toepcb *toep = lookup_tid(sc, tid);
1643 struct inpcb *inp = toep->inp;
1644#ifdef INVARIANTS
1645 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1646#endif
1647
1648 KASSERT(opcode == CPL_ABORT_RPL_RSS,
1649 ("%s: unexpected opcode 0x%x", __func__, opcode));
1650 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1651
1652 if (toep->flags & TPF_SYNQE)
1653 return (do_abort_rpl_synqe(iq, rss, m));
1654
1655 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1656
1657 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
1658 __func__, tid, toep, inp, cpl->status);
1659
1660 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1661 ("%s: wasn't expecting abort reply", __func__));
1662
1663 INP_WLOCK(inp);
1664 final_cpl_received(toep);
1665
1666 return (0);
1667}
1668
1669static int
1670do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1671{
1672 struct adapter *sc = iq->adapter;
1673 const struct cpl_rx_data *cpl = mtod(m, const void *);
1674 unsigned int tid = GET_TID(cpl);
1675 struct toepcb *toep = lookup_tid(sc, tid);
1676 struct inpcb *inp = toep->inp;
1677 struct tcpcb *tp;
1678 struct socket *so;
1679 struct sockbuf *sb;
1680 struct epoch_tracker et;
1681 int len, rx_credits;
1682 uint32_t ddp_placed = 0;
1683
1684 if (__predict_false(toep->flags & TPF_SYNQE)) {
1685 /*
1686 * do_pass_establish must have run before do_rx_data and if this
1687 * is still a synqe instead of a toepcb then the connection must
1688 * be getting aborted.
1689 */
1690 MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
1691 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1692 toep, toep->flags);
1693 m_freem(m);
1694 return (0);
1695 }
1696
1697 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1698
1699 /* strip off CPL header */
1700 m_adj(m, sizeof(*cpl));
1701 len = m->m_pkthdr.len;
1702
1703 INP_WLOCK(inp);
1704 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1705 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1706 __func__, tid, len, inp->inp_flags);
1707 INP_WUNLOCK(inp);
1708 m_freem(m);
1709 return (0);
1710 }
1711
1712 tp = intotcpcb(inp);
1713
1714 if (__predict_false(ulp_mode(toep) == ULP_MODE_TLS &&
1715 toep->flags & TPF_TLS_RECEIVE)) {
1716 /* Received "raw" data on a TLS socket. */
1717 CTR3(KTR_CXGBE, "%s: tid %u, raw TLS data (%d bytes)",
1718 __func__, tid, len);
1719 do_rx_data_tls(cpl, toep, m);
1720 return (0);
1721 }
1722
1723 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
1724 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
1725
1726 tp->rcv_nxt += len;
1727 if (tp->rcv_wnd < len) {
1728 KASSERT(ulp_mode(toep) == ULP_MODE_RDMA,
1729 ("%s: negative window size", __func__));
1730 }
1731
1732 tp->rcv_wnd -= len;
1733 tp->t_rcvtime = ticks;
1734
1735 if (ulp_mode(toep) == ULP_MODE_TCPDDP)
1736 DDP_LOCK(toep);
1737 so = inp_inpcbtosocket(inp);
1738 sb = &so->so_rcv;
1739 SOCKBUF_LOCK(sb);
1740
1741 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
1742 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
1743 __func__, tid, len);
1744 m_freem(m);
1745 SOCKBUF_UNLOCK(sb);
1746 if (ulp_mode(toep) == ULP_MODE_TCPDDP)
1747 DDP_UNLOCK(toep);
1748 INP_WUNLOCK(inp);
1749
1750 CURVNET_SET(toep->vnet);
1751 NET_EPOCH_ENTER(et);
1752 INP_WLOCK(inp);
1753 tp = tcp_drop(tp, ECONNRESET);
1754 if (tp)
1755 INP_WUNLOCK(inp);
1756 NET_EPOCH_EXIT(et);
1757 CURVNET_RESTORE();
1758
1759 return (0);
1760 }
1761
1762 /* receive buffer autosize */
1763 MPASS(toep->vnet == so->so_vnet);
1764 CURVNET_SET(toep->vnet);
1765 if (sb->sb_flags & SB_AUTOSIZE &&
1766 V_tcp_do_autorcvbuf &&
1767 sb->sb_hiwat < V_tcp_autorcvbuf_max &&
1768 len > (sbspace(sb) / 8 * 7)) {
1769 unsigned int hiwat = sb->sb_hiwat;
1770 unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
1771 V_tcp_autorcvbuf_max);
1772
1773 if (!sbreserve_locked(sb, newsize, so, NULL))
1774 sb->sb_flags &= ~SB_AUTOSIZE;
1775 }
1776
1777 if (ulp_mode(toep) == ULP_MODE_TCPDDP) {
1778 int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off;
1779
1780 if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0)
1781 CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)",
1782 __func__, tid, len);
1783
1784 if (changed) {
1785 if (toep->ddp.flags & DDP_SC_REQ)
1786 toep->ddp.flags ^= DDP_ON | DDP_SC_REQ;
1787 else {
1788 KASSERT(cpl->ddp_off == 1,
1789 ("%s: DDP switched on by itself.",
1790 __func__));
1791
1792 /* Fell out of DDP mode */
1793 toep->ddp.flags &= ~DDP_ON;
1794 CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
1795 __func__);
1796
1797 insert_ddp_data(toep, ddp_placed);
1798 }
1799 }
1800
1801 if (toep->ddp.flags & DDP_ON) {
1802 /*
1803 * CPL_RX_DATA with DDP on can only be an indicate.
1804 * Start posting queued AIO requests via DDP. The
1805 * payload that arrived in this indicate is appended
1806 * to the socket buffer as usual.
1807 */
1808 handle_ddp_indicate(toep);
1809 }
1810 }
1811
1812 sbappendstream_locked(sb, m, 0);
1813 rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
1814 if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
1815 rx_credits = send_rx_credits(sc, toep, rx_credits);
1816 tp->rcv_wnd += rx_credits;
1817 tp->rcv_adv += rx_credits;
1818 }
1819
1820 if (ulp_mode(toep) == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 &&
1821 sbavail(sb) != 0) {
1822 CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
1823 tid);
1824 ddp_queue_toep(toep);
1825 }
1826 sorwakeup_locked(so);
1827 SOCKBUF_UNLOCK_ASSERT(sb);
1828 if (ulp_mode(toep) == ULP_MODE_TCPDDP)
1829 DDP_UNLOCK(toep);
1830
1831 INP_WUNLOCK(inp);
1832 CURVNET_RESTORE();
1833 return (0);
1834}
1835
1836static int
1837do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1838{
1839 struct adapter *sc = iq->adapter;
1840 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
1841 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
1842 struct toepcb *toep = lookup_tid(sc, tid);
1843 struct inpcb *inp;
1844 struct tcpcb *tp;
1845 struct socket *so;
1846 uint8_t credits = cpl->credits;
1847 struct ofld_tx_sdesc *txsd;
1848 int plen;
1849#ifdef INVARIANTS
1850 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
1851#endif
1852
1853 /*
1854 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
1855 * now this comes back carrying the credits for the flowc.
1856 */
1857 if (__predict_false(toep->flags & TPF_SYNQE)) {
1858 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1859 ("%s: credits for a synq entry %p", __func__, toep));
1860 return (0);
1861 }
1862
1863 inp = toep->inp;
1864
1865 KASSERT(opcode == CPL_FW4_ACK,
1866 ("%s: unexpected opcode 0x%x", __func__, opcode));
1867 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1868 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1869
1870 INP_WLOCK(inp);
1871
1872 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
1873 INP_WUNLOCK(inp);
1874 return (0);
1875 }
1876
1877 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
1878 ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
1879
1880 tp = intotcpcb(inp);
1881
1882 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
1883 tcp_seq snd_una = be32toh(cpl->snd_una);
1884
1885#ifdef INVARIANTS
1886 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
1887 log(LOG_ERR,
1888 "%s: unexpected seq# %x for TID %u, snd_una %x\n",
1889 __func__, snd_una, toep->tid, tp->snd_una);
1890 }
1891#endif
1892
1893 if (tp->snd_una != snd_una) {
1894 tp->snd_una = snd_una;
1895 tp->ts_recent_age = tcp_ts_getticks();
1896 }
1897 }
1898
1899#ifdef VERBOSE_TRACES
1900 CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits);
1901#endif
1902 so = inp->inp_socket;
1903 txsd = &toep->txsd[toep->txsd_cidx];
1904 plen = 0;
1905 while (credits) {
1906 KASSERT(credits >= txsd->tx_credits,
1907 ("%s: too many (or partial) credits", __func__));
1908 credits -= txsd->tx_credits;
1909 toep->tx_credits += txsd->tx_credits;
1910 plen += txsd->plen;
1911 txsd++;
1912 toep->txsd_avail++;
1913 KASSERT(toep->txsd_avail <= toep->txsd_total,
1914 ("%s: txsd avail > total", __func__));
1915 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
1916 txsd = &toep->txsd[0];
1917 toep->txsd_cidx = 0;
1918 }
1919 }
1920
1921 if (toep->tx_credits == toep->tx_total) {
1922 toep->tx_nocompl = 0;
1923 toep->plen_nocompl = 0;
1924 }
1925
1926 if (toep->flags & TPF_TX_SUSPENDED &&
1927 toep->tx_credits >= toep->tx_total / 4) {
1928#ifdef VERBOSE_TRACES
1929 CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__,
1930 tid);
1931#endif
1932 toep->flags &= ~TPF_TX_SUSPENDED;
1933 CURVNET_SET(toep->vnet);
1934 t4_push_data(sc, toep, plen);
1935 CURVNET_RESTORE();
1936 } else if (plen > 0) {
1937 struct sockbuf *sb = &so->so_snd;
1938 int sbu;
1939
1940 SOCKBUF_LOCK(sb);
1941 sbu = sbused(sb);
1942 if (ulp_mode(toep) == ULP_MODE_ISCSI) {
1943 if (__predict_false(sbu > 0)) {
1944 /*
1945 * The data transmitted before the
1946 * tid's ULP mode changed to ISCSI is
1947 * still in so_snd. Incoming credits
1948 * should account for so_snd first.
1949 */
1950 sbdrop_locked(sb, min(sbu, plen));
1951 plen -= min(sbu, plen);
1952 }
1953 sowwakeup_locked(so); /* unlocks so_snd */
1954 rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
1955 } else {
1956#ifdef VERBOSE_TRACES
1957 CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__,
1958 tid, plen);
1959#endif
1960 sbdrop_locked(sb, plen);
1961 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
1962 t4_aiotx_queue_toep(so, toep);
1963 sowwakeup_locked(so); /* unlocks so_snd */
1964 }
1965 SOCKBUF_UNLOCK_ASSERT(sb);
1966 }
1967
1968 INP_WUNLOCK(inp);
1969
1970 return (0);
1971}
1972
1973void
1974t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep,
1975 uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie)
1976{
1977 struct wrqe *wr;
1978 struct cpl_set_tcb_field *req;
1979 struct ofld_tx_sdesc *txsd;
1980
1981 MPASS((cookie & ~M_COOKIE) == 0);
1982 if (reply) {
1983 MPASS(cookie != CPL_COOKIE_RESERVED);
1984 }
1985
1986 wr = alloc_wrqe(sizeof(*req), wrq);
1987 if (wr == NULL) {
1988 /* XXX */
1989 panic("%s: allocation failure.", __func__);
1990 }
1991 req = wrtod(wr);
1992
1994 req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id));
1995 if (reply == 0)
1996 req->reply_ctrl |= htobe16(F_NO_REPLY);
1997 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie));
1998 req->mask = htobe64(mask);
1999 req->val = htobe64(val);
2000 if (wrq->eq.type == EQ_OFLD) {
2001 txsd = &toep->txsd[toep->txsd_pidx];
2002 txsd->tx_credits = howmany(sizeof(*req), 16);
2003 txsd->plen = 0;
2004 KASSERT(toep->tx_credits >= txsd->tx_credits &&
2005 toep->txsd_avail > 0,
2006 ("%s: not enough credits (%d)", __func__,
2007 toep->tx_credits));
2008 toep->tx_credits -= txsd->tx_credits;
2009 if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
2010 toep->txsd_pidx = 0;
2011 toep->txsd_avail--;
2012 }
2013
2014 t4_wrq_tx(sc, wr);
2015}
2016
2017void
2019{
2020
2022 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
2028}
2029
2030void
2032{
2033
2040}
2041
2042/*
2043 * Use the 'backend1' field in AIO jobs to hold an error that should
2044 * be reported when the job is completed, the 'backend3' field to
2045 * store the amount of data sent by the AIO job so far, and the
2046 * 'backend4' field to hold a reference count on the job.
2047 *
2048 * Each unmapped mbuf holds a reference on the job as does the queue
2049 * so long as the job is queued.
2050 */
2051#define aio_error backend1
2052#define aio_sent backend3
2053#define aio_refs backend4
2054
2055#define jobtotid(job) \
2056 (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
2057
2058static void
2059aiotx_free_job(struct kaiocb *job)
2060{
2061 long status;
2062 int error;
2063
2064 if (refcount_release(&job->aio_refs) == 0)
2065 return;
2066
2067 error = (intptr_t)job->aio_error;
2068 status = job->aio_sent;
2069#ifdef VERBOSE_TRACES
2070 CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__,
2071 jobtotid(job), job, status, error);
2072#endif
2073 if (error != 0 && status != 0)
2074 error = 0;
2075 if (error == ECANCELED)
2076 aio_cancel(job);
2077 else if (error)
2078 aio_complete(job, -1, error);
2079 else {
2080 job->msgsnd = 1;
2081 aio_complete(job, status, 0);
2082 }
2083}
2084
2085static void
2086aiotx_free_pgs(struct mbuf *m)
2087{
2088 struct kaiocb *job;
2089 vm_page_t pg;
2090
2091 M_ASSERTEXTPG(m);
2092 job = m->m_ext.ext_arg1;
2093#ifdef VERBOSE_TRACES
2094 CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
2095 m->m_len, jobtotid(job));
2096#endif
2097
2098 for (int i = 0; i < m->m_epg_npgs; i++) {
2099 pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
2100 vm_page_unwire(pg, PQ_ACTIVE);
2101 }
2102
2103 aiotx_free_job(job);
2104}
2105
2106/*
2107 * Allocate a chain of unmapped mbufs describing the next 'len' bytes
2108 * of an AIO job.
2109 */
2110static struct mbuf *
2111alloc_aiotx_mbuf(struct kaiocb *job, int len)
2112{
2113 struct vmspace *vm;
2114 vm_page_t pgs[MBUF_PEXT_MAX_PGS];
2115 struct mbuf *m, *top, *last;
2116 vm_map_t map;
2117 vm_offset_t start;
2118 int i, mlen, npages, pgoff;
2119
2120 KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes,
2121 ("%s(%p, %d): request to send beyond end of buffer", __func__,
2122 job, len));
2123
2124 /*
2125 * The AIO subsystem will cancel and drain all requests before
2126 * permitting a process to exit or exec, so p_vmspace should
2127 * be stable here.
2128 */
2129 vm = job->userproc->p_vmspace;
2130 map = &vm->vm_map;
2131 start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent;
2132 pgoff = start & PAGE_MASK;
2133
2134 top = NULL;
2135 last = NULL;
2136 while (len > 0) {
2137 mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff);
2138 KASSERT(mlen == len || ((start + mlen) & PAGE_MASK) == 0,
2139 ("%s: next start (%#jx + %#x) is not page aligned",
2140 __func__, (uintmax_t)start, mlen));
2141
2142 npages = vm_fault_quick_hold_pages(map, start, mlen,
2143 VM_PROT_WRITE, pgs, nitems(pgs));
2144 if (npages < 0)
2145 break;
2146
2147 m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs);
2148 if (m == NULL) {
2149 vm_page_unhold_pages(pgs, npages);
2150 break;
2151 }
2152
2153 m->m_epg_1st_off = pgoff;
2154 m->m_epg_npgs = npages;
2155 if (npages == 1) {
2156 KASSERT(mlen + pgoff <= PAGE_SIZE,
2157 ("%s: single page is too large (off %d len %d)",
2158 __func__, pgoff, mlen));
2159 m->m_epg_last_len = mlen;
2160 } else {
2161 m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) -
2162 (npages - 2) * PAGE_SIZE;
2163 }
2164 for (i = 0; i < npages; i++)
2165 m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
2166
2167 m->m_len = mlen;
2168 m->m_ext.ext_size = npages * PAGE_SIZE;
2169 m->m_ext.ext_arg1 = job;
2170 refcount_acquire(&job->aio_refs);
2171
2172#ifdef VERBOSE_TRACES
2173 CTR5(KTR_CXGBE, "%s: tid %d, new mbuf %p for job %p, npages %d",
2174 __func__, jobtotid(job), m, job, npages);
2175#endif
2176
2177 if (top == NULL)
2178 top = m;
2179 else
2180 last->m_next = m;
2181 last = m;
2182
2183 len -= mlen;
2184 start += mlen;
2185 pgoff = 0;
2186 }
2187
2188 return (top);
2189}
2190
2191static void
2192t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job)
2193{
2194 struct sockbuf *sb;
2195 struct file *fp;
2196 struct inpcb *inp;
2197 struct tcpcb *tp;
2198 struct mbuf *m;
2199 int error, len;
2200 bool moretocome, sendmore;
2201
2202 sb = &so->so_snd;
2203 SOCKBUF_UNLOCK(sb);
2204 fp = job->fd_file;
2205 m = NULL;
2206
2207#ifdef MAC
2208 error = mac_socket_check_send(fp->f_cred, so);
2209 if (error != 0)
2210 goto out;
2211#endif
2212
2213 /* Inline sosend_generic(). */
2214
2215 error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
2216 MPASS(error == 0);
2217
2218sendanother:
2219 SOCKBUF_LOCK(sb);
2220 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2221 SOCKBUF_UNLOCK(sb);
2222 SOCK_IO_SEND_UNLOCK(so);
2223 if ((so->so_options & SO_NOSIGPIPE) == 0) {
2224 PROC_LOCK(job->userproc);
2225 kern_psignal(job->userproc, SIGPIPE);
2226 PROC_UNLOCK(job->userproc);
2227 }
2228 error = EPIPE;
2229 goto out;
2230 }
2231 if (so->so_error) {
2232 error = so->so_error;
2233 so->so_error = 0;
2234 SOCKBUF_UNLOCK(sb);
2235 SOCK_IO_SEND_UNLOCK(so);
2236 goto out;
2237 }
2238 if ((so->so_state & SS_ISCONNECTED) == 0) {
2239 SOCKBUF_UNLOCK(sb);
2240 SOCK_IO_SEND_UNLOCK(so);
2241 error = ENOTCONN;
2242 goto out;
2243 }
2244 if (sbspace(sb) < sb->sb_lowat) {
2245 MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO));
2246
2247 /*
2248 * Don't block if there is too little room in the socket
2249 * buffer. Instead, requeue the request.
2250 */
2251 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
2252 SOCKBUF_UNLOCK(sb);
2253 SOCK_IO_SEND_UNLOCK(so);
2254 error = ECANCELED;
2255 goto out;
2256 }
2257 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
2258 SOCKBUF_UNLOCK(sb);
2259 SOCK_IO_SEND_UNLOCK(so);
2260 goto out;
2261 }
2262
2263 /*
2264 * Write as much data as the socket permits, but no more than a
2265 * a single sndbuf at a time.
2266 */
2267 len = sbspace(sb);
2268 if (len > job->uaiocb.aio_nbytes - job->aio_sent) {
2269 len = job->uaiocb.aio_nbytes - job->aio_sent;
2270 moretocome = false;
2271 } else
2272 moretocome = true;
2273 if (len > toep->params.sndbuf) {
2274 len = toep->params.sndbuf;
2275 sendmore = true;
2276 } else
2277 sendmore = false;
2278
2279 if (!TAILQ_EMPTY(&toep->aiotx_jobq))
2280 moretocome = true;
2281 SOCKBUF_UNLOCK(sb);
2282 MPASS(len != 0);
2283
2284 m = alloc_aiotx_mbuf(job, len);
2285 if (m == NULL) {
2286 SOCK_IO_SEND_UNLOCK(so);
2287 error = EFAULT;
2288 goto out;
2289 }
2290
2291 /* Inlined tcp_usr_send(). */
2292
2293 inp = toep->inp;
2294 INP_WLOCK(inp);
2295 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2296 INP_WUNLOCK(inp);
2297 SOCK_IO_SEND_UNLOCK(so);
2298 error = ECONNRESET;
2299 goto out;
2300 }
2301
2302 job->aio_sent += m_length(m, NULL);
2303
2304 sbappendstream(sb, m, 0);
2305 m = NULL;
2306
2307 if (!(inp->inp_flags & INP_DROPPED)) {
2308 tp = intotcpcb(inp);
2309 if (moretocome)
2310 tp->t_flags |= TF_MORETOCOME;
2311 error = tcp_output(tp);
2312 if (error < 0) {
2313 INP_UNLOCK_ASSERT(inp);
2314 SOCK_IO_SEND_UNLOCK(so);
2315 error = -error;
2316 goto out;
2317 }
2318 if (moretocome)
2319 tp->t_flags &= ~TF_MORETOCOME;
2320 }
2321
2322 INP_WUNLOCK(inp);
2323 if (sendmore)
2324 goto sendanother;
2325 SOCK_IO_SEND_UNLOCK(so);
2326
2327 if (error)
2328 goto out;
2329
2330 /*
2331 * If this is a blocking socket and the request has not been
2332 * fully completed, requeue it until the socket is ready
2333 * again.
2334 */
2335 if (job->aio_sent < job->uaiocb.aio_nbytes &&
2336 !(so->so_state & SS_NBIO)) {
2337 SOCKBUF_LOCK(sb);
2338 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) {
2339 SOCKBUF_UNLOCK(sb);
2340 error = ECANCELED;
2341 goto out;
2342 }
2343 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list);
2344 return;
2345 }
2346
2347 /*
2348 * If the request will not be requeued, drop the queue's
2349 * reference to the job. Any mbufs in flight should still
2350 * hold a reference, but this drops the reference that the
2351 * queue owns while it is waiting to queue mbufs to the
2352 * socket.
2353 */
2354 aiotx_free_job(job);
2355
2356out:
2357 if (error) {
2358 job->aio_error = (void *)(intptr_t)error;
2359 aiotx_free_job(job);
2360 }
2361 m_freem(m);
2362 SOCKBUF_LOCK(sb);
2363}
2364
2365static void
2366t4_aiotx_task(void *context, int pending)
2367{
2368 struct toepcb *toep = context;
2369 struct socket *so;
2370 struct kaiocb *job;
2371 struct epoch_tracker et;
2372
2373 so = toep->aiotx_so;
2374 CURVNET_SET(toep->vnet);
2375 NET_EPOCH_ENTER(et);
2376 SOCKBUF_LOCK(&so->so_snd);
2377 while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
2378 job = TAILQ_FIRST(&toep->aiotx_jobq);
2379 TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
2380 if (!aio_clear_cancel_function(job))
2381 continue;
2382
2383 t4_aiotx_process_job(toep, so, job);
2384 }
2385 toep->aiotx_so = NULL;
2386 SOCKBUF_UNLOCK(&so->so_snd);
2387 NET_EPOCH_EXIT(et);
2388
2389 free_toepcb(toep);
2390 sorele(so);
2391 CURVNET_RESTORE();
2392}
2393
2394static void
2395t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep)
2396{
2397
2398 SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd);
2399#ifdef VERBOSE_TRACES
2400 CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s",
2401 __func__, toep->tid, toep->aiotx_so != NULL ? "true" : "false");
2402#endif
2403 if (toep->aiotx_so != NULL)
2404 return;
2405 soref(so);
2406 toep->aiotx_so = so;
2407 hold_toepcb(toep);
2408 soaio_enqueue(&toep->aiotx_task);
2409}
2410
2411static void
2412t4_aiotx_cancel(struct kaiocb *job)
2413{
2414 struct socket *so;
2415 struct sockbuf *sb;
2416 struct tcpcb *tp;
2417 struct toepcb *toep;
2418
2419 so = job->fd_file->f_data;
2420 tp = so_sototcpcb(so);
2421 toep = tp->t_toe;
2422 MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE);
2423 sb = &so->so_snd;
2424
2425 SOCKBUF_LOCK(sb);
2426 if (!aio_cancel_cleared(job))
2427 TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
2428 SOCKBUF_UNLOCK(sb);
2429
2430 job->aio_error = (void *)(intptr_t)ECANCELED;
2431 aiotx_free_job(job);
2432}
2433
2434int
2435t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job)
2436{
2437 struct tcpcb *tp = so_sototcpcb(so);
2438 struct toepcb *toep = tp->t_toe;
2439 struct adapter *sc = td_adapter(toep->td);
2440
2441 /* This only handles writes. */
2442 if (job->uaiocb.aio_lio_opcode != LIO_WRITE)
2443 return (EOPNOTSUPP);
2444
2445 if (!sc->tt.tx_zcopy)
2446 return (EOPNOTSUPP);
2447
2448 if (tls_tx_key(toep))
2449 return (EOPNOTSUPP);
2450
2451 SOCKBUF_LOCK(&so->so_snd);
2452#ifdef VERBOSE_TRACES
2453 CTR3(KTR_CXGBE, "%s: queueing %p for tid %u", __func__, job, toep->tid);
2454#endif
2455 if (!aio_set_cancel_function(job, t4_aiotx_cancel))
2456 panic("new job was cancelled");
2457 refcount_init(&job->aio_refs, 1);
2458 TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
2459 if (sowriteable(so))
2460 t4_aiotx_queue_toep(so, toep);
2461 SOCKBUF_UNLOCK(&so->so_snd);
2462 return (0);
2463}
2464
2465void
2466aiotx_init_toep(struct toepcb *toep)
2467{
2468
2469 TAILQ_INIT(&toep->aiotx_jobq);
2470 TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep);
2471}
2472#endif
@ CPL_COOKIE_RESERVED
Definition: adapter.h:397
@ CPL_COOKIE_TOM
Definition: adapter.h:401
static struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq)
Definition: adapter.h:1437
int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *)
Definition: t4_sched.c:497
void t4_release_cl_rl(struct adapter *, int, int)
Definition: t4_sched.c:577
struct sge_wrq wrq
Definition: adapter.h:0
void t4_register_cpl_handler(int, cpl_handler_t)
Definition: t4_sge.c:387
static void * wrtod(struct wrqe *wr)
Definition: adapter.h:1451
#define KTR_CXGBE
Definition: adapter.h:67
void t4_register_shared_cpl_handler(int, cpl_handler_t, int)
Definition: t4_sge.c:496
static void t4_wrq_tx(struct adapter *sc, struct wrqe *wr)
Definition: adapter.h:1463
int tc_idx
Definition: adapter.h:7
uint16_t ss_len
Definition: adapter.h:26
struct sge_iq iq
Definition: adapter.h:0
@ EQ_OFLD
Definition: adapter.h:446
#define CHELSIO_T6
Definition: common.h:416
static int chip_id(struct adapter *adap)
Definition: common.h:512
#define INIT_TP_WR_MIT_CPL(w, cpl, tid)
Definition: offload.h:57
#define cpu_to_be64(x)
Definition: osdep.h:111
#define DIV_ROUND_UP(x, y)
Definition: osdep.h:92
unsigned short mtus[NMTUS]
Definition: common.h:378
uint8_t nsched_cls
Definition: common.h:411
unsigned int pf
Definition: adapter.h:882
struct adapter_params params
Definition: adapter.h:958
struct tom_tunables tt
Definition: adapter.h:922
int8_t mtu_idx
Definition: t4_tom.h:111
int8_t sack
Definition: t4_tom.h:106
u_int sndbuf
Definition: t4_tom.h:119
int8_t nagle
Definition: t4_tom.h:107
uint16_t emss
Definition: t4_tom.h:117
int8_t tstamp
Definition: t4_tom.h:105
int8_t wscale
Definition: t4_tom.h:109
uint16_t opt0_bufsize
Definition: t4_tom.h:118
int8_t tc_idx
Definition: t4_tom.h:104
int8_t tx_align
Definition: t4_tom.h:113
__u8 rsvd1
Definition: t4_msg.h:1063
__be32 rsvd0
Definition: t4_msg.h:1062
__be32 snd_nxt
Definition: t4_msg.h:1007
u8 credits
Definition: t4_msg.h:2760
__be32 snd_una
Definition: t4_msg.h:2764
__be32 rcv_nxt
Definition: t4_msg.h:1109
__be32 credit_dack
Definition: t4_msg.h:1715
__be32 seq
Definition: t4_msg.h:1642
__u8 ddp_off
Definition: t4_msg.h:1652
__be16 reply_ctrl
Definition: t4_msg.h:962
__be16 word_cookie
Definition: t4_msg.h:963
__be32 len
Definition: t4_msg.h:1532
__be32 datasn_offset
Definition: t4_msg.h:1534
__be32 burst_size
Definition: t4_msg.h:1531
__be32 reserved3
Definition: t4_msg.h:1536
__be16 mpdu
Definition: t4_msg.h:1530
__be32 reserved2_seglen_offset
Definition: t4_msg.h:1533
__be32 buffer_offset
Definition: t4_msg.h:1535
__be32 op_to_scsi
Definition: t4_msg.h:1527
u_int active_count
Definition: t4_tom.h:174
u_int waiting_count
Definition: t4_tom.h:173
u_int flags
Definition: t4_tom.h:169
struct fw_flowc_mnemval mnemval[0]
__be32 op_to_nparams
__be32 flowid_len16
uint32_t plen
Definition: t4_tom.h:123
uint8_t tx_credits
Definition: t4_tom.h:124
uint8_t tx_chan
Definition: adapter.h:325
struct adapter * adapter
Definition: adapter.h:306
struct vi_info * vi
Definition: adapter.h:308
uint8_t port_id
Definition: adapter.h:324
uint8_t type
Definition: adapter.h:468
struct adapter * adapter
Definition: adapter.h:422
uint16_t abs_id
Definition: adapter.h:432
struct sge_iq iq
Definition: adapter.h:674
counter_u64_t tx_iscsi_octets
Definition: adapter.h:749
counter_u64_t tx_iscsi_iso_wrs
Definition: adapter.h:750
struct sge_wrq wrq
Definition: adapter.h:747
counter_u64_t tx_iscsi_pdus
Definition: adapter.h:748
struct sge_eq eq
Definition: adapter.h:714
unsigned short fcplenmax
Definition: t4_tls.h:81
Definition: t4_tom.h:182
uint8_t txsd_pidx
Definition: t4_tom.h:219
u_int tx_nocompl
Definition: t4_tom.h:200
uint8_t txsd_avail
Definition: t4_tom.h:221
struct task aiotx_task
Definition: t4_tom.h:214
struct l2t_entry * l2te
Definition: t4_tom.h:193
struct conn_params params
Definition: t4_tom.h:203
u_int flags
Definition: t4_tom.h:185
u_int plen_nocompl
Definition: t4_tom.h:201
struct sge_ofld_rxq * ofld_rxq
Definition: t4_tom.h:191
u_int tx_total
Definition: t4_tom.h:198
struct sge_wrq * ctrlq
Definition: t4_tom.h:192
int tid
Definition: t4_tom.h:195
struct vi_info * vi
Definition: t4_tom.h:189
struct mbufq ulp_pdu_reclaimq
Definition: t4_tom.h:208
struct inpcb * inp
Definition: t4_tom.h:184
uint8_t txsd_cidx
Definition: t4_tom.h:220
struct sge_ofld_txq * ofld_txq
Definition: t4_tom.h:190
struct ofld_tx_sdesc txsd[]
Definition: t4_tom.h:222
struct vnet * vnet
Definition: t4_tom.h:188
struct ddp_pcb ddp
Definition: t4_tom.h:210
struct mbufq ulp_pduq
Definition: t4_tom.h:207
struct socket * aiotx_so
Definition: t4_tom.h:215
struct tom_data * td
Definition: t4_tom.h:183
struct tls_ofld_info tls
Definition: t4_tom.h:211
uint8_t txsd_total
Definition: t4_tom.h:218
u_int tx_credits
Definition: t4_tom.h:199
int tx_zcopy
Definition: offload.h:232
int autorcvbuf_inc
Definition: offload.h:234
__be64 addr[2]
Definition: t4_msg.h:2857
__be32 len[2]
Definition: t4_msg.h:2856
struct ulptx_sge_pair sge[]
Definition: t4_msg.h:2865
__be32 len0
Definition: t4_msg.h:2862
__be32 cmd_nsge
Definition: t4_msg.h:2861
__be64 addr0
Definition: t4_msg.h:2863
struct port_info * pi
Definition: adapter.h:200
Definition: adapter.h:696
int wr_len
Definition: adapter.h:699
__FBSDID("$FreeBSD$")
@ SGE_MAX_WR_LEN
Definition: t4_hw.h:93
#define V_QUEUENO(x)
Definition: t4_msg.h:933
@ CPL_ABORT_RPL
Definition: t4_msg.h:45
@ CPL_CLOSE_CON_RPL
Definition: t4_msg.h:83
@ CPL_CLOSE_CON_REQ
Definition: t4_msg.h:42
@ CPL_SET_TCB_FIELD
Definition: t4_msg.h:40
@ CPL_ABORT_REQ_RSS
Definition: t4_msg.h:75
@ CPL_FW4_ACK
Definition: t4_msg.h:146
@ CPL_RX_DATA_ACK
Definition: t4_msg.h:47
@ CPL_RX_DATA
Definition: t4_msg.h:90
@ CPL_ABORT_RPL_RSS
Definition: t4_msg.h:77
@ CPL_PEER_CLOSE
Definition: t4_msg.h:70
@ CPL_ABORT_REQ
Definition: t4_msg.h:44
@ CPL_TX_DATA_ISO
Definition: t4_msg.h:62
#define V_CPL_TX_DATA_ISO_CPLHDRLEN(x)
Definition: t4_msg.h:1564
@ CPL_FW4_ACK_FLAGS_SEQVAL
Definition: t4_msg.h:2769
@ CPL_ERR_FINWAIT2_TIMEDOUT
Definition: t4_msg.h:174
@ CPL_ERR_KEEPALIVE_TIMEDOUT
Definition: t4_msg.h:175
@ CPL_ERR_XMIT_TIMEDOUT
Definition: t4_msg.h:172
@ CPL_ERR_PERSIST_TIMEDOUT
Definition: t4_msg.h:173
@ CPL_ERR_CONN_RESET
Definition: t4_msg.h:166
@ CPL_ERR_BAD_SYN
Definition: t4_msg.h:170
#define V_RX_DACK_MODE(x)
Definition: t4_msg.h:1743
#define G_CPL_OPCODE(x)
Definition: t4_msg.h:321
#define V_CPL_TX_DATA_ISO_HDRCRC(x)
Definition: t4_msg.h:1571
@ ULP_MODE_TCPDDP
Definition: t4_msg.h:235
@ ULP_MODE_NONE
Definition: t4_msg.h:232
@ ULP_MODE_RDMA
Definition: t4_msg.h:234
@ ULP_MODE_TLS
Definition: t4_msg.h:237
@ ULP_MODE_ISCSI
Definition: t4_msg.h:233
@ ULP_CRC_HEADER
Definition: t4_msg.h:241
@ ULP_CRC_DATA
Definition: t4_msg.h:242
#define G_TCPOPT_SND_WSCALE(x)
Definition: t4_msg.h:712
#define V_CPL_TX_DATA_ISO_FIRST(x)
Definition: t4_msg.h:1550
#define V_WORD(x)
Definition: t4_msg.h:979
#define G_TCPOPT_WSCALE_OK(x)
Definition: t4_msg.h:697
#define F_RX_DACK_CHANGE
Definition: t4_msg.h:1748
#define V_WR_OP(x)
Definition: t4_msg.h:423
#define V_TX_ULP_MODE(x)
Definition: t4_msg.h:1184
#define V_CPL_TX_DATA_ISO_SCSI(x)
Definition: t4_msg.h:1592
#define F_RX_MODULATE_RX
Definition: t4_msg.h:1735
#define V_CPL_TX_DATA_ISO_PLDCRC(x)
Definition: t4_msg.h:1578
#define V_COOKIE(x)
Definition: t4_msg.h:984
#define OPCODE_TID(cmd)
Definition: t4_msg.h:327
#define V_ULPTX_CMD(x)
Definition: t4_msg.h:2845
#define V_RX_CREDITS(x)
Definition: t4_msg.h:1726
#define V_TX_SHOVE(x)
Definition: t4_msg.h:1192
#define G_CPL_FW4_ACK_FLOWID(x)
Definition: t4_msg.h:2783
#define V_ULPTX_NSGE(x)
Definition: t4_msg.h:2890
#define F_NO_REPLY
Definition: t4_msg.h:942
@ CPL_ABORT_SEND_RST
Definition: t4_msg.h:252
@ CPL_ABORT_NO_RST
Definition: t4_msg.h:253
#define V_CPL_TX_DATA_ISO_LAST(x)
Definition: t4_msg.h:1557
#define M_COOKIE
Definition: t4_msg.h:983
#define G_TCPOPT_MSS(x)
Definition: t4_msg.h:717
#define V_CPL_TX_DATA_ISO_IMMEDIATE(x)
Definition: t4_msg.h:1585
#define G_TCPOPT_SACK(x)
Definition: t4_msg.h:702
#define V_CPL_TX_DATA_ISO_OP(x)
Definition: t4_msg.h:1544
#define MK_OPCODE_TID(opcode, tid)
Definition: t4_msg.h:325
#define G_CPL_FW4_ACK_OPCODE(x)
Definition: t4_msg.h:2777
#define V_TX_ULP_SUBMODE(x)
Definition: t4_msg.h:1179
@ ULP_TX_SC_DSGL
Definition: t4_msg.h:2837
#define G_TCPOPT_TSTAMP(x)
Definition: t4_msg.h:707
#define GET_TID(cmd)
Definition: t4_msg.h:330
#define V_TX_URG(x)
Definition: t4_msg.h:1200
int t4_send_fin(struct toedev *, struct tcpcb *)
void insert_ddp_data(struct toepcb *, uint32_t)
void * lookup_tid(struct adapter *, int)
static int ulp_mode(struct toepcb *toep)
Definition: t4_tom.h:226
@ DDP_BUF1_ACTIVE
Definition: t4_tom.h:87
@ DDP_SC_REQ
Definition: t4_tom.h:84
@ DDP_BUF0_ACTIVE
Definition: t4_tom.h:86
@ DDP_ON
Definition: t4_tom.h:85
#define CXGBE_ISO_F
Definition: t4_tom.h:385
void t4_init_cpl_io_handlers(void)
static bool mbuf_iscsi_iso(struct mbuf *m)
Definition: t4_tom.h:376
@ TPF_SEND_FIN
Definition: t4_tom.h:68
@ TPF_SYNQE
Definition: t4_tom.h:72
@ TPF_KTLS
Definition: t4_tom.h:75
@ TPF_FIN_SENT
Definition: t4_tom.h:69
@ TPF_TX_SUSPENDED
Definition: t4_tom.h:67
@ TPF_FLOWC_WR_SENT
Definition: t4_tom.h:65
@ TPF_ABORT_SHUTDOWN
Definition: t4_tom.h:70
@ TPF_FORCE_CREDITS
Definition: t4_tom.h:74
@ TPF_TLS_RECEIVE
Definition: t4_tom.h:77
@ TPF_TX_DATA_SENT
Definition: t4_tom.h:66
int t4_tod_output(struct toedev *, struct tcpcb *)
int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *)
void handle_ddp_indicate(struct toepcb *)
void t4_push_frames(struct adapter *, struct toepcb *, int)
void t4_rcvd_locked(struct toedev *, struct tcpcb *)
struct toepcb * hold_toepcb(struct toepcb *)
#define DDP_UNLOCK(toep)
Definition: t4_tom.h:233
void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int)
int tls_tx_key(struct toepcb *)
#define CXGBE_ISO_TYPE(flags)
Definition: t4_tom.h:384
void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t)
void free_toepcb(struct toepcb *)
void ddp_queue_toep(struct toepcb *)
void t4_push_ktls(struct adapter *, struct toepcb *, int)
int send_rx_credits(struct adapter *, struct toepcb *, int)
void send_rx_modulate(struct adapter *, struct toepcb *)
void do_rx_data_tls(const struct cpl_rx_data *, struct toepcb *, struct mbuf *)
int t4_send_rst(struct toedev *, struct tcpcb *)
static uint8_t mbuf_ulp_submode(struct mbuf *m)
Definition: t4_tom.h:360
static bool mbuf_raw_wr(struct mbuf *m)
Definition: t4_tom.h:344
void tls_establish(struct toepcb *)
void send_flowc_wr(struct toepcb *, struct tcpcb *)
static uint16_t mbuf_iscsi_iso_mss(struct mbuf *m)
Definition: t4_tom.h:412
void t4_uninit_cpl_io_handlers(void)
void final_cpl_received(struct toepcb *)
int t4_close_conn(struct adapter *, struct toepcb *)
int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *)
void aiotx_init_toep(struct toepcb *)
void handle_ddp_close(struct toepcb *, struct tcpcb *, uint32_t)
#define DDP_LOCK(toep)
Definition: t4_tom.h:232
static uint8_t mbuf_iscsi_iso_flags(struct mbuf *m)
Definition: t4_tom.h:396
static struct adapter * td_adapter(struct tom_data *td)
Definition: t4_tom.h:329
int t4_aio_queue_aiotx(struct socket *, struct kaiocb *)
void t4_push_pdus(struct adapter *, struct toepcb *, int)
void send_reset(struct adapter *, struct toepcb *, uint32_t)
int negative_advice(int)
void restore_so_proto(struct socket *, bool)
void send_abort_rpl(struct adapter *, struct sge_ofld_txq *, int, int)
void t4_rcvd(struct toedev *, struct tcpcb *)
static int t4_l2t_send(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e)
Definition: t4_tom_l2t.h:46
#define S_FW_VIID_PFN
#define V_FW_WR_OP(x)
#define F_FW_OFLD_TX_DATA_WR_LSODISABLE
#define V_FW_WR_LEN16(x)
#define V_FW_FLOWC_WR_NPARAMS(x)
#define F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE
@ FW_FLOWC_MNEM_SCHEDCLASS
#define F_FW_OFLD_TX_DATA_WR_ALIGNPLD
#define V_FW_WR_FLOWID(x)
@ FW_OFLD_TX_DATA_WR
@ FW_FLOWC_WR
@ FW_TP_WR
@ FW_ISCSI_TX_DATA_WR
#define V_FW_WR_IMMDLEN(x)
#define F_FW_WR_COMPL