FreeBSD kernel CXGBE device code
cxgbei.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2012 Chelsio Communications, Inc.
3 * All rights reserved.
4 *
5 * Chelsio T5xx iSCSI driver
6 *
7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_inet.h"
35#include "opt_inet6.h"
36
37#include <sys/types.h>
38#include <sys/param.h>
39#include <sys/kernel.h>
40#include <sys/ktr.h>
41#include <sys/module.h>
42#include <sys/systm.h>
43
44#ifdef TCP_OFFLOAD
45#include <sys/errno.h>
46#include <sys/gsb_crc32.h>
47#include <sys/kthread.h>
48#include <sys/smp.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/mbuf.h>
52#include <sys/lock.h>
53#include <sys/mutex.h>
54#include <sys/condvar.h>
55#include <sys/uio.h>
56
57#include <netinet/in.h>
58#include <netinet/in_pcb.h>
59#include <netinet/toecore.h>
60#include <netinet/tcp_var.h>
61#include <netinet/tcp_fsm.h>
62
63#include <cam/scsi/scsi_all.h>
64#include <cam/scsi/scsi_da.h>
65#include <cam/ctl/ctl_io.h>
66#include <cam/ctl/ctl.h>
67#include <cam/ctl/ctl_backend.h>
68#include <cam/ctl/ctl_error.h>
69#include <cam/ctl/ctl_frontend.h>
70#include <cam/ctl/ctl_debug.h>
71#include <cam/ctl/ctl_ha.h>
72#include <cam/ctl/ctl_ioctl.h>
73
74#include <dev/iscsi/icl.h>
75#include <dev/iscsi/iscsi_proto.h>
76#include <dev/iscsi/iscsi_ioctl.h>
77#include <dev/iscsi/iscsi.h>
78#include <cam/ctl/ctl_frontend_iscsi.h>
79
80#include <cam/cam.h>
81#include <cam/cam_ccb.h>
82#include <cam/cam_xpt.h>
83#include <cam/cam_debug.h>
84#include <cam/cam_sim.h>
85#include <cam/cam_xpt_sim.h>
86#include <cam/cam_xpt_periph.h>
87#include <cam/cam_periph.h>
88#include <cam/cam_compat.h>
89#include <cam/scsi/scsi_message.h>
90
91#include "common/common.h"
92#include "common/t4_msg.h"
93#include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */
94#include "tom/t4_tom.h"
95#include "cxgbei.h"
96
97static void
98read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len,
99 uint32_t *max_rx_data_len, struct ppod_region *pr)
100{
101 uint32_t tx_len, rx_len, r, v;
102
105
107 rx_len = min(rx_len, G_MAXRXDATA(r));
108 tx_len = min(tx_len, G_MAXRXDATA(r));
109
112 rx_len = min(rx_len, v);
113 tx_len = min(tx_len, v);
114
115 /*
116 * AHS is not supported by the kernel so we'll not account for
117 * it either in our PDU len -> data segment len conversions.
118 */
119 rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
120 ISCSI_DATA_DIGEST_SIZE;
121 tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
122 ISCSI_DATA_DIGEST_SIZE;
123
124 /*
125 * DDP can place only 4 pages for a single PDU. A single
126 * request might use larger pages than the smallest page size,
127 * but that cannot be guaranteed. Assume the smallest DDP
128 * page size for this limit.
129 */
130 rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0]));
131
132 if (chip_id(sc) == CHELSIO_T5) {
133 tx_len = min(tx_len, 15360);
134
135 rx_len = rounddown2(rx_len, 512);
136 tx_len = rounddown2(tx_len, 512);
137 }
138
139 *max_tx_data_len = tx_len;
140 *max_rx_data_len = rx_len;
141}
142
143/*
144 * Initialize the software state of the iSCSI ULP driver.
145 *
146 * ENXIO means firmware didn't set up something that it was supposed to.
147 */
148static int
149cxgbei_init(struct adapter *sc, struct cxgbei_data *ci)
150{
151 struct sysctl_oid *oid;
152 struct sysctl_oid_list *children;
153 struct ppod_region *pr;
154 uint32_t r;
155 int rc;
156
157 MPASS(sc->vres.iscsi.size > 0);
158 MPASS(ci != NULL);
159
160 pr = &ci->pr;
162 rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods");
163 if (rc != 0) {
164 device_printf(sc->dev,
165 "%s: failed to initialize the iSCSI page pod region: %u.\n",
166 __func__, rc);
167 return (rc);
168 }
169
172 if (r != pr->pr_tag_mask) {
173 /*
174 * Recent firmwares are supposed to set up the iSCSI tagmask
175 * but we'll do it ourselves it the computed value doesn't match
176 * what's in the register.
177 */
178 device_printf(sc->dev,
179 "tagmask 0x%08x does not match computed mask 0x%08x.\n", r,
180 pr->pr_tag_mask);
183 }
184
185 read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr);
186
187 sysctl_ctx_init(&ci->ctx);
188 oid = device_get_sysctl_tree(sc->dev); /* dev.t5nex.X */
189 children = SYSCTL_CHILDREN(oid);
190
191 oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi",
192 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings");
193 children = SYSCTL_CHILDREN(oid);
194
195 ci->ddp_threshold = 2048;
196 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold",
197 CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold");
198
199 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len",
200 CTLFLAG_RD, &ci->max_rx_data_len, 0,
201 "Maximum receive data segment length");
202 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len",
203 CTLFLAG_RD, &ci->max_tx_data_len, 0,
204 "Maximum transmit data segment length");
205
206 return (0);
207}
208
209static int
210do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
211{
212 struct adapter *sc = iq->adapter;
213 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
214 u_int tid = GET_TID(cpl);
215 struct toepcb *toep = lookup_tid(sc, tid);
216 struct icl_pdu *ip;
217 struct icl_cxgbei_pdu *icp;
218 uint16_t len_ddp = be16toh(cpl->pdu_len_ddp);
219 uint16_t len = be16toh(cpl->len);
220
221 M_ASSERTPKTHDR(m);
222 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
223
224 ip = icl_cxgbei_new_pdu(M_NOWAIT);
225 if (ip == NULL)
226 CXGBE_UNIMPLEMENTED("PDU allocation failure");
227 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
228 ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len;
229 icp = ip_to_icp(ip);
230 icp->icp_seq = ntohl(cpl->seq);
231 icp->icp_flags = ICPF_RX_HDR;
232
233 /* This is the start of a new PDU. There should be no old state. */
234 MPASS(toep->ulpcb2 == NULL);
235 toep->ulpcb2 = icp;
236
237#if 0
238 CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p",
239 __func__, tid, len, len_ddp, icp);
240#endif
241
242 m_freem(m);
243 return (0);
244}
245
246static int
247do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
248{
249 struct adapter *sc = iq->adapter;
250 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *);
251 u_int tid = GET_TID(cpl);
252 struct toepcb *toep = lookup_tid(sc, tid);
253 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
254 struct icl_pdu *ip;
255
256 M_ASSERTPKTHDR(m);
257 MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
258
259 if (icp == NULL) {
260 /*
261 * T6 completion enabled, start of a new pdu. Header
262 * will come in completion CPL.
263 */
264 ip = icl_cxgbei_new_pdu(M_NOWAIT);
265 if (ip == NULL)
266 CXGBE_UNIMPLEMENTED("PDU allocation failure");
267 icp = ip_to_icp(ip);
268 } else {
269 /* T5 mode, header is already received. */
270 MPASS(icp->icp_flags == ICPF_RX_HDR);
271 MPASS(icp->ip.ip_data_mbuf == NULL);
272 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
273 }
274
275 /* Trim the cpl header from mbuf. */
276 m_adj(m, sizeof(*cpl));
277
278 icp->icp_flags |= ICPF_RX_FLBUF;
279 icp->ip.ip_data_mbuf = m;
280 toep->ofld_rxq->rx_iscsi_fl_pdus++;
281 toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
282
283 /*
284 * For T6, save the icp for further processing in the
285 * completion handler.
286 */
287 if (icp->icp_flags == ICPF_RX_FLBUF) {
288 MPASS(toep->ulpcb2 == NULL);
289 toep->ulpcb2 = icp;
290 }
291
292#if 0
293 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
294 be16toh(cpl->len), icp);
295#endif
296
297 return (0);
298}
299
300static int
301mbuf_crc32c_helper(void *arg, void *data, u_int len)
302{
303 uint32_t *digestp = arg;
304
305 *digestp = calculate_crc32c(*digestp, data, len);
306 return (0);
307}
308
309static struct icl_pdu *
310parse_pdu(struct socket *so, struct toepcb *toep, struct icl_cxgbei_conn *icc,
311 struct sockbuf *sb, u_int total_len)
312{
313 struct uio uio;
314 struct iovec iov[2];
315 struct iscsi_bhs bhs;
316 struct mbuf *m;
317 struct icl_pdu *ip;
318 u_int ahs_len, data_len, header_len, pdu_len;
319 uint32_t calc_digest, wire_digest;
320 int error;
321
322 uio.uio_segflg = UIO_SYSSPACE;
323 uio.uio_rw = UIO_READ;
324 uio.uio_td = curthread;
325
326 header_len = sizeof(struct iscsi_bhs);
327 if (icc->ic.ic_header_crc32c)
328 header_len += ISCSI_HEADER_DIGEST_SIZE;
329
330 if (total_len < header_len) {
331 ICL_WARN("truncated pre-offload PDU with len %u", total_len);
332 return (NULL);
333 }
334
335 iov[0].iov_base = &bhs;
336 iov[0].iov_len = sizeof(bhs);
337 iov[1].iov_base = &wire_digest;
338 iov[1].iov_len = sizeof(wire_digest);
339 uio.uio_iov = iov;
340 uio.uio_iovcnt = 1;
341 uio.uio_offset = 0;
342 uio.uio_resid = header_len;
343 error = soreceive(so, NULL, &uio, NULL, NULL, NULL);
344 if (error != 0) {
345 ICL_WARN("failed to read BHS from pre-offload PDU: %d", error);
346 return (NULL);
347 }
348
349 ahs_len = bhs.bhs_total_ahs_len * 4;
350 data_len = bhs.bhs_data_segment_len[0] << 16 |
351 bhs.bhs_data_segment_len[1] << 8 |
352 bhs.bhs_data_segment_len[2];
353 pdu_len = header_len + ahs_len + roundup2(data_len, 4);
354 if (icc->ic.ic_data_crc32c && data_len != 0)
355 pdu_len += ISCSI_DATA_DIGEST_SIZE;
356
357 if (total_len < pdu_len) {
358 ICL_WARN("truncated pre-offload PDU len %u vs %u", total_len,
359 pdu_len);
360 return (NULL);
361 }
362
363 if (ahs_len != 0) {
364 ICL_WARN("received pre-offload PDU with AHS");
365 return (NULL);
366 }
367
368 if (icc->ic.ic_header_crc32c) {
369 calc_digest = calculate_crc32c(0xffffffff, (caddr_t)&bhs,
370 sizeof(bhs));
371 calc_digest ^= 0xffffffff;
372 if (calc_digest != wire_digest) {
373 ICL_WARN("received pre-offload PDU 0x%02x with "
374 "invalid header digest (0x%x vs 0x%x)",
375 bhs.bhs_opcode, wire_digest, calc_digest);
377 return (NULL);
378 }
379 }
380
381 m = NULL;
382 if (data_len != 0) {
383 uio.uio_iov = NULL;
384 uio.uio_resid = roundup2(data_len, 4);
385 if (icc->ic.ic_data_crc32c)
386 uio.uio_resid += ISCSI_DATA_DIGEST_SIZE;
387
388 error = soreceive(so, NULL, &uio, &m, NULL, NULL);
389 if (error != 0) {
390 ICL_WARN("failed to read data payload from "
391 "pre-offload PDU: %d", error);
392 return (NULL);
393 }
394
395 if (icc->ic.ic_data_crc32c) {
396 m_copydata(m, roundup2(data_len, 4),
397 sizeof(wire_digest), (caddr_t)&wire_digest);
398
399 calc_digest = 0xffffffff;
400 m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper,
401 &calc_digest);
402 calc_digest ^= 0xffffffff;
403 if (calc_digest != wire_digest) {
404 ICL_WARN("received pre-offload PDU 0x%02x "
405 "with invalid data digest (0x%x vs 0x%x)",
406 bhs.bhs_opcode, wire_digest, calc_digest);
408 m_freem(m);
409 return (NULL);
410 }
411 }
412 }
413
414 ip = icl_cxgbei_new_pdu(M_WAITOK);
416 *ip->ip_bhs = bhs;
417 ip->ip_data_len = data_len;
418 ip->ip_data_mbuf = m;
419 return (ip);
420}
421
422void
423parse_pdus(struct icl_cxgbei_conn *icc, struct sockbuf *sb)
424{
425 struct icl_conn *ic = &icc->ic;
426 struct socket *so = ic->ic_socket;
427 struct toepcb *toep = icc->toep;
428 struct icl_pdu *ip, *lastip;
429 u_int total_len;
430
431 SOCKBUF_LOCK_ASSERT(sb);
432
433 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid,
434 sbused(sb));
435
436 lastip = NULL;
437 while (sbused(sb) != 0 && (sb->sb_state & SBS_CANTRCVMORE) == 0) {
438 total_len = sbused(sb);
439 SOCKBUF_UNLOCK(sb);
440
441 ip = parse_pdu(so, toep, icc, sb, total_len);
442
443 if (ip == NULL) {
444 ic->ic_error(ic);
445 SOCKBUF_LOCK(sb);
446 return;
447 }
448
449 if (lastip == NULL)
450 STAILQ_INSERT_HEAD(&icc->rcvd_pdus, ip, ip_next);
451 else
452 STAILQ_INSERT_AFTER(&icc->rcvd_pdus, lastip, ip,
453 ip_next);
454 lastip = ip;
455
456 SOCKBUF_LOCK(sb);
457 }
458}
459
460static int
461do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
462{
463 struct adapter *sc = iq->adapter;
464 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
465 u_int tid = GET_TID(cpl);
466 struct toepcb *toep = lookup_tid(sc, tid);
467 struct inpcb *inp = toep->inp;
468 struct socket *so;
469 struct sockbuf *sb;
470 struct tcpcb *tp;
471 struct icl_cxgbei_conn *icc;
472 struct icl_conn *ic;
473 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
474 struct icl_pdu *ip;
475 u_int pdu_len, val;
476 struct epoch_tracker et;
477
478 MPASS(m == NULL);
479
480 /* Must already be assembling a PDU. */
481 MPASS(icp != NULL);
482 MPASS(icp->icp_flags & ICPF_RX_HDR); /* Data is optional. */
483 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
484
485 pdu_len = be16toh(cpl->len); /* includes everything. */
486 val = be32toh(cpl->ddpvld);
487
488#if 0
489 CTR5(KTR_CXGBE,
490 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
491 __func__, tid, pdu_len, val, icp->icp_flags);
492#endif
493
495 ip = &icp->ip;
496 if (val & F_DDP_PADDING_ERR) {
497 ICL_WARN("received PDU 0x%02x with invalid padding",
498 ip->ip_bhs->bhs_opcode);
500 }
501 if (val & F_DDP_HDRCRC_ERR) {
502 ICL_WARN("received PDU 0x%02x with invalid header digest",
503 ip->ip_bhs->bhs_opcode);
505 }
506 if (val & F_DDP_DATACRC_ERR) {
507 ICL_WARN("received PDU 0x%02x with invalid data digest",
508 ip->ip_bhs->bhs_opcode);
510 }
511 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
512 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
513 MPASS(ip->ip_data_len > 0);
514 icp->icp_flags |= ICPF_RX_DDP;
516 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
517 }
518
519 INP_WLOCK(inp);
520 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
521 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
522 __func__, tid, pdu_len, inp->inp_flags);
523 INP_WUNLOCK(inp);
524 icl_cxgbei_conn_pdu_free(NULL, ip);
525 toep->ulpcb2 = NULL;
526 return (0);
527 }
528
529 /*
530 * T6+ does not report data PDUs received via DDP without F
531 * set. This can result in gaps in the TCP sequence space.
532 */
533 tp = intotcpcb(inp);
534 MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
535 tp->rcv_nxt = icp->icp_seq + pdu_len;
536 tp->t_rcvtime = ticks;
537
538 /*
539 * Don't update the window size or return credits since RX
540 * flow control is disabled.
541 */
542
543 so = inp->inp_socket;
544 sb = &so->so_rcv;
545 SOCKBUF_LOCK(sb);
546
547 icc = toep->ulpcb;
548 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
549 CTR5(KTR_CXGBE,
550 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
551 __func__, tid, pdu_len, icc, sb->sb_state);
552 SOCKBUF_UNLOCK(sb);
553 INP_WUNLOCK(inp);
554
555 CURVNET_SET(so->so_vnet);
556 NET_EPOCH_ENTER(et);
557 INP_WLOCK(inp);
558 tp = tcp_drop(tp, ECONNRESET);
559 if (tp)
560 INP_WUNLOCK(inp);
561 NET_EPOCH_EXIT(et);
562 CURVNET_RESTORE();
563
564 icl_cxgbei_conn_pdu_free(NULL, ip);
565 toep->ulpcb2 = NULL;
566 return (0);
567 }
569 ic = &icc->ic;
570 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
571 F_DDP_DATACRC_ERR)) != 0) {
572 SOCKBUF_UNLOCK(sb);
573 INP_WUNLOCK(inp);
574
575 icl_cxgbei_conn_pdu_free(NULL, ip);
576 toep->ulpcb2 = NULL;
577 ic->ic_error(ic);
578 return (0);
579 }
580
582
583 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
584 if (!icc->rx_active) {
585 icc->rx_active = true;
586 wakeup(&icc->rx_active);
587 }
588 SOCKBUF_UNLOCK(sb);
589 INP_WUNLOCK(inp);
590
591 toep->ulpcb2 = NULL;
592
593 return (0);
594}
595
596static int
597do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
598{
599 struct epoch_tracker et;
600 struct adapter *sc = iq->adapter;
601 struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
602 u_int tid = GET_TID(cpl);
603 struct toepcb *toep = lookup_tid(sc, tid);
604 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
605 struct icl_pdu *ip;
606 struct cxgbei_cmp *cmp;
607 struct inpcb *inp = toep->inp;
608#ifdef INVARIANTS
609 uint16_t len = be16toh(cpl->len);
610 u_int data_digest_len;
611#endif
612 struct socket *so;
613 struct sockbuf *sb;
614 struct tcpcb *tp;
615 struct icl_cxgbei_conn *icc;
616 struct icl_conn *ic;
617 struct iscsi_bhs_data_out *bhsdo;
618 u_int val = be32toh(cpl->ddpvld);
619 u_int npdus, pdu_len;
620 uint32_t prev_seg_len;
621
622 M_ASSERTPKTHDR(m);
623 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
624
625 if ((val & F_DDP_PDU) == 0) {
626 MPASS(icp != NULL);
627 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
628 ip = &icp->ip;
629 }
630
631 if (icp == NULL) {
632 /* T6 completion enabled, start of a new PDU. */
633 ip = icl_cxgbei_new_pdu(M_NOWAIT);
634 if (ip == NULL)
635 CXGBE_UNIMPLEMENTED("PDU allocation failure");
636 icp = ip_to_icp(ip);
637 }
638 pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
639
640#if 0
641 CTR5(KTR_CXGBE,
642 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
643 __func__, tid, pdu_len, val, icp);
644#endif
645
646 /* Copy header */
647 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
648 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
649 ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
650 bhsdo->bhsdo_data_segment_len[1] << 8 |
651 bhsdo->bhsdo_data_segment_len[2];
652 icp->icp_seq = ntohl(cpl->seq);
653 icp->icp_flags |= ICPF_RX_HDR;
655
656 if (val & F_DDP_PADDING_ERR) {
657 ICL_WARN("received PDU 0x%02x with invalid padding",
658 ip->ip_bhs->bhs_opcode);
660 }
661 if (val & F_DDP_HDRCRC_ERR) {
662 ICL_WARN("received PDU 0x%02x with invalid header digest",
663 ip->ip_bhs->bhs_opcode);
665 }
666 if (val & F_DDP_DATACRC_ERR) {
667 ICL_WARN("received PDU 0x%02x with invalid data digest",
668 ip->ip_bhs->bhs_opcode);
670 }
671
672 INP_WLOCK(inp);
673 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
674 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
675 __func__, tid, pdu_len, inp->inp_flags);
676 INP_WUNLOCK(inp);
677 icl_cxgbei_conn_pdu_free(NULL, ip);
678 toep->ulpcb2 = NULL;
679 m_freem(m);
680 return (0);
681 }
682
683 tp = intotcpcb(inp);
684
685 /*
686 * If icc is NULL, the connection is being closed in
687 * icl_cxgbei_conn_close(), just drop this data.
688 */
689 icc = toep->ulpcb;
690 if (__predict_false(icc == NULL)) {
691 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
692 __func__, tid, pdu_len, icc);
693
694 /*
695 * Update rcv_nxt so the sequence number of the FIN
696 * doesn't appear wrong.
697 */
698 tp->rcv_nxt = icp->icp_seq + pdu_len;
699 tp->t_rcvtime = ticks;
700 INP_WUNLOCK(inp);
701
702 icl_cxgbei_conn_pdu_free(NULL, ip);
703 toep->ulpcb2 = NULL;
704 m_freem(m);
705 return (0);
706 }
707
709 ic = &icc->ic;
710 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
711 F_DDP_DATACRC_ERR)) != 0) {
712 INP_WUNLOCK(inp);
713
714 icl_cxgbei_conn_pdu_free(NULL, ip);
715 toep->ulpcb2 = NULL;
716 m_freem(m);
717 ic->ic_error(ic);
718 return (0);
719 }
720
721#ifdef INVARIANTS
722 data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
723 ISCSI_DATA_DIGEST_SIZE : 0;
724 MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
725#endif
726
727 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
728 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
729 MPASS(ip->ip_data_len > 0);
730 icp->icp_flags |= ICPF_RX_DDP;
731 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
732
733 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
734 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
735 cmp = cxgbei_find_cmp(icc,
736 be32toh(bhsdo->bhsdo_initiator_task_tag));
737 break;
738 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
739 cmp = cxgbei_find_cmp(icc,
740 be32toh(bhsdo->bhsdo_target_transfer_tag));
741 break;
742 default:
743 __assert_unreachable();
744 }
745 MPASS(cmp != NULL);
746
747 /*
748 * The difference between the end of the last burst
749 * and the offset of the last PDU in this burst is
750 * the additional data received via DDP.
751 */
752 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
754
755 if (prev_seg_len != 0) {
756 uint32_t orig_datasn;
757
758 /*
759 * Return a "large" PDU representing the burst
760 * of PDUs. Adjust the offset and length of
761 * this PDU to represent the entire burst.
762 */
763 ip->ip_data_len += prev_seg_len;
764 bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
765 bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
766 bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
767 bhsdo->bhsdo_buffer_offset =
768 htobe32(cmp->next_buffer_offset);
769
770 orig_datasn = htobe32(bhsdo->bhsdo_datasn);
771 npdus = orig_datasn - cmp->last_datasn;
772 bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
773 cmp->last_datasn = orig_datasn;
774 ip->ip_additional_pdus = npdus - 1;
775 } else {
776 MPASS(htobe32(bhsdo->bhsdo_datasn) ==
777 cmp->last_datasn + 1);
778 npdus = 1;
779 cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
780 }
781
782 cmp->next_buffer_offset += ip->ip_data_len;
783 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
784 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
785 } else {
786 MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
787 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
788 }
789
790 tp->rcv_nxt = icp->icp_seq + pdu_len;
791 tp->t_rcvtime = ticks;
792
793 /*
794 * Don't update the window size or return credits since RX
795 * flow control is disabled.
796 */
797
798 so = inp->inp_socket;
799 sb = &so->so_rcv;
800 SOCKBUF_LOCK(sb);
801 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
802 CTR5(KTR_CXGBE,
803 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
804 __func__, tid, pdu_len, icc, sb->sb_state);
805 SOCKBUF_UNLOCK(sb);
806 INP_WUNLOCK(inp);
807
808 CURVNET_SET(so->so_vnet);
809 NET_EPOCH_ENTER(et);
810 INP_WLOCK(inp);
811 tp = tcp_drop(tp, ECONNRESET);
812 if (tp != NULL)
813 INP_WUNLOCK(inp);
814 NET_EPOCH_EXIT(et);
815 CURVNET_RESTORE();
816
817 icl_cxgbei_conn_pdu_free(NULL, ip);
818 toep->ulpcb2 = NULL;
819 m_freem(m);
820 return (0);
821 }
822
824
825 /* Enqueue the PDU to the received pdus queue. */
826 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
827 if (!icc->rx_active) {
828 icc->rx_active = true;
829 wakeup(&icc->rx_active);
830 }
831 SOCKBUF_UNLOCK(sb);
832 INP_WUNLOCK(inp);
833
834 toep->ulpcb2 = NULL;
835 m_freem(m);
836
837 return (0);
838}
839
840static int
841cxgbei_activate(struct adapter *sc)
842{
843 struct cxgbei_data *ci;
844 int rc;
845
847
848 if (uld_active(sc, ULD_ISCSI)) {
849 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
850 __func__, sc));
851 return (0);
852 }
853
854 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
855 device_printf(sc->dev,
856 "not iSCSI offload capable, or capability disabled.\n");
857 return (ENOSYS);
858 }
859
860 /* per-adapter softc for iSCSI */
861 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK);
862 if (ci == NULL)
863 return (ENOMEM);
864
865 rc = cxgbei_init(sc, ci);
866 if (rc != 0) {
867 free(ci, M_CXGBE);
868 return (rc);
869 }
870
871 sc->iscsi_ulp_softc = ci;
872
873 return (0);
874}
875
876static int
877cxgbei_deactivate(struct adapter *sc)
878{
879 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
880
882
883 if (ci != NULL) {
884 sysctl_ctx_free(&ci->ctx);
886 free(ci, M_CXGBE);
887 sc->iscsi_ulp_softc = NULL;
888 }
889
890 return (0);
891}
892
893static void
894cxgbei_activate_all(struct adapter *sc, void *arg __unused)
895{
896
897 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
898 return;
899
900 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
901 if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
902 (void) t4_activate_uld(sc, ULD_ISCSI);
903
904 end_synchronized_op(sc, 0);
905}
906
907static void
908cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
909{
910
911 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
912 return;
913
914 if (uld_active(sc, ULD_ISCSI))
915 (void) t4_deactivate_uld(sc, ULD_ISCSI);
916
917 end_synchronized_op(sc, 0);
918}
919
920static struct uld_info cxgbei_uld_info = {
921 .uld_id = ULD_ISCSI,
922 .activate = cxgbei_activate,
923 .deactivate = cxgbei_deactivate,
924};
925
926static int
927cxgbei_mod_load(void)
928{
929 int rc;
930
931 t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
932 t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
935
936 rc = t4_register_uld(&cxgbei_uld_info);
937 if (rc != 0)
938 return (rc);
939
940 t4_iterate(cxgbei_activate_all, NULL);
941
942 return (rc);
943}
944
945static int
946cxgbei_mod_unload(void)
947{
948
949 t4_iterate(cxgbei_deactivate_all, NULL);
950
951 if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY)
952 return (EBUSY);
953
958
959 return (0);
960}
961#endif
962
963static int
964cxgbei_modevent(module_t mod, int cmd, void *arg)
965{
966 int rc = 0;
967
968#ifdef TCP_OFFLOAD
969 switch (cmd) {
970 case MOD_LOAD:
971 rc = cxgbei_mod_load();
972 if (rc == 0)
973 rc = icl_cxgbei_mod_load();
974 break;
975
976 case MOD_UNLOAD:
978 if (rc == 0)
979 rc = cxgbei_mod_unload();
980 break;
981
982 default:
983 rc = EINVAL;
984 }
985#else
986 printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
987 rc = EOPNOTSUPP;
988#endif
989
990 return (rc);
991}
992
993static moduledata_t cxgbei_mod = {
994 "cxgbei",
996 NULL,
997};
998
999MODULE_VERSION(cxgbei, 1);
1000DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1001MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
1002MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
1003MODULE_DEPEND(cxgbei, icl, 1, 1, 1);
int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *)
Definition: t4_main.c:6121
static uint32_t t4_read_reg(struct adapter *sc, uint32_t reg)
Definition: adapter.h:1104
void t4_register_cpl_handler(int, cpl_handler_t)
Definition: t4_sge.c:387
struct mp_ring * r
Definition: adapter.h:3
#define KTR_CXGBE
Definition: adapter.h:67
void t4_iterate(void(*)(struct adapter *, void *), void *)
Definition: t4_main.c:12190
#define ASSERT_SYNCHRONIZED_OP(sc)
Definition: adapter.h:1020
struct sge_iq iq
Definition: adapter.h:0
@ SLEEP_OK
Definition: adapter.h:149
@ INTR_OK
Definition: adapter.h:150
void end_synchronized_op(struct adapter *, int)
Definition: t4_main.c:6204
#define CXGBE_UNIMPLEMENTED(s)
Definition: adapter.h:69
#define CHELSIO_T5
Definition: common.h:415
void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val)
Definition: t4_hw.c:100
#define CHELSIO_T6
Definition: common.h:416
static int chip_id(struct adapter *adap)
Definition: common.h:512
static moduledata_t cxgbei_mod
Definition: cxgbei.c:993
MODULE_VERSION(cxgbei, 1)
__FBSDID("$FreeBSD$")
DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY)
static int cxgbei_modevent(module_t mod, int cmd, void *arg)
Definition: cxgbei.c:964
MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1)
static struct icl_cxgbei_pdu * ip_to_icp(struct icl_pdu *ip)
Definition: cxgbei.h:103
int icl_cxgbei_mod_load(void)
struct icl_pdu * icl_cxgbei_new_pdu(int)
void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *)
#define CXGBEI_CONN_SIGNATURE
Definition: cxgbei.h:35
int icl_cxgbei_mod_unload(void)
struct cxgbei_cmp * cxgbei_find_cmp(struct icl_cxgbei_conn *, uint32_t)
void parse_pdus(struct icl_cxgbei_conn *, struct sockbuf *)
void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *)
@ ICPF_RX_DDP
Definition: cxgbei.h:83
@ ICPF_RX_HDR
Definition: cxgbei.h:81
@ ICPF_RX_STATUS
Definition: cxgbei.h:84
@ ICPF_RX_FLBUF
Definition: cxgbei.h:82
@ ULD_ISCSI
Definition: offload.h:207
int offload_map
Definition: adapter.h:937
uint16_t iscsicaps
Definition: adapter.h:969
struct t4_virt_res vres
Definition: adapter.h:960
device_t dev
Definition: adapter.h:866
void * iscsi_ulp_softc
Definition: adapter.h:928
__be16 len
Definition: t4_msg.h:1630
__be32 seq
Definition: t4_msg.h:1610
__be16 pdu_len_ddp
Definition: t4_msg.h:1608
__be16 len
Definition: t4_msg.h:1609
__be32 ddpvld
Definition: t4_msg.h:1769
__be16 len
Definition: t4_msg.h:1762
__be32 ddpvld
Definition: t4_msg.h:1826
__be16 pdu_len_ddp
Definition: t4_msg.h:1819
uint32_t next_buffer_offset
Definition: cxgbei.h:42
uint32_t last_datasn
Definition: cxgbei.h:43
u_int max_tx_data_len
Definition: cxgbei.h:110
struct sysctl_ctx_list ctx
Definition: cxgbei.h:116
struct ppod_region pr
Definition: cxgbei.h:114
u_int ddp_threshold
Definition: cxgbei.h:113
u_int max_rx_data_len
Definition: cxgbei.h:111
uint32_t icc_signature
Definition: cxgbei.h:51
int ulp_submode
Definition: cxgbei.h:52
struct toepcb * toep
Definition: cxgbei.h:54
bool rx_active
Definition: cxgbei.h:57
struct icl_conn ic
Definition: cxgbei.h:48
u_int icp_flags
Definition: cxgbei.h:95
uint32_t icp_seq
Definition: cxgbei.h:94
struct icl_pdu ip
Definition: cxgbei.h:90
uint32_t pr_tag_mask
Definition: t4_tom.h:131
u_int pr_page_shift[4]
Definition: t4_tom.h:130
struct adapter * adapter
Definition: adapter.h:422
uint64_t rx_iscsi_fl_pdus
Definition: adapter.h:680
uint64_t rx_iscsi_ddp_pdus
Definition: adapter.h:678
uint64_t rx_iscsi_fl_octets
Definition: adapter.h:681
uint64_t rx_iscsi_data_digest_errors
Definition: adapter.h:684
uint64_t rx_iscsi_ddp_octets
Definition: adapter.h:679
uint64_t rx_iscsi_header_digest_errors
Definition: adapter.h:683
uint64_t rx_iscsi_padding_errors
Definition: adapter.h:682
u_int size
Definition: offload.h:187
struct t4_range iscsi
Definition: offload.h:192
Definition: t4_tom.h:182
void * ulpcb
Definition: t4_tom.h:205
struct sge_ofld_rxq * ofld_rxq
Definition: t4_tom.h:191
int tid
Definition: t4_tom.h:195
struct inpcb * inp
Definition: t4_tom.h:184
void * ulpcb2
Definition: t4_tom.h:206
int uld_id
Definition: offload.h:216
@ CPL_ISCSI_HDR
Definition: t4_msg.h:84
@ CPL_RX_ISCSI_DDP
Definition: t4_msg.h:107
@ CPL_ISCSI_DATA
Definition: t4_msg.h:141
@ CPL_RX_ISCSI_CMP
Definition: t4_msg.h:103
@ ULP_CRC_DATA
Definition: t4_msg.h:242
#define F_DDP_PADDING_ERR
Definition: t4_msg.h:1864
#define F_DDP_PDU
Definition: t4_msg.h:1852
#define F_DDP_HDRCRC_ERR
Definition: t4_msg.h:1868
#define G_ISCSI_PDU_LEN(x)
Definition: t4_msg.h:1620
#define F_DDP_DATACRC_ERR
Definition: t4_msg.h:1872
#define GET_TID(cmd)
Definition: t4_msg.h:330
#define G_PMMAXXFERLEN1(x)
Definition: t4_regs.h:22726
#define M_ISCSITAGMASK
Definition: t4_regs.h:37095
#define A_TP_PARA_REG7
Definition: t4_regs.h:22721
#define G_PMMAXXFERLEN0(x)
Definition: t4_regs.h:22731
#define A_TP_PARA_REG2
Definition: t4_regs.h:22338
#define G_MAXRXDATA(x)
Definition: t4_regs.h:22343
#define A_ULP_RX_ISCSI_PSZ
Definition: t4_regs.h:37099
#define A_TP_PMM_TX_PAGE_SIZE
Definition: t4_regs.h:21772
#define A_ULP_RX_ISCSI_TAGMASK
Definition: t4_regs.h:37092
#define A_TP_PMM_RX_PAGE_SIZE
Definition: t4_regs.h:21760
#define V_ISCSITAGMASK(x)
Definition: t4_regs.h:37096
void t4_free_ppod_region(struct ppod_region *)
void * lookup_tid(struct adapter *, int)
int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int, const char *)