FreeBSD kernel amd64 PCI device code
pci_iov.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2013-2015 Sandvine Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include "opt_bus.h"
31
32#include <sys/param.h>
33#include <sys/conf.h>
34#include <sys/kernel.h>
35#include <sys/systm.h>
36#include <sys/bus.h>
37#include <sys/fcntl.h>
38#include <sys/ioccom.h>
39#include <sys/iov.h>
40#include <sys/linker.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/module.h>
44#include <sys/mutex.h>
45#include <sys/pciio.h>
46#include <sys/queue.h>
47#include <sys/rman.h>
48#include <sys/sysctl.h>
49
50#include <machine/bus.h>
51#include <machine/stdarg.h>
52
53#include <sys/nv.h>
54#include <sys/iov_schema.h>
55
56#include <dev/pci/pcireg.h>
57#include <dev/pci/pcivar.h>
58#include <dev/pci/pci_iov.h>
59#include <dev/pci/pci_private.h>
62
63#include "pcib_if.h"
64
65static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
66
67static d_ioctl_t pci_iov_ioctl;
68
69static struct cdevsw iov_cdevsw = {
70 .d_version = D_VERSION,
71 .d_name = "iov",
72 .d_ioctl = pci_iov_ioctl
73};
74
75SYSCTL_DECL(_hw_pci);
76
77/*
78 * The maximum amount of memory we will allocate for user configuration of an
79 * SR-IOV device. 1MB ought to be enough for anyone, but leave this
80 * configurable just in case.
81 */
82static u_long pci_iov_max_config = 1024 * 1024;
83SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
84 &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
85
86#define IOV_READ(d, r, w) \
87 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
88
89#define IOV_WRITE(d, r, v, w) \
90 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
91
92static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema,
93 nvlist_t **vf_schema);
94static void pci_iov_build_pf_schema(nvlist_t *schema,
95 nvlist_t **driver_schema);
96static void pci_iov_build_vf_schema(nvlist_t *schema,
97 nvlist_t **driver_schema);
98static int pci_iov_delete_iov_children(struct pci_devinfo *dinfo);
99static nvlist_t *pci_iov_get_pf_subsystem_schema(void);
100static nvlist_t *pci_iov_get_vf_subsystem_schema(void);
101
102int
103pci_iov_attach_name(device_t dev, struct nvlist *pf_schema,
104 struct nvlist *vf_schema, const char *fmt, ...)
105{
106 char buf[NAME_MAX + 1];
107 va_list ap;
108
109 va_start(ap, fmt);
110 vsnprintf(buf, sizeof(buf), fmt, ap);
111 va_end(ap);
112 return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema,
113 vf_schema, buf));
114}
115
116int
117pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
118 nvlist_t *vf_schema, const char *name)
119{
120 struct pci_devinfo *dinfo;
121 struct pcicfg_iov *iov;
122 nvlist_t *schema;
123 uint32_t version;
124 int error;
125 int iov_pos;
126
127 dinfo = device_get_ivars(dev);
128 schema = NULL;
129
130 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
131
132 if (error != 0)
133 return (error);
134
135 version = pci_read_config(dev, iov_pos, 4);
136 if (PCI_EXTCAP_VER(version) != 1) {
137 if (bootverbose)
138 device_printf(dev,
139 "Unsupported version of SR-IOV (%d) detected\n",
140 PCI_EXTCAP_VER(version));
141
142 return (ENXIO);
143 }
144
145 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
146
147 mtx_lock(&Giant);
148 if (dinfo->cfg.iov != NULL) {
149 error = EBUSY;
150 goto cleanup;
151 }
152 iov->iov_pf = dev;
153 iov->iov_pos = iov_pos;
154
156 if (schema == NULL) {
157 error = ENOMEM;
158 goto cleanup;
159 }
160
161 error = pci_iov_validate_schema(schema);
162 if (error != 0)
163 goto cleanup;
164 iov->iov_schema = schema;
165
166 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
167 UID_ROOT, GID_WHEEL, 0600, "iov/%s", name);
168
169 if (iov->iov_cdev == NULL) {
170 error = ENOMEM;
171 goto cleanup;
172 }
173
174 dinfo->cfg.iov = iov;
175 iov->iov_cdev->si_drv1 = dinfo;
176 mtx_unlock(&Giant);
177
178 return (0);
179
180cleanup:
181 nvlist_destroy(schema);
182 nvlist_destroy(pf_schema);
183 nvlist_destroy(vf_schema);
184 free(iov, M_SRIOV);
185 mtx_unlock(&Giant);
186 return (error);
187}
188
189int
190pci_iov_detach_method(device_t bus, device_t dev)
191{
192 struct pci_devinfo *dinfo;
193 struct pcicfg_iov *iov;
194 int error;
195
196 mtx_lock(&Giant);
197 dinfo = device_get_ivars(dev);
198 iov = dinfo->cfg.iov;
199
200 if (iov == NULL) {
201 mtx_unlock(&Giant);
202 return (0);
203 }
204
205 if ((iov->iov_flags & IOV_BUSY) != 0) {
206 mtx_unlock(&Giant);
207 return (EBUSY);
208 }
209
210 error = pci_iov_delete_iov_children(dinfo);
211 if (error != 0) {
212 mtx_unlock(&Giant);
213 return (error);
214 }
215
216 dinfo->cfg.iov = NULL;
217
218 if (iov->iov_cdev) {
219 destroy_dev(iov->iov_cdev);
220 iov->iov_cdev = NULL;
221 }
222 nvlist_destroy(iov->iov_schema);
223
224 free(iov, M_SRIOV);
225 mtx_unlock(&Giant);
226
227 return (0);
228}
229
230static nvlist_t *
231pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
232{
233 nvlist_t *schema, *pf_driver, *vf_driver;
234
235 /* We always take ownership of the schemas. */
236 pf_driver = *pf;
237 *pf = NULL;
238 vf_driver = *vf;
239 *vf = NULL;
240
241 schema = pci_iov_schema_alloc_node();
242 if (schema == NULL)
243 goto cleanup;
244
245 pci_iov_build_pf_schema(schema, &pf_driver);
246 pci_iov_build_vf_schema(schema, &vf_driver);
247
248 if (nvlist_error(schema) != 0)
249 goto cleanup;
250
251 return (schema);
252
253cleanup:
254 nvlist_destroy(schema);
255 nvlist_destroy(pf_driver);
256 nvlist_destroy(vf_driver);
257 return (NULL);
258}
259
260static void
261pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
262{
263 nvlist_t *pf_schema, *iov_schema;
264
266 if (pf_schema == NULL) {
267 nvlist_set_error(schema, ENOMEM);
268 return;
269 }
270
272
273 /*
274 * Note that if either *driver_schema or iov_schema is NULL, then
275 * nvlist_move_nvlist will put the schema in the error state and
276 * SR-IOV will fail to initialize later, so we don't have to explicitly
277 * handle that case.
278 */
279 nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
280 nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
281 nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
282 *driver_schema = NULL;
283}
284
285static void
286pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
287{
288 nvlist_t *vf_schema, *iov_schema;
289
291 if (vf_schema == NULL) {
292 nvlist_set_error(schema, ENOMEM);
293 return;
294 }
295
297
298 /*
299 * Note that if either *driver_schema or iov_schema is NULL, then
300 * nvlist_move_nvlist will put the schema in the error state and
301 * SR-IOV will fail to initialize later, so we don't have to explicitly
302 * handle that case.
303 */
304 nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
305 nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
306 nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
307 *driver_schema = NULL;
308}
309
310static nvlist_t *
312{
313 nvlist_t *pf;
314
316 if (pf == NULL)
317 return (NULL);
318
319 pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
320 pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
321
322 return (pf);
323}
324
325static nvlist_t *
327{
328 nvlist_t *vf;
329
331 if (vf == NULL)
332 return (NULL);
333
334 pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
335
336 return (vf);
337}
338
339static int
340pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
341{
342 struct resource *res;
343 struct pcicfg_iov *iov;
344 device_t dev, bus;
345 rman_res_t start, end;
346 pci_addr_t bar_size;
347 int rid;
348
349 iov = dinfo->cfg.iov;
350 dev = dinfo->cfg.dev;
351 bus = device_get_parent(dev);
352 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
353 bar_size = 1 << bar_shift;
354
355 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0,
356 ~0, 1, iov->iov_num_vfs, RF_ACTIVE);
357
358 if (res == NULL)
359 return (ENXIO);
360
361 iov->iov_bar[bar].res = res;
362 iov->iov_bar[bar].bar_size = bar_size;
363 iov->iov_bar[bar].bar_shift = bar_shift;
364
365 start = rman_get_start(res);
366 end = rman_get_end(res);
367 return (rman_manage_region(&iov->rman, start, end));
368}
369
370static void
371pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
372{
373 struct pci_iov_bar *bar;
374 uint64_t bar_start;
375 int i;
376
377 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
378 bar = &iov->iov_bar[i];
379 if (bar->res != NULL) {
380 bar_start = rman_get_start(bar->res) +
381 dinfo->cfg.vf.index * bar->bar_size;
382
383 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
384 bar->bar_shift);
385 }
386 }
387}
388
389static int
390pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
391 nvlist_t **ret)
392{
393 void *packed_config;
394 nvlist_t *config;
395 int error;
396
397 config = NULL;
398 packed_config = NULL;
399
400 if (arg->len > pci_iov_max_config) {
401 error = EMSGSIZE;
402 goto out;
403 }
404
405 packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
406
407 error = copyin(arg->config, packed_config, arg->len);
408 if (error != 0)
409 goto out;
410
411 config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE);
412 if (config == NULL) {
413 error = EINVAL;
414 goto out;
415 }
416
418 if (error != 0)
419 goto out;
420
421 error = nvlist_error(config);
422 if (error != 0)
423 goto out;
424
425 *ret = config;
426 config = NULL;
427
428out:
429 nvlist_destroy(config);
430 free(packed_config, M_SRIOV);
431 return (error);
432}
433
434/*
435 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
436 * capability. This bit is only writeable on the lowest-numbered PF but
437 * affects all PFs on the device.
438 */
439static int
441{
442 device_t lowest;
443 device_t *devlist;
444 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
445 uint16_t iov_ctl;
446
447 /* If ARI is disabled on the downstream port there is nothing to do. */
448 if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
449 return (0);
450
451 error = device_get_children(bus, &devlist, &devcount);
452
453 if (error != 0)
454 return (error);
455
456 lowest = NULL;
457 for (i = 0; i < devcount; i++) {
458 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
459 dev_func = pci_get_function(devlist[i]);
460 if (lowest == NULL || dev_func < lowest_func) {
461 lowest = devlist[i];
462 lowest_func = dev_func;
463 lowest_pos = iov_pos;
464 }
465 }
466 }
467 free(devlist, M_TEMP);
468
469 /*
470 * If we called this function some device must have the SR-IOV
471 * capability.
472 */
473 KASSERT(lowest != NULL,
474 ("Could not find child of %s with SR-IOV capability",
475 device_get_nameunit(bus)));
476
477 iov_ctl = pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2);
478 iov_ctl |= PCIM_SRIOV_ARI_EN;
479 pci_write_config(lowest, lowest_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
480 if ((pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2) &
481 PCIM_SRIOV_ARI_EN) == 0) {
482 device_printf(lowest, "failed to enable ARI\n");
483 return (ENXIO);
484 }
485 return (0);
486}
487
488static int
489pci_iov_config_page_size(struct pci_devinfo *dinfo)
490{
491 uint32_t page_cap, page_size;
492
493 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
494
495 /*
496 * If the system page size is less than the smallest SR-IOV page size
497 * then round up to the smallest SR-IOV page size.
498 */
499 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
500 page_size = (1 << 0);
501 else
502 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
503
504 /* Check that the device supports the system page size. */
505 if (!(page_size & page_cap))
506 return (ENXIO);
507
508 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
509 return (0);
510}
511
512static int
513pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config)
514{
515 const nvlist_t *device, *driver_config;
516
517 device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
518 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
519 return (PCI_IOV_INIT(dev, num_vfs, driver_config));
520}
521
522static int
523pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
524{
525 int error;
526
527 iov->rman.rm_start = 0;
528 iov->rman.rm_end = ~0;
529 iov->rman.rm_type = RMAN_ARRAY;
530 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
531 device_get_nameunit(pf));
532 iov->rman.rm_descr = iov->rman_name;
533
534 error = rman_init(&iov->rman);
535 if (error != 0)
536 return (error);
537
539 return (0);
540}
541
542static int
543pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar)
544{
545 struct pcicfg_iov *iov;
546 rman_res_t start, end;
547 struct resource *res;
548 struct resource_list *rl;
549 struct resource_list_entry *rle;
550
551 rl = &dinfo->resources;
552 iov = dinfo->cfg.iov;
553
554 rle = resource_list_find(rl, SYS_RES_MEMORY,
555 iov->iov_pos + PCIR_SRIOV_BAR(bar));
556 if (rle == NULL)
557 rle = resource_list_find(rl, SYS_RES_IOPORT,
558 iov->iov_pos + PCIR_SRIOV_BAR(bar));
559 if (rle == NULL)
560 return (ENXIO);
561 res = rle->res;
562
563 iov->iov_bar[bar].res = res;
564 iov->iov_bar[bar].bar_size = rman_get_size(res) / iov->iov_num_vfs;
565 iov->iov_bar[bar].bar_shift = pci_mapsize(iov->iov_bar[bar].bar_size);
566
567 start = rman_get_start(res);
568 end = rman_get_end(res);
569
570 return (rman_manage_region(&iov->rman, start, end));
571}
572
573static int
574pci_iov_setup_bars(struct pci_devinfo *dinfo)
575{
576 device_t dev;
577 struct pcicfg_iov *iov;
578 pci_addr_t bar_value, testval;
579 int i, last_64, error;
580
581 iov = dinfo->cfg.iov;
582 dev = dinfo->cfg.dev;
583 last_64 = 0;
584
585 pci_add_resources_ea(device_get_parent(dev), dev, 1);
586
587 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
588 /* First, try to use BARs allocated with EA */
589 error = pci_iov_alloc_bar_ea(dinfo, i);
590 if (error == 0)
591 continue;
592
593 /* Allocate legacy-BAR only if EA is not enabled */
595 continue;
596
597 /*
598 * If a PCI BAR is a 64-bit wide BAR, then it spans two
599 * consecutive registers. Therefore if the last BAR that
600 * we looked at was a 64-bit BAR, we need to skip this
601 * register as it's the second half of the last BAR.
602 */
603 if (!last_64) {
605 iov->iov_pos + PCIR_SRIOV_BAR(i),
606 &bar_value, &testval, &last_64);
607
608 if (testval != 0) {
609 error = pci_iov_alloc_bar(dinfo, i,
610 pci_mapsize(testval));
611 if (error != 0)
612 return (error);
613 }
614 } else
615 last_64 = 0;
616 }
617
618 return (0);
619}
620
621static void
622pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
623 uint16_t first_rid, uint16_t rid_stride)
624{
625 char device_name[VF_MAX_NAME];
626 const nvlist_t *device, *driver_config, *iov_config;
627 device_t bus, dev, vf;
628 struct pcicfg_iov *iov;
629 struct pci_devinfo *vfinfo;
630 int i, error;
631 uint16_t vid, did, next_rid;
632
633 iov = dinfo->cfg.iov;
634 dev = dinfo->cfg.dev;
635 bus = device_get_parent(dev);
636 next_rid = first_rid;
637 vid = pci_get_vendor(dev);
638 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
639
640 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
641 snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
642 device = nvlist_get_nvlist(config, device_name);
643 iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
644 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
645
646 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
647 if (vf == NULL)
648 break;
649
650 /*
651 * If we are creating passthrough devices then force the ppt
652 * driver to attach to prevent a VF driver from claiming the
653 * VFs.
654 */
655 if (nvlist_get_bool(iov_config, "passthrough"))
656 device_set_devclass_fixed(vf, "ppt");
657
658 vfinfo = device_get_ivars(vf);
659
660 vfinfo->cfg.iov = iov;
661 vfinfo->cfg.vf.index = i;
662
663 pci_iov_add_bars(iov, vfinfo);
664
665 error = PCI_IOV_ADD_VF(dev, i, driver_config);
666 if (error != 0) {
667 device_printf(dev, "Failed to add VF %d\n", i);
668 device_delete_child(bus, vf);
669 }
670 }
671
672 bus_generic_attach(bus);
673}
674
675static int
676pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
677{
678 device_t bus, dev;
679 struct pci_devinfo *dinfo;
680 struct pcicfg_iov *iov;
681 nvlist_t *config;
682 int i, error;
683 uint16_t rid_off, rid_stride;
684 uint16_t first_rid, last_rid;
685 uint16_t iov_ctl;
686 uint16_t num_vfs, total_vfs;
687 int iov_inited;
688
689 mtx_lock(&Giant);
690 dinfo = cdev->si_drv1;
691 iov = dinfo->cfg.iov;
692 dev = dinfo->cfg.dev;
693 bus = device_get_parent(dev);
694 iov_inited = 0;
695 config = NULL;
696
697 if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
698 mtx_unlock(&Giant);
699 return (EBUSY);
700 }
701 iov->iov_flags |= IOV_BUSY;
702
703 error = pci_iov_parse_config(iov, arg, &config);
704 if (error != 0)
705 goto out;
706
708 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
709 if (num_vfs > total_vfs) {
710 error = EINVAL;
711 goto out;
712 }
713
714 error = pci_iov_config_page_size(dinfo);
715 if (error != 0)
716 goto out;
717
718 error = pci_iov_set_ari(bus);
719 if (error != 0)
720 goto out;
721
722 error = pci_iov_init(dev, num_vfs, config);
723 if (error != 0)
724 goto out;
725 iov_inited = 1;
726
728
729 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
730 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
731
732 first_rid = pci_get_rid(dev) + rid_off;
733 last_rid = first_rid + (num_vfs - 1) * rid_stride;
734
735 /* We don't yet support allocating extra bus numbers for VFs. */
736 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
737 error = ENOSPC;
738 goto out;
739 }
740
741 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
743 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
744
745 error = pci_iov_init_rman(dev, iov);
746 if (error != 0)
747 goto out;
748
749 iov->iov_num_vfs = num_vfs;
750
751 error = pci_iov_setup_bars(dinfo);
752 if (error != 0)
753 goto out;
754
755 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
757 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
758
759 /* Per specification, we must wait 100ms before accessing VFs. */
760 pause("iov", roundup(hz, 10));
761 pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
762
763 nvlist_destroy(config);
764 iov->iov_flags &= ~IOV_BUSY;
765 mtx_unlock(&Giant);
766
767 return (0);
768out:
769 if (iov_inited)
770 PCI_IOV_UNINIT(dev);
771
772 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
773 if (iov->iov_bar[i].res != NULL) {
774 pci_release_resource(bus, dev, SYS_RES_MEMORY,
775 iov->iov_pos + PCIR_SRIOV_BAR(i),
776 iov->iov_bar[i].res);
777 pci_delete_resource(bus, dev, SYS_RES_MEMORY,
778 iov->iov_pos + PCIR_SRIOV_BAR(i));
779 iov->iov_bar[i].res = NULL;
780 }
781 }
782
783 if (iov->iov_flags & IOV_RMAN_INITED) {
784 rman_fini(&iov->rman);
785 iov->iov_flags &= ~IOV_RMAN_INITED;
786 }
787
788 nvlist_destroy(config);
789 iov->iov_num_vfs = 0;
790 iov->iov_flags &= ~IOV_BUSY;
791 mtx_unlock(&Giant);
792 return (error);
793}
794
795void
796pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
797{
798 struct pcicfg_iov *iov;
799
800 iov = dinfo->cfg.iov;
801
804 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov->iov_ctl, 2);
805}
806
807void
808pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo)
809{
810 struct pcicfg_iov *iov;
811
812 iov = dinfo->cfg.iov;
813
815 iov->iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
816}
817
818/* Return true if child is a VF of the given PF. */
819static int
821{
822 struct pci_devinfo *vfinfo;
823
824 vfinfo = device_get_ivars(child);
825
826 if (!(vfinfo->cfg.flags & PCICFG_VF))
827 return (0);
828
829 return (pf == vfinfo->cfg.iov);
830}
831
832static int
833pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
834{
835 device_t bus, dev, vf, *devlist;
836 struct pcicfg_iov *iov;
837 int i, error, devcount;
838 uint32_t iov_ctl;
839
840 mtx_assert(&Giant, MA_OWNED);
841
842 iov = dinfo->cfg.iov;
843 dev = dinfo->cfg.dev;
844 bus = device_get_parent(dev);
845 devlist = NULL;
846
847 iov->iov_flags |= IOV_BUSY;
848
849 error = device_get_children(bus, &devlist, &devcount);
850
851 if (error != 0)
852 goto out;
853
854 for (i = 0; i < devcount; i++) {
855 vf = devlist[i];
856
857 if (!pci_iov_is_child_vf(iov, vf))
858 continue;
859
860 error = device_detach(vf);
861 if (error != 0) {
862 device_printf(dev,
863 "Could not disable SR-IOV: failed to detach VF %s\n",
864 device_get_nameunit(vf));
865 goto out;
866 }
867 }
868
869 for (i = 0; i < devcount; i++) {
870 vf = devlist[i];
871
872 if (pci_iov_is_child_vf(iov, vf))
873 device_delete_child(bus, vf);
874 }
875 PCI_IOV_UNINIT(dev);
876
877 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
879 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
880 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
881
882 iov->iov_num_vfs = 0;
883
884 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
885 if (iov->iov_bar[i].res != NULL) {
886 pci_release_resource(bus, dev, SYS_RES_MEMORY,
887 iov->iov_pos + PCIR_SRIOV_BAR(i),
888 iov->iov_bar[i].res);
889 pci_delete_resource(bus, dev, SYS_RES_MEMORY,
890 iov->iov_pos + PCIR_SRIOV_BAR(i));
891 iov->iov_bar[i].res = NULL;
892 }
893 }
894
895 if (iov->iov_flags & IOV_RMAN_INITED) {
896 rman_fini(&iov->rman);
897 iov->iov_flags &= ~IOV_RMAN_INITED;
898 }
899
900 error = 0;
901out:
902 free(devlist, M_TEMP);
903 iov->iov_flags &= ~IOV_BUSY;
904 return (error);
905}
906
907static int
908pci_iov_delete(struct cdev *cdev)
909{
910 struct pci_devinfo *dinfo;
911 struct pcicfg_iov *iov;
912 int error;
913
914 mtx_lock(&Giant);
915 dinfo = cdev->si_drv1;
916 iov = dinfo->cfg.iov;
917
918 if ((iov->iov_flags & IOV_BUSY) != 0) {
919 error = EBUSY;
920 goto out;
921 }
922 if (iov->iov_num_vfs == 0) {
923 error = ECHILD;
924 goto out;
925 }
926
927 error = pci_iov_delete_iov_children(dinfo);
928
929out:
930 mtx_unlock(&Giant);
931 return (error);
932}
933
934static int
935pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
936{
937 struct pci_devinfo *dinfo;
938 void *packed;
939 size_t output_len, size;
940 int error;
941
942 packed = NULL;
943
944 mtx_lock(&Giant);
945 dinfo = cdev->si_drv1;
946 packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
947 mtx_unlock(&Giant);
948
949 if (packed == NULL) {
950 error = ENOMEM;
951 goto fail;
952 }
953
954 output_len = output->len;
955 output->len = size;
956 if (size <= output_len) {
957 error = copyout(packed, output->schema, size);
958
959 if (error != 0)
960 goto fail;
961
962 output->error = 0;
963 } else
964 /*
965 * If we return an error then the ioctl code won't copyout
966 * output back to userland, so we flag the error in the struct
967 * instead.
968 */
969 output->error = EMSGSIZE;
970
971 error = 0;
972
973fail:
974 free(packed, M_NVLIST);
975
976 return (error);
977}
978
979static int
980pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
981 struct thread *td)
982{
983
984 switch (cmd) {
985 case IOV_CONFIG:
986 return (pci_iov_config(dev, (struct pci_iov_arg *)data));
987 case IOV_DELETE:
988 return (pci_iov_delete(dev));
989 case IOV_GET_SCHEMA:
991 (struct pci_iov_schema *)data));
992 default:
993 return (EINVAL);
994 }
995}
996
997struct resource *
998pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid,
999 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1000{
1001 struct pci_devinfo *dinfo;
1002 struct pcicfg_iov *iov;
1003 struct pci_map *map;
1004 struct resource *res;
1005 struct resource_list_entry *rle;
1006 rman_res_t bar_start, bar_end;
1007 pci_addr_t bar_length;
1008 int error;
1009
1010 dinfo = device_get_ivars(child);
1011 iov = dinfo->cfg.iov;
1012
1013 map = pci_find_bar(child, *rid);
1014 if (map == NULL)
1015 return (NULL);
1016
1017 bar_length = 1 << map->pm_size;
1018 bar_start = map->pm_value;
1019 bar_end = bar_start + bar_length - 1;
1020
1021 /* Make sure that the resource fits the constraints. */
1022 if (bar_start >= end || bar_end <= bar_start || count != 1)
1023 return (NULL);
1024
1025 /* Clamp the resource to the constraints if necessary. */
1026 if (bar_start < start)
1027 bar_start = start;
1028 if (bar_end > end)
1029 bar_end = end;
1030 bar_length = bar_end - bar_start + 1;
1031
1032 res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
1033 bar_length, flags, child);
1034 if (res == NULL)
1035 return (NULL);
1036
1037 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
1038 bar_start, bar_end, 1);
1039 if (rle == NULL) {
1040 rman_release_resource(res);
1041 return (NULL);
1042 }
1043
1044 rman_set_rid(res, *rid);
1045
1046 if (flags & RF_ACTIVE) {
1047 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
1048 if (error != 0) {
1049 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
1050 *rid);
1051 rman_release_resource(res);
1052 return (NULL);
1053 }
1054 }
1055 rle->res = res;
1056
1057 return (res);
1058}
1059
1060int
1062 struct resource *r)
1063{
1064 struct pci_devinfo *dinfo;
1065 struct resource_list_entry *rle;
1066 int error;
1067
1068 dinfo = device_get_ivars(child);
1069
1070 if (rman_get_flags(r) & RF_ACTIVE) {
1071 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
1072 if (error != 0)
1073 return (error);
1074 }
1075
1076 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
1077 if (rle != NULL) {
1078 rle->res = NULL;
1079 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
1080 rid);
1081 }
1082
1083 return (rman_release_resource(r));
1084}
int pci_ea_is_enabled(device_t dev, int rid)
Definition: pci.c:3904
struct pci_map * pci_find_bar(device_t dev, int reg)
Definition: pci.c:3162
int pci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r)
Definition: pci.c:5643
void pci_delete_resource(device_t dev, device_t child, int type, int rid)
Definition: pci.c:5785
int pci_mapsize(uint64_t testval)
Definition: pci.c:561
struct resource * pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_long num, u_int flags)
Definition: pci.c:5535
void pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov)
Definition: pci.c:3920
struct pci_map * pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
Definition: pci.c:3203
void pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp, int *bar64)
Definition: pci.c:3064
bool start
Definition: pci_dw_if.m:64
uint16_t data
Definition: pci_if.m:198
device_t child
Definition: pci_if.m:73
device_t pf
Definition: pci_if.m:277
uint16_t rid
Definition: pci_if.m:278
const char * name
Definition: pci_if.m:267
struct nvlist * vf_schema
Definition: pci_if.m:266
uint16_t vid
Definition: pci_if.m:279
struct nvlist * pf_schema
Definition: pci_if.m:265
uint16_t did
Definition: pci_if.m:280
int * count
Definition: pci_if.m:185
static d_ioctl_t pci_iov_ioctl
Definition: pci_iov.c:67
int pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema, nvlist_t *vf_schema, const char *name)
Definition: pci_iov.c:117
static void pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
Definition: pci_iov.c:261
static void pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config, uint16_t first_rid, uint16_t rid_stride)
Definition: pci_iov.c:622
int pci_iov_attach_name(device_t dev, struct nvlist *pf_schema, struct nvlist *vf_schema, const char *fmt,...)
Definition: pci_iov.c:103
static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations")
static nvlist_t * pci_iov_get_pf_subsystem_schema(void)
Definition: pci_iov.c:311
static nvlist_t * pci_iov_get_vf_subsystem_schema(void)
Definition: pci_iov.c:326
SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN, &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.")
static int pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
Definition: pci_iov.c:833
static int pci_iov_delete(struct cdev *cdev)
Definition: pci_iov.c:908
int pci_iov_detach_method(device_t bus, device_t dev)
Definition: pci_iov.c:190
static int pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
Definition: pci_iov.c:523
struct resource * pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
Definition: pci_iov.c:998
void pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo)
Definition: pci_iov.c:808
static int pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg, nvlist_t **ret)
Definition: pci_iov.c:390
static void pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
Definition: pci_iov.c:371
static int pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar)
Definition: pci_iov.c:543
static int pci_iov_config_page_size(struct pci_devinfo *dinfo)
Definition: pci_iov.c:489
#define IOV_WRITE(d, r, v, w)
Definition: pci_iov.c:89
static void pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
Definition: pci_iov.c:286
int pci_vf_release_mem_resource(device_t dev, device_t child, int rid, struct resource *r)
Definition: pci_iov.c:1061
static int pci_iov_set_ari(device_t bus)
Definition: pci_iov.c:440
static int pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
Definition: pci_iov.c:676
static int pci_iov_setup_bars(struct pci_devinfo *dinfo)
Definition: pci_iov.c:574
__FBSDID("$FreeBSD$")
static int pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
Definition: pci_iov.c:820
static int pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
Definition: pci_iov.c:935
static int pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
Definition: pci_iov.c:340
SYSCTL_DECL(_hw_pci)
static u_long pci_iov_max_config
Definition: pci_iov.c:82
static struct cdevsw iov_cdevsw
Definition: pci_iov.c:69
static int pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config)
Definition: pci_iov.c:513
void pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
Definition: pci_iov.c:796
#define IOV_READ(d, r, w)
Definition: pci_iov.c:86
static nvlist_t * pci_iov_build_schema(nvlist_t **pf_schema, nvlist_t **vf_schema)
Definition: pci_iov.c:231
const struct nvlist * config
Definition: pci_iov_if.m:41
uint16_t num_vfs
Definition: pci_iov_if.m:40
#define IOV_RMAN_INITED
#define IOV_BUSY
void pci_iov_schema_add_bool(nvlist_t *schema, const char *name, uint32_t flags, int defaultVal)
uint16_t pci_iov_config_get_num_vfs(const nvlist_t *config)
void pci_iov_schema_add_uint16(nvlist_t *schema, const char *name, uint32_t flags, uint16_t defaultVal)
int pci_iov_validate_schema(const nvlist_t *schema)
nvlist_t * pci_iov_schema_alloc_node(void)
void pci_iov_schema_add_string(nvlist_t *schema, const char *name, uint32_t flags, const char *defaultVal)
int pci_iov_schema_validate_config(const nvlist_t *schema, nvlist_t *config)
u_int bus
Definition: pcib_if.m:79
device_t dev
Definition: pcib_if.m:109
#define PCIM_SRIOV_VF_EN
Definition: pcireg.h:1062
#define PCIR_BAR(x)
Definition: pcireg.h:216
#define PCIR_SRIOV_VF_STRIDE
Definition: pcireg.h:1068
#define PCIZ_SRIOV
Definition: pcireg.h:183
#define PCIM_SRIOV_ARI_EN
Definition: pcireg.h:1064
#define PCI_SRIOV_BASE_PAGE_SHIFT
Definition: pcireg.h:1073
#define PCI_EXTCAP_VER(ecap)
Definition: pcireg.h:163
#define PCIR_SRIOV_VF_DID
Definition: pcireg.h:1069
#define PCIR_MAX_BAR_0
Definition: pcireg.h:217
#define PCIR_SRIOV_PAGE_CAP
Definition: pcireg.h:1070
#define PCI_RID2BUS(rid)
Definition: pcireg.h:69
#define PCIR_SRIOV_VF_OFF
Definition: pcireg.h:1067
#define PCIR_SRIOV_BAR(x)
Definition: pcireg.h:1076
#define PCIR_SRIOV_CTL
Definition: pcireg.h:1061
#define PCIR_SRIOV_TOTAL_VFS
Definition: pcireg.h:1065
#define PCIR_SRIOV_PAGE_SIZE
Definition: pcireg.h:1071
#define PCIR_SRIOV_NUM_VFS
Definition: pcireg.h:1066
#define PCIM_SRIOV_VF_MSE
Definition: pcireg.h:1063
#define PCICFG_VF
Definition: pcivar.h:175
uint64_t pci_addr_t
Definition: pcivar.h:42
pci_addr_t bar_shift
pci_addr_t bar_size
struct resource * res
Definition: pcivar.h:61
pci_addr_t pm_value
Definition: pcivar.h:62
pci_addr_t pm_size
Definition: pcivar.h:63
struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0+1]
uint16_t iov_ctl
device_t iov_pf
struct cdev * iov_cdev
uint32_t iov_flags
char rman_name[64]
nvlist_t * iov_schema
uint32_t iov_page_size
struct rman rman