36#include <sys/malloc.h>
37#include <sys/kernel.h>
44#include <sys/module.h>
45#include <sys/sysctl.h>
47#include <machine/bus.h>
49#include <machine/resource.h>
50#include <machine/vmparam.h>
52#include <xen/xen-os.h>
53#include <xen/hypervisor.h>
54#include <xen/xen_intr.h>
55#include <xen/gnttab.h>
56#include <contrib/xen/grant_table.h>
57#include <contrib/xen/io/protocols.h>
58#include <xen/xenbus/xenbusvar.h>
60#include <machine/_inttypes.h>
62#include <geom/geom_disk.h>
74#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
76#define DPRINTK(fmt, args...)
79#define XBD_SECTOR_SHFT 9
82static MALLOC_DEFINE(M_XENBLOCKFRONT,
"xbd",
"Xen Block Front driver data");
85SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
86 "xbd driver parameters");
108 panic(
"%s: Thaw with flag 0x%x while not frozen.",
151 (
"Freeing command that is still on queue %d.",
163 grant_ref_t * gref_head,
int otherend_id,
int readonly,
164 grant_ref_t * sg_ref,
struct blkif_request_segment *sg)
166 struct blkif_request_segment *last_block_sg = sg + nsegs;
167 vm_paddr_t buffer_ma;
168 uint64_t fsect, lsect;
171 while (sg < last_block_sg) {
173 (
"XEN disk driver I/O must be sector aligned"));
175 (
"XEN disk driver I/Os must be a multiple of "
176 "the sector length"));
177 buffer_ma = segs->ds_addr;
181 KASSERT(lsect <= 7, (
"XEN disk driver data cannot "
182 "cross a page boundary"));
191 KASSERT(ref != ~0, (
"grant_reference failed"));
196 buffer_ma >> PAGE_SHIFT,
200 *sg = (
struct blkif_request_segment) {
222 cm->
cm_bp->bio_error = EIO;
228 KASSERT(nsegs <= sc->xbd_max_request_segments,
229 (
"Too many segments in a blkfront I/O"));
231 if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
232 blkif_request_t *ring_req;
235 ring_req = (blkif_request_t *)
238 ring_req->id = cm->
cm_id;
241 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->
xbd_disk;
242 ring_req->nr_segments = nsegs;
245 xenbus_get_otherend_id(sc->
xbd_dev),
249 blkif_request_indirect_t *ring_req;
252 ring_req = (blkif_request_indirect_t *)
255 ring_req->id = cm->
cm_id;
256 ring_req->operation = BLKIF_OP_INDIRECT;
259 ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->
xbd_disk;
260 ring_req->nr_segments = nsegs;
263 xenbus_get_otherend_id(sc->
xbd_dev),
271 op = BUS_DMASYNC_PREREAD;
273 op = BUS_DMASYNC_PREWRITE;
300 if (cm->
cm_bp != NULL)
306 if (error == EINPROGRESS) {
367 switch (bp->bio_cmd) {
373 if ((bp->bio_flags & BIO_ORDERED) != 0) {
399 panic(
"flush request, but no flush support available");
402 biofinish(bp, NULL, EOPNOTSUPP);
421 int error, queued = 0;
449 printf(
"xbd_queue_request returned %d\n", error);
466 if (__predict_false(cm->
cm_status != BLKIF_RSP_OKAY)) {
467 disk_err(bp,
"disk error" , -1, 0);
469 bp->bio_flags |= BIO_ERROR;
472 if (bp->bio_flags & BIO_ERROR)
486 blkif_response_t *bret;
501 for (i = sc->
xbd_ring.rsp_cons; i != rp;) {
502 bret = RING_GET_RESPONSE(&sc->
xbd_ring, i);
511 op = BUS_DMASYNC_POSTREAD;
514 op = BUS_DMASYNC_POSTWRITE;
541 if (i != sc->
xbd_ring.req_prod_pvt) {
543 RING_FINAL_CHECK_FOR_RESPONSES(&sc->
xbd_ring, more_to_do);
547 sc->
xbd_ring.sring->rsp_event = i + 1;
572 RING_FINAL_CHECK_FOR_RESPONSES(&sc->
xbd_ring, mtd);
596xbd_dump(
void *arg,
void *
virtual, vm_offset_t physical, off_t offset,
599 struct disk *dp = arg;
618 for (sbp=0; length > 0; sbp++) {
622 device_printf(sc->
xbd_dev,
"dump: no more commands?\n");
630 device_printf(sc->
xbd_dev,
"no more grant allocs?\n");
646 virtual = (
char *)
virtual + chunk;
660 "Dump I/O failed at sector %jd\n",
677 printf(
"xbd%d: not found", dp->d_unit);
700 if (xenbus_get_otherend_state(sc->
xbd_dev) ==
708xbd_ioctl(
struct disk *dp, u_long cmd,
void *addr,
int flag,
struct thread *td)
725 struct xbd_softc *sc = bp->bio_disk->d_drv1;
729 bp->bio_error = EINVAL;
730 bp->bio_flags |= BIO_ERROR;
731 bp->bio_resid = bp->bio_bcount;
752 blkif_sring_t *sring;
753 uintptr_t sring_page_addr;
760 xenbus_dev_fatal(sc->
xbd_dev, ENOMEM,
"allocating shared ring");
763 SHARED_RING_INIT(sring);
766 for (i = 0, sring_page_addr = (uintptr_t)sring;
768 i++, sring_page_addr += PAGE_SIZE) {
769 error = xenbus_grant_ring(sc->
xbd_dev,
770 (vtophys(sring_page_addr) >> PAGE_SHIFT),
773 xenbus_dev_fatal(sc->
xbd_dev, error,
774 "granting ring_ref(%d)", i);
782 xenbus_dev_fatal(sc->
xbd_dev, error,
783 "writing %s/ring-ref",
789 char ring_ref_name[]=
"ring_refXX";
791 snprintf(ring_ref_name,
sizeof(ring_ref_name),
796 xenbus_dev_fatal(sc->
xbd_dev, error,
805 error = xen_intr_alloc_and_bind_local_port(sc->
xbd_dev,
809 xenbus_dev_fatal(sc->
xbd_dev, error,
810 "xen_intr_alloc_and_bind_local_port failed");
831 free(sc->
xbd_ring.sring, M_XENBLOCKFRONT);
842 sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
846 sbuf_printf(&sb,
"flush");
851 if (feature_cnt != 0)
852 sbuf_printf(&sb,
", ");
853 sbuf_printf(&sb,
"write_barrier");
858 if (feature_cnt != 0)
859 sbuf_printf(&sb,
", ");
860 sbuf_printf(&sb,
"discard");
865 if (feature_cnt != 0)
866 sbuf_printf(&sb,
", ");
867 sbuf_printf(&sb,
"persistent_grants");
871 (void) sbuf_finish(&sb);
872 return (sbuf_len(&sb));
883 error = sysctl_wire_old_buffer(req, 0);
890 return (SYSCTL_OUT(req, features, len + 1));
896 struct sysctl_ctx_list *sysctl_ctx = NULL;
897 struct sysctl_oid *sysctl_tree = NULL;
898 struct sysctl_oid_list *children;
900 sysctl_ctx = device_get_sysctl_ctx(xbd->
xbd_dev);
901 if (sysctl_ctx == NULL)
904 sysctl_tree = device_get_sysctl_tree(xbd->
xbd_dev);
905 if (sysctl_tree == NULL)
908 children = SYSCTL_CHILDREN(sysctl_tree);
909 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
911 "maximum outstanding requests (negotiated)");
913 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
914 "max_request_segments", CTLFLAG_RD,
916 "maximum number of pages per requests (negotiated)");
918 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
920 "maximum size in bytes of a request (negotiated)");
922 SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
924 "communication channel pages (negotiated)");
926 SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
927 "features", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, xbd,
939 static struct vdev_info {
977 int major = vdevice >> 8;
978 int minor = vdevice & 0xff;
981 if (vdevice & (1 << 28)) {
982 *unit = (vdevice & ((1 << 28) - 1)) >> 8;
987 for (i = 0; info[i].major; i++) {
988 if (info[i].major == major) {
989 *unit = info[i].base + (minor >> info[i].shift);
990 *name = info[i].name;
1001 int vdevice, uint16_t vdisk_info,
unsigned long sector_size,
1002 unsigned long phys_sector_size)
1005 int unit, error = 0;
1012 if (strcmp(name,
"xbd") != 0)
1013 device_printf(sc->
xbd_dev,
"attaching as %s%d\n", name, unit);
1016 device_printf(sc->
xbd_dev,
"features: %s\n",
1029 sc->
xbd_disk->d_sectorsize = sector_size;
1030 sc->
xbd_disk->d_stripesize = phys_sector_size;
1033 sc->
xbd_disk->d_mediasize = sectors * sector_size;
1035 sc->
xbd_disk->d_flags = DISKFLAG_UNMAPPED_BIO;
1037 sc->
xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
1039 "synchronize cache commands enabled.\n");
1041 disk_create(sc->
xbd_disk, DISK_VERSION);
1098 const char *otherend_path;
1099 const char *node_path;
1100 uint32_t max_ring_page_order;
1103 if (xenbus_get_state(sc->
xbd_dev) != XenbusStateInitialising) {
1112 max_ring_page_order = 0;
1126 otherend_path = xenbus_get_otherend_path(sc->
xbd_dev);
1127 node_path = xenbus_get_node(sc->
xbd_dev);
1130 (void)
xs_scanf(XST_NIL, otherend_path,
1131 "max-ring-page-order", NULL,
"%" PRIu32,
1132 &max_ring_page_order);
1134 (void)
xs_scanf(XST_NIL, otherend_path,
1135 "max-ring-pages", NULL,
"%" PRIu32,
1142 "Back-end specified ring-pages of %u "
1143 "limited to front-end limit of %u.\n",
1149 uint32_t new_page_limit;
1153 "Back-end specified ring-pages of %u "
1154 "is not a power of 2. Limited to %u.\n",
1163 "Back-end specified max_requests of %u "
1164 "limited to front-end limit of %zu.\n",
1175 "num-ring-pages",
"%u",
1178 xenbus_dev_fatal(sc->
xbd_dev, error,
1179 "writing %s/num-ring-pages",
1185 "ring-page-order",
"%u",
1188 xenbus_dev_fatal(sc->
xbd_dev, error,
1189 "writing %s/ring-page-order",
1195 error =
xs_printf(XST_NIL, node_path,
"event-channel",
1198 xenbus_dev_fatal(sc->
xbd_dev, error,
1199 "writing %s/event-channel",
1204 error =
xs_printf(XST_NIL, node_path,
"protocol",
1205 "%s", XEN_IO_PROTO_ABI_NATIVE);
1207 xenbus_dev_fatal(sc->
xbd_dev, error,
1208 "writing %s/protocol",
1213 xenbus_set_state(sc->
xbd_dev, XenbusStateInitialised);
1224 blkif_sector_t sectors;
1225 unsigned long sector_size, phys_sector_size;
1227 int err, feature_barrier, feature_flush;
1230 DPRINTK(
"blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
1243 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1244 "sectors",
"%"PRIu64, §ors, NULL);
1246 xenbus_dev_error(dev, err,
1247 "reading sectors at %s",
1248 xenbus_get_otherend_path(dev));
1251 disk->d_mediasize = disk->d_sectorsize * sectors;
1252 err = disk_resize(disk, M_NOWAIT);
1254 xenbus_dev_error(dev, err,
1255 "unable to resize disk %s%u",
1256 disk->d_name, disk->d_unit);
1260 "changed capacity to %jd\n",
1261 (intmax_t)disk->d_mediasize);
1265 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1266 "sectors",
"%"PRIu64, §ors,
1267 "info",
"%u", &binfo,
1268 "sector-size",
"%lu", §or_size,
1271 xenbus_dev_fatal(dev, err,
1272 "reading backend fields at %s",
1273 xenbus_get_otherend_path(dev));
1276 if ((sectors == 0) || (sector_size == 0)) {
1277 xenbus_dev_fatal(dev, 0,
1278 "invalid parameters from %s:"
1279 " sectors = %"PRIu64
", sector_size = %lu",
1280 xenbus_get_otherend_path(dev),
1281 sectors, sector_size);
1284 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1285 "physical-sector-size",
"%lu", &phys_sector_size,
1287 if (err || phys_sector_size <= sector_size)
1288 phys_sector_size = 0;
1289 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1290 "feature-barrier",
"%d", &feature_barrier,
1292 if (err == 0 && feature_barrier != 0)
1295 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1296 "feature-flush-cache",
"%d", &feature_flush,
1298 if (err == 0 && feature_flush != 0)
1301 err =
xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
1302 "feature-max-indirect-segments",
"%" PRIu32,
1318 err = bus_dma_tag_create(
1332 xenbus_dev_fatal(sc->
xbd_dev, err,
1333 "Cannot allocate parent DMA tag\n");
1339 M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
1342 xenbus_dev_fatal(sc->
xbd_dev, ENOMEM,
1343 "Cannot allocate request structures\n");
1349 void * indirectpages;
1354 M_XENBLOCKFRONT, M_NOWAIT);
1363 indirectpages = contigmalloc(
1365 M_XENBLOCKFRONT, M_ZERO | M_NOWAIT, 0, ~0,
1367 if (indirectpages == NULL)
1370 indirectpages = NULL;
1374 xenbus_get_otherend_id(sc->
xbd_dev),
1375 (vtophys(indirectpages) >> PAGE_SHIFT) + j,
1380 if (j < sc->xbd_max_request_indirectpages) {
1381 contigfree(indirectpages,
1391 device_printf(dev,
"%juMB <%s> at %s",
1392 (uintmax_t) sectors / (1048576 / sector_size),
1393 device_get_desc(dev),
1394 xenbus_get_node(dev));
1395 bus_print_child_footer(device_get_parent(dev), dev);
1398 sector_size, phys_sector_size);
1401 (void)xenbus_set_state(dev, XenbusStateConnected);
1420 struct xbd_softc *sc = device_get_softc(dev);
1422 xenbus_set_state(dev, XenbusStateClosing);
1424 DPRINTK(
"xbd_closing: %s removed\n", xenbus_get_node(dev));
1431 xenbus_set_state(dev, XenbusStateClosed);
1438 if (strcmp(xenbus_get_type(dev),
"vbd") != 0)
1441 if (xen_pv_disks_disabled())
1444 if (xen_hvm_domain()) {
1462 error =
xs_read(XST_NIL, xenbus_get_node(dev),
1463 "device-type", NULL, (
void **) &type);
1467 if (strncmp(type,
"cdrom", 5) == 0) {
1468 free(type, M_XENSTORE);
1471 free(type, M_XENSTORE);
1474 device_set_desc(dev,
"Virtual Block Device");
1495 error =
xs_scanf(XST_NIL, xenbus_get_node(dev),
1496 "virtual-device", NULL,
"%" PRIu32, &vdevice);
1498 error =
xs_scanf(XST_NIL, xenbus_get_node(dev),
1499 "virtual-device-ext", NULL,
"%" PRIu32, &vdevice);
1501 xenbus_dev_fatal(dev, error,
"reading virtual-device");
1502 device_printf(dev,
"Couldn't determine virtual device.\n");
1507 if (!strcmp(name,
"xbd"))
1508 device_set_unit(dev, unit);
1510 sc = device_get_softc(dev);
1511 mtx_init(&sc->
xbd_io_lock,
"blkfront i/o lock", NULL, MTX_DEF);
1523 xenbus_set_state(dev, XenbusStateInitialising);
1531 struct xbd_softc *sc = device_get_softc(dev);
1533 DPRINTK(
"%s: %s removed\n", __func__, xenbus_get_node(dev));
1544 struct xbd_softc *sc = device_get_softc(dev);
1557 PRIBIO,
"blkf_susp", 30 * hz) == EWOULDBLOCK) {
1573 struct xbd_softc *sc = device_get_softc(dev);
1580 DPRINTK(
"xbd_resume: %s\n", xenbus_get_node(dev));
1593 struct xbd_softc *sc = device_get_softc(dev);
1595 DPRINTK(
"backend_state=%d\n", backend_state);
1597 switch (backend_state) {
1598 case XenbusStateUnknown:
1599 case XenbusStateInitialising:
1600 case XenbusStateReconfigured:
1601 case XenbusStateReconfiguring:
1602 case XenbusStateClosed:
1605 case XenbusStateInitWait:
1606 case XenbusStateInitialised:
1610 case XenbusStateConnected:
1615 case XenbusStateClosing:
1617 device_printf(dev,
"detaching with pending users\n");
1619 (
"NULL disk with pending users\n"));
1634 DEVMETHOD(device_shutdown, bus_generic_shutdown),
static void xbd_connect(struct xbd_softc *sc)
static void xbd_restart_queue_callback(void *arg)
static int xbd_alloc_ring(struct xbd_softc *sc)
static void xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
static int xbd_close(struct disk *dp)
static device_method_t xbd_methods[]
static int xbd_enable_indirect
static void xbd_flush_requests(struct xbd_softc *sc)
static int xbd_resume(device_t dev)
static struct xbd_command * xbd_bio_command(struct xbd_softc *sc)
static void xbd_setup_sysctl(struct xbd_softc *xbd)
static int xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
static int xbd_detach(device_t dev)
static void xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
static int xbd_open(struct disk *dp)
static void xbd_strategy(struct bio *bp)
static void xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
static void xbd_backend_changed(device_t dev, XenbusState backend_state)
static void xbd_startio(struct xbd_softc *sc)
static void xbd_free_ring(struct xbd_softc *sc)
SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD|CTLFLAG_MPSAFE, 0, "xbd driver parameters")
static void xbd_int(void *xsc)
DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0)
static int xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
static int xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size, unsigned long phys_sector_size)
static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data")
static int xbd_attach(device_t dev)
static int xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length)
static void xbd_free(struct xbd_softc *sc)
static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
static void xbd_quiesce(struct xbd_softc *sc)
static void xbd_dump_complete(struct xbd_command *cm)
static void xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, grant_ref_t *gref_head, int otherend_id, int readonly, grant_ref_t *sg_ref, struct blkif_request_segment *sg)
static void xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
static void xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
static void xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
static void xbd_free_command(struct xbd_command *cm)
#define DPRINTK(fmt, args...)
static int xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
static int xbd_suspend(device_t dev)
static driver_t xbd_driver
static void xbd_closing(device_t)
static int xbd_probe(device_t dev)
SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, &xbd_enable_indirect, 0, "Enable xbd indirect segments")
static void xbd_initialize(struct xbd_softc *sc)
static void xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
static struct xbd_command * xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index)
static void xbd_initqs(struct xbd_softc *sc)
#define XBD_MAX_RING_PAGES
static void xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
static void xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
#define XBD_SEGS_TO_SIZE(segs)
static void xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index)
static struct bio * xbd_dequeue_bio(struct xbd_softc *sc)
#define XBD_INDIRECT_SEGS_TO_PAGES(segs)
static void xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
static void xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index)
static uint32_t xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
#define XBD_MAX_INDIRECT_SEGMENTS
#define XBD_SIZE_TO_SEGS(size)
bool xen_suspend_cancelled
static void notify(struct notify_data *notify, vm_page_t page)
void gnttab_free_grant_references(grant_ref_t head)
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
void gnttab_request_free_callback(struct gnttab_free_callback *callback, void(*fn)(void *), void *arg, uint16_t count)
void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
int gnttab_end_foreign_access_ref(grant_ref_t ref)
int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, grant_ref_t *result)
int gnttab_claim_grant_reference(grant_ref_t *private_head)
blkif_sector_t cm_sector_number
grant_ref_t cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]
void * cm_indirectionpages
struct xbd_command * xbd_shadow
xbd_cm_q_t xbd_cm_q[XBD_Q_COUNT]
grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES]
uint32_t xbd_max_request_segments
uint32_t xbd_max_request_indirectpages
uint32_t xbd_max_request_size
uint32_t xbd_max_requests
struct gnttab_free_callback xbd_callback
blkif_front_ring_t xbd_ring
xen_intr_handle_t xen_intr_handle
bus_dma_tag_t xbd_io_dmat
int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt,...)
int xs_read(struct xs_transaction t, const char *dir, const char *node, u_int *len, void **result)
int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt,...)
int xs_gather(struct xs_transaction t, const char *dir,...)