FreeBSD kernel /amd64 XEN device code
privcmd.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/uio.h>
33#include <sys/bus.h>
34#include <sys/malloc.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/rwlock.h>
39#include <sys/selinfo.h>
40#include <sys/poll.h>
41#include <sys/conf.h>
42#include <sys/fcntl.h>
43#include <sys/ioccom.h>
44#include <sys/rman.h>
45#include <sys/tree.h>
46#include <sys/module.h>
47#include <sys/proc.h>
48#include <sys/bitset.h>
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/vm_extern.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_pager.h>
58
59#include <machine/md_var.h>
60
61#include <xen/xen-os.h>
62#include <xen/hypervisor.h>
63#include <xen/privcmd.h>
64#include <xen/error.h>
65
66MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
67
68#define MAX_DMOP_BUFFERS 16
69
71 vm_object_t mem;
72 vm_size_t size;
73 struct resource *pseudo_phys_res;
75 vm_paddr_t phys_base_addr;
76 boolean_t mapped;
78};
79
80static d_ioctl_t privcmd_ioctl;
81static d_open_t privcmd_open;
82static d_mmap_single_t privcmd_mmap_single;
83
84static struct cdevsw privcmd_devsw = {
85 .d_version = D_VERSION,
86 .d_ioctl = privcmd_ioctl,
87 .d_mmap_single = privcmd_mmap_single,
88 .d_open = privcmd_open,
89 .d_name = "privcmd",
90};
91
92static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
93 vm_ooffset_t foff, struct ucred *cred, u_short *color);
94static void privcmd_pg_dtor(void *handle);
95static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
96 int prot, vm_page_t *mres);
97
98static struct cdev_pager_ops privcmd_pg_ops = {
99 .cdev_pg_fault = privcmd_pg_fault,
100 .cdev_pg_ctor = privcmd_pg_ctor,
101 .cdev_pg_dtor = privcmd_pg_dtor,
102};
103
104struct per_user_data {
105 domid_t dom;
106};
107
108static device_t privcmd_dev = NULL;
109
110/*------------------------- Privcmd Pager functions --------------------------*/
111static int
112privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
113 vm_ooffset_t foff, struct ucred *cred, u_short *color)
114{
115
116 return (0);
117}
118
119static void
120privcmd_pg_dtor(void *handle)
121{
122 struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
123 struct privcmd_map *map = handle;
124 int error __diagused;
125 vm_size_t i;
126 vm_page_t m;
127
128 /*
129 * Remove the mappings from the used pages. This will remove the
130 * underlying p2m bindings in Xen second stage translation.
131 */
132 if (map->mapped == true) {
133 VM_OBJECT_WLOCK(map->mem);
134retry:
135 for (i = 0; i < map->size; i++) {
136 m = vm_page_lookup(map->mem, i);
137 if (m == NULL)
138 continue;
139 if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
140 goto retry;
141 cdev_pager_free_page(map->mem, m);
142 }
143 VM_OBJECT_WUNLOCK(map->mem);
144
145 for (i = 0; i < map->size; i++) {
146 rm.gpfn = atop(map->phys_base_addr) + i;
147 HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
148 }
149 free(map->err, M_PRIVCMD);
150 }
151
153 map->pseudo_phys_res);
154 KASSERT(error == 0, ("Unable to release memory resource: %d", error));
155
156 free(map, M_PRIVCMD);
157}
158
159static int
160privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
161 int prot, vm_page_t *mres)
162{
163 struct privcmd_map *map = object->handle;
164 vm_pindex_t pidx;
165 vm_page_t page;
166
167 if (map->mapped != true)
168 return (VM_PAGER_FAIL);
169
170 pidx = OFF_TO_IDX(offset);
171 if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
172 return (VM_PAGER_FAIL);
173
174 page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
175 if (page == NULL)
176 return (VM_PAGER_FAIL);
177
178 KASSERT((page->flags & PG_FICTITIOUS) != 0,
179 ("not fictitious %p", page));
180 KASSERT(vm_page_wired(page), ("page %p not wired", page));
181 KASSERT(!vm_page_busied(page), ("page %p is busy", page));
182
183 vm_page_busy_acquire(page, 0);
184 vm_page_valid(page);
185
186 if (*mres != NULL)
187 vm_page_replace(page, object, pidx, *mres);
188 else
189 vm_page_insert(page, object, pidx);
190 *mres = page;
191 return (VM_PAGER_OK);
192}
193
194/*----------------------- Privcmd char device methods ------------------------*/
195static int
196privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
197 vm_object_t *object, int nprot)
198{
199 struct privcmd_map *map;
200
201 map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
202
203 map->size = OFF_TO_IDX(size);
204 map->pseudo_phys_res_id = 0;
205
207 &map->pseudo_phys_res_id, size);
208 if (map->pseudo_phys_res == NULL) {
209 free(map, M_PRIVCMD);
210 return (ENOMEM);
211 }
212
213 map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
214 map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
215 size, nprot, *offset, NULL);
216 if (map->mem == NULL) {
218 map->pseudo_phys_res);
219 free(map, M_PRIVCMD);
220 return (ENOMEM);
221 }
222
223 *object = map->mem;
224
225 return (0);
226}
227
228static struct privcmd_map *
229setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
230{
231 vm_map_t map;
232 vm_map_entry_t entry;
233 vm_object_t mem;
234 vm_pindex_t pindex;
235 vm_prot_t prot;
236 boolean_t wired;
237 struct privcmd_map *umap;
238 int error;
239
240 if ((num == 0) || ((addr & PAGE_MASK) != 0))
241 return NULL;
242
243 map = &td->td_proc->p_vmspace->vm_map;
244 error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
245 &prot, &wired);
246 if (error != KERN_SUCCESS || (entry->start != addr) ||
247 (entry->end != addr + (num * PAGE_SIZE)))
248 return NULL;
249
250 vm_map_lookup_done(map, entry);
251 if ((mem->type != OBJT_MGTDEVICE) ||
252 (mem->un_pager.devp.ops != &privcmd_pg_ops))
253 return NULL;
254
255 umap = mem->handle;
256 /* Allocate a bitset to store broken page mappings. */
257 umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
258
259 return umap;
260}
261
262static int
263privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
264 int mode, struct thread *td)
265{
266 int error;
267 unsigned int i;
268 void *data;
269 const struct per_user_data *u;
270
271 error = devfs_get_cdevpriv(&data);
272 if (error != 0)
273 return (EINVAL);
274 /*
275 * Constify user-data to prevent unintended changes to the restriction
276 * limits.
277 */
278 u = data;
279
280 switch (cmd) {
281 case IOCTL_PRIVCMD_HYPERCALL: {
282 struct ioctl_privcmd_hypercall *hcall;
283
284 hcall = (struct ioctl_privcmd_hypercall *)arg;
285
286 /* Forbid hypercalls if restricted. */
287 if (u->dom != DOMID_INVALID) {
288 error = EPERM;
289 break;
290 }
291
292#ifdef __amd64__
293 /*
294 * The hypervisor page table walker will refuse to access
295 * user-space pages if SMAP is enabled, so temporary disable it
296 * while performing the hypercall.
297 */
298 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
299 stac();
300#endif
301 error = privcmd_hypercall(hcall->op, hcall->arg[0],
302 hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
303#ifdef __amd64__
304 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
305 clac();
306#endif
307 if (error >= 0) {
308 hcall->retval = error;
309 error = 0;
310 } else {
311 error = xen_translate_error(error);
312 hcall->retval = 0;
313 }
314 break;
315 }
316 case IOCTL_PRIVCMD_MMAPBATCH: {
317 struct ioctl_privcmd_mmapbatch *mmap;
318 struct xen_add_to_physmap_range add;
319 xen_ulong_t *idxs;
320 xen_pfn_t *gpfns;
321 int *errs;
322 unsigned int index;
323 struct privcmd_map *umap;
324 uint16_t num;
325
326 mmap = (struct ioctl_privcmd_mmapbatch *)arg;
327
328 if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
329 error = EPERM;
330 break;
331 }
332
333 umap = setup_virtual_area(td, mmap->addr, mmap->num);
334 if (umap == NULL) {
335 error = EINVAL;
336 break;
337 }
338
339 add.domid = DOMID_SELF;
340 add.space = XENMAPSPACE_gmfn_foreign;
341 add.foreign_domid = mmap->dom;
342
343 /*
344 * The 'size' field in the xen_add_to_physmap_range only
345 * allows for UINT16_MAX mappings in a single hypercall.
346 */
347 num = MIN(mmap->num, UINT16_MAX);
348
349 idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
350 gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
351 errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
352
353 set_xen_guest_handle(add.idxs, idxs);
354 set_xen_guest_handle(add.gpfns, gpfns);
355 set_xen_guest_handle(add.errs, errs);
356
357 for (index = 0; index < mmap->num; index += num) {
358 num = MIN(mmap->num - index, UINT16_MAX);
359 add.size = num;
360
361 error = copyin(&mmap->arr[index], idxs,
362 sizeof(idxs[0]) * num);
363 if (error != 0)
364 goto mmap_out;
365
366 for (i = 0; i < num; i++)
367 gpfns[i] = atop(umap->phys_base_addr +
368 (i + index) * PAGE_SIZE);
369
370 bzero(errs, sizeof(*errs) * num);
371
372 error = HYPERVISOR_memory_op(
373 XENMEM_add_to_physmap_range, &add);
374 if (error != 0) {
375 error = xen_translate_error(error);
376 goto mmap_out;
377 }
378
379 for (i = 0; i < num; i++) {
380 if (errs[i] != 0) {
381 errs[i] = xen_translate_error(errs[i]);
382
383 /* Mark the page as invalid. */
384 BIT_SET(mmap->num, index + i,
385 umap->err);
386 }
387 }
388
389 error = copyout(errs, &mmap->err[index],
390 sizeof(errs[0]) * num);
391 if (error != 0)
392 goto mmap_out;
393 }
394
395 umap->mapped = true;
396
397mmap_out:
398 free(idxs, M_PRIVCMD);
399 free(gpfns, M_PRIVCMD);
400 free(errs, M_PRIVCMD);
401 if (!umap->mapped)
402 free(umap->err, M_PRIVCMD);
403
404 break;
405 }
406 case IOCTL_PRIVCMD_MMAP_RESOURCE: {
407 struct ioctl_privcmd_mmapresource *mmap;
408 struct xen_mem_acquire_resource adq;
409 xen_pfn_t *gpfns;
410 struct privcmd_map *umap;
411
412 mmap = (struct ioctl_privcmd_mmapresource *)arg;
413
414 if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
415 error = EPERM;
416 break;
417 }
418
419 bzero(&adq, sizeof(adq));
420
421 adq.domid = mmap->dom;
422 adq.type = mmap->type;
423 adq.id = mmap->id;
424
425 /* Shortcut for getting the resource size. */
426 if (mmap->addr == 0 && mmap->num == 0) {
427 error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
428 &adq);
429 if (error != 0)
430 error = xen_translate_error(error);
431 else
432 mmap->num = adq.nr_frames;
433 break;
434 }
435
436 umap = setup_virtual_area(td, mmap->addr, mmap->num);
437 if (umap == NULL) {
438 error = EINVAL;
439 break;
440 }
441
442 adq.nr_frames = mmap->num;
443 adq.frame = mmap->idx;
444
445 gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
446 for (i = 0; i < mmap->num; i++)
447 gpfns[i] = atop(umap->phys_base_addr) + i;
448 set_xen_guest_handle(adq.frame_list, gpfns);
449
450 error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
451 if (error != 0)
452 error = xen_translate_error(error);
453 else
454 umap->mapped = true;
455
456 free(gpfns, M_PRIVCMD);
457 if (!umap->mapped)
458 free(umap->err, M_PRIVCMD);
459
460 break;
461 }
462 case IOCTL_PRIVCMD_DM_OP: {
463 const struct ioctl_privcmd_dmop *dmop;
464 struct privcmd_dmop_buf *bufs;
465 struct xen_dm_op_buf *hbufs;
466
467 dmop = (struct ioctl_privcmd_dmop *)arg;
468
469 if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
470 error = EPERM;
471 break;
472 }
473
474 if (dmop->num == 0)
475 break;
476
477 if (dmop->num > MAX_DMOP_BUFFERS) {
478 error = E2BIG;
479 break;
480 }
481
482 bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
483
484 error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
485 if (error != 0) {
486 free(bufs, M_PRIVCMD);
487 break;
488 }
489
490 hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
491 for (i = 0; i < dmop->num; i++) {
492 set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
493 hbufs[i].size = bufs[i].size;
494 }
495
496#ifdef __amd64__
497 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
498 stac();
499#endif
500 error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
501#ifdef __amd64__
502 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
503 clac();
504#endif
505 if (error != 0)
506 error = xen_translate_error(error);
507
508 free(bufs, M_PRIVCMD);
509 free(hbufs, M_PRIVCMD);
510
511
512 break;
513 }
514 case IOCTL_PRIVCMD_RESTRICT: {
515 struct per_user_data *u;
516 domid_t dom;
517
518 dom = *(domid_t *)arg;
519
520 error = devfs_get_cdevpriv((void **)&u);
521 if (error != 0)
522 break;
523
524 if (u->dom != DOMID_INVALID && u->dom != dom) {
525 error = -EINVAL;
526 break;
527 }
528 u->dom = dom;
529
530 break;
531 }
532 default:
533 error = ENOSYS;
534 break;
535 }
536
537 return (error);
538}
539
540static void
541user_release(void *arg)
542{
543
544 free(arg, M_PRIVCMD);
545}
546
547static int
548privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
549{
550 struct per_user_data *u;
551 int error;
552
553 u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
554 u->dom = DOMID_INVALID;
555
556 /* Assign the allocated per_user_data to this open instance. */
557 error = devfs_set_cdevpriv(u, user_release);
558 if (error != 0) {
559 free(u, M_PRIVCMD);
560 }
561
562 return (error);
563}
564
565/*------------------ Private Device Attachment Functions --------------------*/
566static void
567privcmd_identify(driver_t *driver, device_t parent)
568{
569
570 KASSERT(xen_domain(),
571 ("Trying to attach privcmd device on non Xen domain"));
572
573 if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
574 panic("unable to attach privcmd user-space device");
575}
576
577static int
578privcmd_probe(device_t dev)
579{
580
581 privcmd_dev = dev;
582 device_set_desc(dev, "Xen privileged interface user-space device");
583 return (BUS_PROBE_NOWILDCARD);
584}
585
586static int
587privcmd_attach(device_t dev)
588{
589
590 make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
591 GID_WHEEL, 0600, "xen/privcmd");
592 return (0);
593}
594
595/*-------------------- Private Device Attachment Data -----------------------*/
596static device_method_t privcmd_methods[] = {
597 DEVMETHOD(device_identify, privcmd_identify),
598 DEVMETHOD(device_probe, privcmd_probe),
599 DEVMETHOD(device_attach, privcmd_attach),
600
601 DEVMETHOD_END
602};
603
604static driver_t privcmd_driver = {
605 "privcmd",
607 0,
608};
609
611
613MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
static void user_release(void *arg)
Definition: privcmd.c:541
static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres)
Definition: privcmd.c:160
#define MAX_DMOP_BUFFERS
Definition: privcmd.c:68
static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color)
Definition: privcmd.c:112
static struct privcmd_map * setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
Definition: privcmd.c:229
static d_mmap_single_t privcmd_mmap_single
Definition: privcmd.c:82
static device_method_t privcmd_methods[]
Definition: privcmd.c:596
static struct cdev_pager_ops privcmd_pg_ops
Definition: privcmd.c:98
static driver_t privcmd_driver
Definition: privcmd.c:604
static d_open_t privcmd_open
Definition: privcmd.c:81
DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0)
static int privcmd_probe(device_t dev)
Definition: privcmd.c:578
devclass_t privcmd_devclass
Definition: privcmd.c:610
static d_ioctl_t privcmd_ioctl
Definition: privcmd.c:80
__FBSDID("$FreeBSD$")
MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device")
static struct cdevsw privcmd_devsw
Definition: privcmd.c:84
MODULE_DEPEND(privcmd, xenpv, 1, 1, 1)
static void privcmd_identify(driver_t *driver, device_t parent)
Definition: privcmd.c:567
static device_t privcmd_dev
Definition: privcmd.c:108
static int privcmd_attach(device_t dev)
Definition: privcmd.c:587
static void privcmd_pg_dtor(void *handle)
Definition: privcmd.c:120
domid_t dom
Definition: privcmd.c:105
int pseudo_phys_res_id
Definition: privcmd.c:74
vm_size_t size
Definition: privcmd.c:72
BITSET_DEFINE_VAR() *err
boolean_t mapped
Definition: privcmd.c:76
vm_object_t mem
Definition: privcmd.c:71
struct resource * pseudo_phys_res
Definition: privcmd.c:73
vm_paddr_t phys_base_addr
Definition: privcmd.c:75
struct resource * xenmem_alloc(device_t dev, int *res_id, size_t size)
Definition: xenpv.c:192
int xenmem_free(device_t dev, int res_id, struct resource *res)
Definition: xenpv.c:203