qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC v3 06/10] virt: Assign a VFIO platform device with -de


From: Eric Auger
Subject: [Qemu-devel] [RFC v3 06/10] virt: Assign a VFIO platform device with -device option
Date: Mon, 2 Jun 2014 08:49:30 +0100

This patch aims at allowing the end-user to specify the device he
wants to directly assign to his mach-virt guest in the QEMU command
line.

The QEMU platform device becomes generic.

Current choice is to reuse the "-device" option.

For example when assigning Calxeda Midway xgmac device this option
is used:
-device vfio-platform,vfio_device="fff51000.ethernet",\
compat="calxeda/hb-xgmac",mmap-timeout-ms=1000

where
- fff51000.ethernet is the name of the device in
 /sys/bus/platform/devices/
- calxeda/hb-xgma is the compatibility where the standard comma
  separator is replaced by "/" since coma is specifically used by
  QEMU command line parser
- mmap-timeout-ms is minimal amount of time (ms) during which the IP
  register space stays MMIO mapped after an IRQ triggers in order to
  trap the end of interrupt (EOI). This is an optional parameter
  (default value set to 1100 ms).

mach-virt was modified to interpret this line and automatically

- map the device at a chosen guest physical address in
  [0xa004000, 0x10000000],
- map the device IRQs after 48,
- create the associated guest device tree with the provided
  compatibility.

The "-device" option underlying implementation is not standard
which can be argued. Indeed normaly it induces the call to the QEMU
device realize function once after the virtual machine init
execution.

In vl.c the following sequence is implemented:
1) machine init
   machine_class->init(&current_machine->init_args);
2) init of devices added with -device option
   qemu_opts_foreach(qemu_find_opts("device"),
                     device_init_func, NULL, 1)

The issue with that sequence is that the device tree is built in
mach-virt and at that stage we miss information about the VFIO device
(IRQ number, region number and size). Those information only are
collected when the VFIO device is realized.

For that reason it is decided to interpret the -device option line in
mach-virt and also to call the VFIO realize function there.

since vl.c is not changed by this patch, this means the VFIO realize
function is called twice, once from mach-virt and once from vl.c
The second call returns immediatly since the QEMU device is recognized
as already attached to the group.

changes v2 -> v3
- retrieve device properties through standard functions
- support of multiple "reg" tuples (not tested)

Acknowledgements:
- a single compatibility currently is supported
- More complex device nodes will request specialized code
- cases where multiple VFIO devices are assigned could not be tested
---
 hw/arm/virt.c         | 222 +++++++++++++++++++++++++++++++++++++++++---------
 hw/vfio/common.c      |  10 +--
 hw/vfio/pci.c         |  17 ++++
 hw/vfio/platform.c    |  39 ++++++---
 hw/vfio/vfio-common.h |   1 +
 5 files changed, 233 insertions(+), 56 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f5693aa..8de6d1a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -40,6 +40,8 @@
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
+#include "monitor/qdev.h"
+#include "qemu/config-file.h"
 
 #define NUM_VIRTIO_TRANSPORTS 32
 
@@ -65,7 +67,7 @@ enum {
     VIRT_GIC_CPU,
     VIRT_UART,
     VIRT_MMIO,
-    VIRT_ETHERNET,
+    VIRT_VFIO,
 };
 
 typedef struct MemMapEntry {
@@ -77,7 +79,10 @@ typedef struct VirtBoardInfo {
     struct arm_boot_info bootinfo;
     const char *cpu_model;
     const MemMapEntry *memmap;
+    qemu_irq pic[NUM_IRQS];
     const int *irqmap;
+    hwaddr avail_vfio_base;
+    int avail_vfio_irq;
     int smp_cpus;
     void *fdt;
     int fdt_size;
@@ -103,16 +108,16 @@ static const MemMapEntry a15memmap[] = {
     [VIRT_GIC_CPU] = { 0x8010000, 0x10000 },
     [VIRT_UART] = { 0x9000000, 0x1000 },
     [VIRT_MMIO] = { 0xa000000, 0x200 },
+    [VIRT_VFIO] = { 0xa004000, 0x0 }, /* size is dynamically populated */
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
     /* 0x10000000 .. 0x40000000 reserved for PCI */
-    [VIRT_MEM] = { 0x40000000, 1ULL * 1024 * 1024 * 1024 },
-    [VIRT_ETHERNET] = { 0xfff51000, 0x1000 },
+    [VIRT_MEM] = { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
 };
 
 static const int a15irqmap[] = {
     [VIRT_UART] = 1,
     [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
-    [VIRT_ETHERNET] = 77,
+    [VIRT_VFIO] = 48,
 };
 
 static VirtBoardInfo machines[] = {
@@ -265,7 +270,7 @@ static void fdt_add_gic_node(const VirtBoardInfo *vbi)
     qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
 }
 
-static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gic(VirtBoardInfo *vbi)
 {
     /* We create a standalone GIC v2 */
     DeviceState *gicdev;
@@ -309,13 +314,13 @@ static void create_gic(const VirtBoardInfo *vbi, qemu_irq 
*pic)
     }
 
     for (i = 0; i < NUM_IRQS; i++) {
-        pic[i] = qdev_get_gpio_in(gicdev, i);
+        vbi->pic[i] = qdev_get_gpio_in(gicdev, i);
     }
 
     fdt_add_gic_node(vbi);
 }
 
-static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_uart(const VirtBoardInfo *vbi)
 {
     char *nodename;
     hwaddr base = vbi->memmap[VIRT_UART].base;
@@ -324,7 +329,7 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq 
*pic)
     const char compat[] = "arm,pl011\0arm,primecell";
     const char clocknames[] = "uartclk\0apb_pclk";
 
-    sysbus_create_simple("pl011", base, pic[irq]);
+    sysbus_create_simple("pl011", base, vbi->pic[irq]);
 
     nodename = g_strdup_printf("/address@hidden" PRIx64, base);
     qemu_fdt_add_subnode(vbi->fdt, nodename);
@@ -343,36 +348,177 @@ static void create_uart(const VirtBoardInfo *vbi, 
qemu_irq *pic)
     g_free(nodename);
 }
 
-static void create_ethernet(const VirtBoardInfo *vbi, qemu_irq *pic)
+/*
+ * Function called for each vfio-platform device option found in the
+ * qemu user command line:
+ * -device vfio-platform,vfio-device="<device>",compat"<compat>"
+ * for instance <device> can be fff51000.ethernet (device unbound from
+ * original driver and bound to vfio driver)
+ * for instance <compat> can be calxeda/hb-xgmac
+ * note "/" replaces normal ",". Indeed "," would be interpreted by QEMU as
+ * a separator
+ */
+
+static int vfio_init_func(QemuOpts *opts, void *opaque)
 {
+    const char *driver;
+    DeviceState *dev;
+    SysBusDevice *s;
+    VirtBoardInfo *vbi = (VirtBoardInfo *)opaque;
+    driver = qemu_opt_get(opts, "driver");
+    int irq_start = vbi->avail_vfio_irq;
+    hwaddr vfio_base = vbi->avail_vfio_base;
     char *nodename;
-    hwaddr base = vbi->memmap[VIRT_ETHERNET].base;
-    hwaddr size = vbi->memmap[VIRT_ETHERNET].size;
-    const char compat[] = "calxeda,hb-xgmac";
-    int main_irq = vbi->irqmap[VIRT_ETHERNET];
-    int power_irq = main_irq+1;
-    int low_power_irq = main_irq+2;
-
-    sysbus_create_varargs("vfio-platform", base,
-                          pic[main_irq],
-                          pic[power_irq],
-                          pic[low_power_irq], NULL);
-
-    nodename = g_strdup_printf("/address@hidden" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    char *corrected_compat, *compat, *name;
+    int num_irqs, num_regions;
+    MemoryRegion *mr;
+    int i, ret;
+    uint32_t *irq_attr;
+    uint64_t *reg_attr;
+    uint64_t size;
+    Error *errp = NULL;
+
+    if (!driver) {
+        qerror_report(QERR_MISSING_PARAMETER, "driver");
+        return -1 ;
+    }
 
-    /* Note that we can't use setprop_string because of the embedded NUL */
-    qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
-                                0x0, main_irq, 0x4,
-                                0x0, power_irq, 0x4,
-                                0x0, low_power_irq, 0x4);
+    if (strcasecmp(driver, "vfio-platform") == 0) {
+        dev = qdev_device_add(opts);
+        if (!dev) {
+            return -1;
+        }
+        s = SYS_BUS_DEVICE(dev);
 
-    g_free(nodename);
+        name = object_property_get_str(OBJECT(s), "vfio_device", &errp);
+        if (errp != NULL || (name == NULL)) {
+            error_report("Couldn't retrieve vfio device name: %s\n",
+                         error_get_pretty(errp));
+            exit(1);
+           }
+        compat = object_property_get_str(OBJECT(s), "compat", &errp);
+        if ((errp != NULL) || (name == NULL)) {
+            error_report("Couldn't retrieve VFIO device compat: %s\n",
+                         error_get_pretty(errp));
+            exit(1);
+           }
+        num_irqs = object_property_get_int(OBJECT(s), "num_irqs", &errp);
+        if (errp != NULL) {
+            error_report("Couldn't retrieve VFIO IRQ number: %s\n",
+                         error_get_pretty(errp));
+            exit(1);
+           }
+        num_regions = object_property_get_int(OBJECT(s), "num_regions", &errp);
+        if ((errp != NULL) || (num_regions == 0)) {
+            error_report("Couldn't retrieve VFIO region number: %s\n",
+                         error_get_pretty(errp));
+            exit(1);
+           }
+
+        /*
+         * collect region info and build reg property as tuplets
+         * 2 base 2 size
+         * 2 being the number of cells for base and size
+         */
+        reg_attr = g_new(uint64_t, num_regions*4);
+
+        for (i = 0; i < num_regions; i++) {
+            mr = sysbus_mmio_get_region(s, i);
+            size = memory_region_size(mr);
+            reg_attr[4*i] = 2;
+            reg_attr[4*i+1] = vbi->avail_vfio_base;
+            reg_attr[4*i+2] = 2;
+            reg_attr[4*i+3] = size;
+            vbi->avail_vfio_base += size;
+        }
+
+        if (vbi->avail_vfio_base >= 0x10000000) {
+            /* VFIO region size exceeds remaining VFIO space */
+            qerror_report(QERR_DEVICE_INIT_FAILED, name);
+        } else if (irq_start + num_irqs >= NUM_IRQS) {
+            /* VFIO IRQ number exceeded */
+            qerror_report(QERR_DEVICE_INIT_FAILED, name);
+        }
+
+        /*
+         * process compatibility property string passed by end-user
+         * replaces / by ,
+         * currently a single property compatibility value is supported!
+         */
+        corrected_compat = g_strdup(compat);
+        char *slash = strchr(corrected_compat, '/');
+        if (slash != NULL) {
+            *slash = ',';
+        } else {
+            error_report("Wrong compat string %s, should contain a /\n",
+                         compat);
+            exit(1);
+        }
+
+        sysbus_mmio_map(s, 0, vfio_base);
+        nodename = g_strdup_printf("/address@hidden" PRIx64,
+                                   name, vfio_base);
+
+        qemu_fdt_add_subnode(vbi->fdt, nodename);
+
+        qemu_fdt_setprop(vbi->fdt, nodename, "compatible",
+                             corrected_compat, strlen(corrected_compat));
+
+        ret = qemu_fdt_setprop_sized_cells_from_array(vbi->fdt, nodename, 
"reg",
+                         num_regions*2, reg_attr);
+        if (ret < 0) {
+            error_report("could not set reg property of node %s", nodename);
+        }
+
+        irq_attr = g_new(uint32_t, num_irqs*3);
+        for (i = 0; i < num_irqs; i++) {
+            sysbus_connect_irq(s, i, vbi->pic[irq_start+i]);
+
+            irq_attr[3*i] = cpu_to_be32(0);
+            irq_attr[3*i+1] = cpu_to_be32(irq_start+i);
+            irq_attr[3*i+2] = cpu_to_be32(0x4);
+        }
+
+        ret = qemu_fdt_setprop(vbi->fdt, nodename, "interrupts",
+                         irq_attr, num_irqs*3*sizeof(uint32_t));
+        if (ret < 0) {
+            error_report("could not set interrupts property of node %s",
+                         nodename);
+        }
+
+        vbi->avail_vfio_irq += num_irqs;
+
+        g_free(nodename);
+        g_free(corrected_compat);
+        g_free(irq_attr);
+        g_free(reg_attr);
+
+        object_unref(OBJECT(dev));
+
+    }
+
+  return 0;
 }
 
-static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
+/*
+ * parses the option line and look for -device option
+ * for each of time vfio_init_func is called.
+ * this later only applies to -device vfio-platform ones
+ */
+
+static void create_vfio_devices(VirtBoardInfo *vbi)
+{
+    vbi->avail_vfio_base = vbi->memmap[VIRT_VFIO].base;
+    vbi->avail_vfio_irq =  vbi->irqmap[VIRT_VFIO];
+
+    if (qemu_opts_foreach(qemu_find_opts("device"),
+                        vfio_init_func, (void *)vbi, 1) != 0) {
+        exit(1);
+    }
+}
+
+
+static void create_virtio_devices(const VirtBoardInfo *vbi)
 {
     int i;
     hwaddr size = vbi->memmap[VIRT_MMIO].size;
@@ -386,7 +532,7 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, 
qemu_irq *pic)
         int irq = vbi->irqmap[VIRT_MMIO] + i;
         hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
 
-        sysbus_create_simple("virtio-mmio", base, pic[irq]);
+        sysbus_create_simple("virtio-mmio", base, vbi->pic[irq]);
     }
 
     for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
@@ -417,7 +563,6 @@ static void *machvirt_dtb(const struct arm_boot_info 
*binfo, int *fdt_size)
 
 static void machvirt_init(QEMUMachineInitArgs *args)
 {
-    qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     int n;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
@@ -483,16 +628,17 @@ static void machvirt_init(QEMUMachineInitArgs *args)
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
 
-    create_gic(vbi, pic);
+    create_gic(vbi);
+
+    create_uart(vbi);
 
-    create_uart(vbi, pic);
-    create_ethernet(vbi, pic);
+    create_vfio_devices(vbi);
 
     /* Create mmio transports, so the user can create virtio backends
      * (which will be automatically plugged in to the transports). If
      * no backend is created the transport will just sit harmlessly idle.
      */
-    create_virtio_devices(vbi, pic);
+    create_virtio_devices(vbi);
 
     vbi->bootinfo.ram_size = args->ram_size;
     vbi->bootinfo.kernel_filename = args->kernel_filename;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 07dc409..28d29de 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -732,7 +732,6 @@ void vfio_put_base_device(VFIODevice *vdev)
 
 int vfio_base_device_init(VFIODevice *vdev, int type)
 {
-    VFIODevice *tmp;
     VFIOGroup *group;
     char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
     ssize_t len;
@@ -792,12 +791,9 @@ int vfio_base_device_init(VFIODevice *vdev, int type)
 
     snprintf(path, sizeof(path), "%s", vdev->name);
 
-    QLIST_FOREACH(tmp, &group->device_list, next) {
-        if (strcmp(tmp->name, vdev->name) == 0) {
-            error_report("vfio: error: device %s is already attached", path);
-            vfio_put_group(group, vfio_reset_handler);
-            return -EBUSY;
-        }
+    if (vdev->ops->vfio_is_device_already_attached(vdev, group)) {
+        vfio_put_group(group, vfio_reset_handler);
+        return -EBUSY;
     }
 
     ret = vfio_get_base_device(group, path, vdev);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1b49205..c86bef9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2988,6 +2988,22 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice 
*vdev)
     event_notifier_cleanup(&vdev->err_notifier);
 }
 
+static bool vfio_pci_is_device_already_attached(VFIODevice *vdev,
+                                                VFIOGroup *group)
+{
+    VFIODevice *tmp;
+
+    QLIST_FOREACH(tmp, &group->device_list, next) {
+        if (strcmp(tmp->name, vdev->name) == 0) {
+            error_report("vfio: error: device %s is already attached",
+                         vdev->name);
+            return true;
+        }
+    }
+    return false;
+}
+
+
 
 static VFIODeviceOps vfio_pci_ops = {
     .vfio_eoi = vfio_pci_eoi,
@@ -2996,6 +3012,7 @@ static VFIODeviceOps vfio_pci_ops = {
     .vfio_check_device = vfio_pci_check_device,
     .vfio_get_device_regions = vfio_pci_get_device_regions,
     .vfio_get_device_interrupts = vfio_pci_get_device_interrupts,
+    .vfio_is_device_already_attached = vfio_pci_is_device_already_attached,
 };
 
 static int vfio_initfn(PCIDevice *pdev)
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 5b9451f..377783b 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -43,6 +43,7 @@ typedef struct VFIOPlatformDevice {
     QLIST_HEAD(, VFIOINTp) intp_list; /* list of IRQ */
     /* queue of pending IRQ */
     QSIMPLEQ_HEAD(pending_intp_queue, VFIOINTp) pending_intp_queue;
+    char *compat; /* compatibility string */
 } VFIOPlatformDevice;
 
 
@@ -394,11 +395,6 @@ static int vfio_platform_get_device_interrupts(VFIODevice 
*vdev)
     int i, ret;
     VFIOPlatformDevice *vplatdev = container_of(vdev, VFIOPlatformDevice, 
vdev);
 
-    /*
-     * mmap timeout = 1100 ms, PCI default value
-     * this will become a user-defined value in subsequent patch
-     */
-    vdev->mmap_timeout = 1100;
     vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                     vfio_intp_mmap_enable, vdev);
 
@@ -446,6 +442,19 @@ static void vfio_disable_intp(VFIODevice *vdev)
 
 }
 
+static bool vfio_platform_is_device_already_attached(VFIODevice *vdev,
+                                                     VFIOGroup *group)
+{
+    VFIODevice *tmp;
+
+    QLIST_FOREACH(tmp, &group->device_list, next) {
+        if (strcmp(tmp->name, vdev->name) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
 
 static VFIODeviceOps vfio_platform_ops = {
     .vfio_eoi = vfio_platform_eoi,
@@ -454,6 +463,7 @@ static VFIODeviceOps vfio_platform_ops = {
     .vfio_check_device = vfio_platform_check_device,
     .vfio_get_device_regions = vfio_platform_get_device_regions,
     .vfio_get_device_interrupts = vfio_platform_get_device_interrupts,
+    .vfio_is_device_already_attached = 
vfio_platform_is_device_already_attached,
 };
 
 
@@ -466,9 +476,7 @@ static void vfio_platform_realize(DeviceState *dev, Error 
**errp)
 
     vbasedev->ops = &vfio_platform_ops;
 
-    /* TODO: pass device name on command line */
-    vbasedev->name = malloc(PATH_MAX);
-    snprintf(vbasedev->name, PATH_MAX, "%s", "fff51000.ethernet");
+    DPRINTF("vfio device %s, compat = %s\n", vbasedev->name, vdev->compat);
 
     ret = vfio_base_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM);
     if (ret < 0) {
@@ -531,8 +539,8 @@ static const VMStateDescription vfio_platform_vmstate = {
 
 typedef struct VFIOPlatformDeviceClass {
     DeviceClass parent_class;
+    void (*init)(VFIOPlatformDevice *vdev);
 
-    int (*init)(VFIODevice *dev);
 } VFIOPlatformDeviceClass;
 
 #define VFIO_PLATFORM_DEVICE(obj) \
@@ -542,19 +550,28 @@ typedef struct VFIOPlatformDeviceClass {
 #define VFIO_PLATFORM_DEVICE_GET_CLASS(obj) \
      OBJECT_GET_CLASS(VFIOPlatformDeviceClass, (obj), TYPE_VFIO_PLATFORM)
 
+static Property vfio_platform_dev_properties[] = {
+DEFINE_PROP_STRING("vfio_device", VFIOPlatformDevice, vdev.name),
+DEFINE_PROP_STRING("compat", VFIOPlatformDevice, compat),
+DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
+                   vdev.mmap_timeout, 1100),
+DEFINE_PROP_UINT32("num_irqs", VFIOPlatformDevice, vdev.num_irqs, 0),
+DEFINE_PROP_UINT32("num_regions", VFIOPlatformDevice, vdev.num_regions, 0),
+DEFINE_PROP_END_OF_LIST(),
+};
 
 
 static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VFIOPlatformDeviceClass *vdc = VFIO_PLATFORM_DEVICE_CLASS(klass);
-
+    dc->props = vfio_platform_dev_properties;
     dc->realize = vfio_platform_realize;
     dc->unrealize = vfio_platform_unrealize;
     dc->vmsd = &vfio_platform_vmstate;
     dc->desc = "VFIO-based platform device assignment";
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-
+    dc->cannot_instantiate_with_device_add_yet = false;
     vdc->init = NULL;
 }
 
diff --git a/hw/vfio/vfio-common.h b/hw/vfio/vfio-common.h
index 7139d81..5412acd 100644
--- a/hw/vfio/vfio-common.h
+++ b/hw/vfio/vfio-common.h
@@ -123,6 +123,7 @@ struct VFIODeviceOps {
     int (*vfio_check_device)(VFIODevice *vdev);
     int (*vfio_get_device_regions)(VFIODevice *vdev);
     int (*vfio_get_device_interrupts)(VFIODevice *vdev);
+    bool (*vfio_is_device_already_attached)(VFIODevice *vdev, VFIOGroup*);
 };
 
 
-- 
1.8.3.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]