qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V1 30/32] vfio-pci: save and restore


From: Steven Sistare
Subject: Re: [PATCH V1 30/32] vfio-pci: save and restore
Date: Fri, 7 Aug 2020 16:38:12 -0400
User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.11.0

On 8/6/2020 6:22 AM, Jason Zeng wrote:
> Hi Steve,
> 
> On Thu, Jul 30, 2020 at 08:14:34AM -0700, Steve Sistare wrote:
>> @@ -3182,6 +3207,51 @@ static Property vfio_pci_dev_properties[] = {
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>  
>> +static int vfio_pci_post_load(void *opaque, int version_id)
>> +{
>> +    int vector;
>> +    MSIMessage msg;
>> +    Error *err = 0;
>> +    VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *pdev = &vdev->pdev;
>> +
>> +    if (msix_enabled(pdev)) {
>> +        vfio_msix_enable(vdev);
>> +        pdev->msix_function_masked = false;
>> +
>> +        for (vector = 0; vector < vdev->pdev.msix_entries_nr; vector++) {
>> +            if (!msix_is_masked(pdev, vector)) {
>> +                msg = msix_get_message(pdev, vector);
>> +                vfio_msix_vector_use(pdev, vector, msg);
>> +            }
>> +        }
> 
> It looks to me MSIX re-init here may lose device IRQs and impact
> device hardware state?
> 
> The re-init will cause the kernel vfio driver to connect the device
> MSIX vectors to new eventfds and KVM instance. But before that, device
> IRQs will be routed to previous eventfd. Looks these IRQs will be lost.

Thanks Jason, that sounds like a problem.  I could try reading and saving an 
event from eventfd before shutdown, and injecting it into the eventfd after
restart, but that would be racy unless I disable interrupts.  Or, 
unconditionally
inject a spurious interrupt after restart to kick it, in case an interrupt 
was lost.

Do you have any other ideas?

> And the re-init will make the device go through the procedure of
> disabling MSIX, enabling INTX, and re-enabling MSIX and vectors.
> So if the device is active, its hardware state will be impacted?

Again thanks.  vfio_msix_enable() does indeed call vfio_disable_interrupts().
For a quick experiment, I deleted that call in for the post_load code path, and 
it seems to work fine, but I need to study it more.

- Steve
 
>> +
>> +    } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
>> +        vfio_intx_enable(vdev, &err);
>> +        if (err) {
>> +            error_report_err(err);
>> +        }
>> +    }
>> +
>> +    vdev->vbasedev.group->container->reused = false;
>> +    vdev->pdev.reused = false;
>> +
>> +    return 0;
>> +}
>> +
>> +static const VMStateDescription vfio_pci_vmstate = {
>> +    .name = "vfio-pci",
>> +    .unmigratable = 1,
>> +    .mode_mask = VMS_RESTART,
>> +    .version_id = 0,
>> +    .minimum_version_id = 0,
>> +    .post_load = vfio_pci_post_load,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_MSIX(pdev, VFIOPCIDevice),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>>  {
>>      DeviceClass *dc = DEVICE_CLASS(klass);
>> @@ -3189,6 +3259,7 @@ static void vfio_pci_dev_class_init(ObjectClass 
>> *klass, void *data)
>>  
>>      dc->reset = vfio_pci_reset;
>>      device_class_set_props(dc, vfio_pci_dev_properties);
>> +    dc->vmsd = &vfio_pci_vmstate;
>>      dc->desc = "VFIO-based PCI device assignment";
>>      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>>      pdc->realize = vfio_realize;
>> diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
>> index ac2cefc..e6e1a5d 100644
>> --- a/hw/vfio/platform.c
>> +++ b/hw/vfio/platform.c
>> @@ -592,7 +592,7 @@ static int vfio_base_device_init(VFIODevice *vbasedev, 
>> Error **errp)
>>              return -EBUSY;
>>          }
>>      }
>> -    ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
>> +    ret = vfio_get_device(group, vbasedev->name, vbasedev, 0, errp);
>>      if (ret) {
>>          vfio_put_group(group);
>>          return ret;
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index bd07c86..c926a24 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -358,6 +358,7 @@ struct PCIDevice {
>>  
>>      /* ID of standby device in net_failover pair */
>>      char *failover_pair_id;
>> +    bool reused;
>>  };
>>  
>>  void pci_register_bar(PCIDevice *pci_dev, int region_num,
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index c78f3ff..4e2a332 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -73,6 +73,8 @@ typedef struct VFIOContainer {
>>      unsigned iommu_type;
>>      Error *error;
>>      bool initialized;
>> +    bool reused;
>> +    int cid;
>>      unsigned long pgsizes;
>>      QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>>      QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>> @@ -177,7 +179,7 @@ void vfio_reset_handler(void *opaque);
>>  VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
>>  void vfio_put_group(VFIOGroup *group);
>>  int vfio_get_device(VFIOGroup *group, const char *name,
>> -                    VFIODevice *vbasedev, Error **errp);
>> +                    VFIODevice *vbasedev, bool *reused, Error **errp);
>>  
>>  extern const MemoryRegionOps vfio_region_ops;
>>  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index 881dc13..2606cf0 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1568,7 +1568,7 @@ static int qemu_savevm_state(QEMUFile *f, VMStateMode 
>> mode, Error **errp)
>>          return -EINVAL;
>>      }
>>  
>> -    if (migrate_use_block()) {
>> +    if ((mode & (VMS_SNAPSHOT | VMS_MIGRATE)) && migrate_use_block()) {
>>          error_setg(errp, "Block migration and snapshots are incompatible");
>>          return -EINVAL;
>>      }
>> -- 
>> 1.8.3.1
>>
>>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]