Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration

From:	Anton Kuchin
Subject:	Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration
Date:	Fri, 17 Mar 2023 20:37:37 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.7.1

On 17/03/2023 19:52, Hanna Czenczek wrote:

On 17.03.23 18:19, Anton Kuchin wrote:

On 13/03/2023 19:48, Hanna Czenczek wrote:

A virtio-fs device's VM state consists of:
- the virtio device (vring) state (VMSTATE_VIRTIO_DEVICE)
- the back-end's (virtiofsd's) internal state

We get/set the latter via the new vhost-user operationsFS_SET_STATE_FD,

FS_GET_STATE, and FS_SET_STATE.

Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---

hw/virtio/vhost-user-fs.c | 171+++++++++++++++++++++++++++++++++++++-

  1 file changed, 170 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 83fc20e49e..df1fb02acc 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -20,8 +20,10 @@
  #include "hw/virtio/virtio-bus.h"
  #include "hw/virtio/virtio-access.h"
  #include "qemu/error-report.h"
+#include "qemu/memfd.h"
  #include "hw/virtio/vhost.h"
  #include "hw/virtio/vhost-user-fs.h"
+#include "migration/qemu-file-types.h"
  #include "monitor/monitor.h"
  #include "sysemu/sysemu.h"

@@ -298,9 +300,176 @@ static struct vhost_dev*vuf_get_vhost(VirtIODevice *vdev)

      return &fs->vhost_dev;
  }
  +/**
+ * Fetch the internal state from the back-end (virtiofsd) and save it
+ * to `f`.
+ */
+static int vuf_save_state(QEMUFile *f, void *pv, size_t size,

+ const VMStateField *field, JSONWriter*vmdesc)

+{
+    VirtIODevice *vdev = pv;
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    int memfd = -1;

+ /* Size of the shared memory through which to transfer thestate */

+    const size_t chunk_size = 4 * 1024 * 1024;
+    size_t state_offset;
+    ssize_t remaining;
+    void *shm_buf;
+    Error *local_err = NULL;
+    int ret, ret2;
+

+ /* Set up shared memory through which to receive the state fromvirtiofsd */

+    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,

+ F_SEAL_SEAL | F_SEAL_SHRINK |F_SEAL_GROW,

+                               &memfd, &local_err);
+    if (!shm_buf) {
+        error_report_err(local_err);
+        ret = -ENOMEM;
+        goto early_fail;
+    }
+
+    /* Share the SHM area with virtiofsd */
+    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
+    if (ret < 0) {
+        goto early_fail;


Don't we need some log message here too?

Sure, why not. There are other places in this patch that just return-errno but print no error, I think they could all use a verbose errormessage.

+    }
+
+    /* Receive the virtiofsd state in chunks, and write them to `f` */
+    state_offset = 0;
+    do {
+        size_t this_chunk_size;
+
+        remaining = vhost_fs_get_state(&fs->vhost_dev, state_offset,
+                                       chunk_size);
+        if (remaining < 0) {
+            ret = remaining;
+            goto fail;
+        }
+
+        /* Prefix the whole state by its total length */
+        if (state_offset == 0) {
+            qemu_put_be64(f, remaining);
+        }
+
+        this_chunk_size = MIN(remaining, chunk_size);
+        qemu_put_buffer(f, shm_buf, this_chunk_size);
+        state_offset += this_chunk_size;
+    } while (remaining >= chunk_size);
+
+    ret = 0;
+fail:
+    /* Have virtiofsd close the shared memory */
+    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
+    if (ret2 < 0) {

+ error_report("Failed to remove state FD from thevhost-user-fs back "

+                     "end: %s", strerror(-ret));
+        if (ret == 0) {
+            ret = ret2;
+        }
+    }
+
+early_fail:
+    if (shm_buf) {
+        qemu_memfd_free(shm_buf, chunk_size, memfd);
+    }
+
+    return ret;
+}
+
+/**
+ * Load the back-end's (virtiofsd's) internal state from `f` and send
+ * it over to that back-end.
+ */
+static int vuf_load_state(QEMUFile *f, void *pv, size_t size,
+                          const VMStateField *field)
+{
+    VirtIODevice *vdev = pv;
+    VHostUserFS *fs = VHOST_USER_FS(vdev);
+    int memfd = -1;

+ /* Size of the shared memory through which to transfer thestate */

+    const size_t chunk_size = 4 * 1024 * 1024;
+    size_t state_offset;
+    uint64_t remaining;
+    void *shm_buf;
+    Error *local_err = NULL;
+    int ret, ret2;
+
+    /* The state is prefixed by its total length, read that first */
+    remaining = qemu_get_be64(f);
+

+ /* Set up shared memory through which to send the state tovirtiofsd */

+    shm_buf = qemu_memfd_alloc("vhost-fs-state", chunk_size,

+ F_SEAL_SEAL | F_SEAL_SHRINK |F_SEAL_GROW,

+                               &memfd, &local_err);
+    if (!shm_buf) {
+        error_report_err(local_err);
+        ret = -ENOMEM;
+        goto early_fail;
+    }
+
+    /* Share the SHM area with virtiofsd */
+    ret = vhost_fs_set_state_fd(&fs->vhost_dev, memfd, chunk_size);
+    if (ret < 0) {
+        goto early_fail;
+    }
+
+    /*
+     * Read the virtiofsd state in chunks from `f`, and send them over
+     * to virtiofsd
+     */
+    state_offset = 0;
+    do {
+        size_t this_chunk_size = MIN(remaining, chunk_size);
+

+ if (qemu_get_buffer(f, shm_buf, this_chunk_size) <this_chunk_size) {

+            ret = -EINVAL;
+            goto fail;
+        }
+

+ ret = vhost_fs_set_state(&fs->vhost_dev, state_offset,this_chunk_size);

+        if (ret < 0) {
+            goto fail;
+        }
+
+        state_offset += this_chunk_size;
+        remaining -= this_chunk_size;
+    } while (remaining > 0);
+
+    ret = 0;
+fail:
+    ret2 = vhost_fs_set_state_fd(&fs->vhost_dev, -1, 0);
+    if (ret2 < 0) {

+ error_report("Failed to remove state FD from thevhost-user-fs back "+ "end -- perhaps it failed to deserialize/applythe state: "

+                     "%s", strerror(-ret2));
+        if (ret == 0) {
+            ret = ret2;
+        }
+    }
+
+early_fail:
+    if (shm_buf) {
+        qemu_memfd_free(shm_buf, chunk_size, memfd);
+    }
+
+    return ret;
+}
+
  static const VMStateDescription vuf_vmstate = {
      .name = "vhost-user-fs",
-    .unmigratable = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        {
+            .name = "back-end",
+            .info = &(const VMStateInfo) {
+                .name = "virtio-fs back-end state",
+                .get = vuf_load_state,
+                .put = vuf_save_state,
+            },
+        },

I've been working on stateless migration patch [1] and there wasdiscussed that weneed to keep some kind of blocker by default if orchestrators rely onunmigratable

field in virtio-fs vmstate to block the migration.

For this purpose I've implemented flag that selects "none" or"external" and is checked

in pre_save, so it could be extended with "internal" option.

We didn't come to conclusion if we also need to check incomingmigration, the discussion

has stopped for a while but I'm going back to it now.

I would appreciate if you have time to take a look at the discussionand consider the ideaproposed there to store internal state as a subsection of vmstate tomake it as an option

but not mandatory.

[1]https://patchew.org/QEMU/20230217170038.1273710-1-antonkuchin@yandex-team.ru/

So far I’ve mostly considered these issues orthogonal. If yourstateless migration goes in first, then state is optional and I’lladjust this series.If stateful migration goes in first, then your series can simply makestate optional by introducing the external option, no?

Not really. State can be easily extended by subsections but not trimmed.Maybe this can be worked around by defining two types of vmstate andselecting the correct one at migration, but I'm not sure.

But maybe we could also consider making stateless migration a specialcase of stateful migration; if we had stateful migration, can’t wejust implement stateless migration by telling virtiofsd that it shouldsubmit a special “I have no state” pseudo-state, i.e. by having aswitch on virtiofsd instead?

Sure. Backend can send empty state (as your patch treats 0 length as avalid response and not error) or dummy state that can be recognized asstateless. The only potential problem is that then we need support inbackend for new commands even to return dummy state, and if backend cansupport both types then we'll need some switch in backend to reply withreal or empty state.

Off the top of my head, some downsides of that approach would be
(1) it’d need a switch on the virtiofsd side, not on the qemu side(not sure if that’s a downside, but a difference for sure),

Why would you? It seems to me that this affects only how qemu treats thevmstate of device. If the state was requested backend sends it to qemu.If state subsection is present in stream qemu sends it to the backendfor loading. Stateless one just doesn't request state from the backend.Or am I missing something?

and (2) we’d need at least some support for this on the virtiofsdside, i.e. practically can’t come quicker than stateful migrationsupport.

Not much, essentially this is just a reconnect. I've sent a draft of areconnect patch for old C-virtiofsd, for rust version it takes muchlonger because I'm learning rust and I'm not really good at it yet.


Hanna

[Prev in Thread]

Current Thread

[Next in Thread]

[RFC 0/2] vhost-user-fs: Stateful migration, Hanna Czenczek, 2023/03/13
- [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Hanna Czenczek, 2023/03/13
  - Re: [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Stefan Hajnoczi, 2023/03/15
    - Re: [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Hanna Czenczek, 2023/03/15
    - Re: [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Stefan Hajnoczi, 2023/03/15
  - Re: [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Anton Kuchin, 2023/03/17
    - Re: [Virtio-fs] [RFC 1/2] vhost-user: Add interface for virtio-fs migration, Hanna Czenczek, 2023/03/17
- [RFC 2/2] vhost-user-fs: Implement stateful migration, Hanna Czenczek, 2023/03/13
  - Re: [RFC 2/2] vhost-user-fs: Implement stateful migration, Anton Kuchin, 2023/03/17
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Hanna Czenczek, 2023/03/17
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Anton Kuchin <=
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Hanna Czenczek, 2023/03/20
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Anton Kuchin, 2023/03/20
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Hanna Czenczek, 2023/03/21
    - Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration, Anton Kuchin, 2023/03/22

Prev by Date: Re: [RFC PATCH v2 03/11] hw/arm/smmuv3: Refactor stage-1 PTW
Next by Date: Re: [PATCH v3 1/1] vhost-user-fs: add migration type property
Previous by thread: Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration
Next by thread: Re: [Virtio-fs] [RFC 2/2] vhost-user-fs: Implement stateful migration
Index(es):
- Date
- Thread