[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH V5 05/12] migration: preserve suspended runstate
From: |
Steve Sistare |
Subject: |
[PATCH V5 05/12] migration: preserve suspended runstate |
Date: |
Mon, 13 Nov 2023 10:33:53 -0800 |
A guest that is migrated in the suspended state automaticaly wakes and
continues execution. This is wrong; the guest should end migration in
the same state it started. The root cause is that the outgoing migration
code automatically wakes the guest, then saves the RUNNING runstate in
global_state_store(), hence the incoming migration code thinks the guest is
running and continues the guest if autostart is true.
Simply deleting the call to qemu_system_wakeup_request() on the outgoing
side, to migrate the vm in state suspended, does not solve the problem.
The old vm_stop_force_state does little if the vm is suspended, so the cpu
clock remains running, and runstate notifiers for the stop transition are
not called (and were not called on transition to suspended). Stale cpu
timers_state is saved to the migration stream, causing time errors in the
guest when it wakes from suspend. State that would have been modified by
runstate notifiers is wrong.
The new version of vm_stop_force_state solves the outgoing problems, by
completely stopping a vm in the suspended state.
On the incoming side for precopy, compute the desired new state from global
state received, and call runstate_restore, which will partially
resume the vm if the state is suspended. A future system_wakeup monitor
request will cause the vm to resume running.
On the incoming side for postcopy, apply the the same restore logic found
in precopy.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
migration/migration.c | 33 ++++++++++++++++++---------------
migration/migration.h | 1 +
migration/savevm.c | 8 +-------
3 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 28a34c9..29ed901 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -592,6 +592,7 @@ static void process_incoming_migration_bh(void *opaque)
{
Error *local_err = NULL;
MigrationIncomingState *mis = opaque;
+ RunState state = migrate_new_runstate();
trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
@@ -602,8 +603,7 @@ static void process_incoming_migration_bh(void *opaque)
* unless we really are starting the VM.
*/
if (!migrate_late_block_activate() ||
- (autostart && (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING))) {
+ state == RUN_STATE_RUNNING) {
/* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
bdrv_activate_all(&local_err);
@@ -626,19 +626,13 @@ static void process_incoming_migration_bh(void *opaque)
dirty_bitmap_mig_before_vm_start();
- if (!global_state_received() ||
- global_state_get_runstate() == RUN_STATE_RUNNING) {
- if (autostart) {
- vm_start();
- } else {
- runstate_set(RUN_STATE_PAUSED);
- }
- } else if (migration_incoming_colo_enabled()) {
+ if (migration_incoming_colo_enabled()) {
migration_incoming_disable_colo();
vm_start();
} else {
- runstate_set(global_state_get_runstate());
+ vm_resume(state);
}
+
trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started");
/*
* This must happen after any state changes since as soon as an external
@@ -1277,6 +1271,16 @@ void migrate_set_state(int *state, int old_state, int
new_state)
}
}
+RunState migrate_new_runstate(void)
+{
+ RunState state = global_state_get_runstate();
+
+ if (!global_state_received() || state == RUN_STATE_RUNNING) {
+ state = autostart ? RUN_STATE_RUNNING : RUN_STATE_PAUSED;
+ }
+ return state;
+}
+
static void migrate_fd_cleanup(MigrationState *s)
{
qemu_bh_delete(s->cleanup_bh);
@@ -2415,7 +2419,6 @@ static int postcopy_start(MigrationState *ms, Error
**errp)
migration_downtime_start(ms);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
global_state_store();
ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE);
if (ret < 0) {
@@ -2614,7 +2617,6 @@ static int migration_completion_precopy(MigrationState *s,
qemu_mutex_lock_iothread();
migration_downtime_start(s);
- qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
s->vm_old_state = runstate_get();
global_state_store();
@@ -3135,9 +3137,10 @@ static void migration_iteration_finish(MigrationState *s)
case MIGRATION_STATUS_FAILED:
case MIGRATION_STATUS_CANCELLED:
case MIGRATION_STATUS_CANCELLING:
- if (s->vm_old_state == RUN_STATE_RUNNING) {
+ if (s->vm_old_state == RUN_STATE_RUNNING ||
+ s->vm_old_state == RUN_STATE_SUSPENDED) {
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
- vm_start();
+ vm_resume(s->vm_old_state);
}
} else {
if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
diff --git a/migration/migration.h b/migration/migration.h
index cf2c9c8..5b7aa89 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -473,6 +473,7 @@ struct MigrationState {
};
void migrate_set_state(int *state, int old_state, int new_state);
+RunState migrate_new_runstate(void);
void migration_fd_process_incoming(QEMUFile *f, Error **errp);
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
diff --git a/migration/savevm.c b/migration/savevm.c
index eec5503..78ac2bd 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2163,13 +2163,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
dirty_bitmap_mig_before_vm_start();
- if (autostart) {
- /* Hold onto your hats, starting the CPU */
- vm_start();
- } else {
- /* leave it paused and let management decide when to start the CPU */
- runstate_set(RUN_STATE_PAUSED);
- }
+ vm_resume(migrate_new_runstate());
qemu_bh_delete(mis->bh);
--
1.8.3.1
- [PATCH V5 00/12] fix migration of suspended runstate, Steve Sistare, 2023/11/13
- [PATCH V5 02/12] cpus: stop vm in suspended state, Steve Sistare, 2023/11/13
- [PATCH V5 01/12] cpus: refactor vm_stop, Steve Sistare, 2023/11/13
- [PATCH V5 03/12] cpus: pass runstate to vm_prepare_start, Steve Sistare, 2023/11/13
- [PATCH V5 05/12] migration: preserve suspended runstate,
Steve Sistare <=
- [PATCH V5 09/12] tests/qtest: option to suspend during migration, Steve Sistare, 2023/11/13
- [PATCH V5 06/12] migration: preserve suspended for snapshot, Steve Sistare, 2023/11/13
- [PATCH V5 11/12] tests/qtest: postcopy migration with suspend, Steve Sistare, 2023/11/13
- [PATCH V5 04/12] cpus: start vm in suspended state, Steve Sistare, 2023/11/13
- [PATCH V5 07/12] migration: preserve suspended for bg_migration, Steve Sistare, 2023/11/13