[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest
From: |
Wanpeng Li |
Subject: |
Re: Commit "x86/kvm: Move context tracking where it belongs" broke guest time accounting |
Date: |
Fri, 9 Apr 2021 16:13:03 +0800 |
On Thu, 8 Apr 2021 at 21:19, Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Apr 06 2021 at 21:47, Sean Christopherson wrote:
> > On Tue, Apr 06, 2021, Michael Tokarev wrote:
> >> broke kvm guest cpu time accounting - after this commit, when running
> >> qemu-system-x86_64 -enable-kvm, the guest time (in /proc/stat and
> >> elsewhere) is always 0.
> >>
> >> I dunno why it happened, but it happened, and all kernels after 5.9
> >> are affected by this.
> >>
> >> This commit is found in a (painful) git bisect between kernel 5.8 and 5.10.
> >
> > Yes :-(
> >
> > There's a bugzilla[1] and two proposed fixes[2][3]. I don't particularly
> > like
> > either of the fixes, but an elegant solution hasn't presented itself.
> >
> > Thomas/Paolo, can you please weigh in?
> >
> > [1] https://bugzilla.kernel.org/show_bug.cgi?id=209831
> > [2]
> > https://lkml.kernel.org/r/1617011036-11734-1-git-send-email-wanpengli@tencent.com
> > [3] https://lkml.kernel.org/r/20210206004218.312023-1-seanjc@google.com
>
> All of the solutions I looked at so far are ugly as hell. The problem is
> that the accounting is plumbed into the context tracking and moving
> context tracking around to a different place is just wrong.
>
> I think the right solution is to seperate the time accounting logic out
> from guest_enter/exit_irqoff() and have virt time specific helpers which
> can be placed at the proper spots in kvm.
For x86 part, how about something like below:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 48b396f3..7aeb724 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3730,6 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instrumentation_end();
+ account_guest_enter();
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
@@ -3759,6 +3760,8 @@ static noinstr void svm_vcpu_enter_exit(struct
kvm_vcpu *vcpu)
*/
lockdep_hardirqs_off(CALLER_ADDR0);
guest_exit_irqoff();
+ if (vtime_accounting_enabled_this_cpu())
+ account_guest_exit();
instrumentation_begin();
trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c05e6e2..5f6c30c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6617,6 +6617,7 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
instrumentation_end();
+ account_guest_enter();
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
@@ -6648,6 +6649,8 @@ static noinstr void vmx_vcpu_enter_exit(struct
kvm_vcpu *vcpu,
*/
lockdep_hardirqs_off(CALLER_ADDR0);
guest_exit_irqoff();
+ if (vtime_accounting_enabled_this_cpu())
+ account_guest_exit();
instrumentation_begin();
trace_hardirqs_off_finish();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16fb395..33422c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9229,6 +9229,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++vcpu->stat.exits;
local_irq_disable();
kvm_after_interrupt(vcpu);
+ if (!vtime_accounting_enabled_this_cpu())
+ account_guest_exit();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb064..ff70229 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -104,8 +104,7 @@ static inline void context_tracking_init(void) { }
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
{
instrumentation_begin();
if (vtime_accounting_enabled_this_cpu())
@@ -113,7 +112,11 @@ static __always_inline void guest_enter_irqoff(void)
else
current->flags |= PF_VCPU;
instrumentation_end();
+}
+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
if (context_tracking_enabled())
__context_tracking_enter(CONTEXT_GUEST);
@@ -131,11 +134,8 @@ static __always_inline void guest_enter_irqoff(void)
}
}
-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
{
- if (context_tracking_enabled())
- __context_tracking_exit(CONTEXT_GUEST);
-
instrumentation_begin();
if (vtime_accounting_enabled_this_cpu())
vtime_guest_exit(current);
@@ -144,8 +144,14 @@ static __always_inline void guest_exit_irqoff(void)
instrumentation_end();
}
+static __always_inline void guest_exit_irqoff(void)
+{
+ if (context_tracking_enabled())
+ __context_tracking_exit(CONTEXT_GUEST);
+}
+
#else
-static __always_inline void guest_enter_irqoff(void)
+static __always_inline void account_guest_enter(void)
{
/*
* This is running in ioctl context so its safe
@@ -155,11 +161,17 @@ static __always_inline void guest_enter_irqoff(void)
instrumentation_begin();
vtime_account_kernel(current);
current->flags |= PF_VCPU;
+ instrumentation_end();
+}
+
+static __always_inline void guest_enter_irqoff(void)
+{
+ instrumentation_begin();
rcu_virt_note_context_switch(smp_processor_id());
instrumentation_end();
}
-static __always_inline void guest_exit_irqoff(void)
+static __always_inline void account_guest_exit(void)
{
instrumentation_begin();
/* Flush the guest cputime we spent on the guest */
@@ -167,6 +179,11 @@ static __always_inline void guest_exit_irqoff(void)
current->flags &= ~PF_VCPU;
instrumentation_end();
}
+
+static __always_inline void guest_exit_irqoff(void)
+{
+
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
static inline void guest_exit(void)