qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v14 16/18] plugins: Use different helpers when reading regist


From: Akihiko Odaki
Subject: Re: [PATCH v14 16/18] plugins: Use different helpers when reading registers
Date: Wed, 25 Oct 2023 18:34:55 +0900
User-agent: Mozilla Thunderbird



On 2023/10/25 14:39, Akihiko Odaki wrote:
On 2023/10/25 1:48, Alex Bennée wrote:

Akihiko Odaki <akihiko.odaki@daynix.com> writes:

This avoids optimizations incompatible when reading registers.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
  accel/tcg/plugin-helpers.h |  3 ++-
  include/exec/plugin-gen.h  |  4 ++--
  include/hw/core/cpu.h      |  4 ++--
  include/qemu/plugin.h      |  3 +++
  plugins/plugin.h           |  5 +++--
  accel/tcg/plugin-gen.c     | 41 ++++++++++++++++++++++++++++----------
  accel/tcg/translator.c     |  2 +-
  plugins/api.c              | 14 +++++++++++--
  plugins/core.c             | 28 ++++++++++++++++----------
  9 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h
index 8e685e0654..11796436f3 100644
--- a/accel/tcg/plugin-helpers.h
+++ b/accel/tcg/plugin-helpers.h
@@ -1,4 +1,5 @@
<snip>
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -437,7 +437,7 @@ struct qemu_work_item;
   * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
   *                        to @trace_dstate).
   * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
- * @plugin_mask: Plugin event bitmap. Modified only via async work.
+ * @plugin_flags: Plugin flags. Modified only via async work.
   * @ignore_memory_transaction_failures: Cached copy of the MachineState    *    flag of the same name: allows the board to suppress calling of the
   *    CPU do_transaction_failed hook function.
@@ -529,7 +529,7 @@ struct CPUState {
      /* Use by accel-block: CPU is executing an ioctl() */
      QemuLockCnt in_ioctl_lock;
-    DECLARE_BITMAP(plugin_mask, QEMU_PLUGIN_EV_MAX);
+    unsigned long plugin_flags;

Why are we dropping DECLARE_BITMAP here? It does ensure we will always
have the right size.

  #ifdef CONFIG_PLUGIN
      GArray *plugin_mem_cbs;
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index 7fdc3a4849..a534b9127b 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -16,6 +16,9 @@
  #include "exec/memopidx.h"
  #include "hw/core/cpu.h"
+#define QEMU_PLUGIN_FLAG_TB_CB_READ QEMU_PLUGIN_EV_MAX
+#define QEMU_PLUGIN_FLAG_INSN_CB_READ (QEMU_PLUGIN_EV_MAX + 1)
+

OK this seems like bad code smell. Are we overloading what was
plugin_mask to indicate two different things? I can see we only really
use the flags to indicate syscalls should generate callbacks so maybe
this could be rationlised but at least split into two patches so the
change to original behaviour isn't mixed up with new added behaviour.

I introduced new "flag" concept in v15 to avoid the overloading.


  /*
   * Option parsing/processing.
   * Note that we can load an arbitrary number of plugins.
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 5eb2fdbc85..ba0417194f 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -16,6 +16,7 @@
  #include "qemu/qht.h"
  #define QEMU_PLUGIN_MIN_VERSION 0
+#define QEMU_PLUGIN_FLAG_INSIN_CB_READ QEMU_PLUGIN_EV_MAX

Double definition.

Removed in v15.


  /* global state */
  struct qemu_plugin_state {
@@ -31,7 +32,7 @@ struct qemu_plugin_state {
       * but with the HT we avoid adding a field to CPUState.
       */
      GHashTable *cpu_ht;
-    DECLARE_BITMAP(mask, QEMU_PLUGIN_EV_MAX);
+    unsigned long flags;
      /*
       * @lock protects the struct as well as ctx->uninstalling.
       * The lock must be acquired by all API ops.
@@ -86,7 +87,7 @@ plugin_register_cb_udata(qemu_plugin_id_t id, enum qemu_plugin_event ev,
  void
  plugin_register_dyn_cb__udata(GArray **arr,
                                qemu_plugin_vcpu_udata_cb_t cb,
-                              enum qemu_plugin_cb_flags flags, void *udata);
+                              unsigned int flags, void *udata);
  void plugin_register_vcpu_mem_cb(GArray **arr,
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 78b331b251..3bddd4d3c5 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -90,7 +90,10 @@ enum plugin_gen_cb {
   * These helpers are stubs that get dynamically switched out for calls
   * direct to the plugin if they are subscribed to.
   */
-void HELPER(plugin_vcpu_udata_cb)(uint32_t cpu_index, void *udata)
+void HELPER(plugin_vcpu_udata_cb_no_wg)(uint32_t cpu_index, void *udata)
+{ }
+
+void HELPER(plugin_vcpu_udata_cb_no_rwg)(uint32_t cpu_index, void *udata)
  { }
  void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
@@ -98,7 +101,7 @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
                                  void *userdata)
  { }
-static void gen_empty_udata_cb(void)
+static void gen_empty_udata_cb(void (*gen_helper)(TCGv_i32, TCGv_ptr))
  {
      TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
      TCGv_ptr udata = tcg_temp_ebb_new_ptr();
@@ -106,12 +109,22 @@ static void gen_empty_udata_cb(void)
      tcg_gen_movi_ptr(udata, 0);
      tcg_gen_ld_i32(cpu_index, tcg_env,
                     -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-    gen_helper_plugin_vcpu_udata_cb(cpu_index, udata);
+    gen_helper(cpu_index, udata);
      tcg_temp_free_ptr(udata);
      tcg_temp_free_i32(cpu_index);
  }
+static void gen_empty_udata_cb_no_wg(void)
+{
+    gen_empty_udata_cb(gen_helper_plugin_vcpu_udata_cb_no_wg);
+}
+
+static void gen_empty_udata_cb_no_rwg(void)
+{
+    gen_empty_udata_cb(gen_helper_plugin_vcpu_udata_cb_no_rwg);
+}
+
  /*
   * For now we only support addi_i64.
   * When we support more ops, we can generate one empty inline cb for each.
@@ -176,7 +189,7 @@ static void gen_wrapped(enum plugin_gen_from from,
      tcg_gen_plugin_cb_end();
  }
-static void plugin_gen_empty_callback(enum plugin_gen_from from)
+static void plugin_gen_empty_callback(CPUState *cpu, enum plugin_gen_from from)
  {
      switch (from) {
      case PLUGIN_GEN_AFTER_INSN:
@@ -190,9 +203,15 @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
           */
          gen_wrapped(from, PLUGIN_GEN_ENABLE_MEM_HELPER,
                      gen_empty_mem_helper);
-        /* fall through */
+        gen_wrapped(from, PLUGIN_GEN_CB_UDATA,
+                    cpu->plugin_flags & BIT(QEMU_PLUGIN_FLAG_INSN_CB_READ) ? +                    gen_empty_udata_cb_no_wg : gen_empty_udata_cb_no_rwg);
+        gen_wrapped(from, PLUGIN_GEN_CB_INLINE, gen_empty_inline_cb);
+        break;
      case PLUGIN_GEN_FROM_TB:
-        gen_wrapped(from, PLUGIN_GEN_CB_UDATA, gen_empty_udata_cb);
+        gen_wrapped(from, PLUGIN_GEN_CB_UDATA,
+                    cpu->plugin_flags & BIT(QEMU_PLUGIN_FLAG_TB_CB_READ) ?
+                    gen_empty_udata_cb_no_wg :
gen_empty_udata_cb_no_rwg);

What stops us from generating two empty callbacks (one no_wg one no_rwg)
and just discarding the unused ones after the instrumentation pass?

There is nothing stopping us from doing that, but I see no reason to do so when we can pick one just with a ternary operator.

I think I get the idea. Please check v16 and see if it does what you expect. It is indeed much simpler than the previous version.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]