qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld*


From: Ilya Leoshkevich
Subject: Re: [PATCH v6 17/21] accel/tcg: Add fast path for translator_ld*
Date: Tue, 23 Aug 2022 01:15:44 +0200
User-agent: Evolution 3.36.5-0ubuntu1

On Thu, 2022-08-18 at 20:26 -0700, Richard Henderson wrote:
> Cache the translation from guest to host address, so we may
> use direct loads when we hit on the primary translation page.
> 
> Look up the second translation page only once, during translation.
> This obviates another lookup of the second page within tb_gen_code
> after translation.
> 
> Fixes a bug in that plugin_insn_append should be passed the bytes
> in the original memory order, not bswapped by pieces.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/exec/translator.h |  63 +++++++++++--------
>  accel/tcg/translate-all.c |  26 +++-----
>  accel/tcg/translator.c    | 127 +++++++++++++++++++++++++++++-------
> --
>  3 files changed, 144 insertions(+), 72 deletions(-)
> 
> diff --git a/include/exec/translator.h b/include/exec/translator.h
> index 69db0f5c21..329a42fe46 100644
> --- a/include/exec/translator.h
> +++ b/include/exec/translator.h
> @@ -81,24 +81,14 @@ typedef enum DisasJumpType {
>   * Architecture-agnostic disassembly context.
>   */
>  typedef struct DisasContextBase {
> -    const TranslationBlock *tb;
> +    TranslationBlock *tb;
>      target_ulong pc_first;
>      target_ulong pc_next;
>      DisasJumpType is_jmp;
>      int num_insns;
>      int max_insns;
>      bool singlestep_enabled;
> -#ifdef CONFIG_USER_ONLY
> -    /*
> -     * Guest address of the last byte of the last protected page.
> -     *
> -     * Pages containing the translated instructions are made non-
> writable in
> -     * order to achieve consistency in case another thread is
> modifying the
> -     * code while translate_insn() fetches the instruction bytes
> piecemeal.
> -     * Such writer threads are blocked on mmap_lock() in
> page_unprotect().
> -     */
> -    target_ulong page_protect_end;
> -#endif
> +    void *host_addr[2];
>  } DisasContextBase;
>  
>  /**
> @@ -183,24 +173,43 @@ bool translator_use_goto_tb(DisasContextBase
> *db, target_ulong dest);
>   * the relevant information at translation time.
>   */
>  
> -#define GEN_TRANSLATOR_LD(fullname, type, load_fn,
> swap_fn)             \
> -    type fullname ## _swap(CPUArchState *env, DisasContextBase
> *dcbase, \
> -                           abi_ptr pc, bool
> do_swap);                   \
> -    static inline type fullname(CPUArchState
> *env,                      \
> -                                DisasContextBase *dcbase, abi_ptr
> pc)   \
> -    {                                                               
>     \
> -        return fullname ## _swap(env, dcbase, pc,
> false);               \
> +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc);
> +
> +static inline uint16_t
> +translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
> +                     abi_ptr pc, bool do_swap)
> +{
> +    uint16_t ret = translator_lduw(env, db, pc);
> +    if (do_swap) {
> +        ret = bswap16(ret);
>      }
> +    return ret;
> +}
>  
> -#define
> FOR_EACH_TRANSLATOR_LD(F)                                       \
> -    F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap
> */)           \
> -    F(translator_lduw, uint16_t, cpu_lduw_code,
> bswap16)                \
> -    F(translator_ldl, uint32_t, cpu_ldl_code,
> bswap32)                  \
> -    F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
> +static inline uint32_t
> +translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
> +                    abi_ptr pc, bool do_swap)
> +{
> +    uint32_t ret = translator_ldl(env, db, pc);
> +    if (do_swap) {
> +        ret = bswap32(ret);
> +    }
> +    return ret;
> +}
>  
> -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
> -
> -#undef GEN_TRANSLATOR_LD
> +static inline uint64_t
> +translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
> +                    abi_ptr pc, bool do_swap)
> +{
> +    uint64_t ret = translator_ldq_swap(env, db, pc, false);
> +    if (do_swap) {
> +        ret = bswap64(ret);
> +    }
> +    return ret;
> +}
>  
>  /*
>   * Return whether addr is on the same page as where disassembly
> started.
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index b224f856d0..e44f40b234 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1385,10 +1385,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>  {
>      CPUArchState *env = cpu->env_ptr;
>      TranslationBlock *tb, *existing_tb;
> -    tb_page_addr_t phys_pc, phys_page2;
> -    target_ulong virt_page2;
> +    tb_page_addr_t phys_pc;
>      tcg_insn_unit *gen_code_buf;
>      int gen_code_size, search_size, max_insns;
> +    void *host_pc;
>  #ifdef CONFIG_PROFILER
>      TCGProfile *prof = &tcg_ctx->prof;
>      int64_t ti;
> @@ -1397,7 +1397,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      assert_memory_lock();
>      qemu_thread_jit_write();
>  
> -    phys_pc = get_page_addr_code_hostp(env, pc, false, NULL);
> +    phys_pc = get_page_addr_code_hostp(env, pc, false, &host_pc);
>  
>      if (phys_pc == -1) {
>          /* Generate a one-shot TB with 1 insn in it */
> @@ -1428,6 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      tb->flags = flags;
>      tb->cflags = cflags;
>      tb->trace_vcpu_dstate = *cpu->trace_dstate;
> +    tb->page_addr[0] = phys_pc;
> +    tb->page_addr[1] = -1;
>      tcg_ctx->tb_cflags = cflags;
>   tb_overflow:
>  
> @@ -1621,13 +1623,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      }
>  
>      /*
> -     * If the TB is not associated with a physical RAM page then
> -     * it must be a temporary one-insn TB, and we have nothing to do
> -     * except fill in the page_addr[] fields. Return early before
> -     * attempting to link to other TBs or add to the lookup table.
> +     * If the TB is not associated with a physical RAM page then it
> must be
> +     * a temporary one-insn TB, and we have nothing left to do.
> Return early
> +     * before attempting to link to other TBs or add to the lookup
> table.
>       */
> -    if (phys_pc == -1) {
> -        tb->page_addr[0] = tb->page_addr[1] = -1;
> +    if (tb->page_addr[0] == -1) {
>          return tb;
>      }
>  
> @@ -1638,17 +1638,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>       */
>      tcg_tb_insert(tb);
>  
> -    /* check next page if needed */
> -    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
> -    phys_page2 = -1;
> -    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
> -        phys_page2 = get_page_addr_code(env, virt_page2);
> -    }
>      /*
>       * No explicit memory barrier is required -- tb_link_page()
> makes the
>       * TB visible in a consistent state.
>       */
> -    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
> +    existing_tb = tb_link_page(tb, tb->page_addr[0], tb-
> >page_addr[1]);
>      /* if the TB already exists, discard what we just translated */
>      if (unlikely(existing_tb != tb)) {
>          uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 3eef30d93a..c8e9523e52 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -42,15 +42,6 @@ bool translator_use_goto_tb(DisasContextBase *db,
> target_ulong dest)
>      return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
>  }
>  
> -static inline void translator_page_protect(DisasContextBase *dcbase,
> -                                           target_ulong pc)
> -{
> -#ifdef CONFIG_USER_ONLY
> -    dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
> -    page_protect(pc);
> -#endif
> -}
> -
>  void translator_loop(CPUState *cpu, TranslationBlock *tb, int
> max_insns,
>                       target_ulong pc, void *host_pc,
>                       const TranslatorOps *ops, DisasContextBase *db)
> @@ -66,7 +57,12 @@ void translator_loop(CPUState *cpu,
> TranslationBlock *tb, int max_insns,
>      db->num_insns = 0;
>      db->max_insns = max_insns;
>      db->singlestep_enabled = cflags & CF_SINGLE_STEP;
> -    translator_page_protect(db, db->pc_next);
> +    db->host_addr[0] = host_pc;
> +    db->host_addr[1] = NULL;
> +
> +#ifdef CONFIG_USER_ONLY
> +    page_protect(pc);
> +#endif
>  
>      ops->init_disas_context(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> @@ -151,31 +147,104 @@ void translator_loop(CPUState *cpu,
> TranslationBlock *tb, int max_insns,
>  #endif
>  }
>  
> -static inline void translator_maybe_page_protect(DisasContextBase
> *dcbase,
> -                                                 target_ulong pc,
> size_t len)
> +static void *translator_access(CPUArchState *env, DisasContextBase
> *db,
> +                               target_ulong pc, size_t len)
>  {
> -#ifdef CONFIG_USER_ONLY
> -    target_ulong end = pc + len - 1;
> +    void *host;
> +    target_ulong base, end;
> +    TranslationBlock *tb;
>  
> -    if (end > dcbase->page_protect_end) {
> -        translator_page_protect(dcbase, end);
> +    tb = db->tb;
> +
> +    /* Use slow path if first page is MMIO. */
> +    if (unlikely(tb->page_addr[0] == -1)) {
> +        return NULL;
>      }
> +
> +    end = pc + len - 1;
> +    if (likely(is_same_page(db, end))) {
> +        host = db->host_addr[0];
> +        base = db->pc_first;
> +    } else {
> +        host = db->host_addr[1];
> +        base = TARGET_PAGE_ALIGN(db->pc_first);
> +        if (host == NULL) {
> +            tb->page_addr[1] =
> +                get_page_addr_code_hostp(env, base, false,
> +                                         &db->host_addr[1]);
> +#ifdef CONFIG_USER_ONLY
> +            page_protect(end);
>  #endif
> +            /* We cannot handle MMIO as second page. */
> +            assert(tb->page_addr[1] != -1);
> +            host = db->host_addr[1];
> +        }
> +
> +        /* Use slow path when crossing pages. */
> +        if (is_same_page(db, pc)) {
> +            return NULL;
> +        }
> +    }
> +
> +    tcg_debug_assert(pc >= base);
> +    return host + (pc - base);
>  }
>  
> -#define GEN_TRANSLATOR_LD(fullname, type, load_fn,
> swap_fn)             \
> -    type fullname ## _swap(CPUArchState *env, DisasContextBase
> *dcbase, \
> -                           abi_ptr pc, bool
> do_swap)                    \
> -    {                                                               
>     \
> -        translator_maybe_page_protect(dcbase, pc,
> sizeof(type));        \
> -        type ret = load_fn(env,
> pc);                                    \
> -        if (do_swap)
> {                                                  \
> -            ret =
> swap_fn(ret);                                         \
> -        }                                                           
>     \
> -        plugin_insn_append(pc, &ret,
> sizeof(ret));                      \
> -        return
> ret;                                                     \
> +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> +    uint8_t ret;
> +    void *p = translator_access(env, db, pc, sizeof(ret));
> +
> +    if (p) {
> +        plugin_insn_append(pc, p, sizeof(ret));
> +        return ldub_p(p);
>      }
> +    ret = cpu_ldub_code(env, pc);
> +    plugin_insn_append(pc, &ret, sizeof(ret));
> +    return ret;
> +}
>  
> -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
> +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> +    uint16_t ret, plug;
> +    void *p = translator_access(env, db, pc, sizeof(ret));
>  
> -#undef GEN_TRANSLATOR_LD
> +    if (p) {
> +        plugin_insn_append(pc, p, sizeof(ret));
> +        return lduw_p(p);
> +    }
> +    ret = cpu_lduw_code(env, pc);
> +    plug = tswap16(ret);
> +    plugin_insn_append(pc, &plug, sizeof(ret));
> +    return ret;
> +}
> +
> +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> +    uint32_t ret, plug;
> +    void *p = translator_access(env, db, pc, sizeof(ret));
> +
> +    if (p) {
> +        plugin_insn_append(pc, p, sizeof(ret));
> +        return ldl_p(p);
> +    }
> +    ret = cpu_ldl_code(env, pc);
> +    plug = tswap32(ret);
> +    plugin_insn_append(pc, &plug, sizeof(ret));
> +    return ret;
> +}
> +
> +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db,
> abi_ptr pc)
> +{
> +    uint64_t ret, plug;
> +    void *p = translator_access(env, db, pc, sizeof(ret));
> +
> +    if (p) {
> +        plugin_insn_append(pc, p, sizeof(ret));
> +        return ldq_p(p);
> +    }
> +    ret = cpu_ldq_code(env, pc);
> +    plug = tswap64(ret);
> +    plugin_insn_append(pc, &plug, sizeof(ret));
> +    return ret;
> +}

Hi,

I think you need the following fixup here:

--- a/tests/tcg/multiarch/noexec.c.inc
+++ b/tests/tcg/multiarch/noexec.c.inc
@@ -1,8 +1,5 @@
 /*
  * Common code for arch-specific MMU_INST_FETCH fault testing.
- *
- * Declare struct arch_noexec_test before including this file and
define
- * arch_check_mcontext() after that.
  */
 
 #define _GNU_SOURCE
@@ -13,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <unistd.h>
 #include <sys/mman.h>
 #include <sys/ucontext.h>

After the simplifications the comment is no longer true or useful;
unistd.h is needed for getpagesize().

With that:

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>

for the series.

Best regards,
Ilya




reply via email to

[Prev in Thread] Current Thread [Next in Thread]