qemu-riscv
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC 2/6] target/riscv: rvk: add implementation of instructions for


From: liweiwei
Subject: Re: [RFC 2/6] target/riscv: rvk: add implementation of instructions for Zbk* - reuse partial instructions of Zbb/Zbc extensions - add brev8 packh, unzip, zip, etc.
Date: Wed, 3 Nov 2021 08:56:03 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0

Thanks for your suggestions.

在 2021/11/2 下午11:44, Richard Henderson 写道:
On 11/1/21 11:11 PM, liweiwei wrote:
Signed-off-by: liweiwei <liweiwei@iscas.ac.cn>
Signed-off-by: wangjunqiang <wangjunqiang@iscas.ac.cn>

You managed to get the whole patch description into the subject line.
Please break it up.

OK.
+target_ulong HELPER(grev)(target_ulong rs1, target_ulong rs2)
+{
+    return do_grev(rs1, rs2, TARGET_LONG_BITS);
+}

Are we expecting to see the full grev instruction at any point? If not, we can certainly implement Zbk with a simpler implementation.
The main idea that I add this helper is that  grev may be added to B-extension later and it can be reused. However, it have no effect currently.  I'll replace this with a simpler implementation.

+target_ulong HELPER(xperm)(target_ulong rs1, target_ulong rs2, uint32_t sz_log2)
+{
+    target_ulong r = 0;
+    target_ulong sz = 1LL << sz_log2;
+    target_ulong mask = (1LL << sz) - 1;
+    for (int i = 0; i < TARGET_LONG_BITS; i += sz) {
+        target_ulong pos = ((rs2 >> i) & mask) << sz_log2;
+        if (pos < sizeof(target_ulong) * 8) {
+            r |= ((rs1 >> pos) & mask) << i;
+        }
+    }
+    return r;
+}

This could become a static inline do_xperm, and provide two specific xperm4 and xperm8 helpers; the compiler would fold all of the sz_log2 stuff into a more efficient implementation.
OK.

+target_ulong HELPER(unshfl)(target_ulong rs1,
+                            target_ulong rs2)
+{
+    target_ulong x = rs1;
+    int i, shift;
+    int bits = TARGET_LONG_BITS >> 1;
+    for (i = 0, shift = 1; shift < bits; i++, shift <<= 1) {
+        if (rs2 & shift) {
+            x = do_shuf_stage(x, shuf_masks[i], shuf_masks[i] >> shift, shift);
+        }
+    }
+    return x;
+}
+
+target_ulong HELPER(shfl)(target_ulong rs1,
+                          target_ulong rs2)
+{
+    target_ulong x = rs1;
+    int i, shift;
+    shift = TARGET_LONG_BITS >> 2;
+    i = (shift == 8) ? 3 : 4;
+    for (; i >= 0; i--, shift >>= 1) {
+        if (rs2 & shift) {
+            x = do_shuf_stage(x, shuf_masks[i], shuf_masks[i] >> shift, shift);
+        }
+    }
+    return x;
+}

Similar comment as for grev.

+# The encoding for zext.h differs between RV32 and RV64.
+# zext_h_32 denotes the RV32 variant.
+{
+  zext_h_32  0000100 00000 ..... 100 ..... 0110011 @r2
+  pack       0000100 ..... ..... 100 ..... 0110011 @r
+}

Note to self: improve tcg_gen_deposit to notice zeros, so that the more general pack compiles to zero-extension.

@@ -556,6 +563,81 @@ static bool gen_unary_per_ol(DisasContext *ctx, arg_r2 *a, DisasExtend ext,
      return gen_unary(ctx, a, ext, f_tl);
  }
  +static bool gen_xperm(DisasContext *ctx, arg_r *a, int32_t size)
+{
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+    TCGv_i32 sz = tcg_const_i32(size);
+    gen_helper_xperm(dest, src1, src2, sz);
+
+    gen_set_gpr(ctx, a->rd, dest);
+    tcg_temp_free_i32(sz);
+    return true;
+}
+
+static bool gen_grevi(DisasContext *ctx, arg_r2 *a, int shamt)
+{
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+
+    if (shamt == (TARGET_LONG_BITS - 8)) {
+        /* rev8, byte swaps */
+        tcg_gen_bswap_tl(dest, src1);
+    } else {
+        TCGv src2 = tcg_temp_new();
+        tcg_gen_movi_tl(src2, shamt);
+        gen_helper_grev(dest, src1, src2);
+        tcg_temp_free(src2);
+    }
+
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
+}
+
+static void gen_pack(TCGv ret, TCGv src1, TCGv src2)
+{
+    tcg_gen_deposit_tl(ret, src1, src2,
+                       TARGET_LONG_BITS / 2,
+                       TARGET_LONG_BITS / 2);
+}
+
+static void gen_packh(TCGv ret, TCGv src1, TCGv src2)
+{
+    TCGv t = tcg_temp_new();
+    tcg_gen_ext8u_tl(t, src2);
+    tcg_gen_deposit_tl(ret, src1, t, 8, TARGET_LONG_BITS - 8);
+    tcg_temp_free(t);
+}
+
+static void gen_packw(TCGv ret, TCGv src1, TCGv src2)
+{
+    TCGv t = tcg_temp_new();
+    tcg_gen_ext16s_tl(t, src2);
+    tcg_gen_deposit_tl(ret, src1, t, 16, 48);
+    tcg_temp_free(t);
+}
+
+static bool gen_shufi(DisasContext *ctx, arg_r2 *a, int shamt,
+                       void(*func)(TCGv, TCGv, TCGv))
+{
+    if (shamt >= TARGET_LONG_BITS / 2) {
+        return false;
+    }
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+    TCGv src2 = tcg_temp_new();
+
+    tcg_gen_movi_tl(src2, shamt);
+    (*func)(dest, src1, src2);
+
+    gen_set_gpr(ctx, a->rd, dest);
+    tcg_temp_free(src2);
+    return true;
+}

All of the gen functions belong in insn_trans/trans_rvb.c.inc.
OK. I'll move them to insn_trans/trans_rvb.c.inc.


r~




reply via email to

[Prev in Thread] Current Thread [Next in Thread]