Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instru

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instru

From:	Richard Henderson
Subject:	Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation
Date:	Sat, 4 Sep 2021 13:04:24 +0200
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0

On 9/2/21 2:40 PM, Song Gao wrote:

+static bool gen_r2_si12(DisasContext *ctx, arg_fmt_rdrjsi12 *a,
+                        DisasExtend src_ext, DisasExtend dst_ext,
+                        void (*func)(TCGv, TCGv, TCGv))
+{
+    ctx->dst_ext = dst_ext;
+    TCGv dest = gpr_dst(ctx, a->rd);
+    TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+    TCGv src2 = tcg_constant_tl(a->si12);


Prefer to put declarations before statements, as per old C90 still.

+    func(dest, src1, src2);
+
+    if (ctx->dst_ext) {

Given that func does not receive ctx, why store dst_ext into ctx and then read it back?It seems like you should just use the parameter directly.

+static bool gen_pc(DisasContext *ctx, arg_fmt_rdsi20 *a,
+                   void (*func)(DisasContext *ctx, TCGv, target_long))
+{
+    TCGv dest = gpr_dst(ctx, a->rd);
+
+    func(ctx, dest, a->si20);
+    return true;
+}


Perhaps clearer with:

  target_long (*func)(target_long pc, target_long si20)

  target_long addr = func(ctx->base.pc_next, a->si20);
  TCGv dest = gpr_dst(ctx, a->rd);

  tcg_gen_movi_tl(dest, addr);
  return true;

+static bool gen_mulh(DisasContext *ctx, arg_add_w *a,
+                     void(*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv dest = gpr_dst(ctx, a->rd);
+    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+    TCGv_i32 discard = tcg_temp_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, src1);
+    tcg_gen_trunc_tl_i32(t1, src2);
+    func(discard, t0, t0, t1);
+    tcg_gen_ext_i32_tl(dest, t0);
+
+    tcg_temp_free_i32(discard);
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+    return true;
+}


You should be able to use gen_r3 for these.

static void gen_mulh_w(TCGv dest, TCGv src1, TCGv src2)
{
    tcg_gen_mul_i64(dest, src1, src2);
    tcg_gen_sari_i64(dest, dest, 32);
}

static void gen_mulh_wu(TCGv dest, TCGv src1, TCGv src2)
{
    tcg_gen_mul_i64(dest, src1, src2);
    tcg_gen_sari_i64(dest, dest, 32);
}

static void gen_mulh_d(TCGv dest, TCGv src1, TCGv src2)
{
    TCGv discard = tcg_temp_new();
    tcg_gen_muls2_tl(discard, dest, src1, src2);
    tcg_temp_free(discard);
}

static void gen_mulh_du(TCGv dest, TCGv src1, TCGv src2)
{
    TCGv discard = tcg_temp_new();
    tcg_gen_mulu2_tl(discard, dest, src1, src2);
    tcg_temp_free(discard);
}

TRANS(mulh_w, gen_r3, EXT_SIGN, EXT_SIGN, EXT_NONE, gen_mulh_w)
TRANS(mulh_wu, gen_r3, EXT_ZERO, EXT_ZERO, EXT_NONE, gen_mulh_wu)
TRANS(mulh_d, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_d)
TRANS(mulh_du, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_du)

+static bool gen_mulw_d(DisasContext *ctx, arg_add_w *a,
+                     void(*func)(TCGv_i64, TCGv))
+{
+    TCGv dest = gpr_dst(ctx, a->rd);
+    TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+    TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+    func(src1, src1);
+    func(src2, src2);
+    tcg_gen_mul_i64(dest, src1, src2);
+    return true;
+}


The func parameter here serves the same purpose as DisasExtend, so again you 
can use gen_r3:

TRANS(mulw_d_w, gen_r3, EXT_SIGN, EXT_SIGN, EXT_NONE, tcg_gen_mul_tl)
TRANS(mulw_d_wu, gen_r3, EXT_ZERO, EXT_ZERO, EXT_NONE, tcg_gen_mul_tl)

+
+static bool gen_div_w(DisasContext *ctx, arg_fmt_rdrjrk *a,
+                      DisasExtend src_ext, DisasExtend dst_ext,
+                      void(*func)(TCGv, TCGv, TCGv))
+{
+    ctx->dst_ext = dst_ext;
+    TCGv dest = gpr_dst(ctx, a->rd);
+    TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+    TCGv src2 = gpr_src(ctx, a->rk, src_ext);
+    TCGv t2 = tcg_temp_new();
+    TCGv t3 = tcg_temp_new();
+
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, src1, INT_MIN);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, src2, -1);
+    tcg_gen_and_tl(t2, t2, t3);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, t3, src2, 0);
+    tcg_gen_or_tl(t2, t2, t3);
+    tcg_gen_movi_tl(t3, 0);
+    tcg_gen_movcond_tl(TCG_COND_NE, src2, t2, t3, t2, src2);


Bug, writing back to src2.

+    func(dest, src1, src2);


You can split this out differently so that you can use gen_r3.

static TCGv prep_divisor_d(TCGv src1, TCGv src2)
{
    TCGv t0 = temp_new();
    TCGv t1 = tcg_temp_new();
    TCGv t2 = tcg_temp_new();
    TCGv zero = tcg_constant_tl(0);

    /*
     * If min / -1, set the divisor to 1.
     * This avoids potential host overflow trap and produces min.
     * If x / 0, set the divisor to 1.
     * This avoids potential host overflow trap;
     * the required result is undefined.
     */
    tcg_gen_setcondi_tl(TCG_COND_EQ, t0, src1, INT64_MIN);
    tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src2, -1);
    tcg_gen_setcondi_tl(TCG_COND_EQ, t2, src2, 0);
    tcg_gen_and_tl(t0, t0, t1);
    tcg_gen_or_tl(t0, t0, t2);
    tcg_gen_movcond_tl(TCG_COND_NE, t0, t0, zero, t0, src2);

    tcg_temp_free(t1);
    tcg_temp_free(t2);
    return t0;
}

static TCGv prep_divisor_du(TCGv src2)
{
    TCGv t0 = temp_new();
    TCGv zero = tcg_constant_tl(0);
    TCGv one = tcg_constant_tl(1);

    /*
     * If x / 0, set the divisor to 1.
     * This avoids potential host overflow trap;
     * the required result is undefined.
     */
    tcg_gen_movcond_tl(TCG_COND_EQ, t0, src2, zero, one, src2);
    return t0;
}

static void gen_div_d(TCGv dest, TCGv src1, TCGv src2)
{
    src2 = prep_divisor_d(src1, src2);
    tcg_gen_div_tl(dest, src1, src2);
}

static void gen_rem_d(TCGv dest, TCGv src1, TCGv src2)
{
    src2 = prep_divisor_d(src1, src2);
    tcg_gen_rem_tl(dest, src1, src2);
}

static void gen_div_du(TCGv dest, TCGv src1, TCGv src2)
{
    src2 = prep_divisor_du(src2);
    tcg_gen_divu_tl(dest, src1, src2);
}

static void gen_rem_du(TCGv dest, TCGv src1, TCGv src2)
{
    src2 = prep_divisor_du(src2);
    tcg_gen_remu_tl(dest, src1, src2);
}

static void gen_div_w(TCGv dest, TCGv src1, TCGv src2)
{
    /* We need not check for integer overflow for div_w. */
    src2 = prep_divisor_du(src2);
    tcg_gen_div_tl(dest, src1, src2);
}

static void gen_rem_w(TCGv dest, TCGv src1, TCGv src2)
{
    /* We need not check for integer overflow for rem_w. */
    src2 = prep_divisor_du(src2);
    tcg_gen_rem_tl(dest, src1, src2);
}

TRANS(div_w, gen_r3, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_div_w)
TRANS(mod_w, gen_r3, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_rem_w)
TRANS(div_wu, gen_r3, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du)
TRANS(mod_wu, gen_r3, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du)
TRANS(div_d, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_d)
TRANS(mod_d, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_d)
TRANS(div_du, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_du)
TRANS(mod_du, gen_r3, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_du)

--- a/target/loongarch/internals.h
+++ b/target/loongarch/internals.h
@@ -9,7 +9,6 @@
  #ifndef LOONGARCH_INTERNALS_H
  #define LOONGARCH_INTERNALS_H

-

  void loongarch_translate_init(void);

void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags);


Fold this back to a previous patch.

-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
-{
-    return true;
-}


Yes indeed, fold this patch to a previous patch.


r~

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v4 00/21] Add LoongArch linux-user emulation support, Song Gao, 2021/09/02
- [PATCH v4 01/21] target/loongarch: Add README, Song Gao, 2021/09/02
- [PATCH v4 05/21] target/loongarch: Add fixed point shift instruction translation, Song Gao, 2021/09/02
  - Re: [PATCH v4 05/21] target/loongarch: Add fixed point shift instruction translation, Richard Henderson, 2021/09/04
- [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation, Song Gao, 2021/09/02
  - Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation, Richard Henderson <=
    - Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation, Song Gao, 2021/09/07
- [PATCH v4 08/21] target/loongarch: Add fixed point atomic instruction translation, Song Gao, 2021/09/02
  - Re: [PATCH v4 08/21] target/loongarch: Add fixed point atomic instruction translation, Richard Henderson, 2021/09/04
- [PATCH v4 11/21] target/loongarch: Add floating point comparison instruction translation, Song Gao, 2021/09/02
  - Re: [PATCH v4 11/21] target/loongarch: Add floating point comparison instruction translation, Richard Henderson, 2021/09/05
- [PATCH v4 13/21] target/loongarch: Add floating point move instruction translation, Song Gao, 2021/09/02
  - Re: [PATCH v4 13/21] target/loongarch: Add floating point move instruction translation, Richard Henderson, 2021/09/05
  - Re: [PATCH v4 13/21] target/loongarch: Add floating point move instruction translation, Richard Henderson, 2021/09/05
- [PATCH v4 02/21] target/loongarch: Add core definition, Song Gao, 2021/09/02
  - Re: [PATCH v4 02/21] target/loongarch: Add core definition, Richard Henderson, 2021/09/04

Prev by Date: Re: [PATCH v4 03/21] target/loongarch: Add main translation routines
Next by Date: Re: [PATCH v4 05/21] target/loongarch: Add fixed point shift instruction translation
Previous by thread: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation
Next by thread: Re: [PATCH v4 04/21] target/loongarch: Add fixed point arithmetic instruction translation
Index(es):
- Date
- Thread