[RFC PATCH 3/4] target/ppc: Implement instruction caching for muladd

qemu-ppc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH 3/4] target/ppc: Implement instruction caching for muladd

From:	Víctor Colombo
Subject:	[RFC PATCH 3/4] target/ppc: Implement instruction caching for muladd
Date:	Wed, 5 Oct 2022 11:37:18 -0300

This patch adds the code necessary to cache muladd instructions
for usage with hardfpu in Power.

muladd is an instruction that receives four arguments, three f64 and
one status, and returns f64. This info will be cached inside the
union in env, which grows when other instructions with other
signatures are added.

Hardfpu in QEMU only works when the inexact is already set. So,
CACHE_FN_5 will check if FP_XX is set, and set float_flag_inexact
to enable the hardfpu behavior. When the instruction is later
reexecuted, it will be with float_flag_inexact cleared, forcing
softfloat and correctly updating the relevant flags, as is today.

Signed-off-by: Víctor Colombo <victor.colombo@eldorado.org.br>
---
 target/ppc/cpu.h        | 11 +++++++++++
 target/ppc/fpu_helper.c | 34 ++++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index b423e33a0c..87183de484 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1083,6 +1083,7 @@ struct ppc_radix_page_info {
 enum {
     CACHED_FN_TYPE_NONE,
     CACHED_FN_TYPE_F64_F64_FSTATUS,
+    CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS,
 
 };
 
@@ -1092,6 +1093,15 @@ struct cached_fn_f64_f64_fstatus {
     float_status arg2;
 };
 
+struct cached_fn_f64_f64_f64_f64_i_fstatus {
+    float64 (*fn)(float64, float64, float64, int, float_status*);
+    float64 arg1;
+    float64 arg2;
+    float64 arg3;
+    int arg4;
+    float_status arg5;
+};
+
 struct CPUArchState {
     /* Most commonly used resources during translated code execution first */
     target_ulong gpr[32];  /* general purpose registers */
@@ -1172,6 +1182,7 @@ struct CPUArchState {
     int cached_fn_type;
     union {
         struct cached_fn_f64_f64_fstatus f64_f64_fstatus;
+        struct cached_fn_f64_f64_f64_f64_i_fstatus f64_f64_f64_f64_i_fstatus;
     } cached_fn;
 
     /* Internal devices resources */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index b68f12a1a9..3d06a0fc1a 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -45,6 +45,23 @@
         }                                                                     \
     } while (0)
 
+#define CACHE_FN_5(env, FN, ARG1, ARG2, ARG3, ARG4, FIELD, TYPE)              \
+    do {                                                                      \
+        if (env->fpscr & FP_XX) {                                             \
+            env->cached_fn_type = TYPE;                                       \
+            env->cached_fn.FIELD.fn = FN;                                     \
+            env->cached_fn.FIELD.arg1 = ARG1;                                 \
+            env->cached_fn.FIELD.arg2 = ARG2;                                 \
+            env->cached_fn.FIELD.arg3 = ARG3;                                 \
+            env->cached_fn.FIELD.arg4 = ARG4;                                 \
+            env->fp_status.float_exception_flags |= float_flag_inexact;       \
+        } else {                                                              \
+            assert(!(env->fp_status.float_exception_flags &                   \
+                     float_flag_inexact));                                    \
+            env->cached_fn_type = CACHED_FN_TYPE_NONE;                        \
+        }                                                                     \
+    } while (0)
+
 static inline float128 float128_snan_to_qnan(float128 x)
 {
     float128 r;
@@ -566,6 +583,17 @@ void helper_execute_fp_cached(CPUPPCState *env)
             env->fpscr |= FP_FI | FP_XX;
         }
         break;
+    case CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS:
+        ; /* hack to allow declaration below */
+        struct cached_fn_f64_f64_f64_f64_i_fstatus args =
+            env->cached_fn.f64_f64_f64_f64_i_fstatus;
+        assert(!(args.arg5.float_exception_flags & float_flag_inexact));
+        args.fn(args.arg1, args.arg2, args.arg3, args.arg4, &args.arg5);
+        env->fpscr &= ~FP_FI;
+        if (args.arg5.float_exception_flags & float_flag_inexact) {
+            env->fpscr |= FP_FI | FP_XX;
+        }
+        break;
     default:
         g_assert_not_reached();
     }
@@ -836,7 +864,8 @@ static void float_invalid_op_madd(CPUPPCState *env, int 
flags,
 static float64 do_fmadd(CPUPPCState *env, float64 a, float64 b,
                          float64 c, int madd_flags, uintptr_t retaddr)
 {
-    CACHE_FN_NONE(env);
+    CACHE_FN_5(env, float64_muladd, a, b, c, madd_flags,
+        f64_f64_f64_f64_i_fstatus, CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS);
     float64 ret = float64_muladd(a, b, c, madd_flags, &env->fp_status);
     int flags = get_float_exception_flags(&env->fp_status);
 
@@ -849,7 +878,8 @@ static float64 do_fmadd(CPUPPCState *env, float64 a, 
float64 b,
 static uint64_t do_fmadds(CPUPPCState *env, float64 a, float64 b,
                           float64 c, int madd_flags, uintptr_t retaddr)
 {
-    CACHE_FN_NONE(env);
+    CACHE_FN_5(env, float64r32_muladd, a, b, c, madd_flags,
+        f64_f64_f64_f64_i_fstatus, CACHED_FN_TYPE_F64_F64_F64_F64_I_FSTATUS);
     float64 ret = float64r32_muladd(a, b, c, madd_flags, &env->fp_status);
     int flags = get_float_exception_flags(&env->fp_status);
 
-- 
2.25.1

[Prev in Thread]

Current Thread

[Next in Thread]

[RFC PATCH 0/4] Idea for using hardfloat in PPC, Víctor Colombo, 2022/10/05
- [RFC PATCH 1/4] target/ppc: prepare instructions to work with caching last FP insn, Víctor Colombo, 2022/10/05
- [RFC PATCH 2/4] target/ppc: Implement instruction caching for fsqrt, Víctor Colombo, 2022/10/05
- [RFC PATCH 3/4] target/ppc: Implement instruction caching for muladd, Víctor Colombo <=
- [RFC PATCH 4/4] target/ppc: Enable hardfpu for Power, Víctor Colombo, 2022/10/05
- Re: [RFC PATCH 0/4] Idea for using hardfloat in PPC, Richard Henderson, 2022/10/05
  - Re: [RFC PATCH 0/4] Idea for using hardfloat in PPC, Alex Bennée, 2022/10/07
    - Re: [RFC PATCH 0/4] Idea for using hardfloat in PPC, Richard Henderson, 2022/10/07

Prev by Date: [RFC PATCH 2/4] target/ppc: Implement instruction caching for fsqrt
Next by Date: [RFC PATCH 4/4] target/ppc: Enable hardfpu for Power
Previous by thread: [RFC PATCH 2/4] target/ppc: Implement instruction caching for fsqrt
Next by thread: [RFC PATCH 4/4] target/ppc: Enable hardfpu for Power
Index(es):
- Date
- Thread