[RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduct

qemu-riscv

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduct

From:	frank . chang
Subject:	[RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction
Date:	Wed, 22 Jul 2020 17:16:25 +0800

From: Frank Chang <frank.chang@sifive.com>

Separate the implementation of vfredsum.vs and vfredosum.vs.

Introduce propagate NaN feature for vfredsum.vs as implementations are
permitted to canonicalize the NaN and, if the NaN is signaling, set
the invalid exception flag.

Signed-off-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/helper.h                   |   3 +
 target/riscv/insn32.decode              |   3 +-
 target/riscv/insn_trans/trans_rvv.inc.c |   1 +
 target/riscv/vector_helper.c            | 144 +++++++++++++++++++-----
 4 files changed, 120 insertions(+), 31 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 318fe643f4..6957a98237 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1066,6 +1066,9 @@ DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr, 
env, i32)
 DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e4b36af89e..0fe46c10c2 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -575,7 +575,8 @@ vredmax_vs      000111 . ..... ..... 010 ..... 1010111 @r_vm
 vwredsumu_vs    110000 . ..... ..... 000 ..... 1010111 @r_vm
 vwredsum_vs     110001 . ..... ..... 000 ..... 1010111 @r_vm
 # Vector ordered and unordered reduction sum
-vfredsum_vs     0000-1 . ..... ..... 001 ..... 1010111 @r_vm
+vfredsum_vs     000001 . ..... ..... 001 ..... 1010111 @r_vm
+vfredosum_vs    000011 . ..... ..... 001 ..... 1010111 @r_vm
 vfredmin_vs     000101 . ..... ..... 001 ..... 1010111 @r_vm
 vfredmax_vs     000111 . ..... ..... 001 ..... 1010111 @r_vm
 # Vector widening ordered and unordered float reduction sum
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c 
b/target/riscv/insn_trans/trans_rvv.inc.c
index c1fc168043..37eee6cf97 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -3011,6 +3011,7 @@ GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check)
 
 /* Vector Single-Width Floating-Point Reduction Instructions */
 GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
+GEN_OPFVV_TRANS(vfredosum_vs, reduction_check)
 GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
 GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
 
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index bbd3be527c..8465aec94e 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4682,43 +4682,127 @@ GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, 
H2, DO_ADD)
 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
 
 /* Vector Single-Width Floating-Point Reduction Instructions */
-#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
-void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
-                  void *vs2, CPURISCVState *env,           \
-                  uint32_t desc)                           \
-{                                                          \
-    uint32_t vm = vext_vm(desc);                           \
-    uint32_t vta = vext_vta(desc);                         \
-    uint32_t vl = env->vl;                                 \
-    uint32_t i;                                            \
-    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;         \
-    TD s1 =  *((TD *)vs1 + HD(0));                         \
-                                                           \
-    for (i = 0; i < vl; i++) {                             \
-        TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
-        if (!vm && !vext_elem_mask(v0, i)) {               \
-            continue;                                      \
-        }                                                  \
-        s1 = OP(s1, (TD)s2, &env->fp_status);              \
-    }                                                      \
-    *((TD *)vd + HD(0)) = s1;                              \
-    CLEAR_FN(vd, vta, 1, sizeof(TD), tot);                 \
+
+/*
+ * If f is NaN, return SEW-bit canonical NaN.
+ * Set the invalid exception flag if f is a sNaN.
+ */
+static uint64_t propagate_nan(uint64_t f, uint32_t sew, float_status *s)
+{
+    target_ulong ret;
+
+    switch (sew) {
+    case 16:
+        ret = fclass_h(f);
+        /* check if f is NaN */
+        if (ret & 0x300) {
+            /* check if f is a sNaN */
+            if (ret & 0x100) {
+                s->float_exception_flags |= float_flag_invalid;
+            }
+            /* return canonical NaN */
+            return float16_default_nan(s);
+        } else {
+            return f;
+        }
+        break;
+    case 32:
+        ret = fclass_s(f);
+        /* check if f is NaN */
+        if (ret & 0x300) {
+            /* check if f is a sNaN */
+            if (ret & 0x100) {
+                s->float_exception_flags |= float_flag_invalid;
+            }
+            /* return canonical NaN */
+            return float32_default_nan(s);
+        } else {
+            return f;
+        }
+        break;
+    case 64:
+        ret = fclass_d(f);
+        /* check if f is NaN */
+        if (ret & 0x300) {
+            /* check if f is a sNaN */
+            if (ret & 0x100) {
+                s->float_exception_flags |= float_flag_invalid;
+            }
+            /* return canonical NaN */
+            return float64_default_nan(s);
+        } else {
+            return f;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
+#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, PROPAGATE_NAN, OP, CLEAR_FN) \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,                           \
+                  void *vs2, CPURISCVState *env,                           \
+                  uint32_t desc)                                           \
+{                                                                          \
+    uint32_t vm = vext_vm(desc);                                           \
+    uint32_t vta = vext_vta(desc);                                         \
+    uint32_t vl = env->vl;                                                 \
+    uint32_t i;                                                            \
+    uint32_t tot = env_archcpu(env)->cfg.vlen >> 3;                        \
+    bool active = false;                                                   \
+    TD s1 = *((TD *)vs1 + HD(0));                                          \
+                                                                           \
+    for (i = 0; i < vl; i++) {                                             \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                                   \
+        if (!vm && !vext_elem_mask(v0, i)) {                               \
+            continue;                                                      \
+        }                                                                  \
+        active = true;                                                     \
+        s1 = OP(s1, (TD)s2, &env->fp_status);                              \
+    }                                                                      \
+                                                                           \
+    if (vl > 0) {                                                          \
+        if (PROPAGATE_NAN && !active) {                                    \
+            *((TD *)vd + HD(0)) = propagate_nan(s1, sizeof(TD) * 8,        \
+                                                &env->fp_status);          \
+        } else {                                                           \
+            *((TD *)vd + HD(0)) = s1;                                      \
+        }                                                                  \
+    }                                                                      \
+    CLEAR_FN(vd, vta, 1, sizeof(TD), tot);                                 \
+}
+
+/* Ordered sum */
+GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, false,
+              float16_add, clearh)
+GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, false,
+              float32_add, clearl)
+GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, false,
+              float64_add, clearq)
+
 /* Unordered sum */
-GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
-GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
-GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
+GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, true,
+              float16_add, clearh)
+GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, true,
+              float32_add, clearl)
+GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, true,
+              float64_add, clearq)
 
 /* Maximum value */
-GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, 
clearh)
-GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, 
clearl)
-GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, 
clearq)
+GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, false,
+              float16_maxnum_noprop, clearh)
+GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, false,
+              float32_maxnum_noprop, clearl)
+GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, false,
+              float64_maxnum_noprop, clearq)
 
 /* Minimum value */
-GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, 
clearh)
-GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, 
clearl)
-GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, 
clearq)
+GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, false,
+              float16_minnum_noprop, clearh)
+GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, false,
+              float32_minnum_noprop, clearl)
+GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, false,
+              float64_minnum_noprop, clearq)
 
 /* Vector Widening Floating-Point Reduction Instructions */
 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
-- 
2.17.1

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [RFC v2 56/76] target/riscv: rvv-0.9: widening integer reduction instructions, (continued)
- [RFC v2 57/76] target/riscv: rvv-0.9: mask-register logical instructions, frank . chang, 2020/07/22
- [RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, Richard Henderson, 2020/07/31
- [RFC v2 59/76] target/riscv: rvv-0.9: floating-point slide instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 59/76] target/riscv: rvv-0.9: floating-point slide instructions, Richard Henderson, 2020/07/31
- [RFC v2 60/76] target/riscv: rvv-0.9: narrowing fixed-point clip instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 60/76] target/riscv: rvv-0.9: narrowing fixed-point clip instructions, Richard Henderson, 2020/07/31
- [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions, Richard Henderson, 2020/07/31
- [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction, frank . chang <=
  - Re: [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction, Richard Henderson, 2020/07/31
- [RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction instructions, frank . chang, 2020/07/22
- [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions, Richard Henderson, 2020/07/31
- [RFC v2 65/76] target/riscv: rvv-0.9: remove widening saturating scaled multiply-add, frank . chang, 2020/07/22
  - Re: [RFC v2 65/76] target/riscv: rvv-0.9: remove widening saturating scaled multiply-add, Richard Henderson, 2020/07/31
- [RFC v2 66/76] target/riscv: rvv-0.9: remove vmford.vv and vmford.vf, frank . chang, 2020/07/22
  - Re: [RFC v2 66/76] target/riscv: rvv-0.9: remove vmford.vv and vmford.vf, Richard Henderson, 2020/07/31
- [RFC v2 67/76] target/riscv: rvv-0.9: remove integer extract instruction, frank . chang, 2020/07/22
  - Re: [RFC v2 67/76] target/riscv: rvv-0.9: remove integer extract instruction, Richard Henderson, 2020/07/31

Prev by Date: [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions
Next by Date: [RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction instructions
Previous by thread: Re: [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions
Next by thread: Re: [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction
Index(es):
- Date
- Thread