[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduct
From: |
frank . chang |
Subject: |
[RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction |
Date: |
Wed, 22 Jul 2020 17:16:25 +0800 |
From: Frank Chang <frank.chang@sifive.com>
Separate the implementation of vfredsum.vs and vfredosum.vs.
Introduce propagate NaN feature for vfredsum.vs as implementations are
permitted to canonicalize the NaN and, if the NaN is signaling, set
the invalid exception flag.
Signed-off-by: Frank Chang <frank.chang@sifive.com>
---
target/riscv/helper.h | 3 +
target/riscv/insn32.decode | 3 +-
target/riscv/insn_trans/trans_rvv.inc.c | 1 +
target/riscv/vector_helper.c | 144 +++++++++++++++++++-----
4 files changed, 120 insertions(+), 31 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 318fe643f4..6957a98237 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1066,6 +1066,9 @@ DEF_HELPER_6(vwredsum_vs_w, void, ptr, ptr, ptr, ptr,
env, i32)
DEF_HELPER_6(vfredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfredosum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_6(vfredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index e4b36af89e..0fe46c10c2 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -575,7 +575,8 @@ vredmax_vs 000111 . ..... ..... 010 ..... 1010111 @r_vm
vwredsumu_vs 110000 . ..... ..... 000 ..... 1010111 @r_vm
vwredsum_vs 110001 . ..... ..... 000 ..... 1010111 @r_vm
# Vector ordered and unordered reduction sum
-vfredsum_vs 0000-1 . ..... ..... 001 ..... 1010111 @r_vm
+vfredsum_vs 000001 . ..... ..... 001 ..... 1010111 @r_vm
+vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm
vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm
vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm
# Vector widening ordered and unordered float reduction sum
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
b/target/riscv/insn_trans/trans_rvv.inc.c
index c1fc168043..37eee6cf97 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -3011,6 +3011,7 @@ GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check)
/* Vector Single-Width Floating-Point Reduction Instructions */
GEN_OPFVV_TRANS(vfredsum_vs, reduction_check)
+GEN_OPFVV_TRANS(vfredosum_vs, reduction_check)
GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index bbd3be527c..8465aec94e 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4682,43 +4682,127 @@ GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4,
H2, DO_ADD)
GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
/* Vector Single-Width Floating-Point Reduction Instructions */
-#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
-void HELPER(NAME)(void *vd, void *v0, void *vs1, \
- void *vs2, CPURISCVState *env, \
- uint32_t desc) \
-{ \
- uint32_t vm = vext_vm(desc); \
- uint32_t vta = vext_vta(desc); \
- uint32_t vl = env->vl; \
- uint32_t i; \
- uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \
- TD s1 = *((TD *)vs1 + HD(0)); \
- \
- for (i = 0; i < vl; i++) { \
- TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
- if (!vm && !vext_elem_mask(v0, i)) { \
- continue; \
- } \
- s1 = OP(s1, (TD)s2, &env->fp_status); \
- } \
- *((TD *)vd + HD(0)) = s1; \
- CLEAR_FN(vd, vta, 1, sizeof(TD), tot); \
+
+/*
+ * If f is NaN, return SEW-bit canonical NaN.
+ * Set the invalid exception flag if f is a sNaN.
+ */
+static uint64_t propagate_nan(uint64_t f, uint32_t sew, float_status *s)
+{
+ target_ulong ret;
+
+ switch (sew) {
+ case 16:
+ ret = fclass_h(f);
+ /* check if f is NaN */
+ if (ret & 0x300) {
+ /* check if f is a sNaN */
+ if (ret & 0x100) {
+ s->float_exception_flags |= float_flag_invalid;
+ }
+ /* return canonical NaN */
+ return float16_default_nan(s);
+ } else {
+ return f;
+ }
+ break;
+ case 32:
+ ret = fclass_s(f);
+ /* check if f is NaN */
+ if (ret & 0x300) {
+ /* check if f is a sNaN */
+ if (ret & 0x100) {
+ s->float_exception_flags |= float_flag_invalid;
+ }
+ /* return canonical NaN */
+ return float32_default_nan(s);
+ } else {
+ return f;
+ }
+ break;
+ case 64:
+ ret = fclass_d(f);
+ /* check if f is NaN */
+ if (ret & 0x300) {
+ /* check if f is a sNaN */
+ if (ret & 0x100) {
+ s->float_exception_flags |= float_flag_invalid;
+ }
+ /* return canonical NaN */
+ return float64_default_nan(s);
+ } else {
+ return f;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
+#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, PROPAGATE_NAN, OP, CLEAR_FN) \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
+ void *vs2, CPURISCVState *env, \
+ uint32_t desc) \
+{ \
+ uint32_t vm = vext_vm(desc); \
+ uint32_t vta = vext_vta(desc); \
+ uint32_t vl = env->vl; \
+ uint32_t i; \
+ uint32_t tot = env_archcpu(env)->cfg.vlen >> 3; \
+ bool active = false; \
+ TD s1 = *((TD *)vs1 + HD(0)); \
+ \
+ for (i = 0; i < vl; i++) { \
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
+ if (!vm && !vext_elem_mask(v0, i)) { \
+ continue; \
+ } \
+ active = true; \
+ s1 = OP(s1, (TD)s2, &env->fp_status); \
+ } \
+ \
+ if (vl > 0) { \
+ if (PROPAGATE_NAN && !active) { \
+ *((TD *)vd + HD(0)) = propagate_nan(s1, sizeof(TD) * 8, \
+ &env->fp_status); \
+ } else { \
+ *((TD *)vd + HD(0)) = s1; \
+ } \
+ } \
+ CLEAR_FN(vd, vta, 1, sizeof(TD), tot); \
+}
+
+/* Ordered sum */
+GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, false,
+ float16_add, clearh)
+GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, false,
+ float32_add, clearl)
+GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, false,
+ float64_add, clearq)
+
/* Unordered sum */
-GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
-GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
-GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
+GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, true,
+ float16_add, clearh)
+GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, true,
+ float32_add, clearl)
+GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, true,
+ float64_add, clearq)
/* Maximum value */
-GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum,
clearh)
-GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum,
clearl)
-GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum,
clearq)
+GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, false,
+ float16_maxnum_noprop, clearh)
+GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, false,
+ float32_maxnum_noprop, clearl)
+GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, false,
+ float64_maxnum_noprop, clearq)
/* Minimum value */
-GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum,
clearh)
-GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum,
clearl)
-GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum,
clearq)
+GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, false,
+ float16_minnum_noprop, clearh)
+GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, false,
+ float32_minnum_noprop, clearl)
+GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, false,
+ float64_minnum_noprop, clearq)
/* Vector Widening Floating-Point Reduction Instructions */
/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
--
2.17.1
- Re: [RFC v2 56/76] target/riscv: rvv-0.9: widening integer reduction instructions, (continued)
- [RFC v2 57/76] target/riscv: rvv-0.9: mask-register logical instructions, frank . chang, 2020/07/22
- [RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, frank . chang, 2020/07/22
- [RFC v2 59/76] target/riscv: rvv-0.9: floating-point slide instructions, frank . chang, 2020/07/22
- [RFC v2 60/76] target/riscv: rvv-0.9: narrowing fixed-point clip instructions, frank . chang, 2020/07/22
- [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions, frank . chang, 2020/07/22
- [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction,
frank . chang <=
- [RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction instructions, frank . chang, 2020/07/22
- [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions, frank . chang, 2020/07/22
- [RFC v2 65/76] target/riscv: rvv-0.9: remove widening saturating scaled multiply-add, frank . chang, 2020/07/22
- [RFC v2 66/76] target/riscv: rvv-0.9: remove vmford.vv and vmford.vf, frank . chang, 2020/07/22
- [RFC v2 67/76] target/riscv: rvv-0.9: remove integer extract instruction, frank . chang, 2020/07/22