[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 14/18] i386: Destructive FP helpers for AVX
From: |
Paolo Bonzini |
Subject: |
[PATCH 14/18] i386: Destructive FP helpers for AVX |
Date: |
Fri, 26 Aug 2022 00:14:07 +0200 |
From: Paul Brook <paul@nowt.org>
Perpare the horizontal atithmetic vector helpers for AVX
These currently use a dummy Reg typed variable to store the result then
assign the whole register. This will cause 128 bit operations to corrupt
the upper half of the register, so replace it with explicit temporaries
and element assignments.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-18-paul@nowt.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse.h | 68 +++++++++++++++++++++----------------------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 0493a26804..7252e03619 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -932,45 +932,45 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int
index, int length)
d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
}
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPS(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float32 r[2 << SHIFT]; \
+ int i, j; \
+ for (i = j = 0; j < 4; i++, j += 2) { \
+ r[i] = F(v->ZMM_S(j), v->ZMM_S(j + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 4; i++, j += 2) { \
+ r[i] = F(s->ZMM_S(j), s->ZMM_S(j + 1), &env->sse_status); \
+ } \
+ for (i = 0; i < 2 << SHIFT; i++) { \
+ d->ZMM_S(i) = r[i]; \
+ } \
}
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
+SSE_HELPER_HPS(haddps, float32_add)
+SSE_HELPER_HPS(hsubps, float32_sub)
- r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPD(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float64 r[2 << SHIFT]; \
+ int i, j; \
+ for (i = j = 0; j < 2; i++, j += 2) { \
+ r[i] = F(v->ZMM_D(j), v->ZMM_D(j + 1), &env->sse_status); \
+ } \
+ for (j = 0; j < 2; i++, j += 2) { \
+ r[i] = F(s->ZMM_D(j), s->ZMM_D(j + 1), &env->sse_status); \
+ } \
+ for (i = 0; i < 1 << SHIFT; i++) { \
+ d->ZMM_D(i) = r[i]; \
+ } \
}
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
-}
+SSE_HELPER_HPD(haddpd, float64_add)
+SSE_HELPER_HPD(hsubpd, float64_sub)
void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
--
2.37.1
- Re: [PATCH 07/18] i386: Rewrite simple integer vector helpers, (continued)
- [PATCH 08/18] i386: Misc integer AVX helper prep, Paolo Bonzini, 2022/08/25
- [PATCH 05/18] i386: Add ZMM_OFFSET macro, Paolo Bonzini, 2022/08/25
- [PATCH 09/18] i386: Destructive vector helpers for AVX, Paolo Bonzini, 2022/08/25
- [PATCH 10/18] i386: Add size suffix to vector FP helpers, Paolo Bonzini, 2022/08/25
- [PATCH 11/18] i386: Floating point arithmetic helper AVX prep, Paolo Bonzini, 2022/08/25
- [PATCH 14/18] i386: Destructive FP helpers for AVX,
Paolo Bonzini <=
- [PATCH 12/18] i386: reimplement AVX comparison helpers, Paolo Bonzini, 2022/08/25
- [PATCH 15/18] i386: Misc AVX helper prep, Paolo Bonzini, 2022/08/25
- [PATCH 13/18] i386: Dot product AVX helper prep, Paolo Bonzini, 2022/08/25
- [PATCH 16/18] i386: Rewrite blendv helpers, Paolo Bonzini, 2022/08/25
- [PATCH 17/18] i386: AVX pclmulqdq prep, Paolo Bonzini, 2022/08/25