qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/6] target/i386: Use tcg gvec for p{add,sub}*


From: Richard Henderson
Subject: [PATCH 4/6] target/i386: Use tcg gvec for p{add,sub}*
Date: Mon, 22 Aug 2022 15:37:20 -0700

Since psubb is the second highest overhead sse operation, at 0.9%.
It's simple to include add and the other sizes at the same time.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/ops_sse.h        | 10 ---------
 target/i386/ops_sse_header.h | 10 ---------
 target/i386/tcg/translate.c  | 39 ++++++++++++++++++++++++++++--------
 3 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 94440a9dc5..6f035b5c16 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -389,16 +389,6 @@ static inline int satsw(int x)
 #define FAVG(a, b) (((a) + (b) + 1) >> 1)
 #endif
 
-SSE_HELPER_B(helper_paddb, FADD)
-SSE_HELPER_W(helper_paddw, FADD)
-SSE_HELPER_L(helper_paddl, FADD)
-SSE_HELPER_Q(helper_paddq, FADD)
-
-SSE_HELPER_B(helper_psubb, FSUB)
-SSE_HELPER_W(helper_psubw, FSUB)
-SSE_HELPER_L(helper_psubl, FSUB)
-SSE_HELPER_Q(helper_psubq, FSUB)
-
 SSE_HELPER_B(helper_paddusb, FADDUB)
 SSE_HELPER_B(helper_paddsb, FADDSB)
 SSE_HELPER_B(helper_psubusb, FSUBUB)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index b9f957daf8..da630fbc40 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -60,16 +60,6 @@ DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg)
 #define SSE_HELPER_Q(name, F)\
     DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg)
 
-SSE_HELPER_B(paddb, FADD)
-SSE_HELPER_W(paddw, FADD)
-SSE_HELPER_L(paddl, FADD)
-SSE_HELPER_Q(paddq, FADD)
-
-SSE_HELPER_B(psubb, FSUB)
-SSE_HELPER_W(psubw, FSUB)
-SSE_HELPER_L(psubl, FSUB)
-SSE_HELPER_Q(psubq, FSUB)
-
 SSE_HELPER_B(paddusb, FADDUB)
 SSE_HELPER_B(paddsb, FADDSB)
 SSE_HELPER_B(psubusb, FSUBUB)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 467d018b68..2a8ea3369a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2882,7 +2882,7 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
     [0xd1] = MMX_OP2(psrlw),
     [0xd2] = MMX_OP2(psrld),
     [0xd3] = MMX_OP2(psrlq),
-    [0xd4] = MMX_OP2(paddq),
+    [0xd4] = { SSE_DUMMY, SSE_DUMMY },  /* paddq */
     [0xd5] = MMX_OP2(pmullw),
     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
@@ -2919,13 +2919,13 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
     [0xf6] = MMX_OP2(psadbw),
     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
-    [0xf8] = MMX_OP2(psubb),
-    [0xf9] = MMX_OP2(psubw),
-    [0xfa] = MMX_OP2(psubl),
-    [0xfb] = MMX_OP2(psubq),
-    [0xfc] = MMX_OP2(paddb),
-    [0xfd] = MMX_OP2(paddw),
-    [0xfe] = MMX_OP2(paddl),
+    [0xf8] = { SSE_DUMMY, SSE_DUMMY },  /* psubb */
+    [0xf9] = { SSE_DUMMY, SSE_DUMMY },  /* psubw */
+    [0xfa] = { SSE_DUMMY, SSE_DUMMY },  /* psubl */
+    [0xfb] = { SSE_DUMMY, SSE_DUMMY },  /* psubq */
+    [0xfc] = { SSE_DUMMY, SSE_DUMMY },  /* paddb */
+    [0xfd] = { SSE_DUMMY, SSE_DUMMY },  /* paddw */
+    [0xfe] = { SSE_DUMMY, SSE_DUMMY },  /* paddl */
 };
 
 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
@@ -4551,6 +4551,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
             tcg_gen_gvec_cmp(TCG_COND_EQ, b - 0x74, op1_offset, op1_offset,
                              op2_offset, vec_len, vec_len);
             break;
+        case 0xf8: /* psubb */
+        case 0xf9: /* psubw */
+        case 0xfa: /* psubl */
+        case 0xfb: /* psubq */
+            op1_offset += xmm_ofs;
+            op2_offset += xmm_ofs;
+            tcg_gen_gvec_sub(b - 0xf8, op1_offset, op1_offset,
+                             op2_offset, vec_len, vec_len);
+            break;
+        case 0xfc: /* paddb */
+        case 0xfd: /* paddw */
+        case 0xfe: /* paddl */
+            op1_offset += xmm_ofs;
+            op2_offset += xmm_ofs;
+            tcg_gen_gvec_add(b - 0xfc, op1_offset, op1_offset,
+                             op2_offset, vec_len, vec_len);
+            break;
+        case 0xd4: /* paddq */
+            op1_offset += xmm_ofs;
+            op2_offset += xmm_ofs;
+            tcg_gen_gvec_add(MO_64, op1_offset, op1_offset,
+                             op2_offset, vec_len, vec_len);
+            break;
         default:
             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
-- 
2.34.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]