bug-gzip
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#39832: [PATCH] Optimized the deflate in aarch64


From: Yikun Jiang
Subject: bug#39832: [PATCH] Optimized the deflate in aarch64
Date: Sat, 29 Feb 2020 17:36:37 +0800

From: Yikun Jiang <address@hidden>

This patch uses the prefetch instruction to pre-load the
next_match into cache to improve the performance, also makes
an unrolling change to decrease the number of if branch usage.
---
 deflate.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/deflate.c b/deflate.c
index 5ed2a9b..008c032 100644
--- a/deflate.c
+++ b/deflate.c
@@ -378,6 +378,9 @@ longest_match(IPos cur_match)
     register int len;                           /* length of current match
*/
     int best_len = prev_length;                 /* best match length so
far */
     IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST :
NIL;
+#ifdef __aarch64__
+    IPos next_match;
+#endif
     /* Stop when cur_match becomes <= limit. To simplify the code,
      * we prevent matches with the string of window index 0.
      */
@@ -411,6 +414,10 @@ longest_match(IPos cur_match)
     do {
         Assert(cur_match < strstart, "no future");
         match = window + cur_match;
+#ifdef __aarch64__
+        next_match = prev[cur_match & WMASK];
+        __asm__("PRFM   PLDL1STRM, [%0]"::"r"(&(prev[next_match &
WMASK])));
+#endif

         /* Skip to next match if the match length cannot increase
          * or if the match length is less than 2:
@@ -488,8 +495,14 @@ longest_match(IPos cur_match)
             scan_end   = scan[best_len];
 #endif
         }
-    } while ((cur_match = prev[cur_match & WMASK]) > limit
-             && --chain_length != 0);
+    }
+#ifdef __aarch64__
+    while ((cur_match = next_match) > limit
+            && --chain_length != 0);
+#else
+    while ((cur_match = prev[cur_match & WMASK]) > limit
+            && --chain_length != 0);
+#endif

     return best_len;
 }
@@ -777,7 +790,20 @@ deflate (int pack_level)
             lookahead -= prev_length-1;
             prev_length -= 2;
             RSYNC_ROLL(strstart, prev_length+1);
+
+            while (prev_length >= 4) {
+                prev_length -= 4;
+                strstart++;
+                INSERT_STRING(strstart, hash_head);
+                strstart++;
+                INSERT_STRING(strstart, hash_head);
+                strstart++;
+                INSERT_STRING(strstart, hash_head);
+                strstart++;
+                INSERT_STRING(strstart, hash_head);
+            }
             do {
+                if (prev_length == 0) break;
                 strstart++;
                 INSERT_STRING(strstart, hash_head);
                 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
--
2.17.1


reply via email to

[Prev in Thread] Current Thread [Next in Thread]