bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] (x)memcoll: performance improvement when input is known to b


From: Chen Guo
Subject: Re: [PATCH] (x)memcoll: performance improvement when input is known to be NUL delimited.
Date: Sun, 7 Mar 2010 23:44:54 -0800 (PST)

Hey all,
I was just browsing the mailing list and for some reason my attachment is 
coming off as a .bin file. I'm gonna attach it again, with a .txt extension, as 
well as include it inline, just to be on the safe side.

>From 3b10b760ffa1674faa6c70d58aa18ae40a8805ea Mon Sep 17 00:00:00 2001
From: Chen Guo <address@hidden>
Date: Sun, 7 Mar 2010 17:07:49 -0800
Subject: [PATCH] (x)memcoll: performance improvement when input is known to be
 NUL delimited.

This is in suport of a patch to coreutils' sort, where for each
input line xmemcoll is called several times. If each input line is
NUL delimited when read in, memcoll no longer needs to save the
last byte, NUL delimit, then put the last byte back every time the
line is compared. Sorting a 96MB, 1M line file, performance
improvement is roughly 1%.

* lib/memcoll.c: (memcoll_nul) New function.
(strcoll_loop) New function, refactored for use in both memcoll
and memcoll_nul.
* lib/memcoll.h: Add prototype for memcoll_nul, strcoll_loop.
* lib/xmemcoll.c: (xmemcoll_nul) New function.
(collate_error) New function, refactored for use in both xmemcoll
and xmemcoll_nul.
* lib/xmemcoll.h: Add prototype for xmemcoll_nul, collate_error.
---
 lib/memcoll.c  |   86 ++++++++++++++++++++++++++++++++++++++++---------------
 lib/memcoll.h  |    2 +
 lib/xmemcoll.c |   35 +++++++++++++++++-----
 lib/xmemcoll.h |    2 +
 4 files changed, 93 insertions(+), 32 deletions(-)

diff --git a/lib/memcoll.c b/lib/memcoll.c
index e08ffa5..e09c34f 100644
--- a/lib/memcoll.c
+++ b/lib/memcoll.c
@@ -30,6 +30,7 @@
    adjacent.  Perhaps temporarily modify the bytes after S1 and S2,
    but restore their original contents before returning.  Set errno to an
    error number if there is an error, and to zero otherwise.  */
+
 int
 memcoll (char *s1, size_t s1len, char *s2, size_t s2len)
 {
@@ -54,30 +55,7 @@ memcoll (char *s1, size_t s1len, char *s2, size_t s2len)
       s1[s1len++] = '\0';
       s2[s2len++] = '\0';
 
-      while (! (errno = 0, (diff = strcoll (s1, s2)) || errno))
-        {
-          /* strcoll found no difference, but perhaps it was fooled by NUL
-             characters in the data.  Work around this problem by advancing
-             past the NUL chars.  */
-          size_t size1 = strlen (s1) + 1;
-          size_t size2 = strlen (s2) + 1;
-          s1 += size1;
-          s2 += size2;
-          s1len -= size1;
-          s2len -= size2;
-
-          if (s1len == 0)
-            {
-              if (s2len != 0)
-                diff = -1;
-              break;
-            }
-          else if (s2len == 0)
-            {
-              diff = 1;
-              break;
-            }
-        }
+      diff = strcoll_loop (s1, s1len, s2, s2len);
 
       s1[s1len - 1] = n1;
       s2[s2len - 1] = n2;
@@ -94,3 +72,63 @@ memcoll (char *s1, size_t s1len, char *s2, size_t s2len)
 
   return diff;
 }
+
+/* Like memcoll, but S1 and S2 are known to be NUL delimited, thus no
+   modification to S1 or S2 are needed. */
+int
+memcoll_nul (char *s1, size_t s1len, char *s2, size_t s2len)
+{
+  int diff;
+
+#if HAVE_STRCOLL
+
+  if (s1len == s2len && memcmp (s1, s2, s1len) == 0)
+    {
+      errno = 0;
+      diff = 0;
+    }
+  else
+    diff = strcoll_loop (s1, s1len, s2, s2len);
+
+#else
+
+  diff = memcmp (s1, s2, s1len < s2len ? s1len : s2len);
+  if (! diff)
+    diff = s1len < s2len ? -1 : s1len != s2len;
+  errno = 0;
+
+#endif
+
+  return diff;
+}
+
+static inline int
+strcoll_loop (char *s1, size_t s1len, char *s2, size_t s2len)
+{
+  int diff;
+  while (! (errno = 0, (diff = strcoll (s1, s2)) || errno))
+    {
+      /* strcoll found no difference, but perhaps it was fooled by NUL
+         characters in the data.  Work around this problem by advancing
+         past the NUL chars.  */
+      size_t size1 = strlen (s1) + 1;
+      size_t size2 = strlen (s2) + 1;
+      s1 += size1;
+      s2 += size2;
+      s1len -= size1;
+      s2len -= size2;
+
+      if (s1len == 0)
+        {
+          if (s2len != 0)
+            diff = -1;
+          break;
+        }
+      else if (s2len == 0)
+        {
+          diff = 1;
+          break;
+        }
+    }
+  return diff;
+}
diff --git a/lib/memcoll.h b/lib/memcoll.h
index 8f2e1b1..392484d 100644
--- a/lib/memcoll.h
+++ b/lib/memcoll.h
@@ -23,5 +23,7 @@
 # include <stddef.h>
 
 int memcoll (char *, size_t, char *, size_t);
+int memcoll_nul (char *, size_t, char *, size_t);
+static inline int strcoll_loop (char *, size_t, char *, size_t);
 
 #endif /* MEMCOLL_H_ */
diff --git a/lib/xmemcoll.c b/lib/xmemcoll.c
index 84bbd8c..458bed2 100644
--- a/lib/xmemcoll.c
+++ b/lib/xmemcoll.c
@@ -44,14 +44,33 @@ xmemcoll (char *s1, size_t s1len, char *s2, size_t s2len)
   int collation_errno = errno;
 
   if (collation_errno)
-    {
-      error (0, collation_errno, _("string comparison failed"));
-      error (0, 0, _("Set LC_ALL='C' to work around the problem."));
-      error (exit_failure, 0,
-             _("The strings compared were %s and %s."),
-             quotearg_n_style_mem (0, locale_quoting_style, s1, s1len),
-             quotearg_n_style_mem (1, locale_quoting_style, s2, s2len));
-    }
+    collate_error (collation_errno, s1, s1len, s2, s2len);
 
   return diff;
 }
+
+/* Like xmemcoll, but S1 and S2 are known to be NUL delimited, thus
+   no modifications to S1 and S2 are needed. */
+
+int
+xmemcoll_nul (char *s1, size_t s1len, char *s2, size_t s2len)
+{
+  int diff = memcoll_nul (s1, s1len, s2, s2len);
+  int collation_errno = errno;
+
+  if (collation_errno)
+    collate_error (collation_errno, s1, s1len, s2, s2len);
+  return diff;
+}
+
+static inline void
+collate_error (int collation_errno, char *s1, size_t s1len, char *s2,
+               size_t s2len)
+{
+  error (0, collation_errno, _("string comparison failed"));
+  error (0, 0, _("Set LC_ALL='C' to work around the problem."));
+  error (exit_failure, 0,
+         _("The strings compared were %s and %s."),
+         quotearg_n_style_mem (0, locale_quoting_style, s1, s1len),
+         quotearg_n_style_mem (1, locale_quoting_style, s2, s2len));
+}
diff --git a/lib/xmemcoll.h b/lib/xmemcoll.h
index 2f422e8..df2069d 100644
--- a/lib/xmemcoll.h
+++ b/lib/xmemcoll.h
@@ -1,2 +1,4 @@
 #include <stddef.h>
 int xmemcoll (char *, size_t, char *, size_t);
+int xmemcoll_nul (char *, size_t, char *, size_t);
+static inline void collate_error (int, char *, size_t, char *, size_t);
-- 
1.6.6.1

Attachment: nul_patch.txt
Description: Text document


reply via email to

[Prev in Thread] Current Thread [Next in Thread]