bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module memchr2


From: Eric Blake
Subject: new module memchr2
Date: Sat, 01 Mar 2008 07:33:31 -0700
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080213 Thunderbird/2.0.0.12 Mnenhy/0.7.5.666

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

M4 has a situation where it wants to search for the first of two candidate
bytes within an arbitrary string.  In fact, Bruno profiled m4 when running
autoconf on the gettext package, and found that more than 10% of the total
execution time was in a hot loop doing just that (searching for the first
of '[' and ']' to find the end of a quoted string).

strcspn isn't good enough - it doesn't handle embedded NUL (as a side
note, you should never pass an arbitrary string as the second argument of
strcspn, since glibc's implementation is quadratic if called like
strcspn("a","bbbb"..."bbb")).

Two calls to memchr isn't good enough.  You end up traversing the prefix
of the string twice instead of once, and long enough strings can even
trigger cache flushes before the second search.

Byte-wise traversal isn't good enough - the naive loop with a
double-conditional every byte is processor intensive, compared to more
efficient vectorized algorithms present in good memchr implementations.

getndelim2 isn't good enough - it supports the search for two characters,
but must be based on a FILE, and fmemopen isn't portable (and probably not
efficient, either).

So I'm checking in this new module.  At this point, I'm wondering if we
should rewrite getndelim2 to take advantage of memchr2 and
freadahead/freadptr/freadseek, rather than operate a byte at a time.

- --
Don't work too hard, make some time for fun as well!

Eric Blake             address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFHyWk484KuGfSFAYARAlEGAJ46eyHP1oPDBh9SLL837XkkiD/73wCgpdiu
wgAS1jo+iKrtRXPSUvn8N8Q=
=5mAE
-----END PGP SIGNATURE-----
>From f91b9f973226f2b434ba24e32845f252a3d6e64b Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Sat, 1 Mar 2008 06:54:29 -0700
Subject: [PATCH] New module 'memchr2'.

* modules/memchr2: New file.
* modules/memchr2-tests: Likewise.
* lib/memchr2.h: Likewise.
* lib/memchr2.c: Likewise, based on memchr.c.
* tests/test-memchr2.c: New test.
* MODULES.html.sh (String handling): Add memchr2.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog             |   10 +++
 MODULES.html.sh       |    1 +
 lib/memchr2.c         |  194 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/memchr2.h         |   31 ++++++++
 modules/memchr2       |   24 ++++++
 modules/memchr2-tests |   10 +++
 tests/test-memchr2.c  |   89 ++++++++++++++++++++++
 7 files changed, 359 insertions(+), 0 deletions(-)
 create mode 100644 lib/memchr2.c
 create mode 100644 lib/memchr2.h
 create mode 100644 modules/memchr2
 create mode 100644 modules/memchr2-tests
 create mode 100644 tests/test-memchr2.c

diff --git a/ChangeLog b/ChangeLog
index ace7d2a..0d3e4ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-03-01  Eric Blake  <address@hidden>
+
+       New module 'memchr2'.
+       * modules/memchr2: New file.
+       * modules/memchr2-tests: Likewise.
+       * lib/memchr2.h: Likewise.
+       * lib/memchr2.c: Likewise, based on memchr.c.
+       * tests/test-memchr2.c: New test.
+       * MODULES.html.sh (String handling): Add memchr2.
+
 2008-02-29  Bruno Haible  <address@hidden>
 
        * modules/freadseek-tests: New file.
diff --git a/MODULES.html.sh b/MODULES.html.sh
index 16c3552..683bb69 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -1653,6 +1653,7 @@ func_all_modules ()
 
   func_begin_table
   func_module bcopy
+  func_module memchr2
   func_module memmem
   func_module memmem-simple
   func_module mempcpy
diff --git a/lib/memchr2.c b/lib/memchr2.c
new file mode 100644
index 0000000..540ed9f
--- /dev/null
+++ b/lib/memchr2.c
@@ -0,0 +1,194 @@
+/* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004, 2006,
+   2008 Free Software Foundation, Inc.
+
+   Based on strlen implementation by Torbjorn Granlund (address@hidden),
+   with help from Dan Sahlin (address@hidden) and
+   commentary by Jim Blandy (address@hidden);
+   adaptation to memchr suggested by Dick Karpinski (address@hidden),
+   and implemented in glibc by Roland McGrath (address@hidden).
+   Extension to memchr2 implemented by Eric Blake (address@hidden).
+
+This program is free software: you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "memchr2.h"
+
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Return the first address of either C1 or C2 (treated as unsigned
+   char) that occurs within N bytes of the memory region S.  If
+   neither byte appears, return NULL.  */
+void *
+memchr2 (void const *s, int c1_in, int c2_in, size_t n)
+{
+  const unsigned char *char_ptr;
+  const uintmax_t *longword_ptr;
+  uintmax_t longword1;
+  uintmax_t longword2;
+  uintmax_t magic_bits;
+  uintmax_t charmask1;
+  uintmax_t charmask2;
+  unsigned char c1;
+  unsigned char c2;
+  int i;
+
+  c1 = (unsigned char) c1_in;
+  c2 = (unsigned char) c2_in;
+
+  if (c1 == c2)
+    return memchr (s, c1, n);
+
+  /* Handle the first few characters by reading one character at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */
+  for (char_ptr = (const unsigned char *) s;
+       n > 0 && (size_t) char_ptr % sizeof longword1 != 0;
+       --n, ++char_ptr)
+    if (*char_ptr == c1 || *char_ptr == c2)
+      return (void *) char_ptr;
+
+  /* All these elucidatory comments refer to 4-byte longwords,
+     but the theory applies equally well to any size longwords.  */
+
+  longword_ptr = (const uintmax_t *) char_ptr;
+
+  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
+     the "holes."  Note that there is a hole just to the left of
+     each byte, with an extra at the end:
+
+     bits:  01111110 11111110 11111110 11111111
+     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+     The 1-bits make sure that carries propagate to the next 0-bit.
+     The 0-bits provide holes for carries to fall into.  */
+
+  /* Set MAGIC_BITS to be this pattern of 1 and 0 bits.
+     Set CHARMASK to be a longword, each of whose bytes is C.  */
+
+  magic_bits = 0xfefefefe;
+  charmask1 = c1 | (c1 << 8);
+  charmask2 = c2 | (c2 << 8);
+  charmask1 |= charmask2 << 16;
+  charmask1 |= charmask2 << 16;
+#if 0xffffffffU < UINTMAX_MAX
+  magic_bits |= magic_bits << 32;
+  charmask1 |= charmask1 << 32;
+  charmask2 |= charmask2 << 32;
+  if (8 < sizeof longword1)
+    for (i = 64; i < sizeof longword1 * 8; i *= 2)
+      {
+       magic_bits |= magic_bits << i;
+       charmask1 |= charmask1 << i;
+       charmask2 |= charmask2 << i;
+      }
+#endif
+  magic_bits = (UINTMAX_MAX >> 1) & (magic_bits | 1);
+
+  /* Instead of the traditional loop which tests each character,
+     we will test a longword at a time.  The tricky part is testing
+     if *any of the four* bytes in the longword in question are zero.  */
+  while (n >= sizeof longword1)
+    {
+      /* We tentatively exit the loop if adding MAGIC_BITS to
+        LONGWORD fails to change any of the hole bits of LONGWORD.
+
+        1) Is this safe?  Will it catch all the zero bytes?
+        Suppose there is a byte with all zeros.  Any carry bits
+        propagating from its left will fall into the hole at its
+        least significant bit and stop.  Since there will be no
+        carry from its most significant bit, the LSB of the
+        byte to the left will be unchanged, and the zero will be
+        detected.
+
+        2) Is this worthwhile?  Will it ignore everything except
+        zero bytes?  Suppose every byte of LONGWORD has a bit set
+        somewhere.  There will be a carry into bit 8.  If bit 8
+        is set, this will carry into bit 16.  If bit 8 is clear,
+        one of bits 9-15 must be set, so there will be a carry
+        into bit 16.  Similarly, there will be a carry into bit
+        24.  If one of bits 24-30 is set, there will be a carry
+        into bit 31, so all of the hole bits will be changed.
+
+        The one misfire occurs when bits 24-30 are clear and bit
+        31 is set; in this case, the hole at bit 31 is not
+        changed.  If we had access to the processor carry flag,
+        we could close this loophole by putting the fourth hole
+        at bit 32!
+
+        So it ignores everything except 128's, when they're aligned
+        properly.
+
+        3) But wait!  Aren't we looking for C, not zero?
+        Good point.  So what we do is XOR LONGWORD with a longword,
+        each of whose bytes is C.  This turns each byte that is C
+        into a zero.  */
+
+      longword1 = *longword_ptr ^ charmask1;
+      longword2 = *longword_ptr++ ^ charmask2;
+
+      /* Add MAGIC_BITS to LONGWORD.  */
+      if ((((longword1 + magic_bits)
+
+           /* Set those bits that were unchanged by the addition.  */
+           ^ ~longword1)
+
+          /* Look at only the hole bits.  If any of the hole bits
+             are unchanged, most likely one of the bytes was a
+             zero.  */
+          & ~magic_bits) != 0
+         || (((longword2 + magic_bits) ^ ~longword2) & ~magic_bits) != 0)
+       {
+         /* Which of the bytes was C?  If none of them were, it was
+            a misfire; continue the search.  */
+
+         const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
+
+         if (cp[0] == c1 || cp[0] == c2)
+           return (void *) cp;
+         if (cp[1] == c1 || cp[1] == c2)
+           return (void *) &cp[1];
+         if (cp[2] == c1 || cp[2] == c2)
+           return (void *) &cp[2];
+         if (cp[3] == c1 || cp[3] == c2)
+           return (void *) &cp[3];
+         if (4 < sizeof longword1 && (cp[4] == c1 || cp[4] == c2))
+           return (void *) &cp[4];
+         if (5 < sizeof longword1 && (cp[5] == c1 || cp[5] == c2))
+           return (void *) &cp[5];
+         if (6 < sizeof longword1 && (cp[6] == c1 || cp[6] == c2))
+           return (void *) &cp[6];
+         if (7 < sizeof longword1 && (cp[7] == c1 || cp[7] == c2))
+           return (void *) &cp[7];
+         if (8 < sizeof longword1)
+           for (i = 8; i < sizeof longword1; i++)
+             if (cp[i] == c1 || cp[i] == c2)
+               return (void *) &cp[i];
+       }
+
+      n -= sizeof longword1;
+    }
+
+  char_ptr = (const unsigned char *) longword_ptr;
+
+  while (n-- > 0)
+    {
+      if (*char_ptr == c1 || *char_ptr == c2)
+       return (void *) char_ptr;
+      ++char_ptr;
+    }
+
+  return 0;
+}
diff --git a/lib/memchr2.h b/lib/memchr2.h
new file mode 100644
index 0000000..e35b475
--- /dev/null
+++ b/lib/memchr2.h
@@ -0,0 +1,31 @@
+/* Scan memory for the first of two bytes.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Return the first address of either C1 or C2 (treated as unsigned
+   char) that occurs within N bytes of the memory region S.  If
+   neither byte appears, return NULL.  */
+
+extern void *memchr2 (void const *s, int c1, int c2, size_t n);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/modules/memchr2 b/modules/memchr2
new file mode 100644
index 0000000..7e83415
--- /dev/null
+++ b/modules/memchr2
@@ -0,0 +1,24 @@
+Description:
+memchr2() function: scan memory for the first of two bytes.
+
+Files:
+lib/memchr2.h
+lib/memchr2.c
+
+Depends-on:
+stdint
+memchr
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += memchr2.h memchr2.c
+
+Include:
+"memchr2.h"
+
+License:
+GPL
+
+Maintainer:
+Eric Blake
diff --git a/modules/memchr2-tests b/modules/memchr2-tests
new file mode 100644
index 0000000..7485fdc
--- /dev/null
+++ b/modules/memchr2-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-memchr2.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-memchr2
+check_PROGRAMS += test-memchr2
diff --git a/tests/test-memchr2.c b/tests/test-memchr2.c
new file mode 100644
index 0000000..68e8595
--- /dev/null
+++ b/tests/test-memchr2.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation
+ * Written by Eric Blake
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "memchr2.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ASSERT(expr) \
+  do                                                                        \
+    {                                                                       \
+      if (!(expr))                                                          \
+       {                                                                    \
+         fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+         abort ();                                                          \
+       }                                                                    \
+    }                                                                       \
+  while (0)
+
+int
+main ()
+{
+  size_t n = 0x100000;
+  char *input = malloc (n);
+  ASSERT (input);
+
+  input[0] = 'a';
+  input[1] = 'b';
+  memset (input + 2, 'c', 1024);
+  memset (input + 1026, 'd', n - 1028);
+  input[n - 2] = 'e';
+  input[n - 1] = 'a';
+
+  ASSERT (memchr2 (input, 'a', 'b', n) == input);
+  ASSERT (memchr2 (input, 'b', 'a', n) == input);
+
+  ASSERT (memchr2 (input, 'a', 'b', 0) == NULL);
+  ASSERT (memchr2 (NULL, 'a', 'b', 0) == NULL);
+
+  ASSERT (memchr2 (input, 'b', 'd', n) == input + 1);
+  ASSERT (memchr2 (input + 2, 'b', 'd', n - 2) == input + 1026);
+
+  ASSERT (memchr2 (input, 'd', 'e', n) == input + 1026);
+  ASSERT (memchr2 (input, 'e', 'd', n) == input + 1026);
+
+  ASSERT (memchr2 (input + 1, 'a', 'e', n - 1) == input + n - 2);
+  ASSERT (memchr2 (input + 1, 'e', 'a', n - 1) == input + n - 2);
+
+  ASSERT (memchr2 (input, 'f', 'g', n) == NULL);
+  ASSERT (memchr2 (input, 'f', '\0', n) == NULL);
+
+  ASSERT (memchr2 (input, 'a', 'a', n) == input);
+  ASSERT (memchr2 (input + 1, 'a', 'a', n - 1) == input + n - 1);
+  ASSERT (memchr2 (input, 'f', 'f', n) == NULL);
+
+  /* Check that a very long haystack is handled quickly if one of the
+     two bytes is found near the beginning.  */
+  {
+    size_t repeat = 10000;
+    for (; repeat > 0; repeat--)
+      {
+       ASSERT (memchr2 (input, 'c', 'e', n) == input + 2);
+       ASSERT (memchr2 (input, 'e', 'c', n) == input + 2);
+       ASSERT (memchr2 (input, 'c', '\0', n) == input + 2);
+       ASSERT (memchr2 (input, '\0', 'c', n) == input + 2);
+      }
+  }
+
+  free (input);
+
+  return 0;
+}
-- 
1.5.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]