bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module c-strstr


From: Bruno Haible
Subject: new module c-strstr
Date: Fri, 18 Aug 2006 16:56:40 +0200
User-agent: KMail/1.9.1

The module strstr is internationalized, fine. But there are some cases
where it's overkill and the classical and faster byte-per-byte string
traversal can be used instead. I propose to add a new module c-strstr
in the same philosophy as c-ctype, c-strcase, c-strcasestr.

============================ modules/c-strstr ============================
Description:
Search for a substring in a string in C locale.

Files:
lib/c-strstr.h
lib/c-strstr.c

Depends-on:

configure.ac:

Makefile.am:
lib_SOURCES += c-strstr.h c-strstr.c

Include:
"c-strstr.h"

License:
LGPL

Maintainer:
Bruno Haible

============================= lib/c-strstr.h =============================
/* Searching in a string.
   Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */


/* The functions defined in this file assume the "C" locale and a character
   set without diacritics (ASCII-US or EBCDIC-US or something like that).
   Even if the "C" locale on a particular system is an extension of the ASCII
   character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
   is ISO-8859-1), the functions in this file recognize only the ASCII
   characters.  More precisely, one of the string arguments must be an ASCII
   string with additional restrictions.  */


#ifdef __cplusplus
extern "C" {
#endif

/* Find the first occurrence of NEEDLE in HAYSTACK.
   This function is safe to be called, even in a multibyte locale, if NEEDLE
     1. consists solely of printable ASCII characters excluding '\\' and '~'
        [this restriction is needed because of Shift_JIS and JOHAB]
        or of the control ASCII characters '\a' '\b' '\f' '\n' '\r' '\t' '\v'
        [this restriction is needed because of VISCII], and
     2. has at least length 2
        [this restriction is needed because of BIG5, BIG5-HKSCS, GBK, GB18030,
         Shift_JIS, JOHAB], and
     3. does not consist entirely of decimal digits, or has at least length 4
        [this restricion is needed because of GB18030].  */
extern char *c_strstr (const char *haystack, const char *needle);

#ifdef __cplusplus
}
#endif
============================= lib/c-strstr.c =============================
/* Copyright (C) 1994, 1999, 2002-2003, 2005-2006 Free Software Foundation, Inc.
This file is part of the GNU C Library.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

/*
 * My personal strstr() implementation that beats most other algorithms.
 * Until someone tells me otherwise, I assume that this is the
 * fastest implementation of strstr() in C.
 * I deliberately chose not to comment it.  You should have at least
 * as much fun trying to understand it, as I had to write it :-).
 *
 * Stephen R. van den Berg, address@hidden      */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <string.h>

typedef unsigned chartype;

char *
c_strstr (const char *phaystack, const char *pneedle)
{
  register const unsigned char *haystack, *needle;
  register chartype b, c;

  haystack = (const unsigned char *) phaystack;
  needle = (const unsigned char *) pneedle;

  b = *needle;
  if (b != '\0')
    {
      haystack--;                               /* possible ANSI violation */
      do
        {
          c = *++haystack;
          if (c == '\0')
            goto ret0;
        }
      while (c != b);

      c = *++needle;
      if (c == '\0')
        goto foundneedle;
      ++needle;
      goto jin;

      for (;;)
        {
          register chartype a;
          register const unsigned char *rhaystack, *rneedle;

          do
            {
              a = *++haystack;
              if (a == '\0')
                goto ret0;
              if (a == b)
                break;
              a = *++haystack;
              if (a == '\0')
                goto ret0;
shloop:;    }
          while (a != b);

jin:      a = *++haystack;
          if (a == '\0')
            goto ret0;

          if (a != c)
            goto shloop;

          rhaystack = haystack-- + 1;
          rneedle = needle;
          a = *rneedle;

          if (*rhaystack == a)
            do
              {
                if (a == '\0')
                  goto foundneedle;
                ++rhaystack;
                a = *++needle;
                if (*rhaystack != a)
                  break;
                if (a == '\0')
                  goto foundneedle;
                ++rhaystack;
                a = *++needle;
              }
            while (*rhaystack == a);

          needle = rneedle;                /* took the register-poor approach */

          if (a == '\0')
            break;
        }
    }
foundneedle:
  return (char*) haystack;
ret0:
  return 0;
}
==========================================================================




reply via email to

[Prev in Thread] Current Thread [Next in Thread]