bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module mbsspn


From: Bruno Haible
Subject: new module mbsspn
Date: Mon, 5 Feb 2007 04:31:23 +0100
User-agent: KMail/1.5.4

The function mbsspn() is like strspn(), except that it also works in multibyte
locales.

2007-02-04  Bruno Haible  <address@hidden>

        New module mbsspn.
        * modules/mbsspn: New file.
        * lib/mbsspn.c: New file.
        * lib/string_.h (strspn): Add a conditional link warning.
        (mbsspn): New declaration.
        * m4/mbsspn.m4: New file.
        * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
        GNULIB_MBSCSPN.
        * modules/string (string.h): Also substitute GNULIB_MBSSPN.
        * MODULES.html.sh (Internationalization functions): Add mbsspn.

=========================== modules/mbsspn ===================================
Description:
mbsspn() function: search a string for any outside a set of characters.

Files:
lib/mbsspn.c
m4/mbsspn.m4
m4/mbrtowc.m4

Depends-on:
mbuiter
string
mbschr

configure.ac:
gl_FUNC_MBSSPN
gl_STRING_MODULE_INDICATOR([mbsspn])

Makefile.am:
lib_SOURCES += mbsspn.c

Include:
<string.h>

License:
LGPL

Maintainer:
Bruno Haible

============================== lib/mbsspn.c ==================================
/* Searching a string for a character outside a given set of characters.
   Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2007.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#include <config.h>

/* Specification.  */
#include <string.h>

#if HAVE_MBRTOWC
# include "mbuiter.h"
#endif

/* Find the first occurrence in the character string STRING of any character
   not in the character string REJECT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.  */
size_t
mbsspn (const char *string, const char *reject)
{
  /* Optimize two cases.  */
  if (reject[0] == '\0')
    return 0;
  if (reject[1] == '\0')
    {
      unsigned char uc = (unsigned char) reject[0];

#if HAVE_MBRTOWC
      if (MB_CUR_MAX > 1)
        {
          mbui_iterator_t iter;

          for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
            if (!(mb_len (mbui_cur (iter)) == 1
                  && (unsigned char) * mbui_cur_ptr (iter) == uc))
              return mbui_cur_ptr (iter) - string;
          return strlen (string);
        }
      else
#endif
        {
          const char *ptr;

          for (ptr = string; *ptr != '\0'; ptr++)
            if ((unsigned char) *ptr != uc)
              break;
          return ptr - string;
        }
    }
  /* General case.  */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          if (mb_len (mbui_cur (iter)) == 1)
            {
              if (mbschr (reject, (unsigned char) * mbui_cur_ptr (iter)) == 
NULL)
                return mbui_cur_ptr (iter) - string;
            }
          else
            {
              mbui_iterator_t aiter;

              for (mbui_init (aiter, reject);
                   mbui_avail (aiter);
                   mbui_advance (aiter))
                {
                  if (!mbui_avail (aiter))
                    return mbui_cur_ptr (iter) - string;
                  if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
                    break;
                }
            }
        }
      return strlen (string);
    }
  else
#endif
    return strspn (string, reject);
}
============================== m4/mbsspn.m4 ==================================
# mbsspn.m4 serial 1
dnl Copyright (C) 2007 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_MBSSPN],
[
  gl_PREREQ_MBSSPN
])

# Prerequisites of lib/mbsspn.c.
AC_DEFUN([gl_PREREQ_MBSSPN], [
  AC_REQUIRE([gl_FUNC_MBRTOWC])
  :
])
==============================================================================
--- MODULES.html.sh     5 Feb 2007 03:16:59 -0000       1.187
+++ MODULES.html.sh     5 Feb 2007 03:22:02 -0000
@@ -2167,6 +2167,7 @@
   func_module mbscasestr
   func_module mbscspn
   func_module mbspbrk
+  func_module mbsspn
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
--- lib/string_.h       5 Feb 2007 03:16:59 -0000       1.15
+++ lib/string_.h       5 Feb 2007 03:22:03 -0000
@@ -233,6 +233,15 @@
 #endif
 
 #if defined GNULIB_POSIXCHECK
+/* strspn() assumes the second argument is a list of single-byte characters.
+   Even in this simple case, it cannot work with multibyte strings.  */
+# undef strspn
+# define strspn(s,a) \
+    (GL_LINK_WARNING ("strspn cannot work correctly on character strings in 
multibyte locales - use mbsspn if you care about internationalization"), \
+     strspn (s, a))
+#endif
+
+#if defined GNULIB_POSIXCHECK
 /* strrchr() does not work with multibyte strings if the locale encoding is
    GB18030 and the character to be searched is a digit.  */
 # undef strrchr
@@ -391,6 +400,15 @@
 extern char * mbspbrk (const char *string, const char *accept);
 #endif
 
+#if @GNULIB_MBSSPN@
+/* Find the first occurrence in the character string STRING of any character
+   not in the character string REJECT.  Return the number of bytes from the
+   beginning of the string to this occurrence, or to the end of the string
+   if none exists.
+   Unlike strspn(), this function works correctly in multibyte locales.  */
+extern size_t mbsspn (const char *string, const char *reject);
+#endif
+
 
 #ifdef __cplusplus
 }
--- m4/string_h.m4      5 Feb 2007 03:16:59 -0000       1.14
+++ m4/string_h.m4      5 Feb 2007 03:22:03 -0000
@@ -74,4 +74,5 @@
   GNULIB_MBSCASESTR=0;  AC_SUBST([GNULIB_MBSCASESTR])
   GNULIB_MBSCSPN=0;     AC_SUBST([GNULIB_MBSCSPN])
   GNULIB_MBSPBRK=0;     AC_SUBST([GNULIB_MBSPBRK])
+  GNULIB_MBSSPN=0;      AC_SUBST([GNULIB_MBSSPN])
 ])
--- modules/string      5 Feb 2007 03:16:59 -0000       1.13
+++ modules/string      5 Feb 2007 03:22:03 -0000
@@ -28,6 +28,7 @@
              -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
              -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
              -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \
+             -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \
              -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
              -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
              -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \





reply via email to

[Prev in Thread] Current Thread [Next in Thread]