bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module mbscspn


From: Bruno Haible
Subject: new module mbscspn
Date: Mon, 5 Feb 2007 04:10:39 +0100
User-agent: KMail/1.5.4

The function mbscspn() is a variant of strcspn() which works also with
multibyte strings.

2007-02-04  Bruno Haible  <address@hidden>

        New module mbscspn.
        * modules/mbscspn: New file.
        * lib/mbscspn.c: New file.
        * lib/string_.h (strcspn): Add a conditional link warning.
        (mbscspn): New declaration.
        * m4/mbscspn.m4: New file.
        * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
        GNULIB_MBSCSPN.
        * modules/string (string.h): Also substitute GNULIB_MBSCSPN.
        * MODULES.html.sh (Internationalization functions): Add mbscspn.

========================= modules/mbscspn.m4 =============================
Description:
mbscspn() function: search a string for any of a set of characters.

Files:
lib/mbscspn.c
m4/mbscspn.m4
m4/mbrtowc.m4

Depends-on:
mbuiter
string
mbschr
strcspn

configure.ac:
gl_FUNC_MBSCSPN
gl_STRING_MODULE_INDICATOR([mbscspn])

Makefile.am:
lib_SOURCES += mbscspn.c

Include:
<string.h>

License:
LGPL

Maintainer:
Bruno Haible

========================== lib/mbscspn.c =================================
/* Searching a string for a character among a given set of characters.
   Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2007.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#include <config.h>

/* Specification.  */
#include <string.h>

#if HAVE_MBRTOWC
# include "mbuiter.h"
#endif

/* Find the first occurrence in the character string STRING of any character
   in the character string ACCEPT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.  */
size_t
mbscspn (const char *string, const char *accept)
{
  /* Optimize two cases.  */
  if (accept[0] == '\0')
    return strlen (string);
  if (accept[1] == '\0')
    {
      const char *ptr = mbschr (string, accept[0]);
      return (ptr != NULL ? ptr - string : strlen (string));
    }
  /* General case.  */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          if (mb_len (mbui_cur (iter)) == 1)
            {
              if (mbschr (accept, (unsigned char) * mbui_cur_ptr (iter)))
                return mbui_cur_ptr (iter) - string;
            }
          else
            {
              mbui_iterator_t aiter;

              for (mbui_init (aiter, accept);
                   mbui_avail (aiter);
                   mbui_advance (aiter))
                if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
                  return mbui_cur_ptr (iter) - string;
            }
        }
      return strlen (string);
    }
  else
#endif
    return strcspn (string, accept);
}
========================== m4/mbscspn.m4 =================================
# mbscspn.m4 serial 1
dnl Copyright (C) 2007 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_MBSCSPN],
[
  gl_PREREQ_MBSCSPN
])

# Prerequisites of lib/mbscspn.c.
AC_DEFUN([gl_PREREQ_MBSCSPN], [
  AC_REQUIRE([gl_FUNC_MBRTOWC])
  :
])
==========================================================================
--- MODULES.html.sh     5 Feb 2007 02:42:27 -0000       1.185
+++ MODULES.html.sh     5 Feb 2007 03:02:46 -0000
@@ -2165,6 +2165,7 @@
   func_module mbsstr
   func_module mbscasecmp
   func_module mbscasestr
+  func_module mbscspn
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
--- lib/string_.h       5 Feb 2007 02:42:27 -0000       1.13
+++ lib/string_.h       5 Feb 2007 03:02:46 -0000
@@ -201,6 +201,17 @@
 # define strnlen 
strnlen_is_unportable__use_gnulib_module_strnlen_for_portability
 #endif
 
+#if defined GNULIB_POSIXCHECK
+/* strcspn() assumes the second argument is a list of single-byte characters.
+   Even in this simple case, it does not work with multibyte strings if the
+   locale encoding is GB18030 and one of the characters to be searched is a
+   digit.  */
+# undef strcspn
+# define strcspn(s,a) \
+    (GL_LINK_WARNING ("strcspn cannot work correctly on character strings in 
multibyte locales - use mbscspn if you care about internationalization"), \
+     strcspn (s, a))
+#endif
+
 /* Find the first occurrence in S of any character in ACCEPT.  */
 #if @GNULIB_STRPBRK@
 # if ! @HAVE_STRPBRK@
@@ -352,6 +363,15 @@
 extern char * mbscasestr (const char *haystack, const char *needle);
 #endif
 
+#if @GNULIB_MBSCSPN@
+/* Find the first occurrence in the character string STRING of any character
+   in the character string ACCEPT.  Return the number of bytes from the
+   beginning of the string to this occurrence, or to the end of the string
+   if none exists.
+   Unlike strcspn(), this function works correctly in multibyte locales.  */
+extern size_t mbscspn (const char *string, const char *accept);
+#endif
+
 
 #ifdef __cplusplus
 }
--- m4/string_h.m4      5 Feb 2007 02:42:27 -0000       1.12
+++ m4/string_h.m4      5 Feb 2007 03:02:47 -0000
@@ -72,4 +72,5 @@
   GNULIB_MBSSTR=0;      AC_SUBST([GNULIB_MBSSTR])
   GNULIB_MBSCASECMP=0;  AC_SUBST([GNULIB_MBSCASECMP])
   GNULIB_MBSCASESTR=0;  AC_SUBST([GNULIB_MBSCASESTR])
+  GNULIB_MBSCSPN=0;     AC_SUBST([GNULIB_MBSCSPN])
 ])
--- modules/string      5 Feb 2007 02:42:27 -0000       1.11
+++ modules/string      5 Feb 2007 03:02:47 -0000
@@ -26,6 +26,7 @@
              -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
              -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
              -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
+             -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
              -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
              -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
              -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \





reply via email to

[Prev in Thread] Current Thread [Next in Thread]