bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module 'mbssep'


From: Bruno Haible
Subject: new module 'mbssep'
Date: Tue, 6 Feb 2007 03:03:11 +0100
User-agent: KMail/1.5.4

The function mbssep() is like strsep(), except that it also works with
multibyte strings. Although strsep() is not in POSIX, we have a replacement
for it in gnulib, therefore IMO we also need to offer an internationalized
variant of it.

2007-02-05  Bruno Haible  <address@hidden>

        New module mbssep.
        * modules/mbssep: New file.
        * lib/mbssep.c: New file.
        * lib/string_.h (strsep): Add a conditional link warning.
        (mbssep): New declaration.
        * m4/mbssep.m4: New file.
        * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
        GNULIB_MBSSEP.
        * modules/string (string.h): Also substitute GNULIB_MBSSEP.
        * MODULES.html.sh (Internationalization functions): Add mbssep.

========================= modules/mbssep ======================================
Description:
mbssep() function: split string into tokens, thread safe.

Files:
lib/mbssep.c
m4/mbssep.m4
m4/mbrtowc.m4

Depends-on:
mbuiter
string
mbspbrk
strsep

configure.ac:
gl_FUNC_MBSSEP
gl_STRING_MODULE_INDICATOR([mbssep])

Makefile.am:
lib_SOURCES += mbssep.c

Include:
<string.h>

License:
LGPL

Maintainer:
Bruno Haible

========================= lib/mbssep.c ========================================
/* Tokenizing a string.
   Copyright (C) 2007 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2007.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#include <config.h>

/* Specification.  */
#include <string.h>

#if HAVE_MBRTOWC
# include "mbuiter.h"
#endif

char *
mbssep (char **stringp, const char *delim)
{
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      char *start = *stringp;
      char *ptr;

      if (start == NULL)
        return NULL;

      /* No need to optimize the cases of 0 or 1 delimiters specially,
         since mbspbrk already optimizes them.  */

      ptr = mbspbrk (start, delim);

      if (ptr == NULL)
        {
          *stringp = NULL;
          return start;
        }
      else
        {
          mbui_iterator_t iter;

          mbui_init (iter, ptr);
          if (!mbui_avail (iter))
            abort ();
          mbui_advance (iter);
          *ptr = '\0';
          *stringp = (char *) mbui_cur_ptr (iter);
          return start;
        }
    }
  else
#endif
    return strsep (stringp, delim);
}
========================= m4/mbssep.m4 ========================================
# mbssep.m4 serial 1
dnl Copyright (C) 2007 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_MBSSEP],
[
  gl_PREREQ_MBSSEP
])

# Prerequisites of lib/mbssep.c.
AC_DEFUN([gl_PREREQ_MBSSEP], [
  AC_REQUIRE([gl_FUNC_MBRTOWC])
  :
])
===============================================================================
*** MODULES.html.sh     5 Feb 2007 03:34:24 -0000       1.189
--- MODULES.html.sh     6 Feb 2007 01:50:51 -0000
***************
*** 2168,2173 ****
--- 2168,2174 ----
    func_module mbscspn
    func_module mbspbrk
    func_module mbsspn
+   func_module mbssep
    func_module mbstok_r
    func_module mbswidth
    func_module memcasecmp
*** lib/string_.h       5 Feb 2007 03:39:57 -0000       1.18
--- lib/string_.h       6 Feb 2007 01:50:51 -0000
***************
*** 270,275 ****
--- 270,281 ----
  # if ! @HAVE_STRSEP@
  extern char *strsep (char **restrict __stringp, char const *restrict __delim);
  # endif
+ # if defined GNULIB_POSIXCHECK
+ #  undef strsep
+ #  define strsep(s,d) \
+      (GL_LINK_WARNING ("strsep cannot work correctly on character strings in 
multibyte locales - use mbssep if you care about internationalization"), \
+       strsep (s, d))
+ # endif
  #elif defined GNULIB_POSIXCHECK
  # undef strsep
  # define strsep strsep_is_unportable__use_gnulib_module_strsep_for_portability
***************
*** 415,420 ****
--- 421,444 ----
  extern size_t mbsspn (const char *string, const char *reject);
  #endif
  
+ #if @GNULIB_MBSSEP@
+ /* Search the next delimiter (multibyte character listed in the character
+    string DELIM) starting at the character string *STRINGP.
+    If one is found, overwrite it with a NUL, and advance *STRINGP to point
+    to the next multibyte character after it.  Otherwise, set *STRINGP to NULL.
+    If *STRINGP was already NULL, nothing happens.
+    Return the old value of *STRINGP.
+ 
+    This is a variant of mbstok_r() that supports empty fields.
+ 
+    Caveat: It modifies the original string.
+    Caveat: These functions cannot be used on constant strings.
+    Caveat: The identity of the delimiting character is lost.
+ 
+    See also mbstok_r().  */
+ extern char * mbssep (char **stringp, const char *delim);
+ #endif
+ 
  #if @GNULIB_MBSTOK_R@
  /* Parse the character string STRING into tokens separated by characters in
     the character string DELIM.
***************
*** 429,435 ****
  
     Caveat: It modifies the original string.
     Caveat: These functions cannot be used on constant strings.
!    Caveat: The identity of the delimiting character is lost.  */
  extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
  #endif
  
--- 453,461 ----
  
     Caveat: It modifies the original string.
     Caveat: These functions cannot be used on constant strings.
!    Caveat: The identity of the delimiting character is lost.
! 
!    See also mbssep().  */
  extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
  #endif
  
*** m4/string_h.m4      5 Feb 2007 03:34:24 -0000       1.16
--- m4/string_h.m4      6 Feb 2007 01:50:51 -0000
***************
*** 75,79 ****
--- 75,80 ----
    GNULIB_MBSCSPN=0;     AC_SUBST([GNULIB_MBSCSPN])
    GNULIB_MBSPBRK=0;     AC_SUBST([GNULIB_MBSPBRK])
    GNULIB_MBSSPN=0;      AC_SUBST([GNULIB_MBSSPN])
+   GNULIB_MBSSEP=0;      AC_SUBST([GNULIB_MBSSEP])
    GNULIB_MBSTOK_R=0;    AC_SUBST([GNULIB_MBSTOK_R])
  ])
*** modules/string      5 Feb 2007 03:34:24 -0000       1.15
--- modules/string      6 Feb 2007 01:50:52 -0000
***************
*** 29,34 ****
--- 29,35 ----
              -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
              -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \
              -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \
+             -e 's|@''GNULIB_MBSSEP''@|$(GNULIB_MBSSEP)|g' \
              -e 's|@''GNULIB_MBSTOK_R''@|$(GNULIB_MBSTOK_R)|g' \
              -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
              -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \





reply via email to

[Prev in Thread] Current Thread [Next in Thread]