>From 80ae5aa1307114f51bb47b1b1a4ac89a31422956 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 5 Jan 2020 02:32:18 +0100 Subject: [PATCH 1/2] mbsnrtoc32s: New module. * lib/uchar.in.h (mbsnrtoc32s): New declaration. * lib/mbsnrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC. * lib/mbsnrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros. * lib/mbsnrtoc32s.c: New file. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSNRTOC32S. * modules/uchar (Makefile.am): Substitute GNULIB_MBSNRTOC32S. * modules/mbsnrtoc32s: New file. * tests/test-uchar-c++.cc: Test the signature of mbsnrtoc32s. * doc/posix-functions/mbsnrtowcs.texi: Mention the new module. --- ChangeLog | 14 +++++++++ doc/posix-functions/mbsnrtowcs.texi | 7 +++-- lib/mbsnrtoc32s.c | 63 +++++++++++++++++++++++++++++++++++++ lib/mbsnrtowcs-impl.h | 10 +++--- lib/mbsnrtowcs.c | 4 +++ lib/uchar.in.h | 13 ++++++++ m4/uchar.m4 | 3 +- modules/mbsnrtoc32s | 38 ++++++++++++++++++++++ modules/uchar | 1 + tests/test-uchar-c++.cc | 6 ++++ 10 files changed, 151 insertions(+), 8 deletions(-) create mode 100644 lib/mbsnrtoc32s.c create mode 100644 modules/mbsnrtoc32s diff --git a/ChangeLog b/ChangeLog index 2825437..fb16eee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,19 @@ 2020-01-04 Bruno Haible + mbsnrtoc32s: New module. + * lib/uchar.in.h (mbsnrtoc32s): New declaration. + * lib/mbsnrtowcs-impl.h: Parameterize: Use macros FUNC, DCHAR_T, + INTERNAL_STATE, MBRTOWC. + * lib/mbsnrtowcs.c (FUNC, DCHAR_T, INTERNAL_STATE, MBRTOWC): New macros. + * lib/mbsnrtoc32s.c: New file. + * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_MBSNRTOC32S. + * modules/uchar (Makefile.am): Substitute GNULIB_MBSNRTOC32S. + * modules/mbsnrtoc32s: New file. + * tests/test-uchar-c++.cc: Test the signature of mbsnrtoc32s. + * doc/posix-functions/mbsnrtowcs.texi: Mention the new module. + +2020-01-04 Bruno Haible + mbsrtoc32s tests: Enhance test. * tests/test-mbsrtoc32s.c (main): Include a non-BMP character in the test strings for UTF-8 and GB18030. diff --git a/doc/posix-functions/mbsnrtowcs.texi b/doc/posix-functions/mbsnrtowcs.texi index bd7911b..c6defd2 100644 --- a/doc/posix-functions/mbsnrtowcs.texi +++ b/doc/posix-functions/mbsnrtowcs.texi @@ -19,8 +19,11 @@ Solaris 11.4. Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the Gnulib function @code{mbsnrtoc32s}, provided by Gnulib module +@code{mbsnrtoc32s}, operates on 32-bit wide characters and therefore does not +have this limitation. @item The specification is not clear about whether this function should update the conversion state when the first argument (the destination pointer) is NULL. diff --git a/lib/mbsnrtoc32s.c b/lib/mbsnrtoc32s.c new file mode 100644 index 0000000..7ba0415 --- /dev/null +++ b/lib/mbsnrtoc32s.c @@ -0,0 +1,63 @@ +/* Convert string to 32-bit wide string. + Copyright (C) 2020 Free Software Foundation, Inc. + Written by Bruno Haible , 2020. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include + +#include + +#if _GL_LARGE_CHAR32_T + +/* For Cygwin >= 1.7 it would be possible to speed this up a bit by cutting + the source into chunks, calling mbsnrtowcs on a chunk, then u16_to_u32 on + the result, then proceed with the next chunk, and so on. + But speed is not critical here so far. */ + +/* Reuse the implementation of mbsnrtowcs with a different parameterization. */ + +# include +# include +# include + +# include "minmax.h" +# include "strnlen1.h" + +extern mbstate_t _gl_mbsrtoc32s_state; + +# define FUNC mbsnrtoc32s +# define DCHAR_T char32_t +# define INTERNAL_STATE _gl_mbsrtoc32s_state +# define MBRTOWC mbrtoc32 +# include "mbsnrtowcs-impl.h" + +#else +/* char32_t and wchar_t are equivalent. */ + +# include "verify.h" + +verify (sizeof (char32_t) == sizeof (wchar_t)); + +size_t +mbsnrtoc32s (char32_t *dest, const char **srcp, size_t srclen, size_t len, + mbstate_t *ps) +{ + return mbsnrtowcs ((wchar_t *) dest, srcp, srclen, len, ps); +} + +#endif diff --git a/lib/mbsnrtowcs-impl.h b/lib/mbsnrtowcs-impl.h index 29ed870..ede6fea 100644 --- a/lib/mbsnrtowcs-impl.h +++ b/lib/mbsnrtowcs-impl.h @@ -16,16 +16,16 @@ along with this program. If not, see . */ size_t -mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate_t *ps) +FUNC (DCHAR_T *dest, const char **srcp, size_t srclen, size_t len, mbstate_t *ps) { if (ps == NULL) - ps = &_gl_mbsrtowcs_state; + ps = &INTERNAL_STATE; { const char *src = *srcp; if (dest != NULL) { - wchar_t *destptr = dest; + DCHAR_T *destptr = dest; for (; srclen > 0 && len > 0; destptr++, len--) { @@ -46,7 +46,7 @@ mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate src_avail = 4 + strnlen1 (src + 4, MIN (srclen, MB_LEN_MAX) - 4); /* Parse the next multibyte character. */ - ret = mbrtowc (destptr, src, src_avail, ps); + ret = MBRTOWC (destptr, src, src_avail, ps); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte @@ -94,7 +94,7 @@ mbsnrtowcs (wchar_t *dest, const char **srcp, size_t srclen, size_t len, mbstate src_avail = 4 + strnlen1 (src + 4, MIN (srclen, MB_LEN_MAX) - 4); /* Parse the next multibyte character. */ - ret = mbrtowc (NULL, src, src_avail, &state); + ret = MBRTOWC (NULL, src, src_avail, &state); if (ret == (size_t)(-2)) /* Encountered a multibyte character that extends past a '\0' byte diff --git a/lib/mbsnrtowcs.c b/lib/mbsnrtowcs.c index 2fa5c3e..63bac59 100644 --- a/lib/mbsnrtowcs.c +++ b/lib/mbsnrtowcs.c @@ -30,4 +30,8 @@ extern mbstate_t _gl_mbsrtowcs_state; +#define FUNC mbsnrtowcs +#define DCHAR_T wchar_t +#define INTERNAL_STATE _gl_mbsrtowcs_state +#define MBRTOWC mbrtowc #include "mbsnrtowcs-impl.h" diff --git a/lib/uchar.in.h b/lib/uchar.in.h index 318cf8e..f31b18c 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -108,6 +108,19 @@ _GL_WARN_ON_USE (mbrtoc32, "mbrtoc32 is not portable - " /* Convert a string to a 32-bit wide string. */ +#if @GNULIB_MBSNRTOC32S@ +_GL_FUNCDECL_SYS (mbsnrtoc32s, size_t, + (char32_t *dest, const char **srcp, size_t srclen, size_t len, + mbstate_t *ps) + _GL_ARG_NONNULL ((2))); +_GL_CXXALIAS_SYS (mbsnrtoc32s, size_t, + (char32_t *dest, const char **srcp, size_t srclen, size_t len, + mbstate_t *ps)); +_GL_CXXALIASWARN (mbsnrtoc32s); +#endif + + +/* Convert a string to a 32-bit wide string. */ #if @GNULIB_MBSRTOC32S@ _GL_FUNCDECL_SYS (mbsrtoc32s, size_t, (char32_t *dest, const char **srcp, size_t len, mbstate_t *ps) diff --git a/m4/uchar.m4 b/m4/uchar.m4 index e92f5d6..4e0f43a 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 6 +# uchar.m4 serial 7 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -50,6 +50,7 @@ AC_DEFUN([gl_UCHAR_H_DEFAULTS], GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32]) GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB]) GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32]) + GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S]) GNULIB_MBSRTOC32S=0; AC_SUBST([GNULIB_MBSRTOC32S]) dnl Assume proper GNU behavior unless another module says otherwise. HAVE_MBRTOC32=1; AC_SUBST([HAVE_MBRTOC32]) diff --git a/modules/mbsnrtoc32s b/modules/mbsnrtoc32s new file mode 100644 index 0000000..44784d8 --- /dev/null +++ b/modules/mbsnrtoc32s @@ -0,0 +1,38 @@ +Description: +mbsnrtoc32s() function: convert string to 32-bit wide string. + +Files: +lib/mbsnrtoc32s.c +lib/mbsnrtowcs-impl.h +lib/mbsrtoc32s-state.c + +Depends-on: +uchar +wchar +verify +mbrtoc32 [test $SMALL_WCHAR_T = 1] +minmax [test $SMALL_WCHAR_T = 1] +strnlen1 [test $SMALL_WCHAR_T = 1] +mbsnrtowcs [test $SMALL_WCHAR_T = 0] + +configure.ac: +AC_REQUIRE([gl_UCHAR_H]) +if test $SMALL_WCHAR_T = 1; then + AC_LIBOBJ([mbsrtoc32s-state]) +fi +gl_UCHAR_MODULE_INDICATOR([mbsnrtoc32s]) + +Makefile.am: +lib_SOURCES += mbsnrtoc32s.c + +Include: + + +Link: +$(LIB_MBRTOWC) + +License: +LGPL + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index 03101c1..a50eb5a 100644 --- a/modules/uchar +++ b/modules/uchar @@ -31,6 +31,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ + -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \ -e 's/@''GNULIB_MBSRTOC32S''@/$(GNULIB_MBSRTOC32S)/g' \ -e 's|@''HAVE_MBRTOC32''@|$(HAVE_MBRTOC32)|g' \ -e 's|@''REPLACE_MBRTOC32''@|$(REPLACE_MBRTOC32)|g' \ diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc index a7132a1..a630eec 100644 --- a/tests/test-uchar-c++.cc +++ b/tests/test-uchar-c++.cc @@ -37,6 +37,12 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::mbrtoc32, size_t, (char32_t *, const char *, size_t, mbstate_t *)); #endif +#if GNULIB_TEST_MBSNRTOC32S +SIGNATURE_CHECK (GNULIB_NAMESPACE::mbsnrtoc32s, size_t, + (char32_t *, const char **, size_t, size_t, mbstate_t *)); + +#endif + #if GNULIB_TEST_MBSRTOC32S SIGNATURE_CHECK (GNULIB_NAMESPACE::mbsrtoc32s, size_t, (char32_t *, const char **, size_t, mbstate_t *)); -- 2.7.4