>From ef3398710f4b3cff37dcbdb4fdb267f3dcdb9fbe Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Thu, 9 Jan 2020 16:20:10 +0100 Subject: [PATCH 1/2] c32srtombs: New module. * lib/uchar.in.h (c32srtombs): New declaration. * lib/wcsrtombs-impl.h: Parameterize: Use macros FUNC, SCHAR_T, INTERNAL_STATE, WCRTOMB. * lib/wcsrtombs.c (FUNC, SCHAR_T, INTERNAL_STATE, WCRTOMB): New macros. * lib/c32srtombs.c: New file. * lib/c32srtombs-state.c: New file, based on lib/wcsrtombs-state.c. * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32SRTOMBS. * modules/uchar (Makefile.am): Substitute GNULIB_C32SRTOMBS. * modules/c32srtombs: New file. * tests/test-uchar-c++.cc: Test the signature of c32srtombs. * doc/posix-functions/wcsrtombs.texi: Mention the new module. --- ChangeLog | 15 +++++++++++ doc/posix-functions/wcsrtombs.texi | 7 +++-- lib/c32srtombs-state.c | 37 +++++++++++++++++++++++++ lib/c32srtombs.c | 55 ++++++++++++++++++++++++++++++++++++++ lib/uchar.in.h | 12 +++++++++ lib/wcsrtombs-impl.h | 14 +++++----- lib/wcsrtombs.c | 4 +++ m4/uchar.m4 | 3 ++- modules/c32srtombs | 31 +++++++++++++++++++++ modules/uchar | 1 + tests/test-uchar-c++.cc | 5 ++++ 11 files changed, 174 insertions(+), 10 deletions(-) create mode 100644 lib/c32srtombs-state.c create mode 100644 lib/c32srtombs.c create mode 100644 modules/c32srtombs diff --git a/ChangeLog b/ChangeLog index 9c3f603..9d940e5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2020-01-09 Bruno Haible + + c32srtombs: New module. + * lib/uchar.in.h (c32srtombs): New declaration. + * lib/wcsrtombs-impl.h: Parameterize: Use macros FUNC, SCHAR_T, + INTERNAL_STATE, WCRTOMB. + * lib/wcsrtombs.c (FUNC, SCHAR_T, INTERNAL_STATE, WCRTOMB): New macros. + * lib/c32srtombs.c: New file. + * lib/c32srtombs-state.c: New file, based on lib/wcsrtombs-state.c. + * m4/uchar.m4 (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32SRTOMBS. + * modules/uchar (Makefile.am): Substitute GNULIB_C32SRTOMBS. + * modules/c32srtombs: New file. + * tests/test-uchar-c++.cc: Test the signature of c32srtombs. + * doc/posix-functions/wcsrtombs.texi: Mention the new module. + 2020-01-08 Bruno Haible c32tob: Make consistent with mbrtoc32. diff --git a/doc/posix-functions/wcsrtombs.texi b/doc/posix-functions/wcsrtombs.texi index 975d317..5bb7d8c 100644 --- a/doc/posix-functions/wcsrtombs.texi +++ b/doc/posix-functions/wcsrtombs.texi @@ -22,6 +22,9 @@ HP-UX 11. Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the Gnulib function @code{c32srtombs}, provided by Gnulib module +@code{c32srtombs}, operates on 32-bit wide characters and therefore does not +have this limitation. @end itemize diff --git a/lib/c32srtombs-state.c b/lib/c32srtombs-state.c new file mode 100644 index 0000000..5491b9c --- /dev/null +++ b/lib/c32srtombs-state.c @@ -0,0 +1,37 @@ +/* Convert 32-bit wide string to string. + Copyright (C) 2008-2020 Free Software Foundation, Inc. + Written by Bruno Haible , 2020. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +/* Internal state used by the functions c32srtombs() and c32snrtombs(). */ +mbstate_t _gl_c32srtombs_state +/* The state must initially be in the "initial state"; so, zero-initialize it. + On most systems, putting it into BSS is sufficient. Not so on Mac OS X 10.3, + see . + When it needs an initializer, use 0 or {0} as initializer? 0 only works + when mbstate_t is a scalar type (such as when gnulib defines it, or on + AIX, IRIX, mingw). {0} works as an initializer in all cases: for a struct + or union type, but also for a scalar type (ISO C 99, 6.7.8.(11)). */ +#if defined __ELF__ + /* On ELF systems, variables in BSS behave well. */ +#else + /* Use braces, to be on the safe side. */ + = { 0 } +#endif + ; diff --git a/lib/c32srtombs.c b/lib/c32srtombs.c new file mode 100644 index 0000000..a4e0840 --- /dev/null +++ b/lib/c32srtombs.c @@ -0,0 +1,55 @@ +/* Convert 32-bit wide string to string. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2020. */ + +#include + +/* Specification. */ +#include + +#include + +#if (HAVE_WORKING_MBRTOC32 && !defined __GLIBC__) || _GL_LARGE_CHAR32_T +/* The char32_t encoding of a multibyte character may be different than its + wchar_t encoding, or char32_t is wider than wchar_t. */ + +# include +# include +# include + +extern mbstate_t _gl_c32srtombs_state; + +# define FUNC c32srtombs +# define SCHAR_T char32_t +# define INTERNAL_STATE _gl_c32srtombs_state +# define WCRTOMB c32rtomb +# include "wcsrtombs-impl.h" + +#else +/* char32_t and wchar_t are equivalent. */ + +# include "verify.h" + +verify (sizeof (char32_t) == sizeof (wchar_t)); + +size_t +c32srtombs (char *dest, const char32_t **srcp, size_t len, mbstate_t *ps) +{ + return wcsrtombs (dest, (const wchar_t **) srcp, len, ps); +} + +#endif diff --git a/lib/uchar.in.h b/lib/uchar.in.h index dbbfc30..75da254 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -93,6 +93,18 @@ _GL_WARN_ON_USE (mbrtoc32, "c32rtomb is not portable - " #endif +/* Convert a 32-bit wide string to a string. */ +#if @GNULIB_C32SRTOMBS@ +_GL_FUNCDECL_SYS (c32srtombs, size_t, + (char *dest, const char32_t **srcp, size_t len, mbstate_t *ps) + _GL_ARG_NONNULL ((2))); +_GL_CXXALIAS_SYS (c32srtombs, size_t, + (char *dest, const char32_t **srcp, size_t len, + mbstate_t *ps)); +_GL_CXXALIASWARN (c32srtombs); +#endif + + /* Converts a 32-bit wide character to unibyte character. Returns the single-byte representation of WC if it exists, or EOF otherwise. */ diff --git a/lib/wcsrtombs-impl.h b/lib/wcsrtombs-impl.h index 81a7a7f..d39af07 100644 --- a/lib/wcsrtombs-impl.h +++ b/lib/wcsrtombs-impl.h @@ -16,12 +16,12 @@ along with this program. If not, see . */ size_t -wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) +FUNC (char *dest, const SCHAR_T **srcp, size_t len, mbstate_t *ps) { if (ps == NULL) - ps = &_gl_wcsrtombs_state; + ps = &INTERNAL_STATE; { - const wchar_t *src = *srcp; + const SCHAR_T *src = *srcp; size_t cur_max = MB_CUR_MAX; char buf[64]; @@ -34,8 +34,8 @@ wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) for (; len > 0; src++) { - wchar_t wc = *src; - size_t ret = wcrtomb (len >= cur_max ? destptr : buf, wc, ps); + SCHAR_T wc = *src; + size_t ret = WCRTOMB (len >= cur_max ? destptr : buf, wc, ps); if (ret == (size_t)(-1)) goto bad_input; @@ -66,8 +66,8 @@ wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) for (;; src++) { - wchar_t wc = *src; - size_t ret = wcrtomb (buf, wc, &state); + SCHAR_T wc = *src; + size_t ret = WCRTOMB (buf, wc, &state); if (ret == (size_t)(-1)) goto bad_input2; diff --git a/lib/wcsrtombs.c b/lib/wcsrtombs.c index db8489b..307912f 100644 --- a/lib/wcsrtombs.c +++ b/lib/wcsrtombs.c @@ -51,6 +51,10 @@ rpl_wcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) # include # include +# define FUNC wcsrtombs +# define SCHAR_T wchar_t +# define INTERNAL_STATE _gl_wcsrtombs_state +# define WCRTOMB wcrtomb # include "wcsrtombs-impl.h" #endif diff --git a/m4/uchar.m4 b/m4/uchar.m4 index be71196..4e9b16d 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 9 +# uchar.m4 serial 10 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -49,6 +49,7 @@ AC_DEFUN([gl_UCHAR_H_DEFAULTS], [ GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32]) GNULIB_C32RTOMB=0; AC_SUBST([GNULIB_C32RTOMB]) + GNULIB_C32SRTOMBS=0; AC_SUBST([GNULIB_C32SRTOMBS]) GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB]) GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32]) GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S]) diff --git a/modules/c32srtombs b/modules/c32srtombs new file mode 100644 index 0000000..1f36b6c --- /dev/null +++ b/modules/c32srtombs @@ -0,0 +1,31 @@ +Description: +c32srtombs() function: convert 32-bit wide string to string. + +Files: +lib/c32srtombs.c +lib/wcsrtombs-impl.h +lib/c32srtombs-state.c + +Depends-on: +uchar +wchar +verify +c32rtomb +wcsrtombs [test $SMALL_WCHAR_T = 0] + +configure.ac: +AC_REQUIRE([gl_UCHAR_H]) +AC_LIBOBJ([c32srtombs-state]) +gl_UCHAR_MODULE_INDICATOR([c32srtombs]) + +Makefile.am: +lib_SOURCES += c32srtombs.c + +Include: + + +License: +LGPL + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index cab4518..7124a67 100644 --- a/modules/uchar +++ b/modules/uchar @@ -30,6 +30,7 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's|@''SMALL_WCHAR_T''@|$(SMALL_WCHAR_T)|g' \ -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \ -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \ + -e 's/@''GNULIB_C32SRTOMBS''@/$(GNULIB_C32SRTOMBS)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \ diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc index ed45da2..e202bbc 100644 --- a/tests/test-uchar-c++.cc +++ b/tests/test-uchar-c++.cc @@ -33,6 +33,11 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::c32rtomb, size_t, (char *, char32_t , mbstate_t *)); #endif +#if GNULIB_TEST_C32SRTOMBS +SIGNATURE_CHECK (GNULIB_NAMESPACE::c32srtombs, size_t, + (char *, const char32_t **, size_t, mbstate_t *)); +#endif + #if GNULIB_TEST_C32TOB SIGNATURE_CHECK (GNULIB_NAMESPACE::c32tob, int, (wint_t)); #endif -- 2.7.4