>From 4ec96253823bde7488bfee4ee5d890792d6b555b Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Thu, 9 Jan 2020 01:56:35 +0100 Subject: [PATCH 2/4] c32rtomb: New module. * lib/uchar.in.h (c32rtomb): New declaration. * lib/c32rtomb.c: New file, based on lib/unistr/u8-uctomb-aux.c. * m4/c32rtomb.m4: New file. * m4/uchar.m4 (gl_UCHAR_H): Test whether c32rtomb is declared. (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32RTOMB, HAVE_C32RTOMB, REPLACE_C32RTOMB. * modules/uchar (Makefile.am): Substitute GNULIB_C32RTOMB, HAVE_C32RTOMB, REPLACE_C32RTOMB. * modules/c32rtomb: New file. * tests/test-uchar-c++.cc: Test the signature of c32rtomb. * doc/posix-functions/c32rtomb.texi: Document the new module. * doc/posix-functions/wcrtomb.texi: Mention the new module. --- ChangeLog | 16 +++++ doc/posix-functions/c32rtomb.texi | 11 ++-- doc/posix-functions/wcrtomb.texi | 7 ++- lib/c32rtomb.c | 124 ++++++++++++++++++++++++++++++++++++++ lib/uchar.in.h | 25 ++++++++ m4/c32rtomb.m4 | 55 +++++++++++++++++ m4/uchar.m4 | 7 ++- modules/c32rtomb | 32 ++++++++++ modules/uchar | 3 + tests/test-uchar-c++.cc | 5 ++ 10 files changed, 277 insertions(+), 8 deletions(-) create mode 100644 lib/c32rtomb.c create mode 100644 m4/c32rtomb.m4 create mode 100644 modules/c32rtomb diff --git a/ChangeLog b/ChangeLog index 4b5a419..3ad99ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,21 @@ 2020-01-08 Bruno Haible + c32rtomb: New module. + * lib/uchar.in.h (c32rtomb): New declaration. + * lib/c32rtomb.c: New file, based on lib/unistr/u8-uctomb-aux.c. + * m4/c32rtomb.m4: New file. + * m4/uchar.m4 (gl_UCHAR_H): Test whether c32rtomb is declared. + (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32RTOMB, HAVE_C32RTOMB, + REPLACE_C32RTOMB. + * modules/uchar (Makefile.am): Substitute GNULIB_C32RTOMB, + HAVE_C32RTOMB, REPLACE_C32RTOMB. + * modules/c32rtomb: New file. + * tests/test-uchar-c++.cc: Test the signature of c32rtomb. + * doc/posix-functions/c32rtomb.texi: Document the new module. + * doc/posix-functions/wcrtomb.texi: Mention the new module. + +2020-01-08 Bruno Haible + mbrtoc32: Use the system's mbrtoc32 if it exists and basically works. * m4/mbrtoc32.m4 (gl_MBRTOC32_SANITYCHECK): New macro. (gl_FUNC_MBRTOC32): Require it. Set REPLACE_MBRTOC32 if mbrtoc32 exists diff --git a/doc/posix-functions/c32rtomb.texi b/doc/posix-functions/c32rtomb.texi index 392bbe9..4a1a617 100644 --- a/doc/posix-functions/c32rtomb.texi +++ b/doc/posix-functions/c32rtomb.texi @@ -2,15 +2,18 @@ @section @code{c32rtomb} @findex c32rtomb -Gnulib module: --- +Gnulib module: c32rtomb Portability problems fixed by Gnulib: @itemize +@item +This function is missing on most non-glibc platforms: +glibc 2.15, Mac OS X 10.5, FreeBSD 6.4, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin, mingw, MSVC 9, Android 4.4. +@item +This function returns 0 when the first argument is NULL in some locales on some platforms: +AIX 7.2. @end itemize Portability problems not fixed by Gnulib: @itemize -@item -This function is missing on most non-glibc platforms: -glibc 2.15, Mac OS X 10.5, FreeBSD 6.4, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin, mingw, MSVC 9, Android 4.4. @end itemize diff --git a/doc/posix-functions/wcrtomb.texi b/doc/posix-functions/wcrtomb.texi index 232bea4..28b8dfe 100644 --- a/doc/posix-functions/wcrtomb.texi +++ b/doc/posix-functions/wcrtomb.texi @@ -25,6 +25,9 @@ MSVC 14. Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the ISO C11 function @code{c32rtomb}, provided by Gnulib module +@code{c32rtomb}, operates on 32-bit wide characters and therefore does not have +this limitation. @end itemize diff --git a/lib/c32rtomb.c b/lib/c32rtomb.c new file mode 100644 index 0000000..ba39929 --- /dev/null +++ b/lib/c32rtomb.c @@ -0,0 +1,124 @@ +/* Convert 32-bit wide character to multibyte character. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2020. */ + +#include + +/* Specification. */ +#include + +#include +#include + +#include "localcharset.h" +#include "streq.h" + +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif + +size_t +c32rtomb (char *s, char32_t wc, mbstate_t *ps) +#undef c32rtomb +{ +#if HAVE_WORKING_MBRTOC32 + +# if C32RTOMB_RETVAL_BUG + if (s == NULL) + /* We know the NUL wide character corresponds to the NUL character. */ + return 1; +# endif + + return c32rtomb (s, wc, ps); + +#elif _GL_LARGE_CHAR32_T + + if (s == NULL) + return wcrtomb (NULL, 0, ps); + else + { + /* Special-case all encodings that may produce wide character values + > WCHAR_MAX. */ + const char *encoding = locale_charset (); + if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) + { + /* Special-case the UTF-8 encoding. Assume that the wide-character + encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */ + if (wc < 0x80) + { + s[0] = (unsigned char) wc; + return 1; + } + else + { + int count; + + if (wc < 0x800) + count = 2; + else if (wc < 0x10000) + { + if (wc < 0xd800 || wc >= 0xe000) + count = 3; + else + { + errno = EILSEQ; + return (size_t)(-1); + } + } + else if (wc < 0x110000) + count = 4; + else + { + errno = EILSEQ; + return (size_t)(-1); + } + + switch (count) /* note: code falls through cases! */ + { + case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; + FALLTHROUGH; + case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; + FALLTHROUGH; + case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; + /*case 1:*/ s[0] = wc; + } + return count; + } + } + else + { + if ((wchar_t) wc == wc) + return wcrtomb (s, (wchar_t) wc, ps); + else + { + errno = EILSEQ; + return (size_t)(-1); + } + } + } + +#else + + /* char32_t and wchar_t are equivalent. */ + return wcrtomb (s, (wchar_t) wc, ps); + +#endif +} diff --git a/lib/uchar.in.h b/lib/uchar.in.h index 513fa8c..dbbfc30 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -68,6 +68,31 @@ _GL_CXXALIASWARN (btoc32); #endif +/* Converts a 32-bit wide character to a multibyte character. */ +#if @GNULIB_C32RTOMB@ +# if @REPLACE_C32RTOMB@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef c32rtomb +# define c32rtomb rpl_c32rtomb +# endif +_GL_FUNCDECL_RPL (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps)); +_GL_CXXALIAS_RPL (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps)); +# else +# if !@HAVE_C32RTOMB@ +_GL_FUNCDECL_SYS (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps)); +# endif +_GL_CXXALIAS_SYS (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps)); +# endif +_GL_CXXALIASWARN (c32rtomb); +#elif defined GNULIB_POSIXCHECK +# undef c32rtomb +# if HAVE_RAW_DECL_C32RTOMB +_GL_WARN_ON_USE (mbrtoc32, "c32rtomb is not portable - " + "use gnulib module c32rtomb for portability"); +# endif +#endif + + /* Converts a 32-bit wide character to unibyte character. Returns the single-byte representation of WC if it exists, or EOF otherwise. */ diff --git a/m4/c32rtomb.m4 b/m4/c32rtomb.m4 new file mode 100644 index 0000000..4cf0e4d --- /dev/null +++ b/m4/c32rtomb.m4 @@ -0,0 +1,55 @@ +# c32rtomb.m4 serial 1 +dnl Copyright (C) 2020 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_C32RTOMB], +[ + AC_REQUIRE([gl_UCHAR_H_DEFAULTS]) + + AC_REQUIRE([gl_MBRTOC32_SANITYCHECK]) + + AC_CHECK_FUNCS_ONCE([c32rtomb]) + if test $ac_cv_func_c32rtomb = no; then + HAVE_C32RTOMB=0 + else + dnl When we override mbrtoc32, redefining the meaning of the char32_t + dnl values, we need to override c32rtomb as well, for consistency. + if test $HAVE_WORKING_MBRTOC32 = 0; then + REPLACE_C32RTOMB=1 + fi + AC_CACHE_CHECK([whether c32rtomb return value is correct], + [gl_cv_func_c32rtomb_retval], + [ + dnl Initial guess, used when cross-compiling. +changequote(,)dnl + case "$host_os" in + # Guess no on AIX. + aix*) gl_cv_func_c32rtomb_retval="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_c32rtomb_retval="guessing yes" ;; + esac +changequote([,])dnl + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include +int main () +{ + int result = 0; + if (c32rtomb (NULL, 0, NULL) != 1) + result |= 1; + return result; +}]])], + [gl_cv_func_c32rtomb_retval=yes], + [gl_cv_func_c32rtomb_retval=no], + [:]) + ]) + case "$gl_cv_func_c32rtomb_retval" in + *yes) ;; + *) AC_DEFINE([C32RTOMB_RETVAL_BUG], [1], + [Define if the wcrtomb function has an incorrect return value.]) + REPLACE_C32RTOMB=1 ;; + esac + fi +]) diff --git a/m4/uchar.m4 b/m4/uchar.m4 index 0b5c662..be71196 100644 --- a/m4/uchar.m4 +++ b/m4/uchar.m4 @@ -1,4 +1,4 @@ -# uchar.m4 serial 8 +# uchar.m4 serial 9 dnl Copyright (C) 2019-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -33,7 +33,7 @@ AC_DEFUN_ONCE([gl_UCHAR_H], dnl corresponding gnulib module is not in use, and which is not dnl guaranteed by C11. gl_WARN_ON_USE_PREPARE([[#include - ]], [mbrtoc32]) + ]], [c32rtomb mbrtoc32]) ]) AC_DEFUN([gl_UCHAR_MODULE_INDICATOR], @@ -48,12 +48,15 @@ AC_DEFUN([gl_UCHAR_MODULE_INDICATOR], AC_DEFUN([gl_UCHAR_H_DEFAULTS], [ GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32]) + GNULIB_C32RTOMB=0; AC_SUBST([GNULIB_C32RTOMB]) GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB]) GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32]) GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S]) GNULIB_MBSRTOC32S=0; AC_SUBST([GNULIB_MBSRTOC32S]) GNULIB_MBSTOC32S=0; AC_SUBST([GNULIB_MBSTOC32S]) dnl Assume proper GNU behavior unless another module says otherwise. + HAVE_C32RTOMB=1; AC_SUBST([HAVE_C32RTOMB]) HAVE_MBRTOC32=1; AC_SUBST([HAVE_MBRTOC32]) + REPLACE_C32RTOMB=0; AC_SUBST([REPLACE_C32RTOMB]) REPLACE_MBRTOC32=0; AC_SUBST([REPLACE_MBRTOC32]) ]) diff --git a/modules/c32rtomb b/modules/c32rtomb new file mode 100644 index 0000000..ea227df --- /dev/null +++ b/modules/c32rtomb @@ -0,0 +1,32 @@ +Description: +c32rtomb() function: convert 32-bit wide character to multibyte character. + +Files: +lib/c32rtomb.c +m4/c32rtomb.m4 +m4/mbrtoc32.m4 + +Depends-on: +uchar +wchar [test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1] +wcrtomb [test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1] +localcharset [{ test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; } && test $SMALL_WCHAR_T = 1] +streq [{ test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; } && test $SMALL_WCHAR_T = 1] + +configure.ac: +gl_FUNC_C32RTOMB +if test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; then + AC_LIBOBJ([c32rtomb]) +fi +gl_UCHAR_MODULE_INDICATOR([c32rtomb]) + +Makefile.am: + +Include: + + +License: +LGPLv2+ + +Maintainer: +Bruno Haible diff --git a/modules/uchar b/modules/uchar index 29bc7ae..cab4518 100644 --- a/modules/uchar +++ b/modules/uchar @@ -29,12 +29,15 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) -e 's|@''NEXT_UCHAR_H''@|$(NEXT_UCHAR_H)|g' \ -e 's|@''SMALL_WCHAR_T''@|$(SMALL_WCHAR_T)|g' \ -e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \ + -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \ -e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \ -e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \ -e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \ -e 's/@''GNULIB_MBSRTOC32S''@/$(GNULIB_MBSRTOC32S)/g' \ -e 's/@''GNULIB_MBSTOC32S''@/$(GNULIB_MBSTOC32S)/g' \ + -e 's|@''HAVE_C32RTOMB''@|$(HAVE_C32RTOMB)|g' \ -e 's|@''HAVE_MBRTOC32''@|$(HAVE_MBRTOC32)|g' \ + -e 's|@''REPLACE_C32RTOMB''@|$(REPLACE_C32RTOMB)|g' \ -e 's|@''REPLACE_MBRTOC32''@|$(REPLACE_MBRTOC32)|g' \ -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \ < $(srcdir)/uchar.in.h; \ diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc index 3e71c89..ed45da2 100644 --- a/tests/test-uchar-c++.cc +++ b/tests/test-uchar-c++.cc @@ -28,6 +28,11 @@ SIGNATURE_CHECK (GNULIB_NAMESPACE::btoc32, wint_t, (int)); #endif +#if GNULIB_TEST_C32RTOMB +SIGNATURE_CHECK (GNULIB_NAMESPACE::c32rtomb, size_t, + (char *, char32_t , mbstate_t *)); +#endif + #if GNULIB_TEST_C32TOB SIGNATURE_CHECK (GNULIB_NAMESPACE::c32tob, int, (wint_t)); #endif -- 2.7.4