>From 7bd3aacff73e31b213a58170cf1ed6cba6ecd6d1 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sat, 25 Jan 2020 23:36:41 +0100 Subject: [PATCH 02/25] c32isalnum: New module. * lib/c32isalnum.c: New file. * lib/c32is-impl.h: New file. * modules/c32isalnum: New file. * doc/posix-functions/iswalnum.texi: Mention the new module. --- ChangeLog | 8 ++++ doc/posix-functions/iswalnum.texi | 7 ++- lib/c32is-impl.h | 95 +++++++++++++++++++++++++++++++++++++++ lib/c32isalnum.c | 25 +++++++++++ modules/c32isalnum | 33 ++++++++++++++ 5 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 lib/c32is-impl.h create mode 100644 lib/c32isalnum.c create mode 100644 modules/c32isalnum diff --git a/ChangeLog b/ChangeLog index a41e8ac..4ffcc0b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2020-01-25 Bruno Haible + c32isalnum: New module. + * lib/c32isalnum.c: New file. + * lib/c32is-impl.h: New file. + * modules/c32isalnum: New file. + * doc/posix-functions/iswalnum.texi: Mention the new module. + +2020-01-25 Bruno Haible + uchar: Preparations for modules c32isalnum, ..., c32isxdigit. * lib/uchar.in.h (c32isalnum, c32isalpha, c32isblank, c32iscntrl, c32isdigit, c32isgraph, c32islower, c32isprint, c32ispunct, c32isspace, diff --git a/doc/posix-functions/iswalnum.texi b/doc/posix-functions/iswalnum.texi index 1ccb315..b3b6d24 100644 --- a/doc/posix-functions/iswalnum.texi +++ b/doc/posix-functions/iswalnum.texi @@ -20,6 +20,9 @@ OS X 10.8. Portability problems not fixed by Gnulib: @itemize @item -On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot -accommodate all Unicode characters. +On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and +therefore cannot accommodate all Unicode characters. +However, the Gnulib function @code{c32isalnum}, provided by Gnulib module +@code{c32isalnum}, operates on 32-bit wide characters and therefore does not +have this limitation. @end itemize diff --git a/lib/c32is-impl.h b/lib/c32is-impl.h new file mode 100644 index 0000000..fd5f06a --- /dev/null +++ b/lib/c32is-impl.h @@ -0,0 +1,95 @@ +/* Test whether a 32-bit wide character belongs to a specific character class. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see . */ + +/* Written by Bruno Haible , 2020. */ + +#include +#include + +#ifdef __CYGWIN__ +# include +#endif + +#if GNULIB_defined_mbstate_t +# include "localcharset.h" +# include "streq.h" +#endif + +#include "unictype.h" +#include "verify.h" + +int +FUNC (wint_t wc) +{ + /* The char32_t encoding of a multibyte character is defined by the way + mbrtoc32() is defined. */ + +#if GNULIB_defined_mbstate_t /* AIX, IRIX */ + /* mbrtoc32() is defined on top of mbtowc() for the non-UTF-8 locales + and directly for the UTF-8 locales. */ + if (wc != WEOF) + { + const char *encoding = locale_charset (); + if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) + return UCS_FUNC (wc); + else + return WCHAR_FUNC (wc); + } + else + return 0; + +#elif HAVE_WORKING_MBRTOC32 /* glibc */ + /* mbrtoc32() is essentially defined by the system libc. */ + +# if defined __GLIBC__ + /* The char32_t encoding of a multibyte character is known to be the same as + the wchar_t encoding. */ + return WCHAR_FUNC (wc); +# else + /* The char32_t encoding of a multibyte character is known to be UCS-4, + different from the the wchar_t encoding. */ + if (wc != WEOF) + return UCS_FUNC (wc); + else + return 0; +# endif + +#elif _GL_LARGE_CHAR32_T /* Cygwin, mingw, MSVC */ + /* The wchar_t encoding is UTF-16. + The char32_t encoding is UCS-4. */ + +# if defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007 + /* As an extension to POSIX, the iswalnum() function of Cygwin >= 1.7 + supports also wc arguments outside the Unicode BMP, that is, outside + the 'wchar_t' range. See + + = . */ + return WCHAR_FUNC (wc); +# else + if (wc == WEOF || wc == (wchar_t) wc) + /* wc is in the range for the isw* functions. */ + return WCHAR_FUNC (wc); + else + return UCS_FUNC (wc); +# endif + +#else /* macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Minix, Android */ + /* char32_t and wchar_t are equivalent. */ + verify (sizeof (char32_t) == sizeof (wchar_t)); + + return WCHAR_FUNC (wc); +#endif +} diff --git a/lib/c32isalnum.c b/lib/c32isalnum.c new file mode 100644 index 0000000..c81b833 --- /dev/null +++ b/lib/c32isalnum.c @@ -0,0 +1,25 @@ +/* Test 32-bit wide character for being alphanumeric. + Copyright (C) 2020 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, see . */ + +#include + +/* Specification. */ +#include + +#define FUNC c32isalnum +#define WCHAR_FUNC iswalnum +#define UCS_FUNC uc_is_alnum +#include "c32is-impl.h" diff --git a/modules/c32isalnum b/modules/c32isalnum new file mode 100644 index 0000000..213ffdd --- /dev/null +++ b/modules/c32isalnum @@ -0,0 +1,33 @@ +Description: +c32isalnum() function: test 32-bit wide character for being alphanumeric. + +Files: +lib/c32isalnum.c +lib/c32is-impl.h +m4/mbrtoc32.m4 + +Depends-on: +uchar +wchar +wctype-h +localcharset [test $REPLACE_MBSTATE_T = 1] +streq [test $REPLACE_MBSTATE_T = 1] +unictype/ctype-alnum +verify + +configure.ac: +AC_REQUIRE([gl_UCHAR_H]) +AC_REQUIRE([gl_MBRTOC32_SANITYCHECK]) +gl_UCHAR_MODULE_INDICATOR([c32isalnum]) + +Makefile.am: +lib_SOURCES += c32isalnum.c + +Include: + + +License: +LGPLv3+ or GPLv2 + +Maintainer: +Bruno Haible -- 2.7.4