bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] mbrtowc: work around glibc bug#19932


From: Paul Eggert
Subject: [PATCH] mbrtowc: work around glibc bug#19932
Date: Sat, 9 Apr 2016 01:29:33 -0700

From: Paul Eggert <address@hidden>

Fix mbrtowc so that it never returns -1 in the C locale,
as this conflicts with a future version of POSIX
http://austingroupbugs.net/view.php?id=663#c2738
and causes problems with GNU grep: http://bugs.gnu.org/23234
See glibc bug 19932:
https://sourceware.org/bugzilla/show_bug.cgi?id=19932
* doc/posix-functions/mbrlen.texi (mbrlen):
* doc/posix-functions/mbrtowc.texi (mbrtowc):
Document the glibc bug.
* lib/mbrtowc.c [C_LOCALE_MAYBE_EILSEQ]:
Include hard-locale.h, locale.h.
(rpl_mbrtowc): Work around the C_LOCALE_MAYBE_EILSEQ bug,
if the bug is possible.
* m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): New macro.
(gl_FUNC_MBRTOWC): Use it, and define C_LOCALE_MAYBE_EILSEQ as needed.
* modules/hard-locale (License): Now LGPLv2+, for mbrtowc.
* modules/mbrtowc (Depends-on): Add hard-locale.
* modules/mbrtowc-tests (Files, TESTS): Add tests/test-mbrtowc5.sh.
* tests/test-mbrtowc.c (main): Test for bug fix if arg is '5'.
* tests/test-mbrtowc5.sh: New file.
---
 ChangeLog                        | 24 ++++++++++++++++++
 doc/posix-functions/mbrlen.texi  |  4 +++
 doc/posix-functions/mbrtowc.texi |  4 +++
 lib/mbrtowc.c                    | 54 ++++++++++++++++++++--------------------
 m4/mbrtowc.m4                    | 50 ++++++++++++++++++++++++++++++++++++-
 modules/hard-locale              |  2 +-
 modules/mbrtowc                  |  1 +
 modules/mbrtowc-tests            |  3 ++-
 tests/test-mbrtowc.c             | 11 +++++++-
 tests/test-mbrtowc5.sh           |  6 +++++
 10 files changed, 128 insertions(+), 31 deletions(-)
 create mode 100755 tests/test-mbrtowc5.sh

diff --git a/ChangeLog b/ChangeLog
index 980cfaa..77f1be9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,27 @@
+2016-04-09  Paul Eggert  <address@hidden>
+
+       mbrtowc: work around glibc bug#19932
+       Fix mbrtowc so that it never returns -1 in the C locale,
+       as this conflicts with a future version of POSIX
+       http://austingroupbugs.net/view.php?id=663#c2738
+       and causes problems with GNU grep: http://bugs.gnu.org/23234
+       See glibc bug 19932:
+       https://sourceware.org/bugzilla/show_bug.cgi?id=19932
+       * doc/posix-functions/mbrlen.texi (mbrlen):
+       * doc/posix-functions/mbrtowc.texi (mbrtowc):
+       Document the glibc bug.
+       * lib/mbrtowc.c [C_LOCALE_MAYBE_EILSEQ]:
+       Include hard-locale.h, locale.h.
+       (rpl_mbrtowc): Work around the C_LOCALE_MAYBE_EILSEQ bug,
+       if the bug is possible.
+       * m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): New macro.
+       (gl_FUNC_MBRTOWC): Use it, and define C_LOCALE_MAYBE_EILSEQ as needed.
+       * modules/hard-locale (License): Now LGPLv2+, for mbrtowc.
+       * modules/mbrtowc (Depends-on): Add hard-locale.
+       * modules/mbrtowc-tests (Files, TESTS): Add tests/test-mbrtowc5.sh.
+       * tests/test-mbrtowc.c (main): Test for bug fix if arg is '5'.
+       * tests/test-mbrtowc5.sh: New file.
+
 2016-04-03  Pedro Alves  <address@hidden>
 
        stdint: detect good enough pre-C++11 stdint.h in C++ mode
diff --git a/doc/posix-functions/mbrlen.texi b/doc/posix-functions/mbrlen.texi
index 7db550e..3f1d472 100644
--- a/doc/posix-functions/mbrlen.texi
+++ b/doc/posix-functions/mbrlen.texi
@@ -12,6 +12,10 @@ Portability problems fixed by Gnulib:
 This function is missing on some platforms:
 Minix 3.1.8, HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5.
 @item
+In the C or POSIX locales, this function can return @code{(size_t) -1}
+and set @code{errno} to @code{EILSEQ}:
+glibc 2.23.
address@hidden
 This function returns 0 instead of @code{(size_t) -2} when the input
 is empty:
 glibc 2.19.
diff --git a/doc/posix-functions/mbrtowc.texi b/doc/posix-functions/mbrtowc.texi
index 7c7f5fd..ad5c671 100644
--- a/doc/posix-functions/mbrtowc.texi
+++ b/doc/posix-functions/mbrtowc.texi
@@ -12,6 +12,10 @@ Portability problems fixed by Gnulib:
 This function is missing on some platforms:
 Minix 3.1.8, HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5.
 @item
+In the C or POSIX locales, this function can return @code{(size_t) -1}
+and set @code{errno} to @code{EILSEQ}:
+glibc 2.23.
address@hidden
 This function returns 0 instead of @code{(size_t) -2} when the input
 is empty:
 glibc 2.19.
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index 864e006..cdd874b 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -20,6 +20,11 @@
 /* Specification.  */
 #include <wchar.h>
 
+#if C_LOCALE_MAYBE_EILSEQ
+# include "hard-locale.h"
+# include <locale.h>
+#endif
+
 #if GNULIB_defined_mbstate_t
 /* Implement mbrtowc() on top of mbtowc().  */
 
@@ -328,6 +333,9 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t 
*ps)
 size_t
 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
 {
+  size_t ret;
+  wchar_t wc;
+
 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
   if (s == NULL)
     {
@@ -342,6 +350,9 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, 
mbstate_t *ps)
     return (size_t) -2;
 # endif
 
+  if (! pwc)
+    pwc = &wc;
+
 # if MBRTOWC_RETVAL_BUG
   {
     static mbstate_t internal_state;
@@ -357,8 +368,7 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, 
mbstate_t *ps)
         size_t count = 0;
         for (; n > 0; s++, n--)
           {
-            wchar_t wc;
-            size_t ret = mbrtowc (&wc, s, 1, ps);
+            ret = mbrtowc (&wc, s, 1, ps);
 
             if (ret == (size_t)(-1))
               return (size_t)(-1);
@@ -366,8 +376,7 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, 
mbstate_t *ps)
             if (ret != (size_t)(-2))
               {
                 /* The multibyte character has been completed.  */
-                if (pwc != NULL)
-                  *pwc = wc;
+                *pwc = wc;
                 return (wc == 0 ? 0 : count);
               }
           }
@@ -376,32 +385,23 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, 
mbstate_t *ps)
   }
 # endif
 
-# if MBRTOWC_NUL_RETVAL_BUG
-  {
-    wchar_t wc;
-    size_t ret = mbrtowc (&wc, s, n, ps);
+  ret = mbrtowc (pwc, s, n, ps);
 
-    if (ret != (size_t)(-1) && ret != (size_t)(-2))
-      {
-        if (pwc != NULL)
-          *pwc = wc;
-        if (wc == 0)
-          ret = 0;
-      }
-    return ret;
-  }
-# else
-  {
-#   if MBRTOWC_NULL_ARG1_BUG
-    wchar_t dummy;
-
-    if (pwc == NULL)
-      pwc = &dummy;
-#   endif
+# if MBRTOWC_NUL_RETVAL_BUG
+  if (ret < (size_t) -2 && !*pwc)
+    return 0;
+# endif
 
-    return mbrtowc (pwc, s, n, ps);
-  }
+# if C_LOCALE_MAYBE_EILSEQ
+  if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
+    {
+      unsigned char uc = *s;
+      *pwc = uc;
+      return 1;
+    }
 # endif
+
+  return ret;
 }
 
 #endif
diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4
index e8c7eeb..d370fcc 100644
--- a/m4/mbrtowc.m4
+++ b/m4/mbrtowc.m4
@@ -1,4 +1,4 @@
-# mbrtowc.m4 serial 26  -*- coding: utf-8 -*-
+# mbrtowc.m4 serial 27  -*- coding: utf-8 -*-
 dnl Copyright (C) 2001-2002, 2004-2005, 2008-2016 Free Software Foundation,
 dnl Inc.
 dnl This file is free software; the Free Software Foundation
@@ -40,6 +40,7 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
       gl_MBRTOWC_RETVAL
       gl_MBRTOWC_NUL_RETVAL
       gl_MBRTOWC_EMPTY_INPUT
+      gl_MBRTOWC_C_LOCALE
       case "$gl_cv_func_mbrtowc_null_arg1" in
         *yes) ;;
         *) AC_DEFINE([MBRTOWC_NULL_ARG1_BUG], [1],
@@ -76,6 +77,13 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
            REPLACE_MBRTOWC=1
            ;;
       esac
+      case $gl_cv_C_locale_sans_EILSEQ in
+        *yes) ;;
+        *) AC_DEFINE([C_LOCALE_MAYBE_EILSEQ], [1],
+             [Define to 1 if the C locale may have encoding errors.])
+           REPLACE_MBRTOWC=1
+           ;;
+      esac
     fi
   fi
 ])
@@ -577,6 +585,46 @@ changequote([,])dnl
     ])
 ])
 
+dnl Test whether mbrtowc reports encoding errors in the C locale.
+dnl Although POSIX was never intended to allow this, the GNU C Library
+dnl and other implementations do it.  See:
+dnl https://sourceware.org/bugzilla/show_bug.cgi?id=19932
+
+AC_DEFUN([gl_MBRTOWC_C_LOCALE],
+[
+  AC_CACHE_CHECK([whether the C locale is free of encoding errors],
+    [gl_cv_C_locale_sans_EILSEQ],
+    [
+     dnl Initial guess, used when cross-compiling or when no suitable locale
+     dnl is present.
+     gl_cv_C_locale_sans_EILSEQ="guessing no"
+
+     AC_RUN_IFELSE(
+       [AC_LANG_PROGRAM(
+          [[#include <limits.h>
+            #include <locale.h>
+            #include <wchar.h>
+          ]], [[
+            int i;
+            char *locale = setlocale (LC_ALL, "C");
+            if (! locale)
+              return 1;
+            for (i = CHAR_MIN; i <= CHAR_MAX; i++)
+              {
+                char c = i;
+                wchar_t wc;
+                mbstate_t mbs = { 0, };
+                size_t ss = mbrtowc (&wc, &c, 1, &mbs);
+                if (1 < ss)
+                  return 1;
+              }
+            return 0;
+          ]])],
+      [gl_cv_C_locale_sans_EILSEQ=yes],
+      [gl_cv_C_locale_sans_EILSEQ=no],
+      [:])])
+])
+
 # Prerequisites of lib/mbrtowc.c.
 AC_DEFUN([gl_PREREQ_MBRTOWC], [
   :
diff --git a/modules/hard-locale b/modules/hard-locale
index 88dff8e..76c6edd 100644
--- a/modules/hard-locale
+++ b/modules/hard-locale
@@ -20,7 +20,7 @@ Include:
 "hard-locale.h"
 
 License:
-GPL
+LGPLv2+
 
 Maintainer:
 Paul Eggert
diff --git a/modules/mbrtowc b/modules/mbrtowc
index 4e90b67..bd951ae 100644
--- a/modules/mbrtowc
+++ b/modules/mbrtowc
@@ -13,6 +13,7 @@ m4/codeset.m4
 Depends-on:
 wchar
 extensions
+hard-locale     [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1]
 mbsinit         [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1]
 localcharset    [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1]
 streq           [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1]
diff --git a/modules/mbrtowc-tests b/modules/mbrtowc-tests
index bbd2213..fe948c3 100644
--- a/modules/mbrtowc-tests
+++ b/modules/mbrtowc-tests
@@ -3,6 +3,7 @@ tests/test-mbrtowc1.sh
 tests/test-mbrtowc2.sh
 tests/test-mbrtowc3.sh
 tests/test-mbrtowc4.sh
+tests/test-mbrtowc5.sh
 tests/test-mbrtowc.c
 tests/test-mbrtowc-w32-1.sh
 tests/test-mbrtowc-w32-2.sh
@@ -31,6 +32,7 @@ gt_LOCALE_ZH_CN
 Makefile.am:
 TESTS += \
   test-mbrtowc1.sh test-mbrtowc2.sh test-mbrtowc3.sh test-mbrtowc4.sh \
+  test-mbrtowc5.sh \
   test-mbrtowc-w32-1.sh test-mbrtowc-w32-2.sh test-mbrtowc-w32-3.sh \
   test-mbrtowc-w32-4.sh test-mbrtowc-w32-5.sh
 TESTS_ENVIRONMENT += \
@@ -39,4 +41,3 @@ TESTS_ENVIRONMENT += \
   LOCALE_JA='@LOCALE_JA@' \
   LOCALE_ZH_CN='@LOCALE_ZH_CN@'
 check_PROGRAMS += test-mbrtowc test-mbrtowc-w32
-
diff --git a/tests/test-mbrtowc.c b/tests/test-mbrtowc.c
index 831836e..f7fed6a 100644
--- a/tests/test-mbrtowc.c
+++ b/tests/test-mbrtowc.c
@@ -72,6 +72,10 @@ main (int argc, char *argv[])
     for (c = 0; c < 0x100; c++)
       switch (c)
         {
+        default:
+          if (! (c && 1 < argc && argv[1][0] == '5'))
+            break;
+          /* Fall through.  */
         case '\t': case '\v': case '\f':
         case ' ': case '!': case '"': case '#': case '%':
         case '&': case '\'': case '(': case ')': case '*':
@@ -93,7 +97,8 @@ main (int argc, char *argv[])
         case 'p': case 'q': case 'r': case 's': case 't':
         case 'u': case 'v': case 'w': case 'x': case 'y':
         case 'z': case '{': case '|': case '}': case '~':
-          /* c is in the ISO C "basic character set".  */
+          /* c is in the ISO C "basic character set", or argv[1] starts
+             with '5' so we are testing all nonnull bytes.  */
           buf[0] = c;
           wc = (wchar_t) 0xBADFACE;
           ret = mbrtowc (&wc, buf, 1, &state);
@@ -334,6 +339,10 @@ main (int argc, char *argv[])
           ASSERT (mbsinit (&state));
         }
         return 0;
+
+      case '5':
+        /* C locale; tested above.  */
+        return 0;
       }
 
   return 1;
diff --git a/tests/test-mbrtowc5.sh b/tests/test-mbrtowc5.sh
new file mode 100755
index 0000000..c10b228
--- /dev/null
+++ b/tests/test-mbrtowc5.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Test whether the POSIX locale has encoding errors.
+LC_ALL=C \
+./test-mbrtowc${EXEEXT} 5 || exit
+LC_ALL=POSIX \
+./test-mbrtowc${EXEEXT} 5
-- 
2.5.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]