bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: vasnwprintf: Port to older platforms without swprintf


From: Bruno Haible
Subject: Re: vasnwprintf: Port to older platforms without swprintf
Date: Tue, 21 Mar 2023 17:52:43 +0100

Yesterday I did:
>       (VASNPRINTF): In this case, implement %ls and %lc directly. Adjust a
>       couple of #if conditions. For the conversion from TCHAR_T[] to
>       DCHAR_T[], use mbsrtowcs.

Oops, this was buggy:
  - The %lc implementation needs to ignore the precision.
  - The conversion from TCHAR_T[] to DCHAR_T[] must not stop at NUL bytes.
    It needs to convert the entire output of snprintf.
The new unit tests for the %c and %lc directives trigger these bugs.

Fixed as follows.


2023-03-21  Bruno Haible  <bruno@clisp.org>

        vasnwprintf: Fix for older platforms without swprintf.
        * lib/vasnprintf.c (VASNPRINTF): In the %lc handling, ignore the
        precision. Convert the snprintf result to a wchar_t[] not by mbsrtowcs,
        but by a loop that does not stop at NUL characters.
        * tests/test-vasnwprintf-posix.c (test_function): Add more tests for the
        %c and %lc directives.
        * modules/vasnwprintf (Depends-on): Add mbrtowc. Remove mbsrtowcs.

diff --git a/lib/vasnprintf.c b/lib/vasnprintf.c
index 0b349f4b55..bd13002e98 100644
--- a/lib/vasnprintf.c
+++ b/lib/vasnprintf.c
@@ -2432,8 +2432,6 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                    Instead, just copy the argument wchar_t[] to the result.  */
                 int flags = dp->flags;
                 size_t width;
-                int has_precision;
-                size_t precision;
 
                 width = 0;
                 if (dp->width_start != dp->width_end)
@@ -2464,59 +2462,62 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                       }
                   }
 
-                has_precision = 0;
-                precision = 6;
-                if (dp->precision_start != dp->precision_end)
-                  {
-                    if (dp->precision_arg_index != ARG_NONE)
-                      {
-                        int arg;
-
-                        if (!(a.arg[dp->precision_arg_index].type == TYPE_INT))
-                          abort ();
-                        arg = a.arg[dp->precision_arg_index].a.a_int;
-                        /* "A negative precision is taken as if the precision
-                            were omitted."  */
-                        if (arg >= 0)
-                          {
-                            precision = arg;
-                            has_precision = 1;
-                          }
-                      }
-                    else
-                      {
-                        const FCHAR_T *digitp = dp->precision_start + 1;
-
-                        precision = 0;
-                        while (digitp != dp->precision_end)
-                          precision = xsum (xtimes (precision, 10), *digitp++ 
- '0');
-                        has_precision = 1;
-                      }
-                  }
-
                 {
-                  const wchar_t *arg;
+                  const wchar_t *ls_arg;
                   wchar_t lc_arg[1];
                   size_t characters;
 
                   if (dp->conversion == 's')
                     {
-                      arg = a.arg[dp->arg_index].a.a_wide_string;
+                      int has_precision;
+                      size_t precision;
+
+                      has_precision = 0;
+                      precision = 6;
+                      if (dp->precision_start != dp->precision_end)
+                        {
+                          if (dp->precision_arg_index != ARG_NONE)
+                            {
+                              int arg;
+
+                              if (!(a.arg[dp->precision_arg_index].type == 
TYPE_INT))
+                                abort ();
+                              arg = a.arg[dp->precision_arg_index].a.a_int;
+                              /* "A negative precision is taken as if the 
precision
+                                  were omitted."  */
+                              if (arg >= 0)
+                                {
+                                  precision = arg;
+                                  has_precision = 1;
+                                }
+                            }
+                          else
+                            {
+                              const FCHAR_T *digitp = dp->precision_start + 1;
+
+                              precision = 0;
+                              while (digitp != dp->precision_end)
+                                precision = xsum (xtimes (precision, 10), 
*digitp++ - '0');
+                              has_precision = 1;
+                            }
+                        }
+
+                      ls_arg = a.arg[dp->arg_index].a.a_wide_string;
 
                       if (has_precision)
                         {
                           /* Use only at most PRECISION wide characters, from
                              the left.  */
-                          const wchar_t *arg_end;
+                          const wchar_t *ls_arg_end;
 
-                          arg_end = arg;
+                          ls_arg_end = ls_arg;
                           characters = 0;
                           for (; precision > 0; precision--)
                             {
-                              if (*arg_end == 0)
+                              if (*ls_arg_end == 0)
                                 /* Found the terminating null wide character.  
*/
                                 break;
-                              arg_end++;
+                              ls_arg_end++;
                               characters++;
                             }
                         }
@@ -2524,17 +2525,14 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                         {
                           /* Use the entire string, and count the number of 
wide
                              characters.  */
-                          characters = local_wcslen (arg);
+                          characters = local_wcslen (ls_arg);
                         }
                     }
                   else /* dp->conversion == 'c' */
                     {
                       lc_arg[0] = (wchar_t) a.arg[dp->arg_index].a.a_wide_char;
-                      arg = lc_arg;
-                      if (has_precision && precision == 0)
-                        characters = 0;
-                      else
-                        characters = 1;
+                      ls_arg = lc_arg;
+                      characters = 1;
                     }
 
                   {
@@ -2550,7 +2548,7 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
 
                     if (characters > 0)
                       {
-                        DCHAR_CPY (result + length, arg, characters);
+                        DCHAR_CPY (result + length, ls_arg, characters);
                         length += characters;
                       }
 
@@ -5854,21 +5852,59 @@ VASNPRINTF (DCHAR_T *resultbuf, size_t *lengthp,
                         tmpsrc = tmp;
 # endif
 # if WIDE_CHAR_VERSION
-                        const TCHAR_T *tmpsrc2;
+                        /* Convert tmpsrc[0..count-1] to a freshly allocated
+                           wide character array.  */
                         mbstate_t state;
 
-                        tmpsrc2 = tmpsrc;
                         memset (&state, '\0', sizeof (mbstate_t));
-                        tmpdst_len = mbsrtowcs (NULL, &tmpsrc2, 0, &state);
-                        if (tmpdst_len == (size_t) -1)
-                          goto fail_with_errno;
+                        tmpdst_len = 0;
+                        {
+                          const TCHAR_T *src = tmpsrc;
+                          size_t srclen = count;
+
+                          for (; srclen > 0; tmpdst_len++)
+                            {
+                              /* Parse the next multibyte character.  */
+                              size_t ret = mbrtowc (NULL, src, srclen, &state);
+                              if (ret == (size_t)(-2) || ret == (size_t)(-1))
+                                goto fail_with_EILSEQ;
+                              if (ret == 0)
+                                ret = 1;
+                              src += ret;
+                              srclen -= ret;
+                            }
+                        }
+
                         tmpdst =
                           (wchar_t *) malloc ((tmpdst_len + 1) * sizeof 
(wchar_t));
                         if (tmpdst == NULL)
                           goto out_of_memory;
-                        tmpsrc2 = tmpsrc;
+
                         memset (&state, '\0', sizeof (mbstate_t));
-                        (void) mbsrtowcs (tmpdst, &tmpsrc2, tmpdst_len + 1, 
&state);
+                        {
+                          DCHAR_T *destptr = tmpdst;
+                          size_t len = tmpdst_len;
+                          const TCHAR_T *src = tmpsrc;
+                          size_t srclen = count;
+
+                          for (; srclen > 0; destptr++, len--)
+                            {
+                              /* Parse the next multibyte character.  */
+                              size_t ret = mbrtowc (destptr, src, srclen, 
&state);
+                              if (ret == (size_t)(-2) || ret == (size_t)(-1))
+                                /* Should already have been caught in the first
+                                   loop, above.  */
+                                abort ();
+                              if (ret == 0)
+                                ret = 1;
+                              src += ret;
+                              srclen -= ret;
+                            }
+                          /* By the way tmpdst_len was computed, len should now
+                             be 0.  */
+                          if (len != 0)
+                            abort ();
+                        }
 # else
                         tmpdst =
                           DCHAR_CONV_FROM_ENCODING (locale_charset (),
diff --git a/modules/vasnwprintf b/modules/vasnwprintf
index 109c39f1c5..91c4ca64ed 100644
--- a/modules/vasnwprintf
+++ b/modules/vasnwprintf
@@ -35,7 +35,7 @@ errno
 memchr
 assert-h
 wchar
-mbsrtowcs
+mbrtowc
 wmemcpy
 wmemset
 
diff --git a/tests/test-vasnwprintf-posix.c b/tests/test-vasnwprintf-posix.c
index c609a104ff..e53c6a33f3 100644
--- a/tests/test-vasnwprintf-posix.c
+++ b/tests/test-vasnwprintf-posix.c
@@ -3940,6 +3940,26 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, 
size_t *, const wchar_t *,
     free (result);
   }
 
+  { /* Precision is ignored.  */
+    size_t length;
+    wchar_t *result =
+      my_asnwprintf (NULL, &length,
+                     L"%.0c %d", (unsigned char) 'x', 33, 44, 55);
+    ASSERT (wcscmp (result, L"x 33") == 0);
+    ASSERT (length == wcslen (result));
+    free (result);
+  }
+
+  { /* NUL character.  */
+    size_t length;
+    wchar_t *result =
+      my_asnwprintf (NULL, &length,
+                     L"a%cz %d", '\0', 33, 44, 55);
+    ASSERT (wmemcmp (result, L"a\0z 33\0", 6 + 1) == 0);
+    ASSERT (length == 6);
+    free (result);
+  }
+
 #if HAVE_WCHAR_T
   static wint_t L_x = (wchar_t) 'x';
 
@@ -3982,6 +4002,24 @@ test_function (wchar_t * (*my_asnwprintf) (wchar_t *, 
size_t *, const wchar_t *,
     ASSERT (length == wcslen (result));
     free (result);
   }
+
+  { /* Precision is ignored.  */
+    size_t length;
+    wchar_t *result =
+      my_asnwprintf (NULL, &length, L"%.0lc %d", L_x, 33, 44, 55);
+    ASSERT (wcscmp (result, L"x 33") == 0);
+    ASSERT (length == wcslen (result));
+    free (result);
+  }
+
+  { /* NUL character.  */
+    size_t length;
+    wchar_t *result =
+      my_asnwprintf (NULL, &length, L"a%lcz %d", (wint_t) L'\0', 33, 44, 55);
+    ASSERT (wmemcmp (result, L"a\0z 33\0", 6 + 1) == 0);
+    ASSERT (length == 6);
+    free (result);
+  }
 #endif
 
   /* Test the support of the 'b' conversion specifier for binary output of






reply via email to

[Prev in Thread] Current Thread [Next in Thread]