bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

u16-conv-to-enc fix


From: Bruno Haible
Subject: u16-conv-to-enc fix
Date: Sun, 20 Apr 2008 20:02:30 +0200
User-agent: KMail/1.5.4

This fixes a testsuite failure of u16-conv-to-enc on platforms with a
working non-GNU iconv implementation, such as NetBSD.

2008-04-20  Bruno Haible  <address@hidden>

        Fix test failure on platforms with non-GNU iconv.
        * lib/uniconv/u16-conv-to-enc.c (u16_to_u8_lenient): New function.
        (U_TO_U8): Use it, rather than u16_to_u8.
        * lib/uniconv/u-conv-to-enc.h (FUNC): Allow an incomplete sequence of
        units at the end of the input string.
        * modules/uniconv/u16-conv-to-enc (Depends-on): Update.

*** lib/uniconv/u-conv-to-enc.h.orig    2008-04-20 19:58:07.000000000 +0200
--- lib/uniconv/u-conv-to-enc.h 2008-04-20 19:52:59.000000000 +0200
***************
*** 106,112 ****
  
        iunit = 0;
        i8 = 0;
!       while (iunit < srclen)
        {
          int countunit;
          int count8;
--- 106,112 ----
  
        iunit = 0;
        i8 = 0;
!       while (iunit < srclen && i8 < utf8_srclen)
        {
          int countunit;
          int count8;
***************
*** 120,125 ****
--- 120,136 ----
          iunit += countunit;
          i8 += count8;
        }
+       /* Check that utf8_src has been traversed entirely.  */
+       if (i8 < utf8_srclen)
+       abort ();
+       /* Check that src has been traversed entirely, except possibly for an
+        incomplete sequence of units at the end.  */
+       if (iunit < srclen)
+       {
+         offsets[iunit] = *lengthp;
+         if (!(U_MBLEN (src + iunit, srclen - iunit) < 0))
+           abort ();
+       }
        free (scaled_offsets);
      }
    if (utf8_src != tmpbuf)
*** lib/uniconv/u16-conv-to-enc.c.orig  2008-04-20 19:58:07.000000000 +0200
--- lib/uniconv/u16-conv-to-enc.c       2008-04-20 19:53:09.000000000 +0200
***************
*** 1,5 ****
  /* Conversion from UTF-16 to legacy encodings.
!    Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify it
     under the terms of the GNU Lesser General Public License as published
--- 1,5 ----
  /* Conversion from UTF-16 to legacy encodings.
!    Copyright (C) 2002, 2006-2008 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify it
     under the terms of the GNU Lesser General Public License as published
***************
*** 39,47 ****
  # endif
  #endif
  
  #define FUNC u16_conv_to_encoding
  #define UNIT uint16_t
! #define U_TO_U8 u16_to_u8
  #define U_MBLEN u16_mblen
  #if defined UTF16_NAME
  # define UTF_NAME UTF16_NAME
--- 39,174 ----
  # endif
  #endif
  
+ 
+ #if !defined UTF16_NAME
+ 
+ /* A variant of u16_to_u8 that treats an incomplete sequence of units at the
+    end as a harmless no-op, rather than reporting it as an EILSEQ error.  */
+ 
+ #define FUNC u16_to_u8_lenient
+ #define SRC_UNIT uint16_t
+ #define DST_UNIT uint8_t
+ 
+ static DST_UNIT *
+ FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+ {
+   const SRC_UNIT *s_end = s + n;
+   /* Output string accumulator.  */
+   DST_UNIT *result;
+   size_t allocated;
+   size_t length;
+ 
+   if (resultbuf != NULL)
+     {
+       result = resultbuf;
+       allocated = *lengthp;
+     }
+   else
+     {
+       result = NULL;
+       allocated = 0;
+     }
+   length = 0;
+   /* Invariants:
+      result is either == resultbuf or == NULL or malloc-allocated.
+      If length > 0, then result != NULL.  */
+ 
+   while (s < s_end)
+     {
+       ucs4_t uc;
+       int count;
+ 
+       /* Fetch a Unicode character from the input string.  */
+       count = u16_mbtoucr (&uc, s, s_end - s);
+       if (count < 0)
+       {
+         if (count == -2)
+           /* Incomplete sequence of units.  */
+           break;
+         if (!(result == resultbuf || result == NULL))
+           free (result);
+         errno = EILSEQ;
+         return NULL;
+       }
+       s += count;
+ 
+       /* Store it in the output string.  */
+       count = u8_uctomb (result + length, uc, allocated - length);
+       if (count == -1)
+       {
+         if (!(result == resultbuf || result == NULL))
+           free (result);
+         errno = EILSEQ;
+         return NULL;
+       }
+       if (count == -2)
+       {
+         DST_UNIT *memory;
+ 
+         allocated = (allocated > 0 ? 2 * allocated : 12);
+         if (length + 6 > allocated)
+           allocated = length + 6;
+         if (result == resultbuf || result == NULL)
+           memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+         else
+           memory =
+             (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+ 
+         if (memory == NULL)
+           {
+             if (!(result == resultbuf || result == NULL))
+               free (result);
+             errno = ENOMEM;
+             return NULL;
+           }
+         if (result == resultbuf && length > 0)
+           memcpy ((char *) memory, (char *) result,
+                   length * sizeof (DST_UNIT));
+         result = memory;
+         count = u8_uctomb (result + length, uc, allocated - length);
+         if (count < 0)
+           abort ();
+       }
+       length += count;
+     }
+ 
+   if (length == 0)
+     {
+       if (result == NULL)
+       {
+         /* Return a non-NULL value.  NULL means error.  */
+         result = (DST_UNIT *) malloc (1);
+         if (result == NULL)
+           {
+             errno = ENOMEM;
+             return NULL;
+           }
+       }
+     }
+   else if (result != resultbuf && length < allocated)
+     {
+       /* Shrink the allocated memory if possible.  */
+       DST_UNIT *memory;
+ 
+       memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+       if (memory != NULL)
+       result = memory;
+     }
+ 
+   *lengthp = length;
+   return result;
+ }
+ 
+ #undef DST_UNIT
+ #undef SRC_UNIT
+ #undef FUNC
+ 
+ #endif
+ 
+ 
  #define FUNC u16_conv_to_encoding
  #define UNIT uint16_t
! #define U_TO_U8 u16_to_u8_lenient
  #define U_MBLEN u16_mblen
  #if defined UTF16_NAME
  # define UTF_NAME UTF16_NAME
*** modules/uniconv/u16-conv-to-enc.orig        2008-04-20 19:58:07.000000000 
+0200
--- modules/uniconv/u16-conv-to-enc     2008-04-20 19:17:20.000000000 +0200
***************
*** 9,15 ****
  uniconv/base
  striconveha
  uniconv/u8-conv-to-enc
! unistr/u16-to-u8
  
  configure.ac:
  AC_REQUIRE([AC_C_BIGENDIAN])
--- 9,18 ----
  uniconv/base
  striconveha
  uniconv/u8-conv-to-enc
! unistr/u16-mbtoucr
! unistr/u8-uctomb
! unistr/u16-mblen 
! unistr/u8-mblen 
  
  configure.ac:
  AC_REQUIRE([AC_C_BIGENDIAN])





reply via email to

[Prev in Thread] Current Thread [Next in Thread]