bug-bash
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Fix u32toutf8 so it encodes values > 0xFFFF correctly.


From: John Kearney
Subject: Fix u32toutf8 so it encodes values > 0xFFFF correctly.
Date: Sat, 18 Feb 2012 11:39:43 +0100
User-agent: Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20120129 Thunderbird/10.0

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Configuration Information [Automatically generated, do not change]:
Machine: x86_64
OS: linux-gnu
Compiler: gcc
Compilation CFLAGS:  -DPROGRAM='bash' -DCONF_HOSTTYPE='x86_64'
- -DCONF_OSTYPE='linux-gnu' -DCONF_MACHTYPE='x86_64-pc-linux-gnu'
- -DCONF_VENDOR='pc' -DLOCALEDIR='/usr/share/locale' -DPACKAGE='bash'
- -DSHELL -DHAVE_CONFIG_H   -I.  -I../bash -I../bash/include
- -I../bash/lib   -g -O2 -Wall
uname output: Linux DETH00 3.0.0-15-generic #26-Ubuntu SMP Fri Jan 20
17:23:00 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
Machine Type: x86_64-pc-linux-gnu

Bash Version: 4.2
Patch Level: 10
Release Status: release

Description:
        Current u32toutf8 only encode values below 0xffff correctly.
        wchar_t can be ambiguous size better in my opinion to use
unsigned long, or uint32_t, or something clearer.
Repeat-By:
  -------'

Fix:
diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c
index d34fa08..3f7d378 100644
- --- a/lib/sh/unicode.c
+++ b/lib/sh/unicode.c
@@ -54,7 +54,7 @@ extern const char *locale_charset __P((void));
 extern char *get_locale_var __P((char *));
 #endif

- -static int u32init = 0;
+static int u32init = 0;
 static int utf8locale = 0;
 #if defined (HAVE_ICONV)
 static iconv_t localconv;
@@ -115,26 +115,61 @@ u32tochar (wc, s)
 }

 int
- -u32toutf8 (wc, s)
- -     wchar_t wc;
+u32toutf8 (c, s)
+     unsigned long c;
      char *s;
 {
   int l;

- -  l = (wc < 0x0080) ? 1 : ((wc < 0x0800) ? 2 : 3);
- -
- -  if (wc < 0x0080)
- -    s[0] = (unsigned char)wc;
- -  else if (wc < 0x0800)
+  if (c <= 0x7F)
+    {
+      s[0] = (char)c;
+      l = 1;
+    }
+  else if (c <= 0x7FF)
+    {
+      s[0] = (c >>   6)                | 0xc0; /* 110x xxxx */
+      s[1] = (c                & 0x3f) | 0x80; /* 10xx xxxx */
+      l = 2;
+    }
+  else if (c <= 0xFFFF)
+    {
+      s[0] =  (c >> 12)         | 0xe0; /* 1110 xxxx */
+      s[1] = ((c >>  6) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[2] =  (c        & 0x3f) | 0x80; /* 10xx xxxx */
+      l = 3;
+    }
+  else if (c <= 0x1FFFFF)
     {
- -      s[0] = (wc >> 6) | 0xc0;
- -      s[1] = (wc & 0x3f) | 0x80;
+      s[0] =  (c >> 18)         | 0xf0; /* 1111 0xxx */
+      s[1] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[2] = ((c >>  6) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[3] = ( c        & 0x3f) | 0x80; /* 10xx xxxx */
+      l = 4;
+    }
+  else if (c <= 0x3FFFFFF)
+    {
+      s[0] =  (c >> 24)         | 0xf8; /* 1111 10xx */
+      s[1] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[2] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[3] = ((c >>  6) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[4] = ( c        & 0x3f) | 0x80; /* 10xx xxxx */
+      l = 5;
+    }
+  else if (c <= 0x7FFFFFFF)
+    {
+      s[0] =  (c >> 30)         | 0xfc; /* 1111 110x */
+      s[1] = ((c >> 24) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[2] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[3] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[4] = ((c >>  6) & 0x3f) | 0x80; /* 10xx xxxx */
+      s[5] = ( c        & 0x3f) | 0x80; /* 10xx xxxx */
+      l = 6;
     }
   else
     {
- -      s[0] = (wc >> 12) | 0xe0;
- -      s[1] = ((wc >> 6) & 0x3f) | 0x80;
- -      s[2] = (wc & 0x3f) | 0x80;
+      /* Error Invalid UTF-8 */
+      l = 0;
     }
   s[l] = '\0';
   return l;
@@ -150,7 +185,7 @@ u32cconv (c, s)
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJPP3/tAAoJEKUDtR0WmS059CcH/iIyBOGhf0IgSmnIFyw0YLpA
3ZWSaXWoEZodrDr1fX67hj2424icXm9fTZw70G+rS1YjtCfm86O/Qou4VNROylAv
TbjPUWkHRWVci7IqcDGb1tNWRrulxUvNFA/Uc1xBtKckAO6HHHRTYFa+sCkd5Fnx
dm7e0iMTqMMmL/dUwB+di+hSkGD+ZXS1vY76wizdwG7CteUxAVunse+ffP7TRYbn
K86Whc7p7llG12hruCPGArc9iS7YiBaC/XNIKXmN7fn93dhQTcdzzk/UTGmaZgDk
cQk4R7/NBljP4LtQtKwX4JYAi5XJM5TeSLykL97UFxW/5OGM+SmSVJbKLlHU/mQ=
=EJUb
-----END PGP SIGNATURE-----



reply via email to

[Prev in Thread] Current Thread [Next in Thread]