bug-bash
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: RFE: Please allow unicode ID chars in identifiers


From: dualbus
Subject: Re: RFE: Please allow unicode ID chars in identifiers
Date: Sun, 4 Jun 2017 02:45:38 -0500
User-agent: NeoMutt/20170113 (1.7.2)

On Sun, Jun 04, 2017 at 01:46:23AM +0700, PePa wrote:
[...]
> But the fact that unicode functions are already supported does seem to
> pave the way for allowing variable names in unicode. For consistency, it
[...]

I know I said I wasn't going to reply, but this changed my mind :-)

I hadn't realized that bash already supports Unicode in function names!

FWIW:

  bash-4.4$ 
  Lēv=?
  Φ=0.618033988749894848
  ɸ=1.61803398874989485
  π=3.14159265358979324
  declare -p Lēv Φ ɸ π
  declare -- Lēv="?"
  declare -- Φ="0.618033988749894848"
  declare -- ɸ="1.61803398874989485"
  declare -- π="3.14159265358979324"

With this terrible patch:

dualbus@debian:~/src/gnu/bash$ PAGER= git diff
diff --git a/general.c b/general.c
index 584e7859..40db7b1d 100644
--- a/general.c
+++ b/general.c
@@ -61,6 +61,9 @@ extern int errno;
 #  include <sys/cygwin.h>
 #endif
 
+#define wlegal_variable_starter(c) (iswalpha(c) || (L'_' == c))
+#define wlegal_variable_char(c) (iswalnum(c) || (L'_' == c))
+
 static char *bash_special_tilde_expansions __P((char *));
 static int unquoted_tilde_word __P((const char *));
 static void initialize_group_array __P((void));
@@ -214,15 +217,25 @@ int
 legal_identifier (name)
      const char *name;
 {
-  register const char *s;
-  unsigned char c;
+  wchar_t *s, *wstring;
+  wchar_t c;
+  size_t n;
+
+  if (!name || *name == '\0')
+    return (0);
+
+  n = mbstowcs(NULL, name, 0);
+  if((size_t) -1 == n) return 0;
+  wstring = xmalloc(sizeof(wchar_t) * (n+1));
+  n = mbstowcs(wstring, name, n);
+  if((size_t) -1 == n) return 0;
 
-  if (!name || !(c = *name) || (legal_variable_starter (c) == 0))
+  if (wlegal_variable_starter (*wstring) == 0)
     return (0);
 
-  for (s = name + 1; (c = *s) != 0; s++)
+  for (s = wstring + 1; (c = *s) != 0; s++)
     {
-      if (legal_variable_char (c) == 0)
+      if (wlegal_variable_char (c) == 0)
        return (0);
     }
   return (1);
@@ -357,27 +370,31 @@ assignment (string, flags)
      const char *string;
      int flags;
 {
-  register unsigned char c;
+  wchar_t c;
   register int newi, indx;
+  wchar_t *wstring;
+  int n;
+  size_t len;
 
-  c = string[indx = 0];
-
+  len = strlen(string);
+  if ((n=mbtowc(&c, &string[indx = 0], len)) < 1) return (0);
+  indx += n; len -= n;
 #if defined (ARRAY_VARS)
-  if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != '[')) /* ] 
*/
+  if ((wlegal_variable_starter (c) == 0) && ((flags&1) == 0 || c != L'[')) /* 
] */
 #else
-  if (legal_variable_starter (c) == 0)
+  if (wlegal_variable_starter (c) == 0)
 #endif
     return (0);
 
-  while (c = string[indx])
+  while ((n=mbtowc(&c, &string[indx], len)) > 0)
     {
       /* The following is safe.  Note that '=' at the start of a word
         is not an assignment statement. */
-      if (c == '=')
+      if (c == L'=')
        return (indx);
 
 #if defined (ARRAY_VARS)
-      if (c == '[')
+      if (c == L'[')
        {
          newi = skipsubscript (string, indx, (flags & 2) ? 1 : 0);
          if (string[newi++] != ']')
@@ -389,15 +406,15 @@ assignment (string, flags)
 #endif /* ARRAY_VARS */
 
       /* Check for `+=' */
-      if (c == '+' && string[indx+1] == '=')
+      if (c == L'+' && string[indx+1] == '=')
        return (indx + 1);
 
       /* Variable names in assignment statements may contain only letters,
         digits, and `_'. */
-      if (legal_variable_char (c) == 0)
+      if (wlegal_variable_char (c) == 0)
        return (0);
 
-      indx++;
+      indx += n; len -= n;
     }
   return (0);
 }


It seems to have issues with compound assignments though. 

-- 
Eduardo Bustamante
https://dualbus.me/



reply via email to

[Prev in Thread] Current Thread [Next in Thread]