[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: RFE: Please allow unicode ID chars in identifiers
From: |
dualbus |
Subject: |
Re: RFE: Please allow unicode ID chars in identifiers |
Date: |
Sun, 4 Jun 2017 02:45:38 -0500 |
User-agent: |
NeoMutt/20170113 (1.7.2) |
On Sun, Jun 04, 2017 at 01:46:23AM +0700, PePa wrote:
[...]
> But the fact that unicode functions are already supported does seem to
> pave the way for allowing variable names in unicode. For consistency, it
[...]
I know I said I wasn't going to reply, but this changed my mind :-)
I hadn't realized that bash already supports Unicode in function names!
FWIW:
bash-4.4$
Lēv=?
Φ=0.618033988749894848
ɸ=1.61803398874989485
π=3.14159265358979324
declare -p Lēv Φ ɸ π
declare -- Lēv="?"
declare -- Φ="0.618033988749894848"
declare -- ɸ="1.61803398874989485"
declare -- π="3.14159265358979324"
With this terrible patch:
dualbus@debian:~/src/gnu/bash$ PAGER= git diff
diff --git a/general.c b/general.c
index 584e7859..40db7b1d 100644
--- a/general.c
+++ b/general.c
@@ -61,6 +61,9 @@ extern int errno;
# include <sys/cygwin.h>
#endif
+#define wlegal_variable_starter(c) (iswalpha(c) || (L'_' == c))
+#define wlegal_variable_char(c) (iswalnum(c) || (L'_' == c))
+
static char *bash_special_tilde_expansions __P((char *));
static int unquoted_tilde_word __P((const char *));
static void initialize_group_array __P((void));
@@ -214,15 +217,25 @@ int
legal_identifier (name)
const char *name;
{
- register const char *s;
- unsigned char c;
+ wchar_t *s, *wstring;
+ wchar_t c;
+ size_t n;
+
+ if (!name || *name == '\0')
+ return (0);
+
+ n = mbstowcs(NULL, name, 0);
+ if((size_t) -1 == n) return 0;
+ wstring = xmalloc(sizeof(wchar_t) * (n+1));
+ n = mbstowcs(wstring, name, n);
+ if((size_t) -1 == n) return 0;
- if (!name || !(c = *name) || (legal_variable_starter (c) == 0))
+ if (wlegal_variable_starter (*wstring) == 0)
return (0);
- for (s = name + 1; (c = *s) != 0; s++)
+ for (s = wstring + 1; (c = *s) != 0; s++)
{
- if (legal_variable_char (c) == 0)
+ if (wlegal_variable_char (c) == 0)
return (0);
}
return (1);
@@ -357,27 +370,31 @@ assignment (string, flags)
const char *string;
int flags;
{
- register unsigned char c;
+ wchar_t c;
register int newi, indx;
+ wchar_t *wstring;
+ int n;
+ size_t len;
- c = string[indx = 0];
-
+ len = strlen(string);
+ if ((n=mbtowc(&c, &string[indx = 0], len)) < 1) return (0);
+ indx += n; len -= n;
#if defined (ARRAY_VARS)
- if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != '[')) /* ]
*/
+ if ((wlegal_variable_starter (c) == 0) && ((flags&1) == 0 || c != L'[')) /*
] */
#else
- if (legal_variable_starter (c) == 0)
+ if (wlegal_variable_starter (c) == 0)
#endif
return (0);
- while (c = string[indx])
+ while ((n=mbtowc(&c, &string[indx], len)) > 0)
{
/* The following is safe. Note that '=' at the start of a word
is not an assignment statement. */
- if (c == '=')
+ if (c == L'=')
return (indx);
#if defined (ARRAY_VARS)
- if (c == '[')
+ if (c == L'[')
{
newi = skipsubscript (string, indx, (flags & 2) ? 1 : 0);
if (string[newi++] != ']')
@@ -389,15 +406,15 @@ assignment (string, flags)
#endif /* ARRAY_VARS */
/* Check for `+=' */
- if (c == '+' && string[indx+1] == '=')
+ if (c == L'+' && string[indx+1] == '=')
return (indx + 1);
/* Variable names in assignment statements may contain only letters,
digits, and `_'. */
- if (legal_variable_char (c) == 0)
+ if (wlegal_variable_char (c) == 0)
return (0);
- indx++;
+ indx += n; len -= n;
}
return (0);
}
It seems to have issues with compound assignments though.
--
Eduardo Bustamante
https://dualbus.me/
- RFE: Please allow unicode ID chars in identifiers, L A Walsh, 2017/06/01
- Re: RFE: Please allow unicode ID chars in identifiers, Eduardo Bustamante, 2017/06/01
- Re: RFE: Please allow unicode ID chars in identifiers, L A Walsh, 2017/06/02
- Re: RFE: Please allow unicode ID chars in identifiers, dualbus, 2017/06/02
- Re: RFE: Please allow unicode ID chars in identifiers, L A Walsh, 2017/06/02
- Re: RFE: Please allow unicode ID chars in identifiers, dualbus, 2017/06/02
- Re: RFE: Please allow unicode ID chars in identifiers, L A Walsh, 2017/06/03
- Re: RFE: Please allow unicode ID chars in identifiers, George, 2017/06/03
- Re: RFE: Please allow unicode ID chars in identifiers, PePa, 2017/06/03
- Re: RFE: Please allow unicode ID chars in identifiers,
dualbus <=
- Re: Patch for unicode in varnames..., L A Walsh, 2017/06/04
- Re: Patch for unicode in varnames..., George, 2017/06/05
- Re: Patch for unicode in varnames..., Peter & Kelly Passchier, 2017/06/05
- Re: Patch for unicode in varnames..., George, 2017/06/05
- Re: Patch for unicode in varnames..., L A Walsh, 2017/06/05
- Re: Patch for unicode in varnames..., George, 2017/06/06
- Re: Patch for unicode in varnames..., Peter & Kelly Passchier, 2017/06/06
- Re: Patch for unicode in varnames..., Peter & Kelly Passchier, 2017/06/06
- Re: Patch for unicode in varnames..., Chet Ramey, 2017/06/13
- Re: Patch for unicode in varnames..., L A Walsh, 2017/06/06