diff --git a/lib/localcharset.c b/lib/localcharset.c index bfd9a03..fd677cf 100644 --- a/lib/localcharset.c +++ b/lib/localcharset.c @@ -113,19 +113,31 @@ #if __STDC__ != 1 # define volatile /* empty */ #endif -/* Pointer to the contents of the charset.alias file, if it has already been - read, else NULL. Its format is: - ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ -static const char * volatile charset_aliases; +/* Pointer to the ordered array of charset aliases read. + Format of a single alias is: ALIAS '\0' CANONICAL '\0' */ +static const char ** volatile charset_aliases; +/* Aliases count, -1 until charset aliases are read. */ +static int volatile charset_aliases_count = -1; +/* fallback charset name, "*" entry from aliases */ +static const char * volatile charset_fallback; + +static int +charset_alias_cmp(const void *a,const void *b) +{ + return strcmp (*(const char**)a, *(const char**)b); +} -/* Return a pointer to the contents of the charset.alias file. */ -static const char * +/* Reads the contents of the charset.alias file. */ +static void get_charset_aliases (void) { const char *cp; + const char **cps = 0; + int cnt = 0; + const char *fb = 0; + const char *aliases; - cp = charset_aliases; - if (cp == NULL) + if (charset_aliases_count < 0) { #if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) const char *dir; @@ -343,10 +355,67 @@ get_charset_aliases (void) # endif #endif - charset_aliases = cp; + /* build ordered array of aliases and also find fallback */ + cnt = 0; + for (aliases = cp; *aliases != '\0'; + aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) + { + if (aliases[0] == '*' && aliases[1] == '\0') + fb = aliases + strlen (aliases) + 1; + else + cnt ++; + } + + if (cnt > 0) + { + cps = (const char **) malloc (cnt * sizeof(*cps)); + if (cps == NULL) + { + /* out of memory */ + charset_aliases_count = 0; + return; + } + + cnt = 0; + for (aliases = cp; *aliases != '\0'; + aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) + { + if (!(aliases[0] == '*' && aliases[1] == '\0')) + cps[cnt++] = aliases; + } + + qsort (cps, cnt, sizeof(*cps), charset_alias_cmp); + } + + charset_aliases = cps; + charset_fallback = fb; + charset_aliases_count = cnt; } +} + +/* Replace codeset name using the alias table. + The result must not be freed; it is statically allocated. +*/ + +static const char * +resolve_alias (const char *codeset) +{ + const char **alias_ptr = 0; + + get_charset_aliases (); + + if (charset_aliases_count > 0) + alias_ptr = bsearch (&codeset, charset_aliases, + charset_aliases_count, sizeof(codeset), + charset_alias_cmp); + + if (alias_ptr != NULL) + return *alias_ptr + strlen (*alias_ptr) + 1; + + if (charset_fallback != NULL) + return charset_fallback; - return cp; + return codeset; } /* Determine the current locale's character encoding, and canonicalize it @@ -362,7 +431,6 @@ const char * locale_charset (void) { const char *codeset; - const char *aliases; #if !(defined WINDOWS_NATIVE || defined OS2) @@ -530,16 +598,7 @@ locale_charset (void) /* The canonical name cannot be determined. */ codeset = ""; - /* Resolve alias. */ - for (aliases = get_charset_aliases (); - *aliases != '\0'; - aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) - if (strcmp (codeset, aliases) == 0 - || (aliases[0] == '*' && aliases[1] == '\0')) - { - codeset = aliases + strlen (aliases) + 1; - break; - } + codeset = resolve_alias (codeset); /* Don't return an empty string. GNU libc and GNU libiconv interpret the empty string as denoting "the locale's character encoding",