Richard Frith-Macdonald wrote on 2003-10-18:
3) If GNUSTEP_STRING_ENCODING is not set, why is the default value
(set in Unicode.m:580) ISO-8859-1? On POSIX systems, all programs
are expected to interpret file names and file contents according
to
the encoding given by the current locale (nl_langinfo (CODESET)).
IMO this codeset should be taken and transformed into the GNUstep
specific equivalent name. I'm using a de_DE.UTF-8 locale and all
my local files are UTF-8 encoded.
... I'd be happy to accept a patch to make this change
as long as nobody knows good reason not to.
Here is the patch to that effect. Tested for me in an UTF-8 locale.
You need to regenerate config.h.in and configure after applying the
patch.
2003-12-07 Bruno Haible <bruno@clisp.org>
* Source/Additions/Unicode.m (GetDefEncoding): Use the result of
nl_langinfo(CODESET) before falling back on ISO-8859-1.
* config/codeset.m4: New file, taken from GNU gettext.
* configure.ac: Include it, and invoke AM_LANGINFO_CODESET.
*** gnustep-base-1.8.0/Source/Additions/Unicode.m.bak 2003-09-16
04:56:04.000000000 +0200
--- gnustep-base-1.8.0/Source/Additions/Unicode.m 2003-12-07
23:42:24.000000000 +0100
***************
*** 45,50 ****
--- 45,54 ----
#include <stdlib.h>
#include <string.h>
+ #if HAVE_LANGINFO_CODESET
+ #include <langinfo.h>
+ #endif
+
typedef struct {unichar from; unsigned char to;} _ucc_;
#include "GNUstepBase/unicode/cyrillic.h"
***************
*** 577,583 ****
if (defEnc == GSUndefinedEncoding)
{
/* Encoding not set */
! defEnc = NSISOLatin1StringEncoding;
}
else if (GSEncodingSupported(defEnc) == NO)
{
--- 581,683 ----
if (defEnc == GSUndefinedEncoding)
{
/* Encoding not set */
! #if HAVE_LANGINFO_CODESET
! /* Take it from the system locale information. */
! encoding = nl_langinfo(CODESET);
! if (strcmp(encoding, "ANSI_X3.4-1968") == 0 /* glibc */
! || strcmp(encoding, "ISO_646.IRV:1983") == 0 /* glibc */
! || strcmp(encoding, "646") == 0 /* Solaris NetBSD */)
! defEnc = NSASCIIStringEncoding;
! else if (strcmp(encoding, "EUC-JP") == 0 /* glibc */
! || strcmp(encoding, "eucJP") == 0 /* HP-UX IRIX OSF/1 Solaris
NetBSD */
! || strcmp(encoding, "IBM-eucJP") == 0 /* AIX */)
! defEnc = NSJapaneseEUCStringEncoding;
! else if (strcmp(encoding, "UTF-8") == 0 /* glibc AIX OSF/1
Solaris */
! || strcmp(encoding, "utf8") == 0 /* HP-UX */)
! defEnc = NSUTF8StringEncoding;
! else if (strcmp(encoding, "ISO-8859-1") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-1") == 0 /* AIX IRIX OSF/1 Solaris
NetBSD */
! || strcmp(encoding, "iso88591") == 0 /* HP-UX */)
! defEnc = NSISOLatin1StringEncoding;
! else if (strcmp(encoding, "IBM-932") == 0 /* AIX */
! || strcmp(encoding, "SJIS") == 0 /* HP-UX OSF/1 NetBSD */
! || strcmp(encoding, "PCK") == 0 /* Solaris */)
! defEnc = NSShiftJISStringEncoding;
! else if (strcmp(encoding, "ISO-8859-2") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-2") == 0 /* AIX IRIX OSF/1 Solaris
NetBSD */
! || strcmp(encoding, "iso88592") == 0 /* HP-UX */)
! defEnc = NSISOLatin2StringEncoding;
! else if (strcmp(encoding, "CP1251") == 0 /* glibc */
! || strcmp(encoding, "ansi-1251") == 0 /* Solaris */)
! defEnc = NSWindowsCP1251StringEncoding;
! else if (strcmp(encoding, "CP1252") == 0 /* */
! || strcmp(encoding, "IBM-1252") == 0 /* AIX */)
! defEnc = NSWindowsCP1252StringEncoding;
! else if (strcmp(encoding, "ISO-8859-5") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-5") == 0 /* AIX IRIX OSF/1 Solaris
NetBSD */
! || strcmp(encoding, "iso88595") == 0 /* HP-UX */)
! defEnc = NSISOCyrillicStringEncoding;
! else if (strcmp(encoding, "KOI8-R") == 0 /* glibc */
! || strcmp(encoding, "koi8-r") == 0 /* Solaris */)
! defEnc = NSKOI8RStringEncoding;
! else if (strcmp(encoding, "ISO-8859-3") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-3") == 0 /* Solaris */)
! defEnc = NSISOLatin3StringEncoding;
! else if (strcmp(encoding, "ISO-8859-4") == 0 /* */
! || strcmp(encoding, "ISO8859-4") == 0 /* OSF/1 Solaris NetBSD
*/)
! defEnc = NSISOLatin4StringEncoding;
! else if (strcmp(encoding, "ISO-8859-6") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-6") == 0 /* AIX Solaris */
! || strcmp(encoding, "iso88596") == 0 /* HP-UX */)
! defEnc = NSISOArabicStringEncoding;
! else if (strcmp(encoding, "ISO-8859-7") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-7") == 0 /* AIX IRIX OSF/1 Solaris
*/
! || strcmp(encoding, "iso88597") == 0 /* HP-UX */)
! defEnc = NSISOGreekStringEncoding;
! else if (strcmp(encoding, "ISO-8859-8") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-8") == 0 /* AIX OSF/1 Solaris */
! || strcmp(encoding, "iso88598") == 0 /* HP-UX */)
! defEnc = NSISOHebrewStringEncoding;
! else if (strcmp(encoding, "ISO-8859-9") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-9") == 0 /* AIX IRIX OSF/1 Solaris
*/
! || strcmp(encoding, "iso88599") == 0 /* HP-UX */)
! defEnc = NSISOLatin5StringEncoding;
! else if (strcmp(encoding, "ISO-8859-10") == 0 /* */
! || strcmp(encoding, "ISO8859-10") == 0 /* */)
! defEnc = NSISOLatin6StringEncoding;
! else if (strcmp(encoding, "TIS-620") == 0 /* glibc AIX */
! || strcmp(encoding, "tis620") == 0 /* HP-UX */
! || strcmp(encoding, "TIS620.2533") == 0 /* Solaris */
! || strcmp(encoding, "TACTIS") == 0 /* OSF/1 */)
! defEnc = NSISOThaiStringEncoding;
! else if (strcmp(encoding, "ISO-8859-13") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-13") == 0 /* */
! || strcmp(encoding, "IBM-921") == 0 /* AIX */)
! defEnc = NSISOLatin7StringEncoding;
! else if (strcmp(encoding, "ISO-8859-14") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-14") == 0 /* */)
! defEnc = NSISOLatin8StringEncoding;
! else if (strcmp(encoding, "ISO-8859-15") == 0 /* glibc */
! || strcmp(encoding, "ISO8859-15") == 0 /* AIX OSF/1 Solaris
NetBSD */
! || strcmp(encoding, "iso885915") == 0 /* HP-UX */)
! defEnc = NSISOLatin9StringEncoding;
! else if (strcmp(encoding, "GB2312") == 0 /* glibc */
! || strcmp(encoding, "gb2312") == 0 /* Solaris */
! || strcmp(encoding, "eucCN") == 0 /* IRIX NetBSD */
! || strcmp(encoding, "IBM-eucCN") == 0 /* AIX */
! || strcmp(encoding, "hp15CN") == 0 /* HP-UX */)
! defEnc = NSGB2312StringEncoding;
! else if (strcmp(encoding, "BIG5") == 0 /* glibc Solaris NetBSD */
! || strcmp(encoding, "big5") == 0 /* AIX HP-UX OSF/1 */)
! defEnc = NSBIG5StringEncoding;
! else if (strcmp(encoding, "EUC-KR") == 0 /* glibc */
! || strcmp(encoding, "eucKR") == 0 /* HP-UX IRIX OSF/1 NetBSD
*/
! || strcmp(encoding, "IBM-eucKR") == 0 /* AIX */
! || strcmp(encoding, "5601") == 0 /* Solaris */)
! defEnc = NSKoreanEUCEncoding;
! else
! #endif
! defEnc = NSISOLatin1StringEncoding;
}
else if (GSEncodingSupported(defEnc) == NO)
{
*** gnustep-base-1.8.0/config/codeset.m4.bak 2003-12-07
21:59:42.000000000 +0100
--- gnustep-base-1.8.0/config/codeset.m4 2002-01-13 12:51:27.000000000
+0100
***************
*** 0 ****
--- 1,23 ----
+ # codeset.m4 serial AM1 (gettext-0.10.40)
+ dnl Copyright (C) 2000-2002 Free Software Foundation, Inc.
+ dnl This file is free software, distributed under the terms of the
GNU
+ dnl General Public License. As a special exception to the GNU
General
+ dnl Public License, this file may be distributed as part of a program
+ dnl that contains a configuration script generated by Autoconf, under
+ dnl the same distribution terms as the rest of that program.
+
+ dnl From Bruno Haible.
+
+ AC_DEFUN([AM_LANGINFO_CODESET],
+ [
+ AC_CACHE_CHECK([for nl_langinfo and CODESET],
am_cv_langinfo_codeset,
+ [AC_TRY_LINK([#include <langinfo.h>],
+ [char* cs = nl_langinfo(CODESET);],
+ am_cv_langinfo_codeset=yes,
+ am_cv_langinfo_codeset=no)
+ ])
+ if test $am_cv_langinfo_codeset = yes; then
+ AC_DEFINE(HAVE_LANGINFO_CODESET, 1,
+ [Define if you have <langinfo.h> and nl_langinfo(CODESET).])
+ fi
+ ])
*** gnustep-base-1.8.0/configure.ac.bak 2003-09-23 01:06:51.000000000
+0200
--- gnustep-base-1.8.0/configure.ac 2003-12-07 22:05:50.000000000 +0100
***************
*** 26,31 ****
--- 26,32 ----
builtin(include, config/procfs-exe-link.m4)dnl
builtin(include, config/procfs.m4)dnl
builtin(include, config/pathxml.m4)dnl
+ builtin(include, config/codeset.m4)dnl
AC_INIT
AC_CONFIG_SRCDIR([Source/NSArray.m])
***************
*** 1007,1012 ****
--- 1008,1018 ----
AC_SUBST(USE_GMP)
#--------------------------------------------------------------------
+ # Check whether nl_langinfo(CODESET) is supported, needed by
Unicode.m.
+ #--------------------------------------------------------------------
+ AM_LANGINFO_CODESET
+
+ #--------------------------------------------------------------------
# Check for iconv support (for Unicode conversion).
#--------------------------------------------------------------------
# Check in the glibc library
_______________________________________________
Discuss-gnustep mailing list
Discuss-gnustep@gnu.org
http://mail.gnu.org/mailman/listinfo/discuss-gnustep