bug-diffutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-diffutils] Bug#704182: diffutils: Diff -r will confusion betwee


From: Paul Eggert
Subject: Re: [bug-diffutils] Bug#704182: diffutils: Diff -r will confusion between asian characters in filenames, when locale are non asian - UTF-8. (fwd)
Date: Wed, 03 Apr 2013 08:26:34 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130308 Thunderbird/17.0.4

Thanks for reminding me about that problem.  I've pushed the following patches.
The first two are merely tuning and reorganization; the third one does the
real work.

>From 37bffc430560df85029b2cacda65893542f0d455 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 07:48:22 -0700
Subject: [PATCH 1/3] diff: tune compare_names_for_qsort

* src/dir.c (compare_collated): New function.
(compare_names): Use it.
(compare_names_for_qsort): Use it.  This is a bit more efficient
as it can avoid a double invocation of file_name_cmp when
file_name_cmp returns zero.
---
 src/dir.c | 49 +++++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/src/dir.c b/src/dir.c
index 7f647b0..fc42f62 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -140,28 +140,34 @@ dir_read (struct file_data const *dir, struct dirdata 
*dirdata)
   return true;
 }
 
-/* Compare file names, returning a value compatible with strcmp.  */
+/* Compare strings in a locale-specific way, returning a value
+   compatible with strcmp.  */
 
 static int
-compare_names (char const *name1, char const *name2)
+compare_collated (char const *name1, char const *name2)
 {
-  if (locale_specific_sorting)
+  int r;
+  errno = 0;
+  if (ignore_file_name_case)
+    r = strcasecoll (name1, name2);
+  else
+    r = strcoll (name1, name2);
+  if (errno)
     {
-      int r;
-      errno = 0;
-      if (ignore_file_name_case)
-       r = strcasecoll (name1, name2);
-      else
-       r = strcoll (name1, name2);
-      if (errno)
-       {
-         error (0, errno, _("cannot compare file names '%s' and '%s'"),
-                name1, name2);
-         longjmp (failed_locale_specific_sorting, 1);
-       }
-      return r;
+      error (0, errno, _("cannot compare file names '%s' and '%s'"),
+            name1, name2);
+      longjmp (failed_locale_specific_sorting, 1);
     }
+  return r;
+}
+
+/* Compare file names, returning a value compatible with strcmp.  */
 
+static int
+compare_names (char const *name1, char const *name2)
+{
+  if (locale_specific_sorting)
+    return compare_collated (name1, name2);
   return file_name_cmp (name1, name2);
 }
 
@@ -173,8 +179,15 @@ compare_names_for_qsort (void const *file1, void const 
*file2)
 {
   char const *const *f1 = file1;
   char const *const *f2 = file2;
-  int diff = compare_names (*f1, *f2);
-  return diff ? diff : file_name_cmp (*f1, *f2);
+  char const *name1 = *f1;
+  char const *name2 = *f2;
+  if (locale_specific_sorting)
+    {
+      int diff = compare_collated (name1, name2);
+      if (diff)
+       return diff;
+    }
+  return file_name_cmp (name1, name2);
 }
 
 /* Compare the contents of two directories named in CMP.
-- 
1.7.11.7


>From 73482f40100760b276d383ed0a588ce13a3d52b4 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 07:51:33 -0700
Subject: [PATCH 2/3] diff: remove unnecessary decl

* src/dir.c (compare_names_for_qsort): Remove declaration.
Not needed now that we assume C89.
---
 src/dir.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dir.c b/src/dir.c
index fc42f62..21b1935 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -45,7 +45,6 @@ static bool locale_specific_sorting;
 static jmp_buf failed_locale_specific_sorting;
 
 static bool dir_loop (struct comparison const *, int);
-static int compare_names_for_qsort (void const *, void const *);
 
 
 /* Read a directory and get its vector of names.  */
-- 
1.7.11.7


>From e82f540d1134ba3d30434024e6fc9aea8ec71cf1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 08:20:31 -0700
Subject: [PATCH 3/3] diff: fix bug with Asian file names

Problem reported by Errembault Philippe in:
http://lists.gnu.org/archive/html/bug-diffutils/2013-03/msg00012.html
* NEWS: Document this.
* src/dir.c (compare_names): Fall back on file_name_cmp if
compare_collated returns 0, unless ignoring file name case.
(diff_dirs): Don't bother with the O(N**2) stuff unless ignoring
file name case.
* tests/Makefile.am (TESTS): Add strcoll-0-names.
* tests/strcoll-0-names: New file.
---
 NEWS                  |  7 +++++++
 src/dir.c             |  8 ++++++--
 tests/Makefile.am     |  1 +
 tests/strcoll-0-names | 25 +++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100755 tests/strcoll-0-names

diff --git a/NEWS b/NEWS
index ac7a75e..79517f2 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,13 @@ GNU diffutils NEWS                                    -*- 
outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** Bug fixes
+
+  Unless the --ignore-file-name-case option is used, diff now
+  considers file names to be equal only if they are byte-for-byte
+  equivalent.  This fixes a bug where diff in an English locale might
+  consider two Asian file names to be the same merely because they
+  contain no English characters.
 
 * Noteworthy changes in release 3.3 (2013-03-24) [stable]
 
diff --git a/src/dir.c b/src/dir.c
index 21b1935..d3b0a2d 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -166,7 +166,11 @@ static int
 compare_names (char const *name1, char const *name2)
 {
   if (locale_specific_sorting)
-    return compare_collated (name1, name2);
+    {
+      int diff = compare_collated (name1, name2);
+      if (diff || ignore_file_name_case)
+       return diff;
+    }
   return file_name_cmp (name1, name2);
 }
 
@@ -271,7 +275,7 @@ diff_dirs (struct comparison const *cmp,
             O(N**2), where N is the number of names in a directory
             that compare_names says are all equal, but in practice N
             is so small it's not worth tuning.  */
-         if (nameorder == 0)
+         if (nameorder == 0 && ignore_file_name_case)
            {
              int raw_order = file_name_cmp (*names[0], *names[1]);
              if (raw_order != 0)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5cbcfb4..dd2d514 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -12,6 +12,7 @@ TESTS = \
   no-dereference \
   no-newline-at-eof \
   stdin \
+  strcoll-0-names \
   filename-quoting
 
 EXTRA_DIST = \
diff --git a/tests/strcoll-0-names b/tests/strcoll-0-names
new file mode 100755
index 0000000..33c4a3c
--- /dev/null
+++ b/tests/strcoll-0-names
@@ -0,0 +1,25 @@
+#!/bin/sh
+# Check that diff responds well with two different file names
+# that compare equal with strcoll.  See:
+# http://lists.gnu.org/archive/html/bug-diffutils/2013-03/msg00012.html
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+# These two names compare equal in the en_US.UTF-8 locale
+# in current (2013) versions of glibc.
+# On systems where the names do not compare equal,
+# this diff test should still do the right thing.
+LC_ALL=en_US.UTF-8
+export LC_ALL
+name1='エンドカード1'
+name2='ブックレット1'
+
+mkdir d1 d2 || fail=1
+echo x >d1/"$name1" || fail=1
+echo x >d2/"$name2" || fail=1
+
+# This should report a difference, but on the affected systems
+# diffutils 3.3 does not.
+diff d1 d2 && fail=1
+
+Exit $fail
-- 
1.7.11.7




reply via email to

[Prev in Thread] Current Thread [Next in Thread]