>From e4f2a5f97771c4a74e0bdef1c1e4a0d2735cef15 Mon Sep 17 00:00:00 2001
From: Paul Eggert
Date: Sun, 22 Jul 2018 09:50:20 -0700
Subject: [PATCH 1/2] df: avoid multibyte character corruption on macOS
Problem reported by Chih-Hsuan Yen (Bug#32236).
* NEWS: Mention the bug fix.
* src/df.c: Include wchar.h and wctype.h.
(hide_problematic_chars): Respect multibyte encodings
when replacing problematic characters or bytes with '?'.
---
NEWS | 4 ++++
src/df.c | 35 +++++++++++++++++++++++++----------
2 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/NEWS b/NEWS
index af1a990..aa3b4f9 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU coreutils NEWS -*- outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** Bug fixes
+
+ df no longer corrupts displayed multibyte characters on macOS.
+
* Noteworthy changes in release 8.30 (2018-07-01) [stable]
diff --git a/src/df.c b/src/df.c
index 1178865..52be414b 100644
--- a/src/df.c
+++ b/src/df.c
@@ -23,6 +23,8 @@
#include
#include
#include
+#include
+#include
#include "system.h"
#include "canonicalize.h"
@@ -271,21 +273,34 @@ static struct option const long_options[] =
{NULL, 0, NULL, 0}
};
-/* Replace problematic chars with '?'.
- Since only control characters are currently considered,
- this should work in all encodings. */
+/* Replace problematic chars with '?'. */
-static char*
+static void
hide_problematic_chars (char *cell)
{
- char *p = cell;
- while (*p)
+ char *srcend = cell + strlen (cell);
+ char *dst = cell;
+ mbstate_t mbstate = { 0, };
+ size_t n;
+
+ for (char *src = cell; src != srcend; src += n)
{
- if (iscntrl (to_uchar (*p)))
- *p = '?';
- p++;
+ wchar_t wc;
+ size_t srcbytes = srcend - src;
+ n = mbrtowc (&wc, src, srcbytes, &mbstate);
+ if (n <= srcbytes && !iswcntrl (wc))
+ {
+ memcpy (dst, src, n);
+ dst += n;
+ }
+ else
+ {
+ *dst++ = '?';
+ memset (&mbstate, 0, sizeof mbstate);
+ }
}
- return cell;
+
+ *dst = '\0';
}
/* Dynamically allocate a row of pointers in TABLE, which
--
2.7.4