From 2cf5d730690dad600f8b6d74d0b5fde522804e43 Mon Sep 17 00:00:00 2001
From: Paul Eggert
Date: Sun, 22 Jul 2018 09:50:20 -0700
Subject: [PATCH] df: avoid multibyte character corruption on macOS
This improves on the earlier fix for the problem reported by
Chih-Hsuan Yen (Bug#32236), by also looking for other control
characters and for encoding errors.
* src/df.c: Include wchar.h and wctype.h instead of c-ctype.h.
(hide_problematic_chars): Process the string as multibyte.
Use iswcntrl, not c_iscntrl.
---
src/df.c | 43 ++++++++++++++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 11 deletions(-)
diff --git a/src/df.c b/src/df.c
index c851fcc..d27ba02 100644
--- a/src/df.c
+++ b/src/df.c
@@ -23,7 +23,8 @@
#include
#include
#include
-#include
+#include
+#include
#include "system.h"
#include "canonicalize.h"
@@ -272,21 +273,41 @@ static struct option const long_options[] =
{NULL, 0, NULL, 0}
};
-/* Replace problematic chars with '?'.
- Since only control characters are currently considered,
- this should work in all encodings. */
+/* Replace problematic chars with '?'. */
-static char*
+static void
hide_problematic_chars (char *cell)
{
- char *p = cell;
- while (*p)
+ char *srcend = cell + strlen (cell);
+ char *dst = cell;
+ mbstate_t mbstate = { 0, };
+ size_t n;
+
+ for (char *src = cell; src != srcend; src += n)
{
- if (c_iscntrl (to_uchar (*p)))
- *p = '?';
- p++;
+ wchar_t wc;
+ size_t srcbytes = srcend - src;
+ n = mbrtowc (&wc, src, srcbytes, &mbstate);
+ bool ok = 0 < n && n <= srcbytes;
+
+ if (ok)
+ ok = !iswcntrl (wc);
+ else
+ n = 1;
+
+ if (ok)
+ {
+ memmove (dst, src, n);
+ dst += n;
+ }
+ else
+ {
+ *dst++ = '?';
+ memset (&mbstate, 0, sizeof mbstate);
+ }
}
- return cell;
+
+ *dst = '\0';
}
/* Dynamically allocate a row of pointers in TABLE, which
--
2.7.4