>From e4f2a5f97771c4a74e0bdef1c1e4a0d2735cef15 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 22 Jul 2018 09:50:20 -0700 Subject: [PATCH 1/2] df: avoid multibyte character corruption on macOS Problem reported by Chih-Hsuan Yen (Bug#32236). * NEWS: Mention the bug fix. * src/df.c: Include wchar.h and wctype.h. (hide_problematic_chars): Respect multibyte encodings when replacing problematic characters or bytes with '?'. --- NEWS | 4 ++++ src/df.c | 35 +++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index af1a990..aa3b4f9 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + df no longer corrupts displayed multibyte characters on macOS. + * Noteworthy changes in release 8.30 (2018-07-01) [stable] diff --git a/src/df.c b/src/df.c index 1178865..52be414b 100644 --- a/src/df.c +++ b/src/df.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "system.h" #include "canonicalize.h" @@ -271,21 +273,34 @@ static struct option const long_options[] = {NULL, 0, NULL, 0} }; -/* Replace problematic chars with '?'. - Since only control characters are currently considered, - this should work in all encodings. */ +/* Replace problematic chars with '?'. */ -static char* +static void hide_problematic_chars (char *cell) { - char *p = cell; - while (*p) + char *srcend = cell + strlen (cell); + char *dst = cell; + mbstate_t mbstate = { 0, }; + size_t n; + + for (char *src = cell; src != srcend; src += n) { - if (iscntrl (to_uchar (*p))) - *p = '?'; - p++; + wchar_t wc; + size_t srcbytes = srcend - src; + n = mbrtowc (&wc, src, srcbytes, &mbstate); + if (n <= srcbytes && !iswcntrl (wc)) + { + memcpy (dst, src, n); + dst += n; + } + else + { + *dst++ = '?'; + memset (&mbstate, 0, sizeof mbstate); + } } - return cell; + + *dst = '\0'; } /* Dynamically allocate a row of pointers in TABLE, which -- 2.7.4