From 07acb017c26ce6a9477c119ca693ec49a5b77752 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 6 Mar 2014 21:53:50 -0800 Subject: [PATCH] regex: port to OS X 10.8.5 en_US.UTF-8 locale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a bug when ignoring case and when comparing the titlecase letter 'Lj' (U+01C8 LATIN CAPITAL LETTER L WITH SMALL LETTER J) to the corresponding uppercase letter 'LJ' (U+01C7 LATIN CAPITAL LETTER LJ). In the OS X 10.8.5 en_US.UTF-8 locale, the titlecase letter is neither lowercase nor uppercase, but uppercasing the titlecase letter (via towupper) yields the uppercase letter, so the two letters should match when ignoring case. Problem reported by Jim Meyering in . * lib/regex_internal.c (build_wcs_upper_buffer, build_upper_buffer): Don't test whether a character is lowercase before uppercasing it. --- ChangeLog | 14 ++++++++++++++ lib/regex_internal.c | 15 +++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2f847e6..fe35263 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2014-03-06 Paul Eggert + + regex: port to OS X 10.8.5 en_US.UTF-8 locale + This fixes a bug when ignoring case and when comparing the + titlecase letter 'Lj' (U+01C8 LATIN CAPITAL LETTER L WITH SMALL + LETTER J) to the corresponding uppercase letter 'LJ' (U+01C7 LATIN + CAPITAL LETTER LJ). In the OS X 10.8.5 en_US.UTF-8 locale, the + titlecase letter is neither lowercase nor uppercase, but + uppercasing the titlecase letter (via towupper) yields the + uppercase letter, so the two letters should match when ignoring case. + Problem reported by Jim Meyering in . + * lib/regex_internal.c (build_wcs_upper_buffer, build_upper_buffer): + Don't test whether a character is lowercase before uppercasing it. + 2014-03-04 Kevin Cernekee stdint, read-file: fix missing SIZE_MAX on Android (tiny change) diff --git a/lib/regex_internal.c b/lib/regex_internal.c index 0343ee6..79181a3 100644 --- a/lib/regex_internal.c +++ b/lib/regex_internal.c @@ -311,12 +311,11 @@ build_wcs_upper_buffer (re_string_t *pstr) + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb (buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -381,12 +380,11 @@ build_wcs_upper_buffer (re_string_t *pstr) mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { - wchar_t wcu = wc; - if (iswlower (wc)) + wchar_t wcu = towupper (wc); + if (wcu != wc) { size_t mbcdlen; - wcu = towupper (wc); mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); @@ -538,10 +536,7 @@ build_upper_buffer (re_string_t *pstr) int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (BE (pstr->trans != NULL, 0)) ch = pstr->trans[ch]; - if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); - else - pstr->mbs[char_idx] = ch; + pstr->mbs[char_idx] = toupper (ch); } pstr->valid_len = char_idx; pstr->valid_raw_len = char_idx; -- 1.8.5.3