bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/2] regex: assume RE_ENABLE_I18N


From: Paul Eggert
Subject: [PATCH 1/2] regex: assume RE_ENABLE_I18N
Date: Wed, 24 Nov 2021 09:35:29 -0800

These days there is no longer any need to port to platforms
lacking iswctype etc., since Gnulib now has substitutes.
* config/srclist.txt: Comment out regex_internal.c and
regex_internal.h for now, since they no longer match glibc.
The intent is to merge them again soon.
* lib/regex_internal.h (RE_ENABLE_I18N): Remove.
All uses changed to assume that RE_ENABLE_I18N is 1.
* modules/regex (Depends-on): Add iswctype.
---
 ChangeLog            |  12 ++
 config/srclist.txt   |   4 +-
 lib/regcomp.c        | 297 +++++++++++--------------------------------
 lib/regex_internal.c |  40 ------
 lib/regex_internal.h |  44 ++-----
 lib/regexec.c        |  73 +++--------
 modules/regex        |   1 +
 7 files changed, 121 insertions(+), 350 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index efc3a3887e..85674b1058 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2021-11-23  Paul Eggert  <eggert@cs.ucla.edu>
+
+       regex: assume RE_ENABLE_I18N
+       These days there is no longer any need to port to platforms
+       lacking iswctype etc., since Gnulib now has substitutes.
+       * config/srclist.txt: Comment out regex_internal.c and
+       regex_internal.h for now, since they no longer match glibc.
+       The intent is to merge them again soon.
+       * lib/regex_internal.h (RE_ENABLE_I18N): Remove.
+       All uses changed to assume that RE_ENABLE_I18N is 1.
+       * modules/regex (Depends-on): Add iswctype.
+
 2021-11-15  Paul Eggert  <eggert@cs.ucla.edu>
 
        lseek: port around macOS SEEK_HOLE glitch
diff --git a/config/srclist.txt b/config/srclist.txt
index e365012392..557b5dbaec 100644
--- a/config/srclist.txt
+++ b/config/srclist.txt
@@ -68,8 +68,8 @@ $LIBCSRC misc/sys/cdefs.h             lib
 #$LIBCSRC posix/regcomp.c              lib
 $LIBCSRC posix/regex.c                 lib
 $LIBCSRC posix/regex.h                 lib
-$LIBCSRC posix/regex_internal.c                lib
-$LIBCSRC posix/regex_internal.h                lib
+#$LIBCSRC posix/regex_internal.c       lib
+#$LIBCSRC posix/regex_internal.h       lib
 #$LIBCSRC posix/regexec.c              lib
 #$LIBCSRC stdlib/canonicalize           lib/canonicalize-lgpl.c
 #$LIBCSRC sysdeps/generic/eloop-threshold.h    lib
diff --git a/lib/regcomp.c b/lib/regcomp.c
index 887e5b5068..658de1318e 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -27,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp,
                                     const re_dfastate_t *init_state,
                                     char *fastmap);
 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
-#ifdef RE_ENABLE_I18N
 static void free_charset (re_charset_t *cset);
-#endif /* RE_ENABLE_I18N */
 static void free_workarea_compile (regex_t *preg);
 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-#ifdef RE_ENABLE_I18N
 static void optimize_utf8 (re_dfa_t *dfa);
-#endif
 static reg_errcode_t analyze (regex_t *preg);
 static reg_errcode_t preorder (bin_tree_t *root,
                               reg_errcode_t (fn (void *, bin_tree_t *)),
@@ -89,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t 
*elem,
 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
                                          re_string_t *regexp,
                                          re_token_t *token);
-#ifdef RE_ENABLE_I18N
 static reg_errcode_t build_equiv_class (bitset_t sbcset,
                                        re_charset_t *mbcset,
                                        Idx *equiv_class_alloc,
@@ -100,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE 
trans,
                                      Idx *char_class_alloc,
                                      const char *class_name,
                                      reg_syntax_t syntax);
-#else  /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (bitset_t sbcset,
-                                       const unsigned char *name);
-static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
-                                     bitset_t sbcset,
-                                     const char *class_name,
-                                     reg_syntax_t syntax);
-#endif /* not RE_ENABLE_I18N */
 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
                                       RE_TRANSLATE_TYPE trans,
                                       const char *class_name,
@@ -306,7 +293,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
       if (type == CHARACTER)
        {
          re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
-#ifdef RE_ENABLE_I18N
          if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
            {
              unsigned char buf[MB_LEN_MAX];
@@ -327,7 +313,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
                      != (size_t) -1))
                re_set_fastmap (fastmap, false, buf[0]);
            }
-#endif
        }
       else if (type == SIMPLE_BRACKET)
        {
@@ -341,13 +326,12 @@ re_compile_fastmap_iter (regex_t *bufp, const 
re_dfastate_t *init_state,
                  re_set_fastmap (fastmap, icase, ch);
            }
        }
-#ifdef RE_ENABLE_I18N
       else if (type == COMPLEX_BRACKET)
        {
          re_charset_t *cset = dfa->nodes[node].opr.mbcset;
          Idx i;
 
-# ifdef _LIBC
+#ifdef _LIBC
          /* See if we have to try all bytes which start multiple collation
             elements.
             e.g. In da_DK, we want to catch 'a' since "aa" is a valid
@@ -363,7 +347,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
                    if (table[i] < 0)
                      re_set_fastmap (fastmap, icase, i);
                }
-# endif /* _LIBC */
+#endif /* _LIBC */
 
          /* See if we have to start the match at all multibyte characters,
             i.e. where we would not find an invalid sequence.  This only
@@ -371,9 +355,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
             sets, the SIMPLE_BRACKET again suffices.  */
          if (dfa->mb_cur_max > 1
              && (cset->nchar_classes || cset->non_match || cset->nranges
-# ifdef _LIBC
+#ifdef _LIBC
                  || cset->nequiv_classes
-# endif /* _LIBC */
+#endif /* _LIBC */
                 ))
            {
              unsigned char c = 0;
@@ -406,12 +390,7 @@ re_compile_fastmap_iter (regex_t *bufp, const 
re_dfastate_t *init_state,
                }
            }
        }
-#endif /* RE_ENABLE_I18N */
-      else if (type == OP_PERIOD
-#ifdef RE_ENABLE_I18N
-              || type == OP_UTF8_PERIOD
-#endif /* RE_ENABLE_I18N */
-              || type == END_OF_RE)
+      else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == 
END_OF_RE)
        {
          memset (fastmap, '\1', sizeof (char) * SBC_MAX);
          if (type == END_OF_RE)
@@ -550,7 +529,6 @@ regerror (int errcode, const regex_t *__restrict preg, char 
*__restrict errbuf,
 weak_alias (__regerror, regerror)
 
 
-#ifdef RE_ENABLE_I18N
 /* This static array is used for the map to single-byte characters when
    UTF-8 is used.  Otherwise we would allocate memory just to initialize
    it the same all the time.  UTF-8 is the preferred encoding so this is
@@ -558,25 +536,24 @@ weak_alias (__regerror, regerror)
 static const bitset_t utf8_sb_map =
 {
   /* Set the first 128 bits.  */
-# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
+#if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
   [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
-# else
-#  if 4 * BITSET_WORD_BITS < ASCII_CHARS
-#   error "bitset_word_t is narrower than 32 bits"
-#  elif 3 * BITSET_WORD_BITS < ASCII_CHARS
+#else
+# if 4 * BITSET_WORD_BITS < ASCII_CHARS
+#  error "bitset_word_t is narrower than 32 bits"
+# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
   BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
-#  elif 2 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
   BITSET_WORD_MAX, BITSET_WORD_MAX,
-#  elif 1 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
   BITSET_WORD_MAX,
-#  endif
+# endif
   (BITSET_WORD_MAX
    >> (SBC_MAX % BITSET_WORD_BITS == 0
        ? 0
        : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
-# endif
-};
 #endif
+};
 
 
 static void
@@ -614,10 +591,8 @@ free_dfa_content (re_dfa_t *dfa)
        re_free (entry->array);
       }
   re_free (dfa->state_table);
-#ifdef RE_ENABLE_I18N
   if (dfa->sb_char != utf8_sb_map)
     re_free (dfa->sb_char);
-#endif
   re_free (dfa->subexp_map);
 #ifdef DEBUG
   re_free (dfa->re_str);
@@ -796,11 +771,9 @@ re_compile_internal (regex_t *preg, const char * pattern, 
size_t length,
   if (__glibc_unlikely (err != REG_NOERROR))
     goto re_compile_internal_free_return;
 
-#ifdef RE_ENABLE_I18N
   /* If possible, do searching in single byte encoding to speed things up.  */
   if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
     optimize_utf8 (dfa);
-#endif
 
   /* Then create the initial state of the dfa.  */
   err = create_initial_state (dfa);
@@ -830,11 +803,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
 #ifndef _LIBC
   const char *codeset_name;
 #endif
-#ifdef RE_ENABLE_I18N
   size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
-#else
-  size_t max_i18n_object_size = 0;
-#endif
   size_t max_object_size =
     MAX (sizeof (struct re_state_table_entry),
         MAX (sizeof (re_token_t),
@@ -886,7 +855,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
   dfa->map_notascii = 0;
 #endif
 
-#ifdef RE_ENABLE_I18N
   if (dfa->mb_cur_max > 1)
     {
       if (dfa->is_utf8)
@@ -906,14 +874,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
                wint_t wch = __btowc (ch);
                if (wch != WEOF)
                  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
-# ifndef _LIBC
+#ifndef _LIBC
                if (isascii (ch) && wch != ch)
                  dfa->map_notascii = 1;
-# endif
+#endif
              }
        }
     }
-#endif
 
   if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
     return REG_ESPACE;
@@ -1074,7 +1041,6 @@ create_initial_state (re_dfa_t *dfa)
   return REG_NOERROR;
 }
 
-#ifdef RE_ENABLE_I18N
 /* If it is possible to do searching in single byte encoding instead of UTF-8
    to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
    DFA nodes where needed.  */
@@ -1154,7 +1120,6 @@ optimize_utf8 (re_dfa_t *dfa)
   dfa->is_utf8 = 0;
   dfa->has_mb_node = dfa->nbackref > 0 || has_period;
 }
-#endif
 
 /* Analyze the structure tree, and calculate "first", "next", "edest",
    "eclosure", and "inveclosure".  */
@@ -1792,7 +1757,6 @@ peek_token (re_token_t *token, re_string_t *input, 
reg_syntax_t syntax)
   token->opr.c = c;
 
   token->word_char = 0;
-#ifdef RE_ENABLE_I18N
   token->mb_partial = 0;
   if (input->mb_cur_max > 1
       && !re_string_first_byte (input, re_string_cur_idx (input)))
@@ -1801,7 +1765,6 @@ peek_token (re_token_t *token, re_string_t *input, 
reg_syntax_t syntax)
       token->mb_partial = 1;
       return 1;
     }
-#endif
   if (c == '\\')
     {
       unsigned char c2;
@@ -1814,7 +1777,6 @@ peek_token (re_token_t *token, re_string_t *input, 
reg_syntax_t syntax)
       c2 = re_string_peek_byte_case (input, 1);
       token->opr.c = c2;
       token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
       if (input->mb_cur_max > 1)
        {
          wint_t wc = re_string_wchar_at (input,
@@ -1822,7 +1784,6 @@ peek_token (re_token_t *token, re_string_t *input, 
reg_syntax_t syntax)
          token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
        }
       else
-#endif
        token->word_char = IS_WORD_CHAR (c2) != 0;
 
       switch (c2)
@@ -1928,14 +1889,12 @@ peek_token (re_token_t *token, re_string_t *input, 
reg_syntax_t syntax)
     }
 
   token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
   if (input->mb_cur_max > 1)
     {
       wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
       token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
     }
   else
-#endif
     token->word_char = IS_WORD_CHAR (token->opr.c);
 
   switch (c)
@@ -2027,14 +1986,12 @@ peek_token_bracket (re_token_t *token, re_string_t 
*input, reg_syntax_t syntax)
   c = re_string_peek_byte (input, 0);
   token->opr.c = c;
 
-#ifdef RE_ENABLE_I18N
   if (input->mb_cur_max > 1
       && !re_string_first_byte (input, re_string_cur_idx (input)))
     {
       token->type = CHARACTER;
       return 1;
     }
-#endif /* RE_ENABLE_I18N */
 
   if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
       && re_string_cur_idx (input) + 1 < re_string_length (input))
@@ -2256,7 +2213,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, 
re_token_t *token,
          *err = REG_ESPACE;
          return NULL;
        }
-#ifdef RE_ENABLE_I18N
       if (dfa->mb_cur_max > 1)
        {
          while (!re_string_eoi (regexp)
@@ -2273,7 +2229,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, 
re_token_t *token,
                }
            }
        }
-#endif
       break;
 
     case OP_OPEN_SUBEXP:
@@ -2666,7 +2621,6 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
 
 #ifndef _LIBC
 
-# ifdef RE_ENABLE_I18N
 /* Convert the byte B to the corresponding wide character.  In a
    unibyte locale, treat B as itself.  In a multibyte locale, return
    WEOF if B is an encoding error.  */
@@ -2675,7 +2629,6 @@ parse_byte (unsigned char b, re_charset_t *mbcset)
 {
   return mbcset == NULL ? b : __btowc (b);
 }
-# endif
 
   /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
      Build the range expression which starts from START_ELEM, and ends
@@ -2685,21 +2638,13 @@ parse_byte (unsigned char b, re_charset_t *mbcset)
      update it.  */
 
 static reg_errcode_t
-# ifdef RE_ENABLE_I18N
 build_range_exp (const reg_syntax_t syntax,
                  bitset_t sbcset,
                  re_charset_t *mbcset,
                  Idx *range_alloc,
                  const bracket_elem_t *start_elem,
                  const bracket_elem_t *end_elem)
-# else /* not RE_ENABLE_I18N */
-build_range_exp (const reg_syntax_t syntax,
-                 bitset_t sbcset,
-                 const bracket_elem_t *start_elem,
-                 const bracket_elem_t *end_elem)
-# endif /* not RE_ENABLE_I18N */
 {
-  unsigned int start_ch, end_ch;
   /* Equivalence Classes and Character Classes can't be a range start/end.  */
   if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
                        || start_elem->type == CHAR_CLASS
@@ -2715,92 +2660,74 @@ build_range_exp (const reg_syntax_t syntax,
                            && strlen ((char *) end_elem->opr.name) > 1)))
     return REG_ECOLLATE;
 
-# ifdef RE_ENABLE_I18N
-  {
-    wchar_t wc;
-    wint_t start_wc;
-    wint_t end_wc;
-
+  unsigned int
     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
                : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
                   : 0));
+  unsigned int
     end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
              : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
                 : 0));
+  wint_t
     start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
                ? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
+  wint_t
     end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
              ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
-    if (start_wc == WEOF || end_wc == WEOF)
-      return REG_ECOLLATE;
-    else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
-                              && start_wc > end_wc))
-      return REG_ERANGE;
-
-    /* Got valid collation sequence values, add them as a new entry.
-       However, for !_LIBC we have no collation elements: if the
-       character set is single byte, the single byte character set
-       that we build below suffices.  parse_bracket_exp passes
-       no MBCSET if dfa->mb_cur_max == 1.  */
-    if (mbcset)
-      {
-       /* Check the space of the arrays.  */
-       if (__glibc_unlikely (*range_alloc == mbcset->nranges))
-         {
-           /* There is not enough space, need realloc.  */
-           wchar_t *new_array_start, *new_array_end;
-           Idx new_nranges;
-
-           /* +1 in case of mbcset->nranges is 0.  */
-           new_nranges = 2 * mbcset->nranges + 1;
-           /* Use realloc since mbcset->range_starts and mbcset->range_ends
-              are NULL if *range_alloc == 0.  */
-           new_array_start = re_realloc (mbcset->range_starts, wchar_t,
-                                         new_nranges);
-           new_array_end = re_realloc (mbcset->range_ends, wchar_t,
-                                       new_nranges);
-
-           if (__glibc_unlikely (new_array_start == NULL
-                                 || new_array_end == NULL))
-             {
-               re_free (new_array_start);
-               re_free (new_array_end);
-               return REG_ESPACE;
-             }
 
-           mbcset->range_starts = new_array_start;
-           mbcset->range_ends = new_array_end;
-           *range_alloc = new_nranges;
-         }
+  if (start_wc == WEOF || end_wc == WEOF)
+    return REG_ECOLLATE;
+  else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+                             && start_wc > end_wc))
+    return REG_ERANGE;
 
-       mbcset->range_starts[mbcset->nranges] = start_wc;
-       mbcset->range_ends[mbcset->nranges++] = end_wc;
-      }
+  /* Got valid collation sequence values, add them as a new entry.
+     However, for !_LIBC we have no collation elements: if the
+     character set is single byte, the single byte character set
+     that we build below suffices.  parse_bracket_exp passes
+     no MBCSET if dfa->mb_cur_max == 1.  */
+  if (mbcset)
+    {
+      /* Check the space of the arrays.  */
+      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+        {
+          /* There is not enough space, need realloc.  */
+          wchar_t *new_array_start, *new_array_end;
+          Idx new_nranges;
+
+          /* +1 in case of mbcset->nranges is 0.  */
+          new_nranges = 2 * mbcset->nranges + 1;
+          /* Use realloc since mbcset->range_starts and mbcset->range_ends
+             are NULL if *range_alloc == 0.  */
+          new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+                                        new_nranges);
+          new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+                                      new_nranges);
+
+          if (__glibc_unlikely (new_array_start == NULL
+                                || new_array_end == NULL))
+            {
+              re_free (new_array_start);
+              re_free (new_array_end);
+              return REG_ESPACE;
+            }
+
+          mbcset->range_starts = new_array_start;
+          mbcset->range_ends = new_array_end;
+          *range_alloc = new_nranges;
+        }
+
+      mbcset->range_starts[mbcset->nranges] = start_wc;
+      mbcset->range_ends[mbcset->nranges++] = end_wc;
+    }
+
+  /* Build the table for single byte characters.  */
+  for (wchar_t wc = 0; wc < SBC_MAX; ++wc)
+    {
+      if (start_wc <= wc && wc <= end_wc)
+        bitset_set (sbcset, wc);
+    }
 
-    /* Build the table for single byte characters.  */
-    for (wc = 0; wc < SBC_MAX; ++wc)
-      {
-       if (start_wc <= wc && wc <= end_wc)
-         bitset_set (sbcset, wc);
-      }
-  }
-# else /* not RE_ENABLE_I18N */
-  {
-    unsigned int ch;
-    start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
-               : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
-                  : 0));
-    end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
-             : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
-                : 0));
-    if (start_ch > end_ch)
-      return REG_ERANGE;
-    /* Build the table for single byte characters.  */
-    for (ch = 0; ch < SBC_MAX; ++ch)
-      if (start_ch <= ch  && ch <= end_ch)
-       bitset_set (sbcset, ch);
-  }
-# endif /* not RE_ENABLE_I18N */
   return REG_NOERROR;
 }
 #endif /* not _LIBC */
@@ -2813,12 +2740,8 @@ build_range_exp (const reg_syntax_t syntax,
    pointer argument since we may update it.  */
 
 static reg_errcode_t
-# ifdef RE_ENABLE_I18N
 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
                        Idx *coll_sym_alloc, const unsigned char *name)
-# else /* not RE_ENABLE_I18N */
-build_collating_symbol (bitset_t sbcset, const unsigned char *name)
-# endif /* not RE_ENABLE_I18N */
 {
   size_t name_len = strlen ((const char *) name);
   if (__glibc_unlikely (name_len != 1))
@@ -3091,11 +3014,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
 
   re_token_t br_token;
   re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
   re_charset_t *mbcset;
   Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
   Idx equiv_class_alloc = 0, char_class_alloc = 0;
-#endif /* not RE_ENABLE_I18N */
   bool non_match = false;
   bin_tree_t *work_tree;
   int token_len;
@@ -3118,19 +3039,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
     }
 #endif
   sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
-#ifdef RE_ENABLE_I18N
   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
-#endif /* RE_ENABLE_I18N */
-#ifdef RE_ENABLE_I18N
   if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
-#else
-  if (__glibc_unlikely (sbcset == NULL))
-#endif /* RE_ENABLE_I18N */
     {
       re_free (sbcset);
-#ifdef RE_ENABLE_I18N
       re_free (mbcset);
-#endif
       *err = REG_ESPACE;
       return NULL;
     }
@@ -3143,9 +3056,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
     }
   if (token->type == OP_NON_MATCH_LIST)
     {
-#ifdef RE_ENABLE_I18N
       mbcset->non_match = 1;
-#endif /* not RE_ENABLE_I18N */
       non_match = true;
       if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
        bitset_set (sbcset, '\n');
@@ -3232,14 +3143,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
          *err = build_range_exp (sbcset, mbcset, &range_alloc,
                                  &start_elem, &end_elem);
 #else
-# ifdef RE_ENABLE_I18N
          *err = build_range_exp (syntax, sbcset,
                                  dfa->mb_cur_max > 1 ? mbcset : NULL,
                                  &range_alloc, &start_elem, &end_elem);
-# else
-         *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
-# endif
-#endif /* RE_ENABLE_I18N */
+#endif
          if (__glibc_unlikely (*err != REG_NOERROR))
            goto parse_bracket_exp_free_return;
        }
@@ -3250,7 +3157,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
            case SB_CHAR:
              bitset_set (sbcset, start_elem.opr.ch);
              break;
-#ifdef RE_ENABLE_I18N
            case MB_CHAR:
              /* Check whether the array has enough space.  */
              if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
@@ -3268,30 +3174,23 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
                }
              mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
              break;
-#endif /* RE_ENABLE_I18N */
            case EQUIV_CLASS:
              *err = build_equiv_class (sbcset,
-#ifdef RE_ENABLE_I18N
                                        mbcset, &equiv_class_alloc,
-#endif /* RE_ENABLE_I18N */
                                        start_elem.opr.name);
              if (__glibc_unlikely (*err != REG_NOERROR))
                goto parse_bracket_exp_free_return;
              break;
            case COLL_SYM:
              *err = build_collating_symbol (sbcset,
-#ifdef RE_ENABLE_I18N
                                             mbcset, &coll_sym_alloc,
-#endif /* RE_ENABLE_I18N */
                                             start_elem.opr.name);
              if (__glibc_unlikely (*err != REG_NOERROR))
                goto parse_bracket_exp_free_return;
              break;
            case CHAR_CLASS:
              *err = build_charclass (regexp->trans, sbcset,
-#ifdef RE_ENABLE_I18N
                                      mbcset, &char_class_alloc,
-#endif /* RE_ENABLE_I18N */
                                      (const char *) start_elem.opr.name,
                                      syntax);
              if (__glibc_unlikely (*err != REG_NOERROR))
@@ -3317,7 +3216,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
   if (non_match)
     bitset_not (sbcset);
 
-#ifdef RE_ENABLE_I18N
   /* Ensure only single byte characters are set.  */
   if (dfa->mb_cur_max > 1)
     bitset_mask (sbcset, dfa->sb_char);
@@ -3361,11 +3259,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
        }
     }
   else
-#endif /* not RE_ENABLE_I18N */
     {
-#ifdef RE_ENABLE_I18N
       free_charset (mbcset);
-#endif
       /* Build a tree for simple bracket.  */
       br_token.type = SIMPLE_BRACKET;
       br_token.opr.sbcset = sbcset;
@@ -3379,9 +3274,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
   *err = REG_ESPACE;
  parse_bracket_exp_free_return:
   re_free (sbcset);
-#ifdef RE_ENABLE_I18N
   free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
   return NULL;
 }
 
@@ -3392,7 +3285,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t 
*regexp,
                       re_token_t *token, int token_len, re_dfa_t *dfa,
                       reg_syntax_t syntax, bool accept_hyphen)
 {
-#ifdef RE_ENABLE_I18N
   int cur_char_size;
   cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
   if (cur_char_size > 1)
@@ -3402,7 +3294,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t 
*regexp,
       re_string_skip_bytes (regexp, cur_char_size);
       return REG_NOERROR;
     }
-#endif /* RE_ENABLE_I18N */
   re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
   if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
       || token->type == OP_OPEN_EQUIV_CLASS)
@@ -3475,12 +3366,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t 
*regexp,
      is a pointer argument since we may update it.  */
 
 static reg_errcode_t
-#ifdef RE_ENABLE_I18N
 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
                   Idx *equiv_class_alloc, const unsigned char *name)
-#else /* not RE_ENABLE_I18N */
-build_equiv_class (bitset_t sbcset, const unsigned char *name)
-#endif /* not RE_ENABLE_I18N */
 {
 #ifdef _LIBC
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3560,14 +3447,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char 
*name)
      is a pointer argument since we may update it.  */
 
 static reg_errcode_t
-#ifdef RE_ENABLE_I18N
 build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
                 re_charset_t *mbcset, Idx *char_class_alloc,
                 const char *class_name, reg_syntax_t syntax)
-#else /* not RE_ENABLE_I18N */
-build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
-                const char *class_name, reg_syntax_t syntax)
-#endif /* not RE_ENABLE_I18N */
 {
   int i;
   const char *name = class_name;
@@ -3578,7 +3460,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
       && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
     name = "alpha";
 
-#ifdef RE_ENABLE_I18N
   /* Check the space of the arrays.  */
   if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
     {
@@ -3594,7 +3475,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
       *char_class_alloc = new_char_class_alloc;
     }
   mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
-#endif /* RE_ENABLE_I18N */
 
 #define BUILD_CHARCLASS_LOOP(ctype_func)       \
   do {                                         \
@@ -3649,10 +3529,8 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
                    reg_errcode_t *err)
 {
   re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
   re_charset_t *mbcset;
   Idx alloc = 0;
-#endif /* not RE_ENABLE_I18N */
   reg_errcode_t ret;
   bin_tree_t *tree;
 
@@ -3662,7 +3540,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
       *err = REG_ESPACE;
       return NULL;
     }
-#ifdef RE_ENABLE_I18N
   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
   if (__glibc_unlikely (mbcset == NULL))
     {
@@ -3671,21 +3548,14 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
       return NULL;
     }
   mbcset->non_match = non_match;
-#endif /* RE_ENABLE_I18N */
 
   /* We don't care the syntax in this case.  */
-  ret = build_charclass (trans, sbcset,
-#ifdef RE_ENABLE_I18N
-                        mbcset, &alloc,
-#endif /* RE_ENABLE_I18N */
-                        class_name, 0);
+  ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0);
 
   if (__glibc_unlikely (ret != REG_NOERROR))
     {
       re_free (sbcset);
-#ifdef RE_ENABLE_I18N
       free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
       *err = ret;
       return NULL;
     }
@@ -3697,11 +3567,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
   if (non_match)
     bitset_not (sbcset);
 
-#ifdef RE_ENABLE_I18N
   /* Ensure only single byte characters are set.  */
   if (dfa->mb_cur_max > 1)
     bitset_mask (sbcset, dfa->sb_char);
-#endif
 
   /* Build a tree for simple bracket.  */
   re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
@@ -3709,7 +3577,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
   if (__glibc_unlikely (tree == NULL))
     goto build_word_op_espace;
 
-#ifdef RE_ENABLE_I18N
   if (dfa->mb_cur_max > 1)
     {
       bin_tree_t *mbc_tree;
@@ -3730,15 +3597,10 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
       free_charset (mbcset);
       return tree;
     }
-#else /* not RE_ENABLE_I18N */
-  return tree;
-#endif /* not RE_ENABLE_I18N */
 
  build_word_op_espace:
   re_free (sbcset);
-#ifdef RE_ENABLE_I18N
   free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
   *err = REG_ESPACE;
   return NULL;
 }
@@ -3771,21 +3633,19 @@ fetch_number (re_string_t *input, re_token_t *token, 
reg_syntax_t syntax)
   return num;
 }
 
-#ifdef RE_ENABLE_I18N
 static void
 free_charset (re_charset_t *cset)
 {
   re_free (cset->mbchars);
-# ifdef _LIBC
+#ifdef _LIBC
   re_free (cset->coll_syms);
   re_free (cset->equiv_classes);
-# endif
+#endif
   re_free (cset->range_starts);
   re_free (cset->range_ends);
   re_free (cset->char_classes);
   re_free (cset);
 }
-#endif /* RE_ENABLE_I18N */
 
 /* Functions for binary tree operation.  */
 
@@ -3851,13 +3711,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node)
 static void
 free_token (re_token_t *node)
 {
-#ifdef RE_ENABLE_I18N
   if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
     free_charset (node->opr.mbcset);
-  else
-#endif /* RE_ENABLE_I18N */
-    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
-      re_free (node->opr.sbcset);
+  else if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+    re_free (node->opr.sbcset);
 }
 
 /* Worker function for tree walking.  Free the allocated memory inside NODE
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index aefcfa2f52..9767cd0d07 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -30,10 +30,8 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t 
*dfa,
                                          re_hashval_t hash);
 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
                                                Idx new_buf_len);
-#ifdef RE_ENABLE_I18N
 static void build_wcs_buffer (re_string_t *pstr);
 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
-#endif /* RE_ENABLE_I18N */
 static void build_upper_buffer (re_string_t *pstr);
 static void re_string_translate_buffer (re_string_t *pstr);
 static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
@@ -91,7 +89,6 @@ re_string_construct (re_string_t *pstr, const char *str, Idx 
len,
 
   if (icase)
     {
-#ifdef RE_ENABLE_I18N
       if (dfa->mb_cur_max > 1)
        {
          while (1)
@@ -109,16 +106,13 @@ re_string_construct (re_string_t *pstr, const char *str, 
Idx len,
            }
        }
       else
-#endif /* RE_ENABLE_I18N  */
        build_upper_buffer (pstr);
     }
   else
     {
-#ifdef RE_ENABLE_I18N
       if (dfa->mb_cur_max > 1)
        build_wcs_buffer (pstr);
       else
-#endif /* RE_ENABLE_I18N  */
        {
          if (trans != NULL)
            re_string_translate_buffer (pstr);
@@ -139,7 +133,6 @@ static reg_errcode_t
 __attribute_warn_unused_result__
 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
 {
-#ifdef RE_ENABLE_I18N
   if (pstr->mb_cur_max > 1)
     {
       wint_t *new_wcs;
@@ -162,7 +155,6 @@ re_string_realloc_buffers (re_string_t *pstr, Idx 
new_buf_len)
          pstr->offsets = new_offsets;
        }
     }
-#endif /* RE_ENABLE_I18N  */
   if (pstr->mbs_allocated)
     {
       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
@@ -194,7 +186,6 @@ re_string_construct_common (const char *str, Idx len, 
re_string_t *pstr,
   pstr->raw_stop = pstr->stop;
 }
 
-#ifdef RE_ENABLE_I18N
 
 /* Build wide character buffer PSTR->WCS.
    If the byte sequence of the string are:
@@ -530,7 +521,6 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, 
wint_t *last_wc)
   *last_wc = wc;
   return rawbuf_idx;
 }
-#endif /* RE_ENABLE_I18N  */
 
 /* Build the buffer PSTR->MBS, and apply the translation if we need.
    This function is used in case of REG_ICASE.  */
@@ -585,10 +575,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
   else
     {
       /* Reset buffer.  */
-#ifdef RE_ENABLE_I18N
       if (pstr->mb_cur_max > 1)
        memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
-#endif /* RE_ENABLE_I18N */
       pstr->len = pstr->raw_len;
       pstr->stop = pstr->raw_stop;
       pstr->valid_len = 0;
@@ -608,7 +596,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
       if (__glibc_likely (offset < pstr->valid_raw_len))
        {
          /* Yes, move them to the front of the buffer.  */
-#ifdef RE_ENABLE_I18N
          if (__glibc_unlikely (pstr->offsets_needed))
            {
              Idx low = 0, high = pstr->valid_len, mid;
@@ -672,15 +659,12 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
                }
            }
          else
-#endif
            {
              pstr->tip_context = re_string_context_at (pstr, offset - 1,
                                                        eflags);
-#ifdef RE_ENABLE_I18N
              if (pstr->mb_cur_max > 1)
                memmove (pstr->wcs, pstr->wcs + offset,
                         (pstr->valid_len - offset) * sizeof (wint_t));
-#endif /* RE_ENABLE_I18N */
              if (__glibc_unlikely (pstr->mbs_allocated))
                memmove (pstr->mbs, pstr->mbs + offset,
                         pstr->valid_len - offset);
@@ -691,7 +675,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
        }
       else
        {
-#ifdef RE_ENABLE_I18N
          /* No, skip all characters until IDX.  */
          Idx prev_valid_len = pstr->valid_len;
 
@@ -701,9 +684,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
              pstr->stop = pstr->raw_stop - idx + offset;
              pstr->offsets_needed = 0;
            }
-#endif
          pstr->valid_len = 0;
-#ifdef RE_ENABLE_I18N
          if (pstr->mb_cur_max > 1)
            {
              Idx wcs_idx;
@@ -787,7 +768,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
              pstr->valid_raw_len = pstr->valid_len;
            }
          else
-#endif /* RE_ENABLE_I18N */
            {
              int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
              pstr->valid_raw_len = 0;
@@ -807,7 +787,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
   pstr->stop -= offset;
 
   /* Then build the buffers.  */
-#ifdef RE_ENABLE_I18N
   if (pstr->mb_cur_max > 1)
     {
       if (pstr->icase)
@@ -820,7 +799,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int 
eflags)
        build_wcs_buffer (pstr);
     }
   else
-#endif /* RE_ENABLE_I18N */
     if (__glibc_unlikely (pstr->mbs_allocated))
       {
        if (pstr->icase)
@@ -846,28 +824,22 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx 
idx)
   if (__glibc_likely (!pstr->mbs_allocated))
     return re_string_peek_byte (pstr, idx);
 
-#ifdef RE_ENABLE_I18N
   if (pstr->mb_cur_max > 1
       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
     return re_string_peek_byte (pstr, idx);
-#endif
 
   off = pstr->cur_idx + idx;
-#ifdef RE_ENABLE_I18N
   if (pstr->offsets_needed)
     off = pstr->offsets[off];
-#endif
 
   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
 
-#ifdef RE_ENABLE_I18N
   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
      this function returns CAPITAL LETTER I instead of first byte of
      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
      since peek_byte_case doesn't advance cur_idx in any way.  */
   if (pstr->offsets_needed && !isascii (ch))
     return re_string_peek_byte (pstr, idx);
-#endif
 
   return ch;
 }
@@ -878,7 +850,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
   if (__glibc_likely (!pstr->mbs_allocated))
     return re_string_fetch_byte (pstr);
 
-#ifdef RE_ENABLE_I18N
   if (pstr->offsets_needed)
     {
       Idx off;
@@ -904,7 +875,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
                            re_string_char_size_at (pstr, pstr->cur_idx));
       return ch;
     }
-#endif
 
   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
 }
@@ -912,10 +882,8 @@ re_string_fetch_byte_case (re_string_t *pstr)
 static void
 re_string_destruct (re_string_t *pstr)
 {
-#ifdef RE_ENABLE_I18N
   re_free (pstr->wcs);
   re_free (pstr->offsets);
-#endif /* RE_ENABLE_I18N  */
   if (pstr->mbs_allocated)
     re_free (pstr->mbs);
 }
@@ -933,7 +901,6 @@ re_string_context_at (const re_string_t *input, Idx idx, 
int eflags)
   if (__glibc_unlikely (idx == input->len))
     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
            : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
-#ifdef RE_ENABLE_I18N
   if (input->mb_cur_max > 1)
     {
       wint_t wc;
@@ -953,7 +920,6 @@ re_string_context_at (const re_string_t *input, Idx idx, 
int eflags)
              ? CONTEXT_NEWLINE : 0);
     }
   else
-#endif
     {
       c = re_string_byte_at (input, idx);
       if (bitset_contain (input->word_char, c))
@@ -1451,11 +1417,9 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
     }
   dfa->nodes[dfa->nodes_len] = token;
   dfa->nodes[dfa->nodes_len].constraint = 0;
-#ifdef RE_ENABLE_I18N
   dfa->nodes[dfa->nodes_len].accept_mb =
     ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
      || token.type == COMPLEX_BRACKET);
-#endif
   dfa->nexts[dfa->nodes_len] = -1;
   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
@@ -1651,9 +1615,7 @@ create_ci_newstate (const re_dfa_t *dfa, const 
re_node_set *nodes,
       re_token_type_t type = node->type;
       if (type == CHARACTER && !node->constraint)
        continue;
-#ifdef RE_ENABLE_I18N
       newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
 
       /* If the state has the halt node, the state is a halt state.  */
       if (type == END_OF_RE)
@@ -1705,9 +1667,7 @@ create_cd_newstate (const re_dfa_t *dfa, const 
re_node_set *nodes,
 
       if (type == CHARACTER && !constraint)
        continue;
-#ifdef RE_ENABLE_I18N
       newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
 
       /* If the state has the halt node, the state is a halt state.  */
       if (type == END_OF_RE)
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 1245e782ff..b1bc2ee62f 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -116,10 +116,6 @@
 # define gettext_noop(String) String
 #endif
 
-#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
-# define RE_ENABLE_I18N
-#endif
-
 /* Number of ASCII characters.  */
 #define ASCII_CHARS 0x80
 
@@ -246,10 +242,8 @@ typedef enum
   SIMPLE_BRACKET = 3,
   OP_BACK_REF = 4,
   OP_PERIOD = 5,
-#ifdef RE_ENABLE_I18N
   COMPLEX_BRACKET = 6,
   OP_UTF8_PERIOD = 7,
-#endif /* RE_ENABLE_I18N */
 
   /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
      when the debugger shows values of this enum type.  */
@@ -287,30 +281,29 @@ typedef enum
 
 } re_token_type_t;
 
-#ifdef RE_ENABLE_I18N
 typedef struct
 {
   /* Multibyte characters.  */
   wchar_t *mbchars;
 
+#ifdef _LIBC
   /* Collating symbols.  */
-# ifdef _LIBC
   int32_t *coll_syms;
-# endif
+#endif
 
+#ifdef _LIBC
   /* Equivalence classes. */
-# ifdef _LIBC
   int32_t *equiv_classes;
-# endif
+#endif
 
   /* Range expressions. */
-# ifdef _LIBC
+#ifdef _LIBC
   uint32_t *range_starts;
   uint32_t *range_ends;
-# else /* not _LIBC */
+#else
   wchar_t *range_starts;
   wchar_t *range_ends;
-# endif /* not _LIBC */
+#endif
 
   /* Character classes. */
   wctype_t *char_classes;
@@ -333,7 +326,6 @@ typedef struct
   /* # of character classes. */
   Idx nchar_classes;
 } re_charset_t;
-#endif /* RE_ENABLE_I18N */
 
 typedef struct
 {
@@ -341,9 +333,7 @@ typedef struct
   {
     unsigned char c;           /* for CHARACTER */
     re_bitset_ptr_t sbcset;    /* for SIMPLE_BRACKET */
-#ifdef RE_ENABLE_I18N
     re_charset_t *mbcset;      /* for COMPLEX_BRACKET */
-#endif /* RE_ENABLE_I18N */
     Idx idx;                   /* for BACK_REF */
     re_context_type ctx_type;  /* for ANCHOR */
   } opr;
@@ -355,12 +345,10 @@ typedef struct
   unsigned int constraint : 10;        /* context constraint */
   unsigned int duplicated : 1;
   unsigned int opt_subexp : 1;
-#ifdef RE_ENABLE_I18N
   unsigned int accept_mb : 1;
   /* These 2 bits can be moved into the union if needed (e.g. if running out
      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
   unsigned int mb_partial : 1;
-#endif
   unsigned int word_char : 1;
 } re_token_t;
 
@@ -375,12 +363,10 @@ struct re_string_t
      REG_ICASE, upper cases of the string are stored, otherwise MBS points
      the same address that RAW_MBS points.  */
   unsigned char *mbs;
-#ifdef RE_ENABLE_I18N
   /* Store the wide character string which is corresponding to MBS.  */
   wint_t *wcs;
   Idx *offsets;
   mbstate_t cur_state;
-#endif
   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
      raw_mbs[raw_mbs_idx + i].  */
   Idx raw_mbs_idx;
@@ -779,7 +765,6 @@ bitset_mask (bitset_t dest, const bitset_t src)
     dest[bitset_i] &= src[bitset_i];
 }
 
-#ifdef RE_ENABLE_I18N
 /* Functions for re_string.  */
 static int
 __attribute__ ((pure, unused))
@@ -803,15 +788,15 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx)
   return (wint_t) pstr->wcs[idx];
 }
 
-# ifdef _LIBC
-#  include <locale/weight.h>
-# endif
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
 
 static int
 __attribute__ ((pure, unused))
 re_string_elem_size_at (const re_string_t *pstr, Idx idx)
 {
-# ifdef _LIBC
+#ifdef _LIBC
   const unsigned char *p, *extra;
   const int32_t *table, *indirect;
   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -827,11 +812,10 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx)
       findidx (table, indirect, extra, &p, pstr->len - idx);
       return p - pstr->mbs - idx;
     }
-  else
-# endif /* _LIBC */
-    return 1;
+#endif /* _LIBC */
+
+  return 1;
 }
-#endif /* RE_ENABLE_I18N */
 
 #ifdef _LIBC
 # if __GNUC__ >= 7
diff --git a/lib/regexec.c b/lib/regexec.c
index 6aeba3c0b4..1495ffd3e5 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -67,11 +67,9 @@ static reg_errcode_t set_regs (const regex_t *preg,
                               bool fl_backtrack);
 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
 
-#ifdef RE_ENABLE_I18N
 static int sift_states_iter_mb (const re_match_context_t *mctx,
                                re_sift_context_t *sctx,
                                Idx node_idx, Idx str_idx, Idx max_str_idx);
-#endif /* RE_ENABLE_I18N */
 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
                                           re_sift_context_t *sctx);
 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
@@ -123,10 +121,8 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
                                        re_match_context_t *mctx,
                                        re_dfastate_t *pstate);
 #endif
-#ifdef RE_ENABLE_I18N
 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
                                       re_dfastate_t *pstate);
-#endif /* RE_ENABLE_I18N */
 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
                                          const re_node_set *nodes);
 static reg_errcode_t get_subexp (re_match_context_t *mctx,
@@ -156,14 +152,12 @@ static reg_errcode_t expand_bkref_cache 
(re_match_context_t *mctx,
                                         re_node_set *cur_nodes, Idx cur_str,
                                         Idx subexp_num, int type);
 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
-#ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
                                    const re_string_t *input, Idx idx);
-# ifdef _LIBC
+#ifdef _LIBC
 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
                                                   size_t name_len);
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
+#endif
 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
                                       const re_dfastate_t *state,
                                       re_node_set *states_node,
@@ -779,12 +773,10 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
       if (__glibc_unlikely (err != REG_NOERROR))
        goto free_return;
 
-#ifdef RE_ENABLE_I18N
-     /* Don't consider this char as a possible match start if it part,
-       yet isn't the head, of a multibyte character.  */
+      /* Don't consider this char as a possible match start if it part,
+         yet isn't the head, of a multibyte character.  */
       if (!sb && !re_string_first_byte (&mctx.input, 0))
        continue;
-#endif
 
       /* It seems to be appropriate one, then use the matcher.  */
       /* We assume that the matching starts from 0.  */
@@ -858,7 +850,6 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
        if (pmatch[reg_idx].rm_so != -1)
          {
-#ifdef RE_ENABLE_I18N
            if (__glibc_unlikely (mctx.input.offsets_needed != 0))
              {
                pmatch[reg_idx].rm_so =
@@ -870,9 +861,6 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
                   ? mctx.input.valid_raw_len
                   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
              }
-#else
-           DEBUG_ASSERT (mctx.input.offsets_needed == 0);
-#endif
            pmatch[reg_idx].rm_so += match_first;
            pmatch[reg_idx].rm_eo += match_first;
          }
@@ -1261,12 +1249,9 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
       Idx naccepted = 0;
       re_token_type_t type = dfa->nodes[node].type;
 
-#ifdef RE_ENABLE_I18N
       if (dfa->nodes[node].accept_mb)
        naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
-      else
-#endif /* RE_ENABLE_I18N */
-      if (type == OP_BACK_REF)
+      else if (type == OP_BACK_REF)
        {
          Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
          if (subexp_idx < nregs)
@@ -1634,12 +1619,10 @@ build_sifted_states (const re_match_context_t *mctx, 
re_sift_context_t *sctx,
       bool ok;
       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
 
-#ifdef RE_ENABLE_I18N
       /* If the node may accept "multi byte".  */
       if (dfa->nodes[prev_node].accept_mb)
        naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
                                         str_idx, sctx->last_str_idx);
-#endif /* RE_ENABLE_I18N */
 
       /* We don't check backreferences here.
         See update_cur_sifted_state().  */
@@ -2176,7 +2159,6 @@ sift_states_bkref (const re_match_context_t *mctx, 
re_sift_context_t *sctx,
 }
 
 
-#ifdef RE_ENABLE_I18N
 static int
 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
                     Idx node_idx, Idx str_idx, Idx max_str_idx)
@@ -2196,8 +2178,6 @@ sift_states_iter_mb (const re_match_context_t *mctx, 
re_sift_context_t *sctx,
      'naccepted' bytes input.  */
   return naccepted;
 }
-#endif /* RE_ENABLE_I18N */
-
 
 /* Functions for state transition.  */
 
@@ -2215,7 +2195,6 @@ transit_state (reg_errcode_t *err, re_match_context_t 
*mctx,
   re_dfastate_t **trtable;
   unsigned char ch;
 
-#ifdef RE_ENABLE_I18N
   /* If the current state can accept multibyte.  */
   if (__glibc_unlikely (state->accept_mb))
     {
@@ -2223,7 +2202,6 @@ transit_state (reg_errcode_t *err, re_match_context_t 
*mctx,
       if (__glibc_unlikely (*err != REG_NOERROR))
        return NULL;
     }
-#endif /* RE_ENABLE_I18N */
 
   /* Then decide the next state with the single byte.  */
 #if 0
@@ -2444,7 +2422,6 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t 
*mctx,
 }
 #endif
 
-#ifdef RE_ENABLE_I18N
 static reg_errcode_t
 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
 {
@@ -2512,7 +2489,6 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t 
*pstate)
     }
   return REG_NOERROR;
 }
-#endif /* RE_ENABLE_I18N */
 
 static reg_errcode_t
 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
@@ -3002,9 +2978,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, 
Idx str_idx,
   const re_dfa_t *const dfa = mctx->dfa;
   bool ok;
   Idx cur_idx;
-#ifdef RE_ENABLE_I18N
   reg_errcode_t err = REG_NOERROR;
-#endif
   re_node_set union_set;
   re_node_set_init_empty (&union_set);
   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
@@ -3013,7 +2987,6 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, 
Idx str_idx,
       Idx cur_node = cur_nodes->elems[cur_idx];
       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
 
-#ifdef RE_ENABLE_I18N
       /* If the node may accept "multi byte".  */
       if (dfa->nodes[cur_node].accept_mb)
        {
@@ -3051,7 +3024,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, 
Idx str_idx,
                }
            }
        }
-#endif /* RE_ENABLE_I18N */
+
       if (naccepted
          || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
        {
@@ -3475,18 +3448,15 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
        }
       else if (type == OP_PERIOD)
        {
-#ifdef RE_ENABLE_I18N
          if (dfa->mb_cur_max > 1)
            bitset_merge (accepts, dfa->sb_char);
          else
-#endif
            bitset_set_all (accepts);
          if (!(dfa->syntax & RE_DOT_NEWLINE))
            bitset_clear (accepts, '\n');
          if (dfa->syntax & RE_DOT_NOT_NULL)
            bitset_clear (accepts, '\0');
        }
-#ifdef RE_ENABLE_I18N
       else if (type == OP_UTF8_PERIOD)
        {
          if (ASCII_CHARS % BITSET_WORD_BITS == 0)
@@ -3498,7 +3468,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
          if (dfa->syntax & RE_DOT_NOT_NULL)
            bitset_clear (accepts, '\0');
        }
-#endif
       else
        continue;
 
@@ -3529,12 +3498,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
                  bitset_empty (accepts);
                  continue;
                }
-#ifdef RE_ENABLE_I18N
              if (dfa->mb_cur_max > 1)
                for (j = 0; j < BITSET_WORDS; ++j)
                  any_set |= (accepts[j] &= (dfa->word_char[j] | 
~dfa->sb_char[j]));
              else
-#endif
                for (j = 0; j < BITSET_WORDS; ++j)
                  any_set |= (accepts[j] &= dfa->word_char[j]);
              if (!any_set)
@@ -3548,12 +3515,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
                  bitset_empty (accepts);
                  continue;
                }
-#ifdef RE_ENABLE_I18N
              if (dfa->mb_cur_max > 1)
                for (j = 0; j < BITSET_WORDS; ++j)
                  any_set |= (accepts[j] &= ~(dfa->word_char[j] & 
dfa->sb_char[j]));
              else
-#endif
                for (j = 0; j < BITSET_WORDS; ++j)
                  any_set |= (accepts[j] &= ~dfa->word_char[j]);
              if (!any_set)
@@ -3630,7 +3595,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
   return -1;
 }
 
-#ifdef RE_ENABLE_I18N
 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
    Return the number of the bytes the node accepts.
    STR_IDX is the current index of the input string.
@@ -3639,9 +3603,9 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
    one collating element like '.', '[a-z]', opposite to the other nodes
    can only accept one byte.  */
 
-# ifdef _LIBC
-#  include <locale/weight.h>
-# endif
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
 
 static int
 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
@@ -3725,12 +3689,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
   if (node->type == COMPLEX_BRACKET)
     {
       const re_charset_t *cset = node->opr.mbcset;
-# ifdef _LIBC
+#ifdef _LIBC
       const unsigned char *pin
        = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
       Idx j;
       uint32_t nrules;
-# endif /* _LIBC */
+#endif
       int match_len = 0;
       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
                    ? re_string_wchar_at (input, str_idx) : 0);
@@ -3753,7 +3717,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
            }
        }
 
-# ifdef _LIBC
+#ifdef _LIBC
       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
       if (nrules != 0)
        {
@@ -3842,7 +3806,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
            }
        }
       else
-# endif /* _LIBC */
+#endif /* _LIBC */
        {
          /* match with range expression?  */
          for (i = 0; i < cset->nranges; ++i)
@@ -3868,7 +3832,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
   return 0;
 }
 
-# ifdef _LIBC
+#ifdef _LIBC
 static unsigned int
 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
 {
@@ -3926,8 +3890,7 @@ find_collation_sequence_value (const unsigned char *mbs, 
size_t mbs_len)
       return UINT_MAX;
     }
 }
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
+#endif /* _LIBC */
 
 /* Check whether the node accepts the byte which is IDX-th
    byte of the INPUT.  */
@@ -3950,12 +3913,10 @@ check_node_accept (const re_match_context_t *mctx, 
const re_token_t *node,
         return false;
       break;
 
-#ifdef RE_ENABLE_I18N
     case OP_UTF8_PERIOD:
       if (ch >= ASCII_CHARS)
         return false;
       FALLTHROUGH;
-#endif
     case OP_PERIOD:
       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
          || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
@@ -4016,7 +3977,6 @@ extend_buffers (re_match_context_t *mctx, int min_len)
   /* Then reconstruct the buffers.  */
   if (pstr->icase)
     {
-#ifdef RE_ENABLE_I18N
       if (pstr->mb_cur_max > 1)
        {
          ret = build_wcs_upper_buffer (pstr);
@@ -4024,16 +3984,13 @@ extend_buffers (re_match_context_t *mctx, int min_len)
            return ret;
        }
       else
-#endif /* RE_ENABLE_I18N  */
        build_upper_buffer (pstr);
     }
   else
     {
-#ifdef RE_ENABLE_I18N
       if (pstr->mb_cur_max > 1)
        build_wcs_buffer (pstr);
       else
-#endif /* RE_ENABLE_I18N  */
        {
          if (pstr->trans != NULL)
            re_string_translate_buffer (pstr);
diff --git a/modules/regex b/modules/regex
index a32c46e183..866ade60ce 100644
--- a/modules/regex
+++ b/modules/regex
@@ -21,6 +21,7 @@ btowc           [test $ac_use_included_regex = yes]
 builtin-expect  [test $ac_use_included_regex = yes]
 dynarray        [test $ac_use_included_regex = yes]
 intprops        [test $ac_use_included_regex = yes]
+iswctype        [test $ac_use_included_regex = yes]
 langinfo        [test $ac_use_included_regex = yes]
 libc-config     [test $ac_use_included_regex = yes]
 lock            [test $ac_use_included_regex = yes]
-- 
2.33.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]