bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 5/6] regex: implement rational ranges


From: Paul Eggert
Subject: [PATCH 5/6] regex: implement rational ranges
Date: Sun, 30 Dec 2012 00:17:01 -0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/17.0 Thunderbird/17.0

Reported by Aharon Robbins in
<http://sourceware.org/ml/libc-alpha/2012-12/msg00456.html>.
* lib/regcomp.c (build_range_exp) [!_LIBC]:
* lib/regexec.c (check_node_accept_bytes) [!_LIBC]:
Implement rational ranges.
---
 ChangeLog     |  7 +++++++
 lib/regcomp.c | 12 +++---------
 lib/regexec.c | 12 ++----------
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0824b08..8c61adb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2012-12-29  Paul Eggert  <address@hidden>
 
+       regex: implement rational ranges
+       Reported by Aharon Robbins in
+       <http://sourceware.org/ml/libc-alpha/2012-12/msg00456.html>.
+       * lib/regcomp.c (build_range_exp) [!_LIBC]:
+       * lib/regexec.c (check_node_accept_bytes) [!_LIBC]:
+       Implement rational ranges.
+
        regex: avoid redefining __wctype
        Reported by Aharon Robbins in
        <http://sourceware.org/ml/libc-alpha/2012-12/msg00456.html>.
diff --git a/lib/regcomp.c b/lib/regcomp.c
index 398df66..5cc2c67 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -2712,7 +2712,6 @@ build_range_exp (const reg_syntax_t syntax,
     wchar_t wc;
     wint_t start_wc;
     wint_t end_wc;
-    wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
 
     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
                : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
@@ -2726,11 +2725,7 @@ build_range_exp (const reg_syntax_t syntax,
              ? __btowc (end_ch) : end_elem->opr.wch);
     if (start_wc == WEOF || end_wc == WEOF)
       return REG_ECOLLATE;
-    cmp_buf[0] = start_wc;
-    cmp_buf[4] = end_wc;
-
-    if (BE ((syntax & RE_NO_EMPTY_RANGES)
-            && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
+    else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
       return REG_ERANGE;
 
     /* Got valid collation sequence values, add them as a new entry.
@@ -2771,9 +2766,7 @@ build_range_exp (const reg_syntax_t syntax,
     /* Build the table for single byte characters.  */
     for (wc = 0; wc < SBC_MAX; ++wc)
       {
-       cmp_buf[2] = wc;
-       if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
-           && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+       if (start_wc <= wc && wc <= end_wc)
          bitset_set (sbcset, wc);
       }
   }
@@ -2970,6 +2963,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, 
re_token_t *token,
              0))
        return REG_ERANGE;
 
+      /* FIXME: Implement rational ranges here, too.  */
       start_collseq = lookup_collation_sequence_value (start_elem);
       end_collseq = lookup_collation_sequence_value (end_elem);
       /* Check start/end collation sequence values.  */
diff --git a/lib/regexec.c b/lib/regexec.c
index 4e4b295..22e8dd6 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -3936,6 +3936,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
                in_collseq = find_collation_sequence_value (pin, elem_len);
            }
          /* match with range expression?  */
+         /* FIXME: Implement rational ranges here, too.  */
          for (i = 0; i < cset->nranges; ++i)
            if (cset->range_starts[i] <= in_collseq
                && in_collseq <= cset->range_ends[i])
@@ -3987,18 +3988,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx 
node_idx,
 # endif /* _LIBC */
        {
          /* match with range expression?  */
-#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && defined __STRICT_ANSI__)
-         wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
-#else
-         wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
-         cmp_buf[2] = wc;
-#endif
          for (i = 0; i < cset->nranges; ++i)
            {
-             cmp_buf[0] = cset->range_starts[i];
-             cmp_buf[4] = cset->range_ends[i];
-             if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
-                 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+             if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
                {
                  match_len = char_len;
                  goto check_node_accept_bytes_match;
-- 
1.7.11.7




reply via email to

[Prev in Thread] Current Thread [Next in Thread]