bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/2] regex: make it closer to libc


From: Paul Eggert
Subject: [PATCH 2/2] regex: make it closer to libc
Date: Fri, 19 Feb 2016 10:34:33 -0800

Make Idx a signed type, rather than possibly unsigned.
The unsignedness was not really buying us anything, since the code
overflows for other reasons before getting to PTRDIFF_MAX.  Making
it signed allows us to use -1 and -2 with abandon, like libc does,
thus lessening the number of differences between gnulib and libc.
Also, it should help avoid gratuitous warnings like the one
reported by Nelson H. F. Beebe in: http://bugs.gnu.org/22702
* lib/regex.h (__re_idx_t): Remove.  All uses changed to regoff_t.
* lib/regex_internal.h (SSIZE_MAX): Define if <limits.h> doesn't.
(IDX_MAX) [_REGEX_LARGE_OFFSETS]: Now SSIZE_MAX.
(REG_MISSING, REG_ERROR, REG_VALID_INDEX, REG_VALID_NONZERO_INDEX):
Remove.  Revert all uses to their libc versions.
---
 ChangeLog            | 13 ++++++++
 lib/regcomp.c        | 65 ++++++++++++++++++------------------
 lib/regex.h          | 33 ++++++++----------
 lib/regex_internal.c | 41 +++++++++++------------
 lib/regex_internal.h | 33 +++++++-----------
 lib/regexec.c        | 94 ++++++++++++++++++++++++++--------------------------
 6 files changed, 138 insertions(+), 141 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1f8f1b0..2692bd3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2016-02-19  Paul Eggert  <address@hidden>
 
+       * lib/regex_internal.h (IDX_MAX) [_REGEX_LARGE_OFFSETS]: Now SSIZE_MAX.
+
+       regex: make it closer to libc
+       Make Idx a signed type, rather than possibly unsigned.
+       The unsignedness was not really buying us anything, since the code
+       overflows for other reasons before getting to PTRDIFF_MAX.  Making
+       it signed allows us to use -1 and -2 with abandon, like libc does,
+       thus lessening the number of differences between gnulib and libc.
+       Also, it should help avoid gratuitous warnings like the one
+       reported by Nelson H. F. Beebe in: http://bugs.gnu.org/22702
+       * lib/regex.h (__re_idx_t): Remove.  All uses changed to regoff_t.
+
+
        regex: merge patches from libc
 
        2015-10-21  Joseph Myers  <address@hidden>
diff --git a/lib/regcomp.c b/lib/regcomp.c
index 6285050..6de9b72 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1397,7 +1397,7 @@ calc_first (void *extra, bin_tree_t *node)
     {
       node->first = node;
       node->node_idx = re_dfa_add_node (dfa, node->token);
-      if (BE (node->node_idx == REG_MISSING, 0))
+      if (BE (node->node_idx == -1, 0))
        return REG_ESPACE;
       if (node->token.type == ANCHOR)
        dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
@@ -1458,8 +1458,8 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
          right = node->right->first->node_idx;
        else
          right = node->next->node_idx;
-       assert (REG_VALID_INDEX (left));
-       assert (REG_VALID_INDEX (right));
+       assert (left > -1);
+       assert (right > -1);
        err = re_node_set_init_2 (dfa->edests + idx, left, right);
       }
       break;
@@ -1509,7 +1509,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, 
Idx top_clone_node,
          org_dest = dfa->nexts[org_node];
          re_node_set_empty (dfa->edests + clone_node);
          clone_dest = duplicate_node (dfa, org_dest, constraint);
-         if (BE (clone_dest == REG_MISSING, 0))
+         if (BE (clone_dest == -1, 0))
            return REG_ESPACE;
          dfa->nexts[clone_node] = dfa->nexts[org_node];
          ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
@@ -1542,7 +1542,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, 
Idx top_clone_node,
          /* In case the node has another constraint, append it.  */
          constraint |= dfa->nodes[org_node].constraint;
          clone_dest = duplicate_node (dfa, org_dest, constraint);
-         if (BE (clone_dest == REG_MISSING, 0))
+         if (BE (clone_dest == -1, 0))
            return REG_ESPACE;
          ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
          if (BE (! ok, 0))
@@ -1556,12 +1556,12 @@ duplicate_node_closure (re_dfa_t *dfa, Idx 
top_org_node, Idx top_clone_node,
          re_node_set_empty (dfa->edests + clone_node);
          /* Search for a duplicated node which satisfies the constraint.  */
          clone_dest = search_duplicated_node (dfa, org_dest, constraint);
-         if (clone_dest == REG_MISSING)
+         if (clone_dest == -1)
            {
              /* There is no such duplicated node, create a new one.  */
              reg_errcode_t err;
              clone_dest = duplicate_node (dfa, org_dest, constraint);
-             if (BE (clone_dest == REG_MISSING, 0))
+             if (BE (clone_dest == -1, 0))
                return REG_ESPACE;
              ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
              if (BE (! ok, 0))
@@ -1582,7 +1582,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, 
Idx top_clone_node,
 
          org_dest = dfa->edests[org_node].elems[1];
          clone_dest = duplicate_node (dfa, org_dest, constraint);
-         if (BE (clone_dest == REG_MISSING, 0))
+         if (BE (clone_dest == -1, 0))
            return REG_ESPACE;
          ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
          if (BE (! ok, 0))
@@ -1608,18 +1608,18 @@ search_duplicated_node (const re_dfa_t *dfa, Idx 
org_node,
          && constraint == dfa->nodes[idx].constraint)
        return idx; /* Found.  */
     }
-  return REG_MISSING; /* Not found.  */
+  return -1; /* Not found.  */
 }
 
 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
-   Return the index of the new node, or REG_MISSING if insufficient storage is
+   Return the index of the new node, or -1 if insufficient storage is
    available.  */
 
 static Idx
 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
 {
   Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
-  if (BE (dup_idx != REG_MISSING, 1))
+  if (BE (dup_idx != -1, 1))
     {
       dfa->nodes[dup_idx].constraint = constraint;
       dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
@@ -1678,7 +1678,7 @@ calc_eclosure (re_dfa_t *dfa)
        }
 
 #ifdef DEBUG
-      assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
+      assert (dfa->eclosures[node_idx].nelem != -1);
 #endif
 
       /* If we have already calculated, skip it.  */
@@ -1714,7 +1714,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, 
Idx node, bool root)
 
   /* This indicates that we are calculating this node now.
      We reference this value to avoid infinite loop.  */
-  dfa->eclosures[node].nelem = REG_MISSING;
+  dfa->eclosures[node].nelem = -1;
 
   /* If the current node has constraints, duplicate all nodes
      since they must inherit the constraints.  */
@@ -1736,7 +1736,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, 
Idx node, bool root)
        Idx edest = dfa->edests[node].elems[i];
        /* If calculating the epsilon closure of 'edest' is in progress,
           return intermediate result.  */
-       if (dfa->eclosures[edest].nelem == REG_MISSING)
+       if (dfa->eclosures[edest].nelem == -1)
          {
            incomplete = true;
            continue;
@@ -2533,7 +2533,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
     {
       end = 0;
       start = fetch_number (regexp, token, syntax);
-      if (start == REG_MISSING)
+      if (start == -1)
        {
          if (token->type == CHARACTER && token->opr.c == ',')
            start = 0; /* We treat "{,m}" as "{0,m}".  */
@@ -2543,14 +2543,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
              return NULL;
            }
        }
-      if (BE (start != REG_ERROR, 1))
+      if (BE (start != -2, 1))
        {
          /* We treat "{n}" as "{n,n}".  */
          end = ((token->type == OP_CLOSE_DUP_NUM) ? start
                 : ((token->type == CHARACTER && token->opr.c == ',')
-                   ? fetch_number (regexp, token, syntax) : REG_ERROR));
+                   ? fetch_number (regexp, token, syntax) : -2));
        }
-      if (BE (start == REG_ERROR || end == REG_ERROR, 0))
+      if (BE (start == -2 || end == -2, 0))
        {
          /* Invalid sequence.  */
          if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
@@ -2572,7 +2572,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
          return elem;
        }
 
-      if (BE ((end != REG_MISSING && start > end)
+      if (BE ((end != -1 && start > end)
              || token->type != OP_CLOSE_DUP_NUM, 0))
        {
          /* First number greater than second.  */
@@ -2580,7 +2580,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
          return NULL;
        }
 
-      if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0))
+      if (BE (RE_DUP_MAX < (end == -1 ? start : end), 0))
        {
          *err = REG_ESIZE;
          return NULL;
@@ -2589,7 +2589,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
   else
     {
       start = (token->type == OP_DUP_PLUS) ? 1 : 0;
-      end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
+      end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
     }
 
   fetch_token (token, regexp, syntax);
@@ -2633,7 +2633,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
     }
 
   tree = create_tree (dfa, elem, NULL,
-                     (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
+                     (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
   if (BE (tree == NULL, 0))
     goto parse_dup_op_espace;
 
@@ -2641,10 +2641,10 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, 
re_dfa_t *dfa,
    True if the arithmetic type T is signed.  */
 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
 
-  /* This loop is actually executed only when end != REG_MISSING,
+  /* This loop is actually executed only when end != -1,
      to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?...  We have
      already created the start+1-th copy.  */
-  if (TYPE_SIGNED (Idx) || end != REG_MISSING)
+  if (TYPE_SIGNED (Idx) || end != -1)
     for (i = start + 2; i <= end; ++i)
       {
        elem = duplicate_tree (elem, dfa);
@@ -3761,27 +3761,26 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE 
trans,
 
 /* This is intended for the expressions like "a{1,3}".
    Fetch a number from 'input', and return the number.
-   Return REG_MISSING if the number field is empty like "{,1}".
+   Return -1 if the number field is empty like "{,1}".
    Return RE_DUP_MAX + 1 if the number field is too large.
-   Return REG_ERROR if an error occurred.  */
+   Return -2 if an error occurred.  */
 
 static Idx
 fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
 {
-  Idx num = REG_MISSING;
+  Idx num = -1;
   unsigned char c;
   while (1)
     {
       fetch_token (token, input, syntax);
       c = token->opr.c;
       if (BE (token->type == END_OF_RE, 0))
-       return REG_ERROR;
+       return -2;
       if (token->type == OP_CLOSE_DUP_NUM || c == ',')
        break;
-      num = ((token->type != CHARACTER || c < '0' || '9' < c
-             || num == REG_ERROR)
-            ? REG_ERROR
-            : num == REG_MISSING
+      num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+            ? -2
+            : num == -1
             ? c - '0'
             : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
     }
@@ -3845,7 +3844,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, 
bin_tree_t *right,
   tree->token.opt_subexp = 0;
   tree->first = NULL;
   tree->next = NULL;
-  tree->node_idx = REG_MISSING;
+  tree->node_idx = -1;
 
   if (left != NULL)
     left->parent = tree;
diff --git a/lib/regex.h b/lib/regex.h
index 8e2cca5..6b969e3 100644
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -42,11 +42,6 @@ extern "C" {
    supported within glibc itself, and glibc users should not define
    _REGEX_LARGE_OFFSETS.  */
 
-/* The type of nonnegative object indexes.  Traditionally, GNU regex
-   uses 'int' for these.  Code that uses __re_idx_t should work
-   regardless of whether the type is signed.  */
-typedef size_t __re_idx_t;
-
 /* The type of object sizes.  */
 typedef size_t __re_size_t;
 
@@ -58,7 +53,6 @@ typedef size_t __re_long_size_t;
 
 /* The traditional GNU regex implementation mishandles strings longer
    than INT_MAX.  */
-typedef int __re_idx_t;
 typedef unsigned int __re_size_t;
 typedef unsigned long int __re_long_size_t;
 
@@ -488,7 +482,8 @@ typedef struct re_pattern_buffer regex_t;
 #ifdef _REGEX_LARGE_OFFSETS
 /* POSIX 1003.1-2008 requires that regoff_t be at least as wide as
    ptrdiff_t and ssize_t.  We don't know of any hosts where ptrdiff_t
-   is wider than ssize_t, so ssize_t is safe.  */
+   is wider than ssize_t, so ssize_t is safe.  ptrdiff_t is not
+   visible here, so use ssize_t.  */
 typedef ssize_t regoff_t;
 #else
 /* The traditional GNU regex implementation mishandles strings longer
@@ -557,34 +552,34 @@ extern int re_compile_fastmap (struct re_pattern_buffer 
*__buffer);
    match, or -2 for an internal error.  Also return register
    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
 extern regoff_t re_search (struct re_pattern_buffer *__buffer,
-                          const char *__string, __re_idx_t __length,
-                          __re_idx_t __start, regoff_t __range,
+                          const char *__string, regoff_t __length,
+                          regoff_t __start, regoff_t __range,
                           struct re_registers *__regs);
 
 
 /* Like 're_search', but search in the concatenation of STRING1 and
    STRING2.  Also, stop searching at index START + STOP.  */
 extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
-                            const char *__string1, __re_idx_t __length1,
-                            const char *__string2, __re_idx_t __length2,
-                            __re_idx_t __start, regoff_t __range,
+                            const char *__string1, regoff_t __length1,
+                            const char *__string2, regoff_t __length2,
+                            regoff_t __start, regoff_t __range,
                             struct re_registers *__regs,
-                            __re_idx_t __stop);
+                            regoff_t __stop);
 
 
 /* Like 're_search', but return how many characters in STRING the regexp
    in BUFFER matched, starting at position START.  */
 extern regoff_t re_match (struct re_pattern_buffer *__buffer,
-                         const char *__string, __re_idx_t __length,
-                         __re_idx_t __start, struct re_registers *__regs);
+                         const char *__string, regoff_t __length,
+                         regoff_t __start, struct re_registers *__regs);
 
 
 /* Relates to 're_match' as 're_search_2' relates to 're_search'.  */
 extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
-                           const char *__string1, __re_idx_t __length1,
-                           const char *__string2, __re_idx_t __length2,
-                           __re_idx_t __start, struct re_registers *__regs,
-                           __re_idx_t __stop);
+                           const char *__string1, regoff_t __length1,
+                           const char *__string2, regoff_t __length2,
+                           regoff_t __start, struct re_registers *__regs,
+                           regoff_t __stop);
 
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 9dc7124..986aae2 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -922,7 +922,7 @@ internal_function
 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
 {
   int c;
-  if (BE (! REG_VALID_INDEX (idx), 0))
+  if (BE (idx < 0, 0))
     /* In this case, we use the value stored in input->tip_context,
        since we can't know the character in input->mbs[-1] here.  */
     return input->tip_context;
@@ -938,10 +938,10 @@ re_string_context_at (const re_string_t *input, Idx idx, 
int eflags)
        {
 #if defined DEBUG && DEBUG
          /* It must not happen.  */
-         assert (REG_VALID_INDEX (wc_idx));
+         assert (wc_idx >= 0);
 #endif
          --wc_idx;
-         if (! REG_VALID_INDEX (wc_idx))
+         if (wc_idx < 0)
            return input->tip_context;
        }
       wc = input->wcs[wc_idx];
@@ -1077,25 +1077,25 @@ re_node_set_add_intersect (re_node_set *dest, const 
re_node_set *src1,
       if (src1->elems[i1] == src2->elems[i2])
        {
          /* Try to find the item in DEST.  Maybe we could binary search?  */
-         while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
+         while (id >= 0 && dest->elems[id] > src1->elems[i1])
            --id;
 
-          if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
+         if (id < 0 || dest->elems[id] != src1->elems[i1])
             dest->elems[--sbase] = src1->elems[i1];
 
-         if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
+         if (--i1 < 0 || --i2 < 0)
            break;
        }
 
       /* Lower the highest of the two items.  */
       else if (src1->elems[i1] < src2->elems[i2])
        {
-         if (! REG_VALID_INDEX (--i2))
+         if (--i2 < 0)
            break;
        }
       else
        {
-         if (! REG_VALID_INDEX (--i1))
+         if (--i1 < 0)
            break;
        }
     }
@@ -1108,7 +1108,7 @@ re_node_set_add_intersect (re_node_set *dest, const 
re_node_set *src1,
      DEST elements are already in place; this is more or
      less the same loop that is in re_node_set_merge.  */
   dest->nelem += delta;
-  if (delta > 0 && REG_VALID_INDEX (id))
+  if (delta > 0 && id >= 0)
     for (;;)
       {
        if (dest->elems[is] > dest->elems[id])
@@ -1122,7 +1122,7 @@ re_node_set_add_intersect (re_node_set *dest, const 
re_node_set *src1,
          {
            /* Slide from the bottom.  */
            dest->elems[id + delta] = dest->elems[id];
-           if (! REG_VALID_INDEX (--id))
+           if (--id < 0)
              break;
          }
       }
@@ -1216,8 +1216,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set 
*src)
   /* Copy into the top of DEST the items of SRC that are not
      found in DEST.  Maybe we could binary search in DEST?  */
   for (sbase = dest->nelem + 2 * src->nelem,
-       is = src->nelem - 1, id = dest->nelem - 1;
-       REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
+       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
     {
       if (dest->elems[id] == src->elems[is])
        is--, id--;
@@ -1227,7 +1226,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set 
*src)
        --id;
     }
 
-  if (REG_VALID_INDEX (is))
+  if (is >= 0)
     {
       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
       sbase -= is + 1;
@@ -1256,7 +1255,7 @@ re_node_set_merge (re_node_set *dest, const re_node_set 
*src)
        {
          /* Slide from the bottom.  */
          dest->elems[id + delta] = dest->elems[id];
-         if (! REG_VALID_INDEX (--id))
+         if (--id < 0)
            {
              /* Copy remaining SRC elements.  */
              memcpy (dest->elems, dest->elems + sbase,
@@ -1355,7 +1354,7 @@ re_node_set_compare (const re_node_set *set1, const 
re_node_set *set2)
   Idx i;
   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
     return false;
-  for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
+  for (i = set1->nelem ; --i >= 0 ; )
     if (set1->elems[i] != set2->elems[i])
       return false;
   return true;
@@ -1368,7 +1367,7 @@ internal_function __attribute__ ((pure))
 re_node_set_contains (const re_node_set *set, Idx elem)
 {
   __re_size_t idx, right, mid;
-  if (! REG_VALID_NONZERO_INDEX (set->nelem))
+  if (set->nelem <= 0)
     return 0;
 
   /* Binary search the element.  */
@@ -1398,7 +1397,7 @@ re_node_set_remove_at (re_node_set *set, Idx idx)
 
 
 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
-   Or return REG_MISSING if an error occurred.  */
+   Or return -1 if an error occurred.  */
 
 static Idx
 internal_function
@@ -1416,11 +1415,11 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
                                          MAX (sizeof (re_node_set),
                                               sizeof (Idx)));
       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
-       return REG_MISSING;
+       return -1;
 
       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
       if (BE (new_nodes == NULL, 0))
-       return REG_MISSING;
+       return -1;
       dfa->nodes = new_nodes;
       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
@@ -1433,7 +1432,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
           re_free (new_indices);
           re_free (new_edests);
           re_free (new_eclosures);
-          return REG_MISSING;
+          return -1;
        }
       dfa->nexts = new_nexts;
       dfa->org_indices = new_indices;
@@ -1448,7 +1447,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
     ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
      || token.type == COMPLEX_BRACKET);
 #endif
-  dfa->nexts[dfa->nodes_len] = REG_MISSING;
+  dfa->nexts[dfa->nodes_len] = -1;
   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
   return dfa->nodes_len++;
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index 598ac4d..56a315a 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -151,31 +151,22 @@
 # define __attribute__(arg)
 #endif
 
-typedef __re_idx_t Idx;
-#ifdef _REGEX_LARGE_OFFSETS
-# define IDX_MAX (SIZE_MAX - 2)
-#else
-# define IDX_MAX INT_MAX
-#endif
-
-/* Special return value for failure to match.  */
-#define REG_MISSING ((Idx) -1)
-
-/* Special return value for internal error.  */
-#define REG_ERROR ((Idx) -2)
-
-/* Test whether N is a valid index, and is not one of the above.  */
-#ifdef _REGEX_LARGE_OFFSETS
-# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
-#else
-# define REG_VALID_INDEX(n) (0 <= (n))
+#ifndef SSIZE_MAX
+# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
 #endif
 
-/* Test whether N is a valid nonzero index.  */
+/* The type of indexes into strings.  This is signed, not size_t,
+   since the API requires indexes to fit in regoff_t anyway, and using
+   signed integers makes the code a bit smaller and presumably faster.
+   The traditional GNU regex implementation uses int for indexes.
+   The POSIX-compatible implementation uses a possibly-wider type.
+   The name 'Idx' is three letters to minimize the hassle of
+   reindenting a lot of regex code that formerly used 'int'.  */
+typedef regoff_t Idx;
 #ifdef _REGEX_LARGE_OFFSETS
-# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
+# define IDX_MAX SSIZE_MAX
 #else
-# define REG_VALID_NONZERO_INDEX(n) (0 < (n))
+# define IDX_MAX INT_MAX
 #endif
 
 /* A hash value, suitable for computing hash tables.  */
diff --git a/lib/regexec.c b/lib/regexec.c
index fbde8e3..afdc173 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -621,7 +621,7 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
   bool fl_longest_match;
   int match_kind;
   Idx match_first;
-  Idx match_last = REG_MISSING;
+  Idx match_last = -1;
   Idx extra_nmatch;
   bool sb;
   int ch;
@@ -830,9 +830,9 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
       match_last = check_matching (&mctx, fl_longest_match,
                                   start <= last_start ? &match_first : NULL);
-      if (match_last != REG_MISSING)
+      if (match_last != -1)
        {
-         if (BE (match_last == REG_ERROR, 0))
+         if (BE (match_last == -2, 0))
            {
              err = REG_ESPACE;
              goto free_return;
@@ -854,7 +854,7 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
                    break;
                  if (BE (err != REG_NOMATCH, 0))
                    goto free_return;
-                 match_last = REG_MISSING;
+                 match_last = -1;
                }
              else
                break; /* We found a match.  */
@@ -865,7 +865,7 @@ re_search_internal (const regex_t *preg, const char 
*string, Idx length,
     }
 
 #ifdef DEBUG
-  assert (match_last != REG_MISSING);
+  assert (match_last != -1);
   assert (err == REG_NOERROR);
 #endif
 
@@ -991,7 +991,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
          do
            {
              --match_last;
-             if (! REG_VALID_INDEX (match_last))
+             if (match_last < 0)
                {
                  ret = REG_NOMATCH;
                  goto free_return;
@@ -1072,8 +1072,8 @@ acquire_init_state_context (reg_errcode_t *err, const 
re_match_context_t *mctx,
 }
 
 /* Check whether the regular expression match input string INPUT or not,
-   and return the index where the matching end.  Return REG_MISSING if
-   there is no match, and return REG_ERROR in case of an error.
+   and return the index where the matching end.  Return -1 if
+   there is no match, and return -2 in case of an error.
    FL_LONGEST_MATCH means we want the POSIX longest matching.
    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
    next place where we may want to try matching.
@@ -1088,7 +1088,7 @@ check_matching (re_match_context_t *mctx, bool 
fl_longest_match,
   const re_dfa_t *const dfa = mctx->dfa;
   reg_errcode_t err;
   Idx match = 0;
-  Idx match_last = REG_MISSING;
+  Idx match_last = -1;
   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   re_dfastate_t *cur_state;
   bool at_init_state = p_match_first != NULL;
@@ -1100,7 +1100,7 @@ check_matching (re_match_context_t *mctx, bool 
fl_longest_match,
   if (BE (cur_state == NULL, 0))
     {
       assert (err == REG_ESPACE);
-      return REG_ERROR;
+      return -2;
     }
 
   if (mctx->state_log != NULL)
@@ -1155,7 +1155,7 @@ check_matching (re_match_context_t *mctx, bool 
fl_longest_match,
          if (BE (err != REG_NOERROR, 0))
            {
              assert (err == REG_ESPACE);
-             return REG_ERROR;
+             return -2;
            }
        }
 
@@ -1169,7 +1169,7 @@ check_matching (re_match_context_t *mctx, bool 
fl_longest_match,
             state using the state log, if available and if we have not
             already found a valid (even if not the longest) match.  */
          if (BE (err != REG_NOERROR, 0))
-           return REG_ERROR;
+           return -2;
 
          if (mctx->state_log == NULL
              || (match && !fl_longest_match)
@@ -1252,7 +1252,7 @@ check_halt_state_context (const re_match_context_t *mctx,
 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
    corresponding to the DFA).
    Return the destination node, and update EPS_VIA_NODES;
-   return REG_MISSING in case of errors.  */
+   return -1 in case of errors.  */
 
 static Idx
 internal_function
@@ -1270,15 +1270,15 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
       Idx dest_node;
       ok = re_node_set_insert (eps_via_nodes, node);
       if (BE (! ok, 0))
-       return REG_ERROR;
-      /* Pick up a valid destination, or return REG_MISSING if none
+       return -2;
+      /* Pick up a valid destination, or return -1 if none
         is found.  */
-      for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
+      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
        {
          Idx candidate = edests->elems[i];
          if (!re_node_set_contains (cur_nodes, candidate))
            continue;
-          if (dest_node == REG_MISSING)
+          if (dest_node == -1)
            dest_node = candidate;
 
          else
@@ -1292,7 +1292,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
              else if (fs != NULL
                       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
                                           eps_via_nodes))
-               return REG_ERROR;
+               return -2;
 
              /* We know we are going to exit.  */
              break;
@@ -1317,13 +1317,13 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
          if (fs != NULL)
            {
              if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
-               return REG_MISSING;
+               return -1;
              else if (naccepted)
                {
                  char *buf = (char *) re_string_get_buffer (&mctx->input);
                  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
                              naccepted) != 0)
-                   return REG_MISSING;
+                   return -1;
                }
            }
 
@@ -1332,7 +1332,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
              Idx dest_node;
              ok = re_node_set_insert (eps_via_nodes, node);
              if (BE (! ok, 0))
-               return REG_ERROR;
+               return -2;
              dest_node = dfa->edests[node].elems[0];
              if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
                                        dest_node))
@@ -1348,12 +1348,12 @@ proceed_next_node (const re_match_context_t *mctx, Idx 
nregs, regmatch_t *regs,
          if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
                     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
                                               dest_node)))
-           return REG_MISSING;
+           return -1;
          re_node_set_empty (eps_via_nodes);
          return dest_node;
        }
     }
-  return REG_MISSING;
+  return -1;
 }
 
 static reg_errcode_t
@@ -1389,7 +1389,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, 
Idx nregs,
                regmatch_t *regs, re_node_set *eps_via_nodes)
 {
   Idx num = --fs->num;
-  assert (REG_VALID_INDEX (num));
+  assert (num >= 0);
   *pidx = fs->stack[num].idx;
   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
   re_node_set_free (eps_via_nodes);
@@ -1482,9 +1482,9 @@ set_regs (const regex_t *preg, const re_match_context_t 
*mctx, size_t nmatch,
       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
                                    &eps_via_nodes, fs);
 
-      if (BE (! REG_VALID_INDEX (cur_node), 0))
+      if (BE (cur_node < 0, 0))
        {
-         if (BE (cur_node == REG_ERROR, 0))
+         if (BE (cur_node == -2, 0))
            {
              re_node_set_free (&eps_via_nodes);
              if (prev_idx_match_malloced)
@@ -1868,10 +1868,10 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, 
re_node_set *dest_nodes,
          {
            Idx edst1 = dfa->edests[cur_node].elems[0];
            Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
-                        ? dfa->edests[cur_node].elems[1] : REG_MISSING);
+                        ? dfa->edests[cur_node].elems[1] : -1);
            if ((!re_node_set_contains (inv_eclosure, edst1)
                 && re_node_set_contains (dest_nodes, edst1))
-               || (REG_VALID_NONZERO_INDEX (edst2)
+               || (edst2 > 0
                    && !re_node_set_contains (inv_eclosure, edst2)
                    && re_node_set_contains (dest_nodes, edst2)))
              {
@@ -1951,7 +1951,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t 
*mctx, int boundaries,
       switch (dfa->nodes[node].type)
        {
        case OP_BACK_REF:
-         if (bkref_idx != REG_MISSING)
+         if (bkref_idx != -1)
            {
              struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
              do
@@ -2067,8 +2067,8 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set 
*dest_nodes,
       subexp_idx = dfa->nodes[ent->node].opr.idx;
       if (ent->subexp_to == str_idx)
        {
-         Idx ops_node = REG_MISSING;
-         Idx cls_node = REG_MISSING;
+         Idx ops_node = -1;
+         Idx cls_node = -1;
          for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
            {
              Idx node = dest_nodes->elems[node_idx];
@@ -2083,7 +2083,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set 
*dest_nodes,
 
          /* Check the limitation of the open subexpression.  */
          /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
-         if (REG_VALID_INDEX (ops_node))
+         if (ops_node >= 0)
            {
              err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
                                           candidates);
@@ -2092,7 +2092,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set 
*dest_nodes,
            }
 
          /* Check the limitation of the close subexpression.  */
-         if (REG_VALID_INDEX (cls_node))
+         if (cls_node >= 0)
            for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
              {
                Idx node = dest_nodes->elems[node_idx];
@@ -2145,7 +2145,7 @@ sift_states_bkref (const re_match_context_t *mctx, 
re_sift_context_t *sctx,
   re_sift_context_t local_sctx;
   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
 
-  if (first_idx == REG_MISSING)
+  if (first_idx == -1)
     return REG_NOERROR;
 
   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
@@ -2549,7 +2549,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t 
*pstate)
       if (BE (err != REG_NOERROR, 0))
        return err;
 #ifdef DEBUG
-      assert (dfa->nexts[cur_node_idx] != REG_MISSING);
+      assert (dfa->nexts[cur_node_idx] != -1);
 #endif
       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
 
@@ -2615,7 +2615,7 @@ transit_state_bkref (re_match_context_t *mctx, const 
re_node_set *nodes)
       /* And add the epsilon closures (which is 'new_dest_nodes') of
         the backreference to appropriate state_log.  */
 #ifdef DEBUG
-      assert (dfa->nexts[node_idx] != REG_MISSING);
+      assert (dfa->nexts[node_idx] != -1);
 #endif
       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
        {
@@ -2699,7 +2699,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx 
bkref_str_idx)
   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
-  if (cache_idx != REG_MISSING)
+  if (cache_idx != -1)
     {
       const struct re_backref_cache_entry *entry
        = mctx->bkref_ents + cache_idx;
@@ -2804,7 +2804,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx 
bkref_str_idx)
          nodes = &mctx->state_log[sl_str]->nodes;
          cls_node = find_subexp_node (dfa, nodes, subexp_num,
                                       OP_CLOSE_SUBEXP);
-         if (cls_node == REG_MISSING)
+         if (cls_node == -1)
            continue; /* No.  */
          if (sub_top->path == NULL)
            {
@@ -2883,7 +2883,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set 
*nodes,
          && node->opr.idx == subexp_idx)
        return cls_node;
     }
-  return REG_MISSING;
+  return -1;
 }
 
 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
@@ -3159,7 +3159,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, 
re_node_set *cur_nodes,
       Idx cur_node = cur_nodes->elems[idx];
       const re_node_set *eclosure = dfa->eclosures + cur_node;
       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
-      if (outside_node == REG_MISSING)
+      if (outside_node == -1)
        {
          /* There are no problematic nodes, just merge them.  */
          err = re_node_set_merge (&new_nodes, eclosure);
@@ -3245,7 +3245,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set 
*cur_nodes,
   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
   struct re_backref_cache_entry *ent;
 
-  if (cache_idx_start == REG_MISSING)
+  if (cache_idx_start == -1)
     return REG_NOERROR;
 
  restart:
@@ -3370,7 +3370,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   /* At first, group all nodes belonging to 'state' into several
      destinations.  */
   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
-  if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
+  if (BE (ndests <= 0, 0))
     {
       if (dests_node_malloced)
        free (dests_alloc);
@@ -3432,7 +3432,7 @@ out_free:
       for (j = 0; j < dests_node[i].nelem; ++j)
        {
          next_node = dfa->nexts[dests_node[i].elems[j]];
-         if (next_node != REG_MISSING)
+         if (next_node != -1)
            {
              err = re_node_set_merge (&follows, dfa->eclosures + next_node);
              if (BE (err != REG_NOERROR, 0))
@@ -3743,7 +3743,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const 
re_dfastate_t *state,
  error_return:
   for (j = 0; j < ndests; ++j)
     re_node_set_free (dests_node + j);
-  return REG_MISSING;
+  return -1;
 }
 
 #ifdef RE_ENABLE_I18N
@@ -4174,7 +4174,7 @@ internal_function __attribute_warn_unused_result__
 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
 {
   mctx->eflags = eflags;
-  mctx->match_last = REG_MISSING;
+  mctx->match_last = -1;
   if (n > 0)
     {
       /* Avoid overflow.  */
@@ -4295,7 +4295,7 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, 
Idx str_idx, Idx from,
   return REG_NOERROR;
 }
 
-/* Return the first entry with the same str_idx, or REG_MISSING if none is
+/* Return the first entry with the same str_idx, or -1 if none is
    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
 
 static Idx
@@ -4315,7 +4315,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, 
Idx str_idx)
   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
     return left;
   else
-    return REG_MISSING;
+    return -1;
 }
 
 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]