bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] dfa: new options DFA_STAR_WARN, DFA_PLUS_WARN


From: Paul Eggert
Subject: [PATCH] dfa: new options DFA_STAR_WARN, DFA_PLUS_WARN
Date: Tue, 24 May 2022 16:04:23 -0700

This lets ‘grep -E '(*a|+b)'’ warn about the * and the +.
* lib/dfa.h (DFA_STAR_WARN, DFA_PLUS_WARN): New flags.
* lib/dfa.c (lex): Support them.
---
 ChangeLog |  7 +++++++
 lib/dfa.c | 51 ++++++++++++++++++++++++++++++++++-----------------
 lib/dfa.h |  8 ++++++++
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 088e3b3134..5b20aa58e7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2022-05-24  Paul Eggert  <eggert@cs.ucla.edu>
+
+       dfa: new options DFA_STAR_WARN, DFA_PLUS_WARN
+       This lets ‘grep -E '(*a|+b)'’ warn about the * and the +.
+       * lib/dfa.h (DFA_STAR_WARN, DFA_PLUS_WARN): New flags.
+       * lib/dfa.c (lex): Support them.
+
 2022-05-23  Paul Eggert  <eggert@cs.ucla.edu>
 
        dfa: '\n' is not governed by RE_LIMITED_OPS
diff --git a/lib/dfa.c b/lib/dfa.c
index 5d92b38b4c..bd4c5f0582 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -1311,17 +1311,25 @@ lex (struct dfa *dfa)
             goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
-          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lex.laststart)
-            goto normal_char;
+          if (dfa->lex.laststart)
+            {
+              if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS))
+                goto default_case;
+              if (dfa->syntax.dfaopts & DFA_PLUS_WARN)
+                dfawarn (_("? at start of expression"));
+            }
           return dfa->lex.lasttok = QMARK;
 
         case '*':
           if (backslash)
             goto normal_char;
-          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lex.laststart)
-            goto normal_char;
+          if (dfa->lex.laststart)
+            {
+              if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS))
+                goto default_case;
+              if (dfa->syntax.dfaopts & DFA_STAR_WARN)
+                dfawarn (_("* at start of expression"));
+            }
           return dfa->lex.lasttok = STAR;
 
         case '+':
@@ -1329,9 +1337,13 @@ lex (struct dfa *dfa)
             goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
-          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lex.laststart)
-            goto normal_char;
+          if (dfa->lex.laststart)
+            {
+              if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS))
+                goto default_case;
+              if (dfa->syntax.dfaopts & DFA_PLUS_WARN)
+                dfawarn (_("+ at start of expression"));
+            }
           return dfa->lex.lasttok = PLUS;
 
         case '{':
@@ -1339,9 +1351,6 @@ lex (struct dfa *dfa)
             goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
             goto normal_char;
-          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lex.laststart)
-            goto normal_char;
 
           /* Cases:
              {M} - exact count
@@ -1374,16 +1383,24 @@ lex (struct dfa *dfa)
                                   dfa->lex.maxrep * 10 + *p - '0'));
                   }
               }
-            if (! ((! backslash || (p != lim && *p++ == '\\'))
+            bool invalid_content
+              = ! ((! backslash || (p != lim && *p++ == '\\'))
                    && p != lim && *p++ == '}'
                    && 0 <= dfa->lex.minrep
                    && (dfa->lex.maxrep < 0
-                       || dfa->lex.minrep <= dfa->lex.maxrep)))
+                       || dfa->lex.minrep <= dfa->lex.maxrep));
+            if (invalid_content
+                && (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD))
+              goto normal_char;
+            if (dfa->lex.laststart)
               {
-                if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
-                  goto normal_char;
-                dfaerror (_("invalid content of \\{\\}"));
+                if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS))
+                  goto default_case;
+                if (dfa->syntax.dfaopts & DFA_PLUS_WARN)
+                  dfawarn (_("{...} at start of expression"));
               }
+            if (invalid_content)
+              dfaerror (_("invalid content of \\{\\}"));
             if (RE_DUP_MAX < dfa->lex.maxrep)
               dfaerror (_("regular expression too big"));
             dfa->lex.ptr = p;
diff --git a/lib/dfa.h b/lib/dfa.h
index 8674929e90..91ec1d809f 100644
--- a/lib/dfa.h
+++ b/lib/dfa.h
@@ -81,6 +81,14 @@ enum
 
     /* Warn about stray backslashes before ordinary characters.  */
     DFA_STRAY_BACKSLASH_WARN = 1 << 3,
+
+    /* Warn about * appearing out of context at the start of an
+       expression or subexpression.  */
+    DFA_STAR_WARN = 1 << 4,
+
+    /* Warn about +, ?, {...} appearing out of context at the start of
+       an expression or subexpression.  */
+    DFA_PLUS_WARN = 1 << 5,
   };
 
 /* Initialize or reinitialize a DFA.  The arguments are:
-- 
2.36.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]