bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error


From: Akim Demaille
Subject: [PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error
Date: Sat, 18 Jan 2020 16:40:07 +0100

* src/output.c (escape_trigraphs, xescape_trigraphs): New.
(prepare_symbol_names): Use it.
* tests/regression.at: Check the handling of trigraphs with
parse.error = detailed.
---
 src/output.c        | 52 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/regression.at | 21 +++++++++++++-----
 2 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/src/output.c b/src/output.c
index 9d597e2d..8de00ed2 100644
--- a/src/output.c
+++ b/src/output.c
@@ -132,6 +132,56 @@ string_output (FILE *out, char const *string)
 }
 
 
+/* Store in BUFFER a copy of SRC where trigraphs are escaped, return
+   the size of the result (including the final NUL).  If called with
+   BUFFERSIZE = 0, returns the needed size for BUFFER.  */
+static ptrdiff_t
+escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src)
+{
+#define STORE(c)                                \
+  do                                            \
+    {                                           \
+      if (res < buffersize)                     \
+        buffer[res] = (c);                      \
+      ++res;                                    \
+    }                                           \
+  while (0)
+  ptrdiff_t res = 0;
+  for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i)
+    {
+      if (i + 2 < len
+          && src[i] == '?' && src[i+1] == '?')
+        {
+          switch (src[i+2])
+            {
+            case '!': case '\'':
+            case '(': case ')': case '-': case '/':
+            case '<': case '=': case '>':
+              i += 1;
+              STORE ('?');
+              STORE ('"');
+              STORE ('"');
+              STORE ('?');
+              continue;
+            }
+        }
+      STORE (src[i]);
+    }
+  STORE ('\0');
+#undef STORE
+  return res;
+}
+
+/* Same as xstrdup, except that trigraphs are escaped.  */
+static char *
+xescape_trigraphs (const char *src)
+{
+  ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src);
+  char *buf = xcharalloc (bufsize);
+  escape_trigraphs (buf, bufsize, src);
+  return buf;
+}
+
 /* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the
    symbol names (aka tags). */
 
@@ -148,7 +198,7 @@ prepare_symbol_names (char const *muscle_name)
     {
       char *cp =
         symbols[i]->tag[0] == '"' && !quote
-        ? xstrdup (symbols[i]->tag)
+        ? xescape_trigraphs (symbols[i]->tag)
         : quotearg_alloc (symbols[i]->tag, -1, qo);
       /* Width of the next token, including the two quotes, the
          comma and the space.  */
diff --git a/tests/regression.at b/tests/regression.at
index 7d304614..b293fb35 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -366,17 +366,17 @@ AT_CLEANUP
 ## Token definitions.  ##
 ## ------------------- ##
 
+m4_pushdef([AT_TEST],
+[AT_SETUP([Token definitions: $1])
 
-AT_SETUP([Token definitions])
-
-AT_BISON_OPTION_PUSHDEFS
+AT_BISON_OPTION_PUSHDEFS([$1])
 
 AT_DATA_GRAMMAR([input.y],
 [%{
 ]AT_YYERROR_DECLARE[
 ]AT_YYLEX_DECLARE[
 %}
-[%define parse.error verbose
+[$1
 %token MYEOF 0 "end of file"
 %token 'a' "a"  // Bison managed, when fed with '%token 'f' "f"' to #define 
'f'!
 %token B_TOKEN "b"
@@ -391,7 +391,6 @@ exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
 ]AT_YYLEX_DEFINE([{ SPECIAL }])[
 ]AT_MAIN_DEFINE[
 ]])
-AT_BISON_OPTION_POPDEFS
 
 # Checking the warning message guarantees that the trigraph "??!" isn't
 # unnecessarily escaped here even though it would need to be if encoded in a
@@ -411,6 +410,7 @@ input.y:22.16-63: warning: symbol 
"\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!" used
 
 AT_COMPILE([input])
 
+AT_ERROR_VERBOSE_IF([
 # Checking the error message here guarantees that yytname, which does contain
 # C-string literals, does have the trigraph escaped correctly.  Thus, the
 # symbol name reported by the parser is exactly the same as that reported by
@@ -419,9 +419,20 @@ AT_DATA([experr],
 [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", 
expecting a
 ]])
 AT_PARSER_CHECK([input], 1, [], [experr])
+])
+
+# We don't check the error message in "detailed" parse.error, since
+# the special characters are not longer escaped, and it produces
+# invalid UTF-8.
+
+AT_BISON_OPTION_POPDEFS
 AT_CLEANUP
+])
 
+AT_TEST([%define parse.error detailed])
+AT_TEST([%define parse.error verbose])
 
+m4_popdef([AT_TEST])
 
 ## -------------------- ##
 ## Characters Escapes.  ##
-- 
2.24.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]