[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error
From: |
Akim Demaille |
Subject: |
[PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error |
Date: |
Sat, 18 Jan 2020 16:40:07 +0100 |
* src/output.c (escape_trigraphs, xescape_trigraphs): New.
(prepare_symbol_names): Use it.
* tests/regression.at: Check the handling of trigraphs with
parse.error = detailed.
---
src/output.c | 52 ++++++++++++++++++++++++++++++++++++++++++++-
tests/regression.at | 21 +++++++++++++-----
2 files changed, 67 insertions(+), 6 deletions(-)
diff --git a/src/output.c b/src/output.c
index 9d597e2d..8de00ed2 100644
--- a/src/output.c
+++ b/src/output.c
@@ -132,6 +132,56 @@ string_output (FILE *out, char const *string)
}
+/* Store in BUFFER a copy of SRC where trigraphs are escaped, return
+ the size of the result (including the final NUL). If called with
+ BUFFERSIZE = 0, returns the needed size for BUFFER. */
+static ptrdiff_t
+escape_trigraphs (char *buffer, ptrdiff_t buffersize, const char *src)
+{
+#define STORE(c) \
+ do \
+ { \
+ if (res < buffersize) \
+ buffer[res] = (c); \
+ ++res; \
+ } \
+ while (0)
+ ptrdiff_t res = 0;
+ for (ptrdiff_t i = 0, len = strlen (src); i < len; ++i)
+ {
+ if (i + 2 < len
+ && src[i] == '?' && src[i+1] == '?')
+ {
+ switch (src[i+2])
+ {
+ case '!': case '\'':
+ case '(': case ')': case '-': case '/':
+ case '<': case '=': case '>':
+ i += 1;
+ STORE ('?');
+ STORE ('"');
+ STORE ('"');
+ STORE ('?');
+ continue;
+ }
+ }
+ STORE (src[i]);
+ }
+ STORE ('\0');
+#undef STORE
+ return res;
+}
+
+/* Same as xstrdup, except that trigraphs are escaped. */
+static char *
+xescape_trigraphs (const char *src)
+{
+ ptrdiff_t bufsize = escape_trigraphs (NULL, 0, src);
+ char *buf = xcharalloc (bufsize);
+ escape_trigraphs (buf, bufsize, src);
+ return buf;
+}
+
/* Generate the b4_<MUSCLE_NAME> (e.g., b4_tname) table with the
symbol names (aka tags). */
@@ -148,7 +198,7 @@ prepare_symbol_names (char const *muscle_name)
{
char *cp =
symbols[i]->tag[0] == '"' && !quote
- ? xstrdup (symbols[i]->tag)
+ ? xescape_trigraphs (symbols[i]->tag)
: quotearg_alloc (symbols[i]->tag, -1, qo);
/* Width of the next token, including the two quotes, the
comma and the space. */
diff --git a/tests/regression.at b/tests/regression.at
index 7d304614..b293fb35 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -366,17 +366,17 @@ AT_CLEANUP
## Token definitions. ##
## ------------------- ##
+m4_pushdef([AT_TEST],
+[AT_SETUP([Token definitions: $1])
-AT_SETUP([Token definitions])
-
-AT_BISON_OPTION_PUSHDEFS
+AT_BISON_OPTION_PUSHDEFS([$1])
AT_DATA_GRAMMAR([input.y],
[%{
]AT_YYERROR_DECLARE[
]AT_YYLEX_DECLARE[
%}
-[%define parse.error verbose
+[$1
%token MYEOF 0 "end of file"
%token 'a' "a" // Bison managed, when fed with '%token 'f' "f"' to #define
'f'!
%token B_TOKEN "b"
@@ -391,7 +391,6 @@ exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
]AT_YYLEX_DEFINE([{ SPECIAL }])[
]AT_MAIN_DEFINE[
]])
-AT_BISON_OPTION_POPDEFS
# Checking the warning message guarantees that the trigraph "??!" isn't
# unnecessarily escaped here even though it would need to be if encoded in a
@@ -411,6 +410,7 @@ input.y:22.16-63: warning: symbol
"\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!" used
AT_COMPILE([input])
+AT_ERROR_VERBOSE_IF([
# Checking the error message here guarantees that yytname, which does contain
# C-string literals, does have the trigraph escaped correctly. Thus, the
# symbol name reported by the parser is exactly the same as that reported by
@@ -419,9 +419,20 @@ AT_DATA([experr],
[[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!",
expecting a
]])
AT_PARSER_CHECK([input], 1, [], [experr])
+])
+
+# We don't check the error message in "detailed" parse.error, since
+# the special characters are not longer escaped, and it produces
+# invalid UTF-8.
+
+AT_BISON_OPTION_POPDEFS
AT_CLEANUP
+])
+AT_TEST([%define parse.error detailed])
+AT_TEST([%define parse.error verbose])
+m4_popdef([AT_TEST])
## -------------------- ##
## Characters Escapes. ##
--
2.24.1
- [PATCH 00/13] Better error messages, Akim Demaille, 2020/01/18
- [PATCH 01/13] glr.c: move some functions after the definition of types, Akim Demaille, 2020/01/18
- [PATCH 02/13] c: use yysymbol_name in traces, Akim Demaille, 2020/01/18
- [PATCH 03/13] yacc.c: add support for parse.error detailed, Akim Demaille, 2020/01/18
- [PATCH 04/13] yacc.c: tests: check detailed error messages, Akim Demaille, 2020/01/18
- [PATCH 05/13] regen, Akim Demaille, 2020/01/18
- [PATCH 06/13] bison: use detailed error messages, Akim Demaille, 2020/01/18
- [PATCH 07/13] regen, Akim Demaille, 2020/01/18
- [PATCH 09/13] tests: check that detailed error messages preserve UTF-8 characters, Akim Demaille, 2020/01/18
- [PATCH 08/13] yacc.c: escape trigraphs in detailed parse.error,
Akim Demaille <=
- [PATCH 10/13] parsers: support translatable token aliases, Akim Demaille, 2020/01/18
- [PATCH 12/13] tests: check token internationalization, Akim Demaille, 2020/01/18
- [PATCH 13/13] parsers: issue tname with i18n markup, Akim Demaille, 2020/01/18
- [PATCH 11/13] regen, Akim Demaille, 2020/01/18
- Re: [PATCH 00/13] Better error messages, Akim Demaille, 2020/01/20