[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 3/4] java: add support for lookahead correction
From: |
Akim Demaille |
Subject: |
[PATCH 3/4] java: add support for lookahead correction |
Date: |
Sun, 1 Nov 2020 19:33:23 +0100 |
Shamelessly stolen from Adrian Vogelsgesang's implementation in
lalr1.cc.
* data/skeletons/lalr1.java (yycdebugNnl, yyLacCheck, yyLacEstablish)
(yyLacDiscard, yylacStack, yylacEstablished): New.
(Context): Use it.
* examples/java/calc/Calc.y: Enable LAC.
---
data/skeletons/lalr1.java | 231 ++++++++++++++++++++++++++++++++++----
examples/java/calc/Calc.y | 8 +-
2 files changed, 218 insertions(+), 21 deletions(-)
diff --git a/data/skeletons/lalr1.java b/data/skeletons/lalr1.java
index e8664901..e56378cb 100644
--- a/data/skeletons/lalr1.java
+++ b/data/skeletons/lalr1.java
@@ -1,4 +1,4 @@
-# Java skeleton for Bison -*- autoconf -*-
+# Java skeleton for Bison -*- java -*-
# Copyright (C) 2007-2015, 2018-2020 Free Software Foundation, Inc.
@@ -80,6 +80,15 @@ m4_define([b4_define_state],
]b4_yystype[ yylval = null;
]])
+# parse.lac
+b4_percent_define_default([[parse.lac]], [[none]])
+b4_percent_define_check_values([[[[parse.lac]], [[full]], [[none]]]])
+b4_define_flag_if([lac])
+m4_define([b4_lac_flag],
+ [m4_if(b4_percent_define_get([[parse.lac]]),
+ [none], [[0]], [[1]])])
+
+
## ------------- ##
## Parser File. ##
## ------------- ##
@@ -93,6 +102,7 @@ b4_output_begin([b4_parser_file_name])[
]b4_user_pre_prologue[
]b4_user_post_prologue[
import java.text.MessageFormat;
+import java.util.Vector;
]b4_percent_code_get([[imports]])[
/**
* A Bison parser, automatically generated from
<tt>]m4_bpatsubst(b4_file_name, [^"\(.*\)"$], [\1])[</tt>.
@@ -257,8 +267,10 @@ import java.text.MessageFormat;
*/
public
]b4_parser_class[(]b4_parse_param_decl([b4_lex_param_decl])[)]b4_maybe_throws([b4_init_throws])[
{
-]b4_percent_code_get([[init]])[
- this.yylexer = new YYLexer (]b4_lex_param_call[);
+]b4_percent_code_get([[init]])[]b4_lac_if([[
+ this.yylacStack = new Vector<Integer>();
+ this.yylacEstablished = false;]])[
+ this.yylexer = new YYLexer(]b4_lex_param_call[);
]b4_parse_param_cons[
}
]])[
@@ -269,7 +281,9 @@ import java.text.MessageFormat;
*/
]b4_lexer_if([[protected]], [[public]])
b4_parser_class[(]b4_parse_param_decl([[Lexer
yylexer]])[)]b4_maybe_throws([b4_init_throws])[
{
-]b4_percent_code_get([[init]])[
+]b4_percent_code_get([[init]])[]b4_lac_if([[
+ this.yylacStack = new Vector<Integer>();
+ this.yylacEstablished = false;]])[
this.yylexer = yylexer;
]b4_parse_param_cons[
}
@@ -338,6 +352,11 @@ import java.text.MessageFormat;
yylexer.yyerror(new ]b4_location_type[ (pos), msg);
}]])[
]b4_parse_trace_if([[
+ protected final void yycdebugNnl(String s) {
+ if (0 < yydebug)
+ yyDebugStream.print(s);
+ }
+
protected final void yycdebug(String s) {
if (0 < yydebug)
yyDebugStream.println(s);
@@ -541,7 +560,12 @@ import java.text.MessageFormat;
/* @@$. */
]b4_location_type[ yyloc;]])[
]b4_push_if([],[[
-]b4_define_state[]b4_parse_trace_if([[
+]b4_define_state[
+]b4_lac_if([[
+ /// Discard the LAC context in case there still is one left from a
+ /// previous invocation.
+ yylacDiscard("init");]])[
+]b4_parse_trace_if([[
yycdebug ("Starting parse");]])[
yyerrstatus_ = 0;
yynerrs = 0;
@@ -635,19 +659,24 @@ b4_dollar_popdef[]dnl
/* If the proper action on seeing token YYTOKEN is to reduce or to
detect an error, take that action. */
yyn += yytoken.getCode();
- if (yyn < 0 || YYLAST_ < yyn || yycheck_[yyn] != yytoken.getCode())
+ if (yyn < 0 || YYLAST_ < yyn || yycheck_[yyn] !=
yytoken.getCode()) {]b4_lac_if([[
+ if (!yylacEstablish(yystack, yytoken)) {
+ label = YYERRLAB;
+ } else]])[
label = YYDEFAULT;
+ }
/* <= 0 means reduce or error. */
else if ((yyn = yytable_[yyn]) <= 0)
{
- if (yyTableValueIsError (yyn))
+ if (yyTableValueIsError(yyn)) {
label = YYERRLAB;
- else
- {
- yyn = -yyn;
- label = YYREDUCE;
- }
+ }]b4_lac_if([[ else if (!yylacEstablish(yystack, yytoken)) {
+ label = YYERRLAB;
+ }]])[ else {
+ yyn = -yyn;
+ label = YYREDUCE;
+ }
}
else
@@ -665,7 +694,8 @@ b4_dollar_popdef[]dnl
--yyerrstatus_;
yystate = yyn;
- yystack.push (yystate, yylval]b4_locations_if([, yylloc])[);
+ yystack.push(yystate, yylval]b4_locations_if([,
yylloc])[);]b4_lac_if([[
+ yylacDiscard("shift");]])[
label = YYNEWSTATE;
}
}
@@ -701,7 +731,7 @@ b4_dollar_popdef[]dnl
++yynerrs;
if (yychar == YYEMPTY_)
yytoken = null;
- yyreportSyntaxError(new Context(yystack,
yytoken]b4_locations_if([[, yylloc]])[));
+ yyreportSyntaxError(new Context(this, yystack,
yytoken]b4_locations_if([[, yylloc]])[));
}
]b4_locations_if([[
yyerrloc = yylloc;]])[
@@ -784,7 +814,8 @@ b4_dollar_popdef[]dnl
yyloc = yylloc (yystack, 2);
yystack.pop (2);]])[
- /* Shift the error token. */]b4_parse_trace_if([[
+ /* Shift the error token. */]b4_lac_if([[
+ yylacDiscard("error recovery");]])[]b4_parse_trace_if([[
yySymbolPrint("Shifting", SymbolKind.get(yystos_[yyn]),
yylval]b4_locations_if([, yyloc])[);]])[
@@ -820,7 +851,9 @@ b4_dollar_popdef[]dnl
this.yyn = 0;
this.yylen = 0;
this.yystate = 0;
- this.yystack = new YYStack ();
+ this.yystack = new YYStack();]b4_lac_if([[
+ this.yylacStack = new Vector<Integer>();
+ this.yylacEstablished = false;]])[
this.label = YYNEWSTATE;
/* Error handling. */
@@ -881,13 +914,14 @@ b4_dollar_popdef[]dnl
* a syntax error diagnostic.
*/
public static final class Context {
- Context (YYStack stack, SymbolKind token]b4_locations_if([[,
]b4_location_type[ loc]])[)
- {
+ Context(]b4_parser_class[ parser, YYStack stack, SymbolKind
token]b4_locations_if([[, ]b4_location_type[ loc]])[) {
+ yyparser = parser;
yystack = stack;
yytoken = token;]b4_locations_if([[
yylocation = loc;]])[
}
+ private ]b4_parser_class[ yyparser;
private YYStack yystack;
@@ -921,7 +955,27 @@ b4_dollar_popdef[]dnl
}
int getExpectedTokens(SymbolKind yyarg[], int yyoffset, int yyargn) {
- int yycount = yyoffset;
+ int yycount = yyoffset;]b4_lac_if([b4_parse_trace_if([[
+ // Execute LAC once. We don't care if it is successful, we
+ // only do it for the sake of debugging output.
+ if (!yyparser.yylacEstablished)
+ yyparser.yylacCheck(yystack, yytoken);
+]])[
+ for (int yyx = 0; yyx < YYNTOKENS_; ++yyx)
+ {
+ SymbolKind yysym = SymbolKind.get(yyx);
+ if (yysym != ]b4_symbol(1, kind)[
+ && yysym != ]b4_symbol(2, kind)[
+ && yyparser.yylacCheck(yystack, yysym))
+ {
+ if (yyarg == null)
+ yycount += 1;
+ else if (yycount == yyargn)
+ return 0;
+ else
+ yyarg[yycount++] = yysym;
+ }
+ }]], [[
int yyn = yypact_[this.yystack.stateAt(0)];
if (!yyPactValueIsDefault(yyn))
{
@@ -944,13 +998,150 @@ b4_dollar_popdef[]dnl
else
yyarg[yycount++] = SymbolKind.get(yyx);
}
- }
+ }]])[
if (yyarg != null && yycount == yyoffset && yyoffset < yyargn)
yyarg[yycount] = null;
return yycount - yyoffset;
}
}
+]b4_lac_if([[
+ /// Check the lookahead yytoken.
+ /// \returns true iff the token will be eventually shifted.
+ boolean yylacCheck(YYStack yystack, SymbolKind yytoken)
+ {
+ // Logically, the yylacStack's lifetime is confined to this function.
+ // Clear it, to get rid of potential left-overs from previous call.
+ yylacStack.clear();
+ // Reduce until we encounter a shift and thereby accept the token.
+ yycdebugNnl("LAC: checking lookahead " + yytoken.getName() + ":");
+ int lacTop = 0;
+ while (true)
+ {
+ int topState = (yylacStack.isEmpty()
+ ? yystack.stateAt(lacTop)
+ : yylacStack.lastElement());
+ int yyrule = yypact_[topState];
+ if (yyPactValueIsDefault(yyrule)
+ || (yyrule += yytoken.getCode()) < 0 || YYLAST_ < yyrule
+ || yycheck_[yyrule] != yytoken.getCode())
+ {
+ // Use the default action.
+ yyrule = yydefact_[+topState];
+ if (yyrule == 0) {
+ yycdebug(" Err");
+ return false;
+ }
+ }
+ else
+ {
+ // Use the action from yytable.
+ yyrule = yytable_[yyrule];
+ if (yyTableValueIsError(yyrule)) {
+ yycdebug(" Err");
+ return false;
+ }
+ if (0 < yyrule) {
+ yycdebug(" S" + yyrule);
+ return true;
+ }
+ yyrule = -yyrule;
+ }
+ // By now we know we have to simulate a reduce.
+ yycdebugNnl(" R" + (yyrule - 1));
+ // Pop the corresponding number of values from the stack.
+ {
+ int yylen = yyr2_[yyrule];
+ // First pop from the LAC stack as many tokens as possible.
+ int lacSize = yylacStack.size();
+ if (yylen < lacSize) {
+ yylacStack.setSize(lacSize - yylen);
+ yylen = 0;
+ } else if (lacSize != 0) {
+ yylacStack.clear();
+ yylen -= lacSize;
+ }
+ // Only afterwards look at the main stack.
+ // We simulate popping elements by incrementing lacTop.
+ lacTop += yylen;
+ }
+ // Keep topState in sync with the updated stack.
+ topState = (yylacStack.isEmpty()
+ ? yystack.stateAt(lacTop)
+ : yylacStack.lastElement());
+ // Push the resulting state of the reduction.
+ int state = yyLRGotoState(topState, yyr1_[yyrule]);
+ yycdebugNnl(" G" + state);
+ yylacStack.add(state);
+ }
+ }
+
+ /// Establish the initial context if no initial context currently exists.
+ /// \returns true iff the token will be eventually shifted.
+ boolean yylacEstablish(YYStack yystack, SymbolKind yytoken) {
+ /* Establish the initial context for the current lookahead if no initial
+ context is currently established.
+
+ We define a context as a snapshot of the parser stacks. We define
+ the initial context for a lookahead as the context in which the
+ parser initially examines that lookahead in order to select a
+ syntactic action. Thus, if the lookahead eventually proves
+ syntactically unacceptable (possibly in a later context reached via a
+ series of reductions), the initial context can be used to determine
+ the exact set of tokens that would be syntactically acceptable in the
+ lookahead's place. Moreover, it is the context after which any
+ further semantic actions would be erroneous because they would be
+ determined by a syntactically unacceptable token.
+
+ yylacEstablish should be invoked when a reduction is about to be
+ performed in an inconsistent state (which, for the purposes of LAC,
+ includes consistent states that don't know they're consistent because
+ their default reductions have been disabled).
+
+ For parse.lac=full, the implementation of yylacEstablish is as
+ follows. If no initial context is currently established for the
+ current lookahead, then check if that lookahead can eventually be
+ shifted if syntactic actions continue from the current context. */
+ if (yylacEstablished) {
+ return true;
+ } else {
+ yycdebug("LAC: initial context established for " + yytoken.getName());
+ yylacEstablished = true;
+ return yylacCheck(yystack, yytoken);
+ }
+ }
+
+ /// Discard any previous initial lookahead context because of event.
+ /// \param event the event which caused the lookahead to be discarded.
+ /// Only used for debbuging output.
+ void yylacDiscard(String event) {
+ /* Discard any previous initial lookahead context because of Event,
+ which may be a lookahead change or an invalidation of the currently
+ established initial context for the current lookahead.
+
+ The most common example of a lookahead change is a shift. An example
+ of both cases is syntax error recovery. That is, a syntax error
+ occurs when the lookahead is syntactically erroneous for the
+ currently established initial context, so error recovery manipulates
+ the parser stacks to try to find a new initial context in which the
+ current lookahead is syntactically acceptable. If it fails to find
+ such a context, it discards the lookahead. */
+ if (yylacEstablished) {
+ yycdebug("LAC: initial context discarded due to " + event);
+ yylacEstablished = false;
+ }
+ }
+
+ /// The stack for LAC.
+ /// Logically, the yylacStack's lifetime is confined to the function
+ /// yylacCheck. We just store it as a member of this class to hold
+ /// on to the memory and to avoid frequent reallocations.
+ /// Since yylacCheck is const, this member must be mutable.
+ Vector<Integer> yylacStack;
+ /// Whether an initial LAC context was established.
+ boolean yylacEstablished;
+]])[
+
]b4_parse_error_bmatch(
[detailed\|verbose], [[
private int yysyntaxErrorArguments(Context yyctx, SymbolKind[] yyarg, int
yyargn) {
diff --git a/examples/java/calc/Calc.y b/examples/java/calc/Calc.y
index 1caa48b7..b0c2ba7d 100644
--- a/examples/java/calc/Calc.y
+++ b/examples/java/calc/Calc.y
@@ -23,11 +23,17 @@
%define api.parser.public
%define api.push-pull push
+// Customized syntax error messages (see reportSyntaxError)...
%define parse.error custom
-%define parse.trace
+// ... with locations...
%locations
+// ... and accurate list of expected tokens.
+%define parse.lac full
+
+%define parse.trace
+
%code imports {
import java.io.BufferedReader;
import java.io.IOException;
--
2.28.0
[PATCH 4/4] java: lac: check it, Akim Demaille, 2020/11/01