bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH for Dlang support] d: add support for lookahead correction


From: Adela Vais
Subject: [PATCH for Dlang support] d: add support for lookahead correction
Date: Tue, 17 Nov 2020 20:09:09 +0200
User-agent: Mutt/1.9.4 (2018-02-28)

When using lookahead correction, the method YYParser.Context.getExpectedTokens
is not annotated with const, because the method calls yylacCheck, which is not
const. Also, because of yylacStack and yylacEstablished, yylacCheck needs to
be called from the context of the parser class, which is sent as parameter to
the Context's constructor.

* data/skeletons/lalr1.d (yylacCheck, yylacEstablish, yylacDiscard,
yylacStack, yylacEstablished): New.
(Context): Use it.
* doc/bison.texi: Document it.
* tests/calc.at: Check it.
---
 data/skeletons/lalr1.d | 248 +++++++++++++++++++++++++++++++++++++----
 doc/bison.texi         |   2 +-
 tests/calc.at          |   4 +
 3 files changed, 229 insertions(+), 25 deletions(-)

diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d
index 73ece8ff..74eac5de 100644
--- a/data/skeletons/lalr1.d
+++ b/data/skeletons/lalr1.d
@@ -17,6 +17,13 @@
 
 m4_include(b4_skeletonsdir/[d.m4])
 
+# parse.lac
+b4_percent_define_default([[parse.lac]], [[none]])
+b4_percent_define_check_values([[[[parse.lac]], [[full]], [[none]]]])
+b4_define_flag_if([lac])
+m4_define([b4_lac_flag],
+          [m4_if(b4_percent_define_get([[parse.lac]]),
+                 [none], [[0]], [[1]])])
 
 b4_output_begin([b4_parser_file_name])
 b4_copyright([Skeleton implementation for Bison LALR(1) parsers in D],
@@ -227,6 +234,9 @@ b4_user_union_members
    * Instantiate the Bison-generated parser.
    */
   public this] (b4_parse_param_decl([b4_lex_param_decl])[) {
+]b4_percent_code_get([[init]])[]b4_lac_if([[
+    this.yylacStack = new int[];
+    this.yylacEstablished = false;]])[
     this (new YYLexer(]b4_lex_param_call[));
   }
 ]])[
@@ -329,6 +339,18 @@ b4_user_union_members
     return yyerrstatus_ == 0;
   }
 
+  /** Compute post-reduction state.
+   * @@param yystate   the current state
+   * @@param yysym     the nonterminal to push on the stack
+   */
+  private int yyLRGotoState(int yystate, int yysym) {
+    int yyr = yypgoto_[yysym - yyntokens_] + yystate;
+    if (0 <= yyr && yyr <= yylast_ && yycheck_[yyr] == yystate)
+      return yytable_[yyr];
+    else
+      return yydefgoto_[yysym - yyntokens_];
+  }
+
   private int yyaction (int yyn, ref YYStack yystack, int yylen)
   {
     ]b4_yystype[ yyval;]b4_locations_if([[
@@ -362,14 +384,7 @@ b4_user_union_members
     yylen = 0;
 
     /* Shift the result of the reduction.  */
-    yyn = yyr1_[yyn];
-    int yystate = yypgoto_[yyn - yyntokens_] + yystack.stateAt (0);
-    if (0 <= yystate && yystate <= yylast_
-        && yycheck_[yystate] == yystack.stateAt (0))
-      yystate = yytable_[yystate];
-    else
-      yystate = yydefgoto_[yyn - yyntokens_];
-
+    int yystate = yyLRGotoState(yystack.stateAt(0), yyr1_[yyn]);
     yystack.push (yystate, yyval]b4_locations_if([, yyloc])[);
     return YYNEWSTATE;
   }
@@ -432,7 +447,10 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[)
     /// Semantic value of the lookahead.
     ]b4_yystype[ yylval;
 
-    bool yyresult;]b4_parse_trace_if([[
+    bool yyresult;]b4_lac_if([[
+    // Discard the LAC context in case there still is one left from a
+    // previous invocation.
+    yylacDiscard("init");]])[]b4_parse_trace_if([[
 
     yycdebugln ("Starting parse");]])[
     yyerrstatus_ = 0;
@@ -504,14 +522,19 @@ m4_popdef([b4_at_dollar])])dnl
           /* If the proper action on seeing token YYTOKEN is to reduce or to
              detect an error, take that action.  */
           yyn += yytoken;
-          if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yytoken)
-            label = YYDEFAULT;
-
+          if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yytoken) 
{]b4_lac_if([[
+            if (!yylacEstablish(yystack, yytoken))
+              label = YYERRLAB;
+            else]])[
+              label = YYDEFAULT;
+          }
           /* <= 0 means reduce or error.  */
           else if ((yyn = yytable_[yyn]) <= 0)
           {
             if (yyTableValueIsError(yyn))
-              label = YYERRLAB;
+              label = YYERRLAB;]b4_lac_if([[
+            else if (!yylacEstablish(yystack, yytoken))
+              label = YYERRLAB;]])[
             else
             {
               yyn = -yyn;
@@ -532,7 +555,8 @@ m4_popdef([b4_at_dollar])])dnl
               --yyerrstatus_;
 
             yystate = yyn;
-            yystack.push (yystate, yylval]b4_locations_if([, yylloc])[);
+            yystack.push (yystate, yylval]b4_locations_if([, 
yylloc])[);]b4_lac_if([[
+            yylacDiscard("shift");]])[
             label = YYNEWSTATE;
           }
         }
@@ -568,7 +592,7 @@ m4_popdef([b4_at_dollar])])dnl
           ++yynerrs_;
           if (yychar == TokenKind.]b4_symbol(empty, id)[)
             yytoken = ]b4_symbol(empty, kind)[;
-          yyreportSyntaxError(new Context(yystack, yytoken]b4_locations_if([[, 
yylloc]])[));
+          yyreportSyntaxError(new Context(]b4_lac_if([[this, ]])[yystack, 
yytoken]b4_locations_if([[, yylloc]])[));
         }
 ]b4_locations_if([
         yyerrloc = yylloc;])[
@@ -644,7 +668,8 @@ m4_popdef([b4_at_dollar])])dnl
         yyloc = yylloc_from_stack (yystack, 2);
         yystack.pop (2);])[
 
-        /* Shift the error token.  */]b4_parse_trace_if([[
+        /* Shift the error token.  */]b4_lac_if([[
+        yylacDiscard("error recovery");]])[]b4_parse_trace_if([[
         import std.conv : to;
         yy_symbol_print ("Shifting", to!SymbolKind (yystos_[yyn]), 
yylval]b4_locations_if([, yyloc])[);]])[
         yystate = yyn;
@@ -747,14 +772,15 @@ m4_popdef([b4_at_dollar])])dnl
    * a syntax error diagnostic.
    */
   public static final class Context
-  {
-
+  {]b4_lac_if([[
+    private ]b4_parser_class[ yyparser;]])[
     private const(YYStack) yystack;
     private SymbolKind yytoken;]b4_locations_if([[
     private const(]b4_location_type[) yylocation;]])[
 
-    this(YYStack stack, SymbolKind kind]b4_locations_if([[, ]b4_location_type[ 
loc]])[)
-    {
+    this(]b4_lac_if([[]b4_parser_class[ parser, ]])[YYStack stack, SymbolKind 
kind]b4_locations_if([[, ]b4_location_type[ loc]])[)
+    {]b4_lac_if([[
+        yyparser = parser;]])[
       yystack = stack;
       yytoken = kind;]b4_locations_if([[
       yylocation = loc;]])[
@@ -775,14 +801,35 @@ m4_popdef([b4_at_dollar])])dnl
      * YYARG is null, return the number of expected tokens (guaranteed to
      * be less than YYNTOKENS).
      */
-    int getExpectedTokens(SymbolKind[] yyarg, int yyargn) const
+    int getExpectedTokens(SymbolKind[] yyarg, int yyargn)]b4_lac_if([[]], [[ 
const]])[
     {
       return getExpectedTokens(yyarg, 0, yyargn);
     }
 
-    int getExpectedTokens(SymbolKind[] yyarg, int yyoffset, int yyargn) const
+    int getExpectedTokens(SymbolKind[] yyarg, int yyoffset, int 
yyargn)]b4_lac_if([[]], [[ const]])[
     {
-      int yycount = yyoffset;
+      int yycount = yyoffset;]b4_lac_if([b4_parse_trace_if([[
+      // Execute LAC once. We don't care if it is successful, we
+      // only do it for the sake of debugging output.
+
+      if (!yyparser.yylacEstablished)
+        yyparser.yylacCheck(yystack, yytoken);
+]])[
+      for (int yyx = 0; yyx < yyntokens_; ++yyx)
+        {
+          SymbolKind yysym = SymbolKind(yyx);
+          if (yysym != ]b4_symbol(error, kind)[
+              && yysym != ]b4_symbol(undef, kind)[
+              && yyparser.yylacCheck(yystack, yysym))
+            {
+              if (yyarg == null)
+                yycount += 1;
+              else if (yycount == yyargn)
+                return 0;
+              else
+                yyarg[yycount++] = yysym;
+            }
+        }]], [[
       int yyn = yypact_[this.yystack.stateAt(0)];
       if (!yyPactValueIsDefault(yyn))
       {
@@ -805,13 +852,166 @@ m4_popdef([b4_at_dollar])])dnl
             else
               yyarg[yycount++] = SymbolKind(yyx);
           }
-      }
+      }]])[
       if (yyarg !is null && yycount == yyoffset && yyoffset < yyargn)
         yyarg[yyoffset] = ]b4_symbol(empty, kind)[;
       return yycount - yyoffset;
     }
   }
 
+]b4_lac_if([[
+  /** Check the lookahead yytoken.
+   * \returns  true iff the token will be eventually shifted.
+   */
+  bool yylacCheck(const YYStack yystack, SymbolKind yytoken)
+  {
+    // Logically, the yylacStack's lifetime is confined to this function.
+    // Clear it, to get rid of potential left-overs from previous call.
+    destroy(yylacStack);
+    // Reduce until we encounter a shift and thereby accept the token.
+]b4_parse_trace_if([[
+    import std.conv;
+    yycdebug("LAC: checking lookahead " ~ format("%s", yytoken) ~ ":");]])[
+    int lacTop = 0;
+    while (true)
+    {
+      int topState = (yylacStack.length == 0
+                      ? yystack.stateAt(lacTop)
+                      : yylacStack[$ - 1]);
+      int yyrule = yypact_[topState];
+      if (yyPactValueIsDefault(yyrule)
+          || (yyrule += yytoken) < 0 || yylast_ < yyrule
+          || yycheck_[yyrule] != yytoken)
+      {
+        // Use the default action.
+        yyrule = yydefact_[+topState];
+        if (yyrule == 0)
+        {]b4_parse_trace_if([[
+          yycdebugln(" Err");]])[
+          return false;
+        }
+      }
+      else
+      {
+        // Use the action from yytable.
+        yyrule = yytable_[yyrule];
+        if (yyTableValueIsError(yyrule))
+        {]b4_parse_trace_if([[
+          yycdebugln(" Err");]])[
+          return false;
+        }
+        if (0 < yyrule)
+        {]b4_parse_trace_if([[
+          yycdebugln(" S" ~ to!string(yyrule));]])[
+          return true;
+        }
+        yyrule = -yyrule;
+      }
+      // By now we know we have to simulate a reduce.
+]b4_parse_trace_if([[
+      yycdebug(" R" ~ to!string(yyrule - 1));]])[
+      // Pop the corresponding number of values from the stack.
+      {
+        int yylen = yyr2_[yyrule];
+        // First pop from the LAC stack as many tokens as possible.
+        int lacSize = cast (int) yylacStack.length;
+        if (yylen < lacSize)
+        {
+          yylacStack.length -= yylen;
+          yylen = 0;
+        }
+        else if (lacSize != 0)
+        {
+          destroy(yylacStack);
+          yylen -= lacSize;
+        }
+        // Only afterwards look at the main stack.
+        // We simulate popping elements by incrementing lacTop.
+        lacTop += yylen;
+      }
+      // Keep topState in sync with the updated stack.
+      topState = (yylacStack.length == 0
+                  ? yystack.stateAt(lacTop)
+                  : yylacStack[$ - 1]);
+      // Push the resulting state of the reduction.
+      int state = yyLRGotoState(topState, yyr1_[yyrule]);]b4_parse_trace_if([[
+      yycdebug(" G" ~ to!string(state));]])[
+      yylacStack.length++;
+      yylacStack[$ - 1] = state;
+    }
+  }
+
+  /** Establish the initial context if no initial context currently exists.
+   * \returns  true iff the token will be eventually shifted.
+   */
+  bool yylacEstablish(YYStack yystack, SymbolKind yytoken)
+  {
+  /* Establish the initial context for the current lookahead if no initial
+     context is currently established.
+
+     We define a context as a snapshot of the parser stacks.  We define
+     the initial context for a lookahead as the context in which the
+     parser initially examines that lookahead in order to select a
+     syntactic action.  Thus, if the lookahead eventually proves
+     syntactically unacceptable (possibly in a later context reached via a
+     series of reductions), the initial context can be used to determine
+     the exact set of tokens that would be syntactically acceptable in the
+     lookahead's place.  Moreover, it is the context after which any
+     further semantic actions would be erroneous because they would be
+     determined by a syntactically unacceptable token.
+
+     yylacEstablish should be invoked when a reduction is about to be
+     performed in an inconsistent state (which, for the purposes of LAC,
+     includes consistent states that don't know they're consistent because
+     their default reductions have been disabled).
+
+     For parse.lac=full, the implementation of yylacEstablish is as
+     follows.  If no initial context is currently established for the
+     current lookahead, then check if that lookahead can eventually be
+     shifted if syntactic actions continue from the current context.  */
+    if (yylacEstablished)
+      return true;
+    else
+    {]b4_parse_trace_if([[
+        yycdebugln("LAC: initial context established for " ~ format("%s", 
yytoken));]])[
+        yylacEstablished = true;
+        return yylacCheck(yystack, yytoken);
+    }
+  }
+
+  /** Discard any previous initial lookahead context because of event.
+   * \param event  the event which caused the lookahead to be discarded.
+   *               Only used for debbuging output.  */
+  void yylacDiscard(string event)
+  {
+  /* Discard any previous initial lookahead context because of Event,
+     which may be a lookahead change or an invalidation of the currently
+     established initial context for the current lookahead.
+
+     The most common example of a lookahead change is a shift.  An example
+     of both cases is syntax error recovery.  That is, a syntax error
+     occurs when the lookahead is syntactically erroneous for the
+     currently established initial context, so error recovery manipulates
+     the parser stacks to try to find a new initial context in which the
+     current lookahead is syntactically acceptable.  If it fails to find
+     such a context, it discards the lookahead.  */
+    if (yylacEstablished)
+    {]b4_parse_trace_if([[
+      yycdebugln("LAC: initial context discarded due to " ~ event);]])[
+      yylacEstablished = false;
+    }
+  }
+
+  /** The stack for LAC.
+   * Logically, the yylacStack's lifetime is confined to the function
+   * yylacCheck. We just store it as a member of this class to hold
+   * on to the memory and to avoid frequent reallocations.
+   */
+  int[] yylacStack;
+  /**  Whether an initial LAC context was established. */
+  bool yylacEstablished;
+]])[
+
   /**
    * Whether the given <code>yypact_</code> value indicates a defaulted state.
    * @@param yyvalue   the value to check
diff --git a/doc/bison.texi b/doc/bison.texi
index 8e9472e2..14611e2b 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -6885,7 +6885,7 @@ introduced in 3.0 with support for @code{simple} and 
@code{verbose}.  Values
 @deffn Directive {%define parse.lac} @var{when}
 
 @itemize
-@item Languages(s): C/C++ (deterministic parsers only), and Java.
+@item Languages(s): C/C++ (deterministic parsers only), D and Java.
 
 @item Purpose: Enable LAC (lookahead correction) to improve
 syntax error handling.  @xref{LAC}.
diff --git a/tests/calc.at b/tests/calc.at
index fd42a40b..57dc2a14 100644
--- a/tests/calc.at
+++ b/tests/calc.at
@@ -1461,6 +1461,10 @@ AT_CHECK_CALC_LALR1_D([%define parse.error verbose 
%debug %define api.token.pref
 AT_CHECK_CALC_LALR1_D([%define parse.error verbose %debug %define 
api.symbol.prefix {SYMB_} %verbose])
 AT_CHECK_CALC_LALR1_D([%define parse.error verbose %debug %define 
api.symbol.prefix {SYMB_} %define api.token.prefix {TOK_} %verbose])
 
+AT_CHECK_CALC_LALR1_D([%locations %define parse.lac full %define parse.error 
verbose])
+AT_CHECK_CALC_LALR1_D([%locations %define parse.lac full %define parse.error 
custom])
+AT_CHECK_CALC_LALR1_D([%locations %define parse.lac full %define parse.error 
detailed %define parse.trace])
+
 #AT_CHECK_CALC_LALR1_D([%locations %define parse.error verbose %debug %verbose 
%parse-param {semantic_value *result}{int *count}{int *nerrs}])
 #AT_CHECK_CALC_LALR1_D([%locations %define parse.error verbose %debug %define 
api.prefix {calc} %verbose %parse-param {semantic_value *result}{int 
*count}{int *nerrs}])
 
-- 
2.17.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]