bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Java push parser


From: Akim Demaille
Subject: Re: Java push parser
Date: Mon, 21 Jan 2013 14:28:57 +0100

Le 15 janv. 2013 à 20:31, Dennis Heimbigner <address@hidden> a écrit :

> I am submitting code to
> add push parsing support for
> Java through Bison.
> 
> The relevant code can be obtained
> using this url:
>    http://www.unidata.ucar.edu/staff/dmh/push.tar
> That tar file contains three files:
> 1. lalr1.java - revised skeleton to add push parsing support

Hi Dennis,

it will be much easier to discuss your patch if it is
included in your message.  It is also better for the mailing
list archive.

Paolo, I'm cc'ing this to you as the original author of
lalr1.java, you might want to drop in some comments too.

--- data/lalr1.java     2013-01-15 15:16:54.000000000 +0100
+++ /Users/akim/Downloads/push/lalr1.java       2013-01-15 20:10:31.000000000 
+0100
@@ -15,6 +15,34 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+dnl Modified by Dennis Heimbigner (address@hidden)
+dnl to support push parsing.
+dnl 
+dnl Changes:
+dnl
+dnl 1. capture the declarations as m4 macros.
+dnl This was done to avoid duplication.
+dnl When push parsing, the declarations occur at
+dnl the class instance level rather than within the parse() function.
+dnl 
+dnl 2. Initialization of the declarations occurs in a function
+dnl called push_parse_initialize() that is called on the first
+dnl invocation of push_parse().
+dnl 
+dnl 3. The body of the parse loop is modified to return values at
+dnl appropriate points when doing push parsing.  In order to
+dnl make push parsing work, it was necessary to divide
+dnl YYNEWSTATE into two states: YYNEWSTATE and YYGETTOKEN. On
+dnl the first call to push_parse, the state is YYNEWSTATE. On
+dnl all later entries, the state is set to YYGETTOKEN. The
+dnl YYNEWSTATE switch arm falls through into
+dnl YYGETTOKEN. YYGETTOKEN indicates that a new token is
+dnl potentially needed. Normally, with a pull parser, this new
+dnl token would be obtained by calling yylex(). In the push
+dnl parser, the value YYMORE is returned to the caller. On the
+dnl next call to push_parse(), the parser will return to the
+dnl YYGETTOKEN state and continue operation.
+
 m4_include(b4_pkgdatadir/[java.m4])
 
 b4_defines_if([b4_fatal([%s: %%defines does not make sense in Java],
@@ -29,6 +57,47 @@
                         [b4_skeleton],
                         [b4_symbol_action_location([$1], [destructor])])])])
 b4_symbol_foreach([b4_symbol_no_destructor_assert])
+dnl
+dnl # Check the value of %define api.push-pull.
+b4_percent_define_default([[api.push-pull]], [[pull]])dnl
+b4_percent_define_check_values([[[[api.push-pull]],
+                                 [[pull]], [[push]], [[both]]]])dnl
+b4_define_flag_if([pull]) m4_define([b4_pull_flag], [[1]])dnl
+b4_define_flag_if([push]) m4_define([b4_push_flag], [[1]])dnl
+m4_case(b4_percent_define_get([[api.push-pull]]),
+        [pull], [m4_define([b4_push_flag], [[0]])],
+        [push], [m4_define([b4_pull_flag], [[0]])])dnl
+dnl
+dnl # Handle BISON_USE_PUSH_FOR_PULL for the test suite.  So that push parsing
+dnl # tests function as written, do not let BISON_USE_PUSH_FOR_PULL modify the
+dnl # behavior of Bison at all when push parsing is already requested.
+b4_define_flag_if([use_push_for_pull])dnl
+b4_use_push_for_pull_if([b4_push_if([m4_define([b4_use_push_for_pull_flag], 
[[0]])],[m4_define([b4_push_flag], [[1]])])])dnl
+m4_define([b4_both_if],[b4_push_if([b4_pull_if([$1],[$2])],[$2])])dnl
+m4_define([b4_throw_exception],[m4_ifval($1,[throw new $1 ( $2 );],[$3;])])
+
+m4_define([b4_define_state],[[
+    /// Lookahead and lookahead in internal form.
+    int yychar = yyempty_;
+    int yytoken = 0;
+
+    /* State.  */
+    int yyn = 0;
+    int yylen = 0;
+    int yystate = 0;
+    YYStack yystack = new YYStack ();
+
+    /* Error handling.  */
+    int yynerrs_ = 0;
+    ]b4_locations_if([/// The location where the error started.
+    b4_location_type yyerrloc = null;
+
+    /// b4_location_type of the lookahead.
+    b4_location_type yylloc = new b4_location_type (null, null);])[
+
+    /// Semantic value of the lookahead.
+    ]b4_yystype[ yylval = null;
+]])
 
 b4_output_begin([b4_parser_file_name])
 b4_copyright([Skeleton implementation for Bison LALR(1) parsers in Java],
@@ -344,6 +413,12 @@
    * return failure (<tt>false</tt>).  */
   public static final int YYABORT = 1;
 
+]b4_push_if([
+  /**
+   * Returned by a Bison action in order to stop the parsing process and
+   * return failure (<tt>false</tt>).  */
+  public static final int YYMORE = 4;])[
+
   /**
    * Returned by a Bison action in order to start error recovery without
    * printing an error message.  */
@@ -357,9 +432,13 @@
   private static final int YYREDUCE = 6;
   private static final int YYERRLAB1 = 7;
   private static final int YYRETURN = 8;
+]b4_push_if([  private static final int YYGETTOKEN = 9;])[
 
   private int yyerrstatus_ = 0;
 
+]b4_push_if([dnl
+  // if using push parser, define state as instance variables
+b4_define_state])[
   /**
    * Return whether error recovery is being done.  In this state, the parser
    * reads token until it reaches a known state, and then restarts normal
@@ -463,6 +542,8 @@
               + (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")");
   }
 
+]b4_push_if([],[
+dnl Define the core pull parse procedure header when pull only
   /**
    * Parse input from the scanner that was specified at object construction
    * time.  Return whether the end of the input was reached successfully.
@@ -470,46 +551,58 @@
    * @@return <tt>true</tt> if the parsing succeeds.  Note that this does not
    *          imply that there were no syntax errors.
    */
-  public boolean parse () ]b4_maybe_throws([b4_list2([b4_lex_throws], 
[b4_throws])])[
+   public boolean parse () b4_maybe_throws([b4_list2([b4_lex_throws], 
[b4_throws])])])[
+]b4_push_if([
+  /**
+   * Push Parse input from external lexer
+   *
+   * @@param yylextoken current token 
+   * @@param yylexval current lval
+b4_locations_if([   * @@param yylexloc current position])
+   *
+   * @@return <tt>YYACCEPT, YYABORT, YYMORE</tt>
+   */
+b4_locations_if([
+  public int push_parse (int yylextoken, b4_yystype yylexval, b4_location_type 
yylexloc)],[dnl
+  public int push_parse (int yylextoken, b4_yystype yylexval)])
+      b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])])[
   {
-    /// Lookahead and lookahead in internal form.
-    int yychar = yyempty_;
-    int yytoken = 0;
-
-    /* State.  */
-    int yyn = 0;
-    int yylen = 0;
-    int yystate = 0;
-
-    YYStack yystack = new YYStack ();
-
-    /* Error handling.  */
-    int yynerrs_ = 0;
-    ]b4_locations_if([/// The location where the error started.
-    ]b4_location_type[ yyerrloc = null;
-
-    /// ]b4_location_type[ of the lookahead.
-    ]b4_location_type[ yylloc = new ]b4_location_type[ (null, null);
-
-    /// @@$.
-    ]b4_location_type[ yyloc;])
-
-    /// Semantic value of the lookahead.
-    b4_yystype[ yylval = null;
-
+    ]b4_locations_if([/// @@$.
+    b4_location_type yyloc;])[
+]b4_push_if([],[dnl
+    // if using pull parser only, define state as method variables
+b4_define_state
+    int label = YYNEWSTATE;
     yycdebug ("Starting parse\n");
     yyerrstatus_ = 0;
 
-]m4_ifdef([b4_initial_action], [
+    /* Initialize the stack.  */
+    yystack.push (yystate, yylval b4_locations_if([, yylloc]));
+m4_ifdef([b4_initial_action], [
 b4_dollar_pushdef([yylval], [], [yylloc])dnl
     /* User initialization code.  */
     b4_user_initial_action
-b4_dollar_popdef])[]dnl
-
-  [  /* Initialize the stack.  */
-    yystack.push (yystate, yylval]b4_locations_if([, yylloc])[);
-
-    int label = YYNEWSTATE;
+b4_dollar_popdef[]dnl
+])
+])[
+]b4_push_if([
+    int label;
+    boolean havenexttoken = true;
+
+    if(!push_parse_initialized) {
+      push_parse_initialize();
+      label = YYNEWSTATE;
+      yycdebug ("Starting parse\n");
+      yyerrstatus_ = 0;
+m4_ifdef([b4_initial_action], [
+b4_dollar_pushdef([yylval], [], [yylloc])dnl
+    /* User initialization code.  */
+    b4_user_initial_action
+b4_dollar_popdef[]dnl
+])
+    } else
+      label = YYGETTOKEN;
+])[
     for (;;)
       switch (label)
       {
@@ -522,7 +615,7 @@
 
         /* Accept?  */
         if (yystate == yyfinal_)
-          return true;
+]b4_push_if([         {label = YYACCEPT; break;}],[           return true;])[
 
         /* Take a decision.  First try without lookahead.  */
         yyn = yypact_[yystate];
@@ -531,16 +624,29 @@
             label = YYDEFAULT;
             break;
           }
+]b4_push_if([        /* Fall Through */
 
+      case YYGETTOKEN:])[
         /* Read a lookahead token.  */
         if (yychar == yyempty_)
           {
+]b4_push_if([
+            if(!havenexttoken) {
+              return YYMORE;
+            }
             yycdebug ("Reading a token: ");
-            yychar = yylexer.yylex ();]
-            b4_locations_if([[
-            yylloc = new ]b4_location_type[(yylexer.getStartPos (),
-                            yylexer.getEndPos ());]])
-            yylval = yylexer.getLVal ();[
+            yychar = yylextoken;
+            yylval = yylexval;
+            b4_locations_if([yylloc = yylexloc;])
+            havenexttoken = false;])[
+]b4_push_if([],[dnl else !push_if
+            yycdebug ("Reading a token: ");
+            yychar = yylexer.yylex ();
+            yylval = yylexer.getLVal ();
+            b4_locations_if([dnl
+            yylloc = new b4_location_type (yylexer.getStartPos (),
+                            yylexer.getEndPos ());])
+])[
           }
 
         /* Convert token to internal form.  */
@@ -633,13 +739,13 @@
         /* If just tried and failed to reuse lookahead token after an
          error, discard it.  */
 
-        if (yychar <= Lexer.EOF)
-          {
-          /* Return failure if at end of input.  */
-          if (yychar == Lexer.EOF)
-            return false;
-          }
-        else
+          if (yychar <= Lexer.EOF)
+            {
+            /* Return failure if at end of input.  */
+            if (yychar == Lexer.EOF)
+]b4_push_if([              {label = YYABORT; break;}],[                 return 
false;])[
+            }
+          else
               yychar = yyempty_;
           }
 
@@ -684,7 +790,7 @@
 
             /* Pop the current state because it cannot handle the error token. 
 */
             if (yystack.height == 0)
-              return false;
+]b4_push_if([              {label = YYABORT; break;}],[                 return 
false;])[
 
             ]b4_locations_if([yyerrloc = yystack.locationAt (0);])[
             yystack.pop ();
@@ -693,6 +799,9 @@
               yystack.print (yyDebugStream);
           }
 
+        if(label == YYABORT)
+            break;/* leave the switch */
+
         ]b4_locations_if([
         /* Muck with the stack to setup for yylloc.  */
         yystack.push (0, null, yylloc);
@@ -711,13 +820,85 @@
 
         /* Accept.  */
       case YYACCEPT:
-        return true;
+]b4_push_if([        push_parse_initialized = false; return YYACCEPT;],[       
 return true;])[
 
         /* Abort.  */
       case YYABORT:
-        return false;
+]b4_push_if([        push_parse_initialized = false; return YYABORT;],[        
return false;])[
       }
+}
+]b4_push_if([[
+  boolean push_parse_initialized = false;
+
+  public void push_parse_initialize()
+  {
+    // (Re-)Initialize the state
+    /// Lookahead and lookahead in internal form.
+    this.yychar = yyempty_;
+    this.yytoken = 0;
+
+    /* State.  */
+    this.yyn = 0;
+    this.yylen = 0;
+    this.yystate = 0;
+    this.yystack = new YYStack ();
+
+    /* Error handling.  */
+    this.yynerrs_ = 0;
+    ]b4_locations_if([/// The location where the error started.
+    this.yyerrloc = null;
+    this.yylloc = new b4_location_type (null, null);])[
+
+    /// Semantic value of the lookahead.
+    this.yylval = null;
+
+    yystack.push (this.yystate, this.yylval]b4_locations_if([, this.yylloc])[);
+    push_parse_initialized = true;
+  }
+]b4_locations_if([
+  /**
+   * Push Parse input from external lexer
+   *
+   * @@param yylextoken current token 
+   * @@param yylexval current lval
+   * @@param yyylexpos current position
+   *
+   * @@return <tt>YYACCEPT, YYABORT, YYMORE</tt>
+   */
+  public int push_parse (int yylextoken, b4_yystype yylexval, b4_position_type 
yylexpos)
+      b4_maybe_throws([b4_list2([b4_lex_throws], [b4_throws])])
+  {
+    return push_parse (yylextoken, yylexval, new b4_location_type (yylexpos));
   }
+])[]])
+
+b4_both_if([
+dnl Define the core pull parse procedure header when api.push-push=both
+  /**
+   * Parse input from the scanner that was specified at object construction
+   * time.  Return whether the end of the input was reached successfully.
+   *
+   * @@return <tt>true</tt> if the parsing succeeds.  Note that this does not
+   *          imply that there were no syntax errors.
+   */
+   public boolean parse () b4_maybe_throws([b4_list2([b4_lex_throws], 
[b4_throws])])
+   {
+      int status;
+      if(yylexer == null)
+        throw new NullPointerException("Null Lexer");
+      do {
+        int token = yylexer.yylex();
+        b4_yystype lval = yylexer.getLVal();    
+b4_locations_if([dnl
+        b4_location_type yyloc = new b4_location_type (yylexer.getStartPos (),
+                                              yylexer.getEndPos ());])
+        this.yyerrstatus_ = 0;
+        b4_locations_if([status = push_parse(token,lval,yyloc);],[dnl else 
!locations_if
+        status = push_parse(token,lval);])
+      } while (status == YYMORE);
+      return (status == YYACCEPT);
+  }
+])[
 
   // Generate an error message.
   private String yysyntax_error (int yystate, int tok)
@@ -825,6 +1006,7 @@
   ]b4_integral_parser_table_define([rline], [b4_rline],
   [[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[
 
+
   // Report on the debug stream that the rule yyrule is going to be reduced.
   private void yy_reduce_print (int yyrule, YYStack yystack)
   {





reply via email to

[Prev in Thread] Current Thread [Next in Thread]