bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 07/10] doc: java: SymbolKind, etc.


From: Akim Demaille
Subject: [PATCH 07/10] doc: java: SymbolKind, etc.
Date: Mon, 13 Apr 2020 17:43:38 +0200

Why didn't I think about this before???  symbolName should be a method
of SymbolKind.

* data/skeletons/lalr1.java (YYParser::yysymbolName): Move as...
* data/skeletons/java.m4 (SymbolKind::getName): this.
Make the table a static final table, not a local variable.
Adjust dependencies.
* doc/bison.texi (Java Parser Interface): Document i18n.
(Java Parser Context Interface): Document SymbolKind.
* examples/java/calc/Calc.y, tests/local.at: Adjust.
---
 TODO                      | 25 +++++++------
 data/skeletons/java.m4    | 63 ++++++++++++++++++++++++++++++---
 data/skeletons/lalr1.java | 74 +++------------------------------------
 doc/bison.texi            | 72 ++++++++++++++++++++++++++++---------
 examples/java/calc/Calc.y |  4 +--
 tests/local.at            | 34 +++++++++---------
 6 files changed, 151 insertions(+), 121 deletions(-)

diff --git a/TODO b/TODO
index a1dec320..73b89db6 100644
--- a/TODO
+++ b/TODO
@@ -1,8 +1,21 @@
 * Bison 3.6
+** Questions
+*** Java
+- Should i18n be part of the Lexer?  Currently it's a static method of
+  Lexer.
+
+- is there a migration path that would allow to use TokenKinds in
+  yylex?
+
+*** D
+- is there a way to attach yysymbol_name to the enum itself?  As we did
+  in Java.
+
+- It would be better to have TokenKind as return value.  Can we use
+  reflection to support both output types?
+
 ** Documentation
-- yyexpected_tokens/expected_tokens/expectedTokens in all the languages.
 - YYERRCODE, YYUNDEF, YYEOF
-- i18n in Java
 - symbol.type_get should be kind_get, and it's not documented.
 - YYERRCODE and "end of file" and translation
 
@@ -11,9 +24,6 @@ You can explicitly specify the numeric code for a token 
type...
 
 The token numbered as 0.
 
-Therefore each time the scanner returns an (external) token number,
-it must be mapped to the (internal) symbol number.
-
 ** Java: EOF
 We should be able to redefine EOF like we do in C.
 
@@ -120,11 +130,6 @@ 
https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd
 https://research.swtch.com/yyerror
 http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf
 
-* D
-** yylex
-It would be better to have TokenKind as return value.  Can we use reflexion
-to support both output types?
-
 * Modernization
 Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern
 and older C++ compilers.  Currently the code defaults to defining it to
diff --git a/data/skeletons/java.m4 b/data/skeletons/java.m4
index d09890a1..ba44ce8e 100644
--- a/data/skeletons/java.m4
+++ b/data/skeletons/java.m4
@@ -174,10 +174,10 @@ m4_define([b4_declare_symbol_enum],
   {
 ]b4_symbol_foreach([b4_symbol_enum])[
 
-    private final int code_;
+    private final int yycode_;
 
     SymbolKind (int n) {
-      this.code_ = n;
+      this.yycode_ = n;
     }
 
     private static final SymbolKind[] values_ = {
@@ -185,13 +185,66 @@ m4_define([b4_declare_symbol_enum],
       ], b4_symbol_numbers)[
     };
 
-    static final SymbolKind get (int code) {
+    static final SymbolKind get(int code) {
       return values_[code];
     }
 
-    public final int getCode () {
-      return this.code_;
+    public final int getCode() {
+      return this.yycode_;
     }
+
+]b4_parse_error_bmatch(
+[simple\|verbose],
+[[    /* Return YYSTR after stripping away unnecessary quotes and
+       backslashes, so that it's suitable for yyerror.  The heuristic is
+       that double-quoting is unnecessary unless the string contains an
+       apostrophe, a comma, or backslash (other than backslash-backslash).
+       YYSTR is taken from yytname.  */
+    private static String yytnamerr_(String yystr)
+    {
+      if (yystr.charAt (0) == '"')
+        {
+          StringBuffer yyr = new StringBuffer();
+          strip_quotes: for (int i = 1; i < yystr.length(); i++)
+            switch (yystr.charAt(i))
+              {
+              case '\'':
+              case ',':
+                break strip_quotes;
+
+              case '\\':
+                if (yystr.charAt(++i) != '\\')
+                  break strip_quotes;
+                /* Fall through.  */
+              default:
+                yyr.append(yystr.charAt(i));
+                break;
+
+              case '"':
+                return yyr.toString();
+              }
+        }
+      return yystr;
+    }
+
+    /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+       First, the terminals, then, starting at \a YYNTOKENS_, nonterminals.  */
+    ]b4_typed_parser_table_define([String], [tname], [b4_tname])[
+
+    /* The user-facing name of this symbol.  */
+    public final String getName() {
+      return yytnamerr_(yytname_[yycode_]);
+    }
+]],
+[custom\|detailed],
+[[    /* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+       First, the terminals, then, starting at \a YYNTOKENS_, nonterminals.  */
+    ]b4_typed_parser_table_define([String], [names], [b4_symbol_names])[
+
+    /* The user-facing name of this symbol.  */
+    public final String getName() {
+      return yynames_[yycode_];
+    }]])[
   };
 ]])])
 
diff --git a/data/skeletons/lalr1.java b/data/skeletons/lalr1.java
index e5b0f729..813c7757 100644
--- a/data/skeletons/lalr1.java
+++ b/data/skeletons/lalr1.java
@@ -507,7 +507,7 @@ import java.text.MessageFormat;
                               b4_locations_if([, Object yylocationp])[)
   {
     yycdebug (s + (yykind.getCode () < YYNTOKENS_ ? " token " : " nterm ")
-              + yysymbolName (yykind) + " ("]b4_locations_if([
+              + yykind.getName() + " ("]b4_locations_if([
               + yylocationp + ": "])[
               + (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")");
   }]])[
@@ -880,7 +880,7 @@ b4_dollar_popdef[]dnl
     /**
      * The symbol kind of the lookahead token.
      */
-    public SymbolKind getToken ()
+    public final SymbolKind getToken ()
     {
       return yytoken;
     }
@@ -890,7 +890,7 @@ b4_dollar_popdef[]dnl
     /**
      * The location of the lookahead.
      */
-    public ]b4_location_type[ getLocation ()
+    public final ]b4_location_type[ getLocation ()
     {
       return yylocation;
     }
@@ -937,15 +937,6 @@ b4_dollar_popdef[]dnl
         }
       return yycount - yyoffset;
     }
-
-    /**
-     * The user-facing name of the symbol whose (internal) number is
-     * YYSYMBOL.  No bounds checking.
-     */
-    static String yysymbolName (SymbolKind yysymbol)
-    {
-      return ]b4_parser_class[.yysymbolName (yysymbol);
-    }
   }
 
 ]b4_parse_error_bmatch(
@@ -1005,7 +996,7 @@ b4_dollar_popdef[]dnl
         int yycount = yysyntaxErrorArguments (yyctx, yyarg, argmax);
         String[] yystr = new String[yycount];
         for (int yyi = 0; yyi < yycount; ++yyi)
-          yystr[yyi] = yysymbolName (yyarg[yyi]);
+          yystr[yyi] = yyarg[yyi].getName();
         String yyformat;
         switch (yycount)
           {
@@ -1049,63 +1040,6 @@ b4_dollar_popdef[]dnl
 
 ]b4_parser_tables_define[
 
-]b4_parse_error_bmatch(
-           [simple\|verbose],
-[[  /* Return YYSTR after stripping away unnecessary quotes and
-     backslashes, so that it's suitable for yyerror.  The heuristic is
-     that double-quoting is unnecessary unless the string contains an
-     apostrophe, a comma, or backslash (other than backslash-backslash).
-     YYSTR is taken from yytname.  */
-  private static String yytnamerr_ (String yystr)
-  {
-    if (yystr.charAt (0) == '"')
-      {
-        StringBuffer yyr = new StringBuffer ();
-        strip_quotes: for (int i = 1; i < yystr.length (); i++)
-          switch (yystr.charAt (i))
-            {
-            case '\'':
-            case ',':
-              break strip_quotes;
-
-            case '\\':
-              if (yystr.charAt(++i) != '\\')
-                break strip_quotes;
-              /* Fall through.  */
-            default:
-              yyr.append (yystr.charAt (i));
-              break;
-
-            case '"':
-              return yyr.toString ();
-            }
-      }
-    return yystr;
-  }
-
-  /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
-     First, the terminals, then, starting at \a YYNTOKENS_, nonterminals.  */
-  ]b4_typed_parser_table_define([String], [tname], [b4_tname])[
-
-  /* The user-facing name of the symbol whose (internal) number is
-     YYSYMBOL.  No bounds checking.  */
-  static String yysymbolName (SymbolKind yysymbol)
-  {
-    return yytnamerr_ (yytname_[yysymbol.getCode ()]);
-  }
-]],
-        [custom\|detailed],
-[[  /* The user-facing name of the symbol whose (internal) number is
-     YYSYMBOL.  No bounds checking.  */
-  static String yysymbolName (SymbolKind yysymbol)
-  {
-    String[] yy_sname =
-    {
-    ]b4_symbol_names[
-    };
-    return yy_sname[yysymbol.getCode ()];
-  }]])[
-
 ]b4_parse_trace_if([[
   ]b4_integral_parser_table_define([rline], [b4_rline],
   [[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[
diff --git a/doc/bison.texi b/doc/bison.texi
index c7084032..abf49b5d 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -13125,6 +13125,22 @@ or nonzero, full tracing.
 Identify the Bison version and skeleton used to generate this parser.
 @end deftypecv
 
+If you enabled token internationalization (@pxref{Token I18n}), you must
+provide the parser with the following function:
+
+@deftypecv {Static Method} {YYParser} {String} {i18n} (@code{string} @var{s})
+Return the translation of @var{s} in the user's language.  As an example:
+
+@example
+%code @{
+  static ResourceBundle myResources
+    = ResourceBundle.getBundle("domain-name");
+  static final String i18n(String s) @{
+    return myResources.getString(s);
+  @}
+@}
+@end example
+@end deftypecv
 
 @node Java Parser Context Interface
 @subsection Java Parser Context Interface
@@ -13132,9 +13148,35 @@ Identify the Bison version and skeleton used to 
generate this parser.
 The parser context provides information to build error reports when you
 invoke @samp{%define parse.error custom}.
 
+@defcv {Type} {YYParser} {SymbolKind}
+An enum that includes all the grammar symbols, tokens and nonterminals.  Its
+enumerators are forged from the symbol names:
+
+@example
+public enum SymbolKind
+@{
+  S_YYEOF(0),          /* "end of file"  */
+  S_YYERROR(1),        /* error  */
+  S_YYUNDEF(2),        /* "invalid token"  */
+  S_BANG(3),           /* "!"  */
+  S_PLUS(4),           /* "+"  */
+  S_MINUS(5),          /* "-"  */
+  [...]
+  S_NUM(13),           /* "number"  */
+  S_NEG(14),           /* NEG  */
+  S_YYACCEPT(15),      /* $accept  */
+  S_input(16),         /* input  */
+  S_line(17);          /* line  */
+@};
+@end example
+@end defcv
+
+@deftypemethod {YYParser.SymbolKind} {String} getName ()
+The name of this symbol, possibly translated.
+@end deftypemethod
+
 @deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken ()
-The kind of the lookahead.  Maybe return @code{null} when there is no
-lookahead.
+The kind of the lookahead.  Return @code{null} iff there is no lookahead.
 @end deftypemethod
 
 @deftypemethod {YYParser.Context} {YYParser.Location} getLocation ()
@@ -13143,14 +13185,12 @@ The location of the lookahead.
 
 @deftypemethod {YYParser.Context} {int} getExpectedTokens 
(@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc})
 Fill @var{argv} with the expected tokens, which never includes
-@code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYERROR}, or
-@code{YYSYMBOL_YYUNDEF}.
+@code{SymbolKind.S_YYERROR}, or @code{SymbolKind.S_YYUNDEF}.
 
 Never put more than @var{argc} elements into @var{argv}, and on success
 return the effective number of tokens stored in @var{argv}.  Return 0 if
 there are more than @var{argc} expected tokens, yet fill @var{argv} up to
-@var{argc}.  When LAC is enabled, may return a negative number on errors,
-such as @code{YYENOMEM} on memory exhaustion.
+@var{argc}.
 
 If @var{argv} is null, return the size needed to store all the possible
 values, which is always less than @code{YYNTOKENS}.
@@ -13227,28 +13267,28 @@ Declarations}), then the parser no longer passes 
syntax error messages to
 
 Whether it uses @code{yyerror} is up to the user.
 
-Here is a typical example of a reporting function.
+Here is an example of a reporting function (@pxref{Java Parser Context
+Interface}).
 
 @example
-public void yyreportSyntaxError (YYParser.Context ctx)
-@{
-  System.err.print (ctx.getLocation () + ": syntax error");
+public void reportSyntaxError(YYParser.Context ctx) @{
+  System.err.print(ctx.getLocation() + ": syntax error");
   // Report the expected tokens.
   @{
     final int TOKENMAX = 5;
     YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX];
-    int n = ctx.getExpectedTokens (arg, TOKENMAX);
+    int n = ctx.getExpectedTokens(arg, TOKENMAX);
     for (int i = 0; i < n; ++i)
-      System.err.print ((i == 0 ? ": expected " : " or ")
-                        + ctx.yysymbolName (arg[i]));
+      System.err.print((i == 0 ? ": expected " : " or ")
+                       + arg[i].getName());
   @}
   // Report the unexpected token which triggered the error.
   @{
-    YYParser.SymbolKind lookahead = ctx.getToken ();
+    YYParser.SymbolKind lookahead = ctx.getToken();
     if (lookahead != null)
-      System.err.print (" before " + ctx.yysymbolName (lookahead));
+      System.err.print(" before " + lookahead.getName());
   @}
-  System.err.println ("");
+  System.err.println("");
 @}
 @end example
 @end deftypemethod
diff --git a/examples/java/calc/Calc.y b/examples/java/calc/Calc.y
index 948c7f1c..8070e0c1 100644
--- a/examples/java/calc/Calc.y
+++ b/examples/java/calc/Calc.y
@@ -121,12 +121,12 @@ class CalcLexer implements Calc.Lexer {
       int n = ctx.getExpectedTokens(arg, TOKENMAX);
       for (int i = 0; i < n; ++i)
         System.err.print((i == 0 ? ": expected " : " or ")
-                         + ctx.yysymbolName(arg[i]));
+                         + arg[i].getName());
     }
     {
       Calc.SymbolKind lookahead = ctx.getToken();
       if (lookahead != null)
-        System.err.print(" before " + ctx.yysymbolName(lookahead));
+        System.err.print(" before " + lookahead.getName());
     }
     System.err.println("");
   }
diff --git a/tests/local.at b/tests/local.at
index 7d63142c..d3ded8f4 100644
--- a/tests/local.at
+++ b/tests/local.at
@@ -958,13 +958,13 @@ class PositionReader extends BufferedReader {
 # FIXME: We should not hard-code "Calc".
 m4_define([AT_YYERROR_DEFINE(java)],
 [AT_LOCATION_IF([[public void yyerror (Calc.Location l, String m)
-  {]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs],[[
+  {]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs], [[
     ++global_nerrs;
     ++*nerrs;]])[
     if (l == null)
-      System.err.println (m);
+      System.err.println(m);
     else
-      System.err.println (l + ": " + m);
+      System.err.println(l + ": " + m);
   }
 ]], [[
   public void yyerror (String m)
@@ -976,27 +976,25 @@ m4_define([AT_YYERROR_DEFINE(java)],
 ]])[
 
 ]AT_ERROR_CUSTOM_IF([[
-  public void reportSyntaxError (Calc.Context ctx)
-  {
-    System.err.print (]AT_LOCATION_IF([[ctx.getLocation () + ": "]]
-                      + )["syntax error");
+  public void reportSyntaxError(Calc.Context ctx) {
+    System.err.print(]AT_LOCATION_IF([[ctx.getLocation() + ": "
+                     + ]])["syntax error");
     {
-      Calc.SymbolKind token = ctx.getToken ();
+      Calc.SymbolKind token = ctx.getToken();
       if (token != null)
-        System.err.print (" on token @<:@" + ctx.yysymbolName (token) + 
"@:>@");
+        System.err.print(" on token @<:@" + token.getName() + "@:>@");
     }
     {
       Calc.SymbolKind[] arg = new Calc.SymbolKind[ctx.NTOKENS];
-      int n = ctx.getExpectedTokens (arg, ctx.NTOKENS);
-      if (0 < n)
-        {
-          System.err.print (" (expected:");
-          for (int i = 0; i < n; ++i)
-            System.err.print (" @<:@" + ctx.yysymbolName (arg[i]) + "@:>@");
-          System.err.print (")");
-        }
+      int n = ctx.getExpectedTokens(arg, ctx.NTOKENS);
+      if (0 < n) {
+        System.err.print(" (expected:");
+        for (int i = 0; i < n; ++i)
+          System.err.print(" @<:@" + arg[i].getName() + "@:>@");
+        System.err.print(")");
+      }
     }
-    System.err.println ("");
+    System.err.println("");
   }
 ]])
 ])
-- 
2.26.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]