[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH for Dlang support 1/2] d: change the return value of yylex fr
From: |
Akim Demaille |
Subject: |
Re: [PATCH for Dlang support 1/2] d: change the return value of yylex from TokenKind to YYParser.Symbol |
Date: |
Fri, 20 Nov 2020 19:18:00 +0100 |
Hi Adela,
This looks great! I'll install it as is. The suggested changes
below are for forthcoming commits.
> Le 20 nov. 2020 à 15:40, Adela Vais <adela.vais99@gmail.com> a écrit :
>
> +# b4_symbol_type_define
> +# ---------------------
> +# Define symbol_type, the external type for symbols used for symbol
> +# constructors.
> +m4_define([b4_symbol_type_define],
> +[[
> + /**
> + * A complete symbol
> + */
> + struct Symbol
> + {
> + private SymbolKind kind;
> + private ]b4_yystype[ value;]b4_locations_if([[
> + private YYLocation location_;]])[
You should introduce type aliases for b4_yystype and YYLocation.
In the C++ parser, you have value_type and location_type which
are defined to whatever they are actually. The code is nicer
to read, with fewer occurrences of ugly YYnames.
> + SymbolKind token() { return kind; }
> + ]b4_yystype[ semanticValue() { return value; }]b4_locations_if([[
Make this value instead of semanticValue.
> @@ -75,7 +75,7 @@ public interface Lexer
> * to the next token and prepares to return the semantic value
> * ]b4_locations_if([and beginning/ending positions ])[of the token.
> * @@return the token identifier corresponding to the next token. */
> - TokenKind yylex ();
> + ]b4_parser_class[.Symbol yylex ();
Can't you provide an alias to avoid the need for the full path?
> @@ -411,7 +411,9 @@ b4_locations_if([, ref ]b4_location_type[ yylocationp])[)
> yycdebugln (message);
> }
> }
> -]])[
> +]])
> +b4_symbol_type_define
> +[
I would prefer
> ]])[
> ]b4_symbol_type_define[
>
for consistency.
> if (yychar == TokenKind.]b4_symbol(empty, id)[)
> {]b4_parse_trace_if([[
> yycdebugln ("Reading a token");]])[
> - yychar = yylex ();]b4_locations_if([[
> - static if (yy_location_is_class) {
> - yylloc = new ]b4_location_type[(yylexer.startPos,
> yylexer.endPos);
> - } else {
> - yylloc = ]b4_location_type[(yylexer.startPos, yylexer.endPos);
> - }]])
> - yylval = yylexer.semanticVal;[
> + Symbol yysymbol = yylex();
> + yychar = yysymbol.token();
Maybe you don't need yychar, but only need yytoken. You probably
can avoid dealing with the token-kinds here, and deal only with
the symbol kinds.
> + yylval = yysymbol.semanticValue();]b4_locations_if([[
> + yylloc = yysymbol.location();]])[
> }
>
> /* Convert token to internal form. */
> +@deftypemethod {Lexer} {YYParser.Symbol} yylex()
> +Return the next token. The return value is of type YYParser.Symbol,
Use @code{YYParser.Symbol}.
> which
> +binds together the TokenKind, the semantic value and the location.
@code for TokenKind too.
> @end deftypemethod
>
> @deftypemethod {Lexer} {YYPosition} getStartPos()
> diff --git a/examples/d/calc/calc.y b/examples/d/calc/calc.y
> index bee3fc94..3f41f048 100644
> --- a/examples/d/calc/calc.y
> +++ b/examples/d/calc/calc.y
> @@ -114,7 +114,7 @@ if (isInputRange!R && is(ElementType!R : dchar))
> return semanticVal_;
> }
>
> - TokenKind yylex()
> + Calc.Symbol yylex()
> {
> import std.uni : isWhite, isNumber;
>
> @@ -127,7 +127,7 @@ if (isInputRange!R && is(ElementType!R : dchar))
> }
>
> if (input.empty)
> - return TokenKind.YYEOF;
> + return Calc.Symbol(TokenKind.YYEOF, new YYLocation(startPos, endPos));
>
> // Numbers.
> if (input.front.isNumber)
> @@ -143,7 +143,7 @@ if (isInputRange!R && is(ElementType!R : dchar))
> }
> start = end;
> end.column += lenChars;
> - return TokenKind.NUM;
> + return Calc.Symbol(TokenKind.NUM, semanticVal_.ival, new
> YYLocation(startPos, endPos));
> }
>
> // Individual characters
> @@ -153,17 +153,17 @@ if (isInputRange!R && is(ElementType!R : dchar))
> end.column++;
> switch (ch)
> {
> - case '+': return TokenKind.PLUS;
> - case '-': return TokenKind.MINUS;
> - case '*': return TokenKind.STAR;
> - case '/': return TokenKind.SLASH;
> - case '(': return TokenKind.LPAR;
> - case ')': return TokenKind.RPAR;
> + case '+': return Calc.Symbol(TokenKind.PLUS, new YYLocation(startPos,
> endPos));
> + case '-': return Calc.Symbol(TokenKind.MINUS, new
> YYLocation(startPos, endPos));
> + case '*': return Calc.Symbol(TokenKind.STAR, new YYLocation(startPos,
> endPos));
> + case '/': return Calc.Symbol(TokenKind.SLASH, new
> YYLocation(startPos, endPos));
> + case '(': return Calc.Symbol(TokenKind.LPAR, new YYLocation(startPos,
> endPos));
> + case ')': return Calc.Symbol(TokenKind.RPAR, new YYLocation(startPos,
> endPos));
This is super verbose. Can't you factor some 'loc' variable and use it?
> @@ -133,13 +132,13 @@ if (isInputRange!R && is(ElementType!R : dchar))
> input.popFront;
> switch (ch)
> {
> - case '+': return TokenKind.PLUS;
> - case '-': return TokenKind.MINUS;
> - case '*': return TokenKind.STAR;
> - case '/': return TokenKind.SLASH;
> - case '(': return TokenKind.LPAR;
> - case ')': return TokenKind.RPAR;
> - case '\n': return TokenKind.EOL;
> + case '+': return Calc.Symbol(TokenKind.PLUS);
> + case '-': return Calc.Symbol(TokenKind.MINUS);
> + case '*': return Calc.Symbol(TokenKind.STAR);
> + case '/': return Calc.Symbol(TokenKind.SLASH);
> + case '(': return Calc.Symbol(TokenKind.LPAR);
> + case ')': return Calc.Symbol(TokenKind.RPAR);
> + case '\n': return Calc.Symbol(TokenKind.EOL);
In modern C++ there's a feature I like: the constructor can be
called implicitly. So for instance
MyStruct foo() { return 42; }
actually means
MyStruct foo() { return MyStruct(42); }
If D has something equivalent, you can probably simplify all this.
> switch (c)
> {
> - case '+': return TokenKind.]AT_TOKEN_PREFIX[PLUS;
> - case '-': return TokenKind.]AT_TOKEN_PREFIX[MINUS;
> - case '*': return TokenKind.]AT_TOKEN_PREFIX[STAR;
> - case '/': return TokenKind.]AT_TOKEN_PREFIX[SLASH;
> - case '(': return TokenKind.]AT_TOKEN_PREFIX[LPAR;
> - case ')': return TokenKind.]AT_TOKEN_PREFIX[RPAR;
> - case '\n': return TokenKind.]AT_TOKEN_PREFIX[EOL;
> - case '=': return TokenKind.]AT_TOKEN_PREFIX[EQUAL;
> - case '^': return TokenKind.]AT_TOKEN_PREFIX[POW;
> - case '!': return TokenKind.]AT_TOKEN_PREFIX[NOT;
> - default: return TokenKind.]AT_TOKEN_PREFIX[YYUNDEF;
> + case '+': return
> YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[PLUS]AT_LOCATION_IF([[, new
> YYLocation(startPos, endPos)]])[);
> + case '-': return
> YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[MINUS]AT_LOCATION_IF([[, new
> YYLocation(startPos, endPos)]])[);
> + case '*': return
> YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[STAR]AT_LOCATION_IF([[, new
> YYLocation(startPos, endPos)]])[);
> + case '/': return
> YYParser.Symbol(TokenKind.]AT_TOKEN_PREFIX[SLASH]AT_LOCATION_IF([[, new
> YYLocation(startPos, endPos)]])
Same comments about verbosity and redundancy.
Great job Adela!