[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
double-reads with push parser
From: |
Ryan |
Subject: |
double-reads with push parser |
Date: |
Sat, 6 Mar 2021 17:33:31 -0700 |
I've noticed a strange behavior with my push parser on bison 3.7+, where
when I multiple identical lines through it, the first one parses
correctly, but all future lines parse the first token of the line twice.
I'm not sure if this is a bug in bison or if there's something wrong
with my push loop, but it definitely worked as I expect it to as of
3.6.4.
Code to reproduce and the output of that code is below.
Ryan
// makefile ///////
BISON := bison
# works:
# BISON := $(HOME)/code/source/bison/bison-3.6.4/install/bin/bison
# does not work:
# BISON := $(HOME)/code/source/bison/bison-3.7/install/bin/bison
all: bug
bug: bug.y makefile
$(BISON) -d -o bug.tab.c bug.y
gcc -Werror -Wall -Wextra -o bug bug.tab.c
// bug.c //////////
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void yyerror(const char *token, char const *s){
printf("yyerror: %s: %s\n", token, s);
}
typedef union {
char *string;
} bug_expr_t;
#include "bug.tab.h"
static char *string_concat(char *a, char *b){
size_t la = strlen(a);
size_t lb = strlen(b);
char *s = malloc(la + lb + 1);
strcpy(s, a);
strcpy(s + la, b);
free(a);
free(b);
return s;
}
static int parse(void *yyps, int type, const char *token){
int yyret = yypush_parse(yyps, type, NULL, token);
switch(yyret){
case 0:
return 0;
case YYPUSH_MORE:
// parsing incomplete, but valid; parser not reset
return 0;
case 1:
// YYABORT or syntax invalid; parser is reset
printf("invalid input, but no error was thrown\n");
return 1;
case 2:
// memory exhaustion; parser is reset
printf("memory exhaustion during yypush_parse\n");
return 1;
}
printf("yypush_parse() returned %d\n", yyret);
return 1;
}
int main(void){
void *yyps = yypstate_new();
#define PARSE(str, typ) { \
printf("parsing: '%s' (%d)\n", str, typ); \
int token_type = typ; \
if(parse(yyps, token_type, str)) return 1; \
}
/* Feed the parser with a "text" token that is broken into two
pushes, as if the token arrived over two separate network
packets. */
PARSE("te", RAW);
PARSE("xt", RAW);
PARSE("\r\n", EOL);
// Do it again, but notice the result is different.
PARSE("te", RAW);
PARSE("xt", RAW);
PARSE("\r\n", EOL);
yypstate_delete(yyps);
return 0;
}
%}
%define api.value.type {bug_expr_t}
%define api.pure full
%define api.push-pull push
%parse-param { const char *token }
%expect 0
%token RAW
%token EOL
%type <string> text
%destructor { free($$); } <string>
%%
line: text EOL { printf("text: %s\n", $text); free($text); YYACCEPT; };
text: RAW {
$$ = strdup(token);
printf(" RAW: '%s'\n", token);
} | text[t] RAW {
printf(" text[t] RAW: '%s' + '%s'\n", $t, token);
$$ = string_concat($t, strdup(token));
}
;
// output (using bison 3.7 or later) /////////////////
parsing: 'te' (258)
RAW: 'te'
parsing: 'xt' (258)
text[t] RAW: 'te' + 'xt'
parsing: '
' (259)
text: text
parsing: 'te' (258)
RAW: 'te'
text[t] RAW: 'te' + 'te'
parsing: 'xt' (258)
text[t] RAW: 'tete' + 'xt'
parsing: '
' (259)
text: tetext
//////////////////////////
- double-reads with push parser,
Ryan <=