[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] master: deterministic user-token-number redeclaration errors
From: |
Joel E. Denny |
Subject: |
Re: [PATCH] master: deterministic user-token-number redeclaration errors. |
Date: |
Tue, 11 Aug 2009 00:02:09 -0400 (EDT) |
User-agent: |
Alpine 1.00 (DEB 882 2007-12-20) |
On Sat, 8 Aug 2009, Joel E. Denny wrote:
> In my offline work, I just realized that some of my test cases are broken
> by this nondeterminism. I'd like to eliminate it. What do you think of
> the following patch, written for master but not yet pushed?
>
> >From 7be421fe9c41514ff2bd8774282b2e544e4e33f7 Mon Sep 17 00:00:00 2001
> From: Joel E. Denny <address@hidden>
> Date: Sat, 8 Aug 2009 20:19:01 -0400
> Subject: [PATCH] Make it easier to write deterministic tests.
>
> Continues Akim's work from his 2009-06-10 commits.
> * src/reader.c (check_and_convert_grammar): Don't add any
> symbols after the first symbols_do invocation.
> * src/symtab.c (symbols_sorted): New static global.
> (user_token_number_redeclaration): Update comments.
> (symbol_from_uniqstr): Assert that symbols_sorted hasn't been
> allocated yet.
> (symbols_free): Free symbols_sorted.
> (symbols_cmp, symbols_cmp_qsort): New functions.
> (symbols_do): Sort symbol_table into symbols_sorted on first
> invocation.
> * tests/input.at (Numbered tokens): Recombine tests now that the
> output should be deterministic across multiple numbers.
Here it is again, rebased to the current master but not yet pushed. I've
also made the assert in symbol_from_uniqstr a little more reasonable, but
I don't believe this change affects Bison's current behavior.
>From cc26a3aa064647f7a46aa6d5f3ad0932c8a6b835 Mon Sep 17 00:00:00 2001
From: Joel E. Denny <address@hidden>
Date: Sat, 8 Aug 2009 20:19:01 -0400
Subject: [PATCH] Make it easier to write deterministic tests.
Continues Akim's work from his 2009-06-10 commits.
* src/reader.c (check_and_convert_grammar): Don't add any
symbols after the first symbols_do invocation.
* src/symtab.c (symbols_sorted): New static global.
(user_token_number_redeclaration): Update comments.
(symbol_from_uniqstr): If a new symbol is being created, assert
that symbols_sorted hasn't been allocated yet.
(symbols_free): Free symbols_sorted.
(symbols_cmp, symbols_cmp_qsort): New functions.
(symbols_do): Sort symbol_table into symbols_sorted on first
invocation.
* tests/input.at (Numbered tokens): Recombine tests now that the
output should be deterministic across multiple numbers.
---
ChangeLog | 17 +++++++++++++++++
src/reader.c | 6 +++---
src/symtab.c | 42 +++++++++++++++++++++++++++++++++++++-----
tests/input.at | 40 +++++++++++++++-------------------------
4 files changed, 72 insertions(+), 33 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 25bd09c..e34da20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2009-08-08 Joel E. Denny <address@hidden>
+
+ Make it easier to write deterministic tests.
+ Continues Akim's work from his 2009-06-10 commits.
+ * src/reader.c (check_and_convert_grammar): Don't add any
+ symbols after the first symbols_do invocation.
+ * src/symtab.c (symbols_sorted): New static global.
+ (user_token_number_redeclaration): Update comments.
+ (symbol_from_uniqstr): If a new symbol is being created, assert
+ that symbols_sorted hasn't been allocated yet.
+ (symbols_free): Free symbols_sorted.
+ (symbols_cmp, symbols_cmp_qsort): New functions.
+ (symbols_do): Sort symbol_table into symbols_sorted on first
+ invocation.
+ * tests/input.at (Numbered tokens): Recombine tests now that the
+ output should be deterministic across multiple numbers.
+
2009-08-10 Joel E. Denny <address@hidden>
* tests/local.mk (TESTSUITE_AT): Add named-refs.at.
diff --git a/src/reader.c b/src/reader.c
index 14624df..05fa9ee 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -641,9 +641,6 @@ check_and_convert_grammar (void)
if (nrules == 0)
fatal (_("no rules in the input grammar"));
- /* Report any undefined symbols and consider them nonterminals. */
- symbols_check_defined ();
-
/* If the user did not define her ENDTOKEN, do it now. */
if (!endtoken)
{
@@ -654,6 +651,9 @@ check_and_convert_grammar (void)
endtoken->user_token_number = 0;
}
+ /* Report any undefined symbols and consider them nonterminals. */
+ symbols_check_defined ();
+
/* Find the start symbol if no %start. */
if (!start_flag)
{
diff --git a/src/symtab.c b/src/symtab.c
index f702054..cac9f80 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -29,6 +29,13 @@
#include "gram.h"
#include "symtab.h"
+/*-------------------------------------------------------------------.
+| Symbols sorted by tag. Allocated by the first invocation of |
+| symbols_do, after which no more symbols should be created. |
+`-------------------------------------------------------------------*/
+
+static symbol **symbols_sorted = NULL;
+
/*------------------------.
| Distinguished symbols. |
`------------------------*/
@@ -583,10 +590,10 @@ static void
user_token_number_redeclaration (int num, symbol *first, symbol *second)
{
/* User token numbers are not assigned during the parsing, but in a
- second step, via a (nondeterministic) traversal of the symbol
- hash table.
+ second step, via a traversal of the symbol table sorted on tag.
- Make errors deterministic: keep the first declaration first. */
+ However, error messages make more sense if we keep the first
+ declaration first. */
if (location_cmp (first->location, second->location) > 0)
{
symbol* tmp = first;
@@ -730,6 +737,7 @@ symbol_from_uniqstr (const uniqstr key, location loc)
if (!entry)
{
/* First insertion in the hash. */
+ aver (!symbols_sorted);
entry = symbol_new (key, loc);
if (!hash_insert (symbol_table, entry))
xalloc_die ();
@@ -824,6 +832,7 @@ symbols_free (void)
hash_free (symbol_table);
hash_free (semantic_type_table);
free (symbols);
+ free (symbols_sorted);
}
@@ -832,13 +841,36 @@ symbols_free (void)
| terminals. |
`---------------------------------------------------------------*/
+static int
+symbols_cmp (symbol const *a, symbol const *b)
+{
+ return strcmp (a->tag, b->tag);
+}
+
+static int
+symbols_cmp_qsort (void const *a, void const *b)
+{
+ return symbols_cmp (*(symbol * const *)a, *(symbol * const *)b);
+}
+
static void
symbols_do (Hash_processor processor, void *processor_data)
{
- hash_do_for_each (symbol_table, processor, processor_data);
+ size_t count = hash_get_n_entries (symbol_table);
+ if (!symbols_sorted)
+ {
+ symbols_sorted = xnmalloc (count, sizeof *symbols_sorted);
+ hash_get_entries (symbol_table, (void**)symbols_sorted, count);
+ qsort (symbols_sorted, count, sizeof *symbols_sorted,
+ symbols_cmp_qsort);
+ }
+ {
+ size_t i;
+ for (i = 0; i < count; ++i)
+ processor (symbols_sorted[i], processor_data);
+ }
}
-
/*--------------------------------------------------------------.
| Check that all the symbols are defined. Report any undefined |
| symbols and consider them nonterminals. |
diff --git a/tests/input.at b/tests/input.at
index 9c62f9e..146d581 100644
--- a/tests/input.at
+++ b/tests/input.at
@@ -675,33 +675,23 @@ AT_CLEANUP
AT_SETUP([Numbered tokens])
-AT_DATA_GRAMMAR([1.y],
-[[%token DECIMAL 11259375
- HEXADECIMAL 0xabcdef
+AT_DATA_GRAMMAR([redecl.y],
+[[%token DECIMAL_1 11259375
+ HEXADECIMAL_1 0xabcdef
+ HEXADECIMAL_2 0xFEDCBA
+ DECIMAL_2 16702650
%%
-start: DECIMAL;
+start: DECIMAL_1 HEXADECIMAL_2;
]])
-AT_BISON_CHECK([1.y], [1], [],
-[[1.y:10.10-20: user token number 11259375 redeclaration for HEXADECIMAL
-1.y:9.8-14: previous declaration for DECIMAL
+AT_BISON_CHECK([redecl.y], [1], [],
+[[redecl.y:10.10-22: user token number 11259375 redeclaration for HEXADECIMAL_1
+redecl.y:9.8-16: previous declaration for DECIMAL_1
+redecl.y:12.10-18: user token number 16702650 redeclaration for DECIMAL_2
+redecl.y:11.10-22: previous declaration for HEXADECIMAL_2
]])
-
-AT_DATA_GRAMMAR([2.y],
-[[%token HEXADECIMAL 0xabcdef
- DECIMAL 11259375
-%%
-start: HEXADECIMAL;
-]])
-
-AT_BISON_CHECK([2.y], [1], [],
-[[2.y:10.10-16: user token number 11259375 redeclaration for DECIMAL
-2.y:9.8-18: previous declaration for HEXADECIMAL
-]])
-
-
-AT_DATA_GRAMMAR([3.y],
+AT_DATA_GRAMMAR([too-large.y],
[[%token TOO_LARGE_DEC 999999999999999999999
TOO_LARGE_HEX 0xFFFFFFFFFFFFFFFFFFF
%%
@@ -709,9 +699,9 @@ start: TOO_LARGE_DEC TOO_LARGE_HEX
%%
]])
-AT_BISON_CHECK([3.y], [1], [],
-[[3.y:9.22-42: integer out of range: `999999999999999999999'
-3.y:10.24-44: integer out of range: `0xFFFFFFFFFFFFFFFFFFF'
+AT_BISON_CHECK([too-large.y], [1], [],
+[[too-large.y:9.22-42: integer out of range: `999999999999999999999'
+too-large.y:10.24-44: integer out of range: `0xFFFFFFFFFFFFFFFFFFF'
]])
AT_CLEANUP
--
1.5.4.3