bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Complain about hidden left recursion in GLR parsing


From: Sylvain Schmitz
Subject: Complain about hidden left recursion in GLR parsing
Date: Mon, 13 Mar 2006 15:09:00 +0100
User-agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7) Gecko/20040616

Hi,

I recently read in <http://wiki.di.uminho.pt/wiki/pub/Joao/--TwikiJoaoPublications/technicalReport.pdf> that bison did not handle hidden left recursion in GLR mode, and indeed, it loops on the provided example.

Here is a simple modification so that bison complains about hidden left recursion, avoiding such cases until someone into GLR parsing implements one of the existing solutions.

--
Hope that helps,

  Sylvain

--- bison/src/closure.c 2005-12-10 00:51:25.000000000 +0100
+++ bison-cvs20060222/src/closure.c     2006-03-13 14:48:15.000000000 +0100
@@ -29,9 +29,11 @@
 #include <quotearg.h>
 
 #include "closure.h"
+#include "complain.h"
 #include "derives.h"
 #include "getargs.h"
 #include "gram.h"
+#include "nullable.h"
 #include "reader.h"
 #include "symtab.h"
 
@@ -44,6 +46,7 @@
 /* internal data.  See comments before set_fderives and set_firsts.  */
 static bitsetv fderives = NULL;
 static bitsetv firsts = NULL;
+static bitsetv next_vars = NULL;
 
 /* Retrieve the FDERIVES/FIRSTS sets of the nonterminals numbered Var.  */
 #define FDERIVES(Var)   fderives[(Var) - ntokens]
@@ -120,6 +123,10 @@
 | symbols 8 3 20, the symbol 8 can be the beginning of the data for |
 | symbol 5, so the bit [8 - ntokens] in first[5 - ntokens] (= FIRST |
 | (5)) is set.                                                      |
+|                                                                   |
+| NEXT_VARS is similar but also takes nullable symbols into         |
+| account.  It is used for the detection of hidden left recursion   |
+| in GLR parsing.                                                   |
 `------------------------------------------------------------------*/
 
 static void
@@ -128,23 +135,63 @@
   symbol_number i, j;
 
   firsts = bitsetv_create (nvars, nvars, BITSET_FIXED);
+  next_vars = bitsetv_create (nvars, nvars, BITSET_FIXED);
 
   for (i = ntokens; i < nsyms; i++)
     for (j = 0; derives[i - ntokens][j]; ++j)
       {
        item_number sym = derives[i - ntokens][j]->rhs[0];
        if (ISVAR (sym))
-         bitset_set (FIRSTS (i), sym - ntokens);
+          {
+            int k = 1;
+            bitset_set (FIRSTS (i), sym - ntokens);
+            bitset_set (next_vars [i - ntokens], sym - ntokens);
+            /* Keep going in the rule right hand side if the current
+               symbol is nullable. */
+            while (nullable[sym - ntokens] 
+                   && (sym = derives[i - ntokens][j]->rhs[k]) >= 0
+                   && ISVAR (sym))
+              {
+                bitset_set (next_vars [i - ntokens], sym - ntokens);
+                k++;
+              }
+            if (ISVAR (sym))
+              bitset_set (next_vars [i - ntokens], sym - ntokens);            
+          }
       }
 
   if (trace_flag & trace_sets)
     bitsetv_matrix_dump (stderr, "RTC: Firsts Input", firsts);
-  bitsetv_reflexive_transitive_closure (firsts);
+  bitsetv_transitive_closure (firsts);
+  bitsetv_transitive_closure (next_vars);
+
+  if (glr_parser)
+    {
+      for (i = ntokens; i < nsyms; i++)
+        {
+          /* Compare the results of the two closures: if there is a
+             recursion appearing in NEXT_VARS that did not appear in
+             FIRSTS, then it is a hidden left recursion. */
+          if (bitset_test (next_vars[i - ntokens], i - ntokens)
+              && !bitset_test (FIRSTS (i), i - ntokens))
+            {
+              complain_at (symbols[i]->location,
+                           _("hidden left recursion on nonterminal: %s"),
+                           symbols[i]->tag);
+            }
+        }
+    }
+  for (i = ntokens; i < nsyms; i++)
+    {  
+      bitset_set (FIRSTS (i), i - ntokens);
+    }
   if (trace_flag & trace_sets)
     bitsetv_matrix_dump (stderr, "RTC: Firsts Output", firsts);
 
   if (trace_flag & trace_sets)
     print_firsts ();
+
+  bitsetv_free (next_vars);
 }
 
 /*-------------------------------------------------------------------.
%glr-parser
%{
  #include <stdio.h>
  char *yystring;
  int nbs;

  int yylex (void);
  void yyerror (const char *);
%}

%%
s:
    a s 'b'                            { nbs++; }
  | 'x'
  ;

a:
    /* empty */
  ;

%%

int
yylex (void)
{
  return *yystring++;
}

void
yyerror (const char *msg)
{
  fprintf (stderr, "%s\n", msg);
}

int
main (int argc, char *argv[])
{
  int ret;
  nbs = 0;
  yystring = argv[1];
  ret = yyparse ();
  if (!ret)
    printf ("Number of b's: %d\n", nbs);
  return ret;
}

reply via email to

[Prev in Thread] Current Thread [Next in Thread]