Implement midrule actions, and fix bug which suppressed actions not using 12660/
authorNick Downing <downing.nick@gmail.com>
Fri, 20 Jul 2018 07:21:31 +0000 (17:21 +1000)
committerNick Downing <downing.nick@gmail.com>
Fri, 20 Jul 2018 07:21:31 +0000 (17:21 +1000)
ast.py
bison_lr1dfa.py
bootstrap_pyacc.py
tests/cal.y

diff --git a/ast.py b/ast.py
index 45c61df..81bd6fa 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -310,7 +310,14 @@ class PYACC(element.Element):
         last_action
       ):
         raise NotImplementedException
-      def add_to_symbols(self, pyacc, production, last_action, symbols):
+      def add_to_symbols(
+        self,
+        pyacc,
+        production,
+        last_action,
+        _lr1,
+        symbols
+      ):
         return last_action
 
     class Action(Item):
@@ -340,6 +347,7 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Production.Action({0:s})'.format(', '.join(params))
       # GENERATE END
+
       def post_process(
         self,
         pyacc,
@@ -349,10 +357,32 @@ class PYACC(element.Element):
         name_to_symbol,
         last_action
       ):
-        production.n_symbols += int(last_action)
+        pyacc.n_productions += int(last_action) # midrule action production
+        production.n_symbols += int(last_action) # midrule action symbol
         return True
-      def add_to_symbols(self, pyacc, production, last_action, symbols):
-        assert last_action is None # no mid-rule actions for now
+
+      def add_to_symbols(
+        self,
+        pyacc,
+        production,
+        last_action,
+        _lr1,
+        symbols
+      ):
+        if last_action is not None:
+          symbols.append(
+            ([], [len(_lr1.productions), len(_lr1.productions) + 1])
+          )
+          _lr1.productions.append(
+            (
+              # symbols (list of terminal_set, nonterminal_set)
+              [],
+              # lookaheads (list of initial_set, can_be_empty)
+              [([], True)],
+              # ref_data
+              last_action
+            )
+          )
         for i in self[0]:
           if isinstance(i, PYACC.Text.StackLocation):
             i.offset = -production.n_symbols
@@ -557,6 +587,7 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Production.SymbolRef({0:s})'.format(', '.join(params))
       # GENERATE END
+
       def post_process(
         self,
         pyacc,
@@ -566,7 +597,8 @@ class PYACC(element.Element):
         name_to_symbol,
         last_action
       ):
-        production.n_symbols += int(last_action) + 1
+        pyacc.n_productions += int(last_action) # midrule action production
+        production.n_symbols += int(last_action) + 1 # midrule action symbol
         if isinstance(self[0], PYACC.Char):
           character = ord(self[0][0].get_text())
           assert character != 0 # would conflict with YYEOF
@@ -594,8 +626,29 @@ class PYACC(element.Element):
         if self.symbol >= 0:
           production.last_terminal = self.symbol
         return False
-      def add_to_symbols(self, pyacc, production, last_action, symbols):
-        assert last_action is None # no mid-rule actions for now
+
+      def add_to_symbols(
+        self,
+        pyacc,
+        production,
+        last_action,
+        _lr1,
+        symbols
+      ):
+        if last_action is not None:
+          symbols.append(
+            ([], [len(_lr1.productions), len(_lr1.productions) + 1])
+          )
+          _lr1.productions.append(
+            (
+              # symbols (list of terminal_set, nonterminal_set)
+              [],
+              # lookaheads (list of initial_set, can_be_empty)
+              [([], True)],
+              # ref_data
+              last_action
+            )
+          )
         symbols.append(
           (pyacc.terminals[self.symbol].character_set, [])
         if self.symbol >= 0 else
@@ -698,7 +751,7 @@ class PYACC(element.Element):
     ):
       self.lhs_nonterminal = lhs_nonterminal
 
-      self.n_symbols = 0
+      self.n_symbols = 0 # includes midrule actions
       self.last_terminal = -1
       self.precedence_terminal = -1
       last_action = False
@@ -712,14 +765,12 @@ class PYACC(element.Element):
           last_action
         )
 
-      character_set = pyacc.nonterminals[
-        self.lhs_nonterminal
-      ].character_set
-      character = 1 + len(pyacc.productions)
-      if len(character_set) and character_set[-1] == character:
-        character_set[-1] = character + 1
+      i = pyacc.nonterminals[self.lhs_nonterminal]
+      if len(i.character_set) and i.character_set[-1] == pyacc.n_productions:
+        i.character_set[-1] = pyacc.n_productions + 1
       else:
-        character_set.extend([character, character + 1])
+        i.character_set.extend([pyacc.n_productions, pyacc.n_productions + 1])
+      pyacc.n_productions += 1
       pyacc.productions.append(self)
 
     def add_to_lr1(self, pyacc, _lr1):
@@ -727,7 +778,14 @@ class PYACC(element.Element):
       symbols = []
       lookaheads = []
       for i in self:
-        last_action = i.add_to_symbols(pyacc, self, last_action, symbols)
+        last_action = i.add_to_symbols(
+          pyacc,
+          self,
+          last_action,
+          _lr1,
+          symbols
+        )
+      assert len(symbols) == self.n_symbols
       _lr1.productions.append(
         (
           # symbols (list of terminal_set, nonterminal_set)
@@ -735,7 +793,7 @@ class PYACC(element.Element):
           # lookaheads (list of initial_set, can_be_empty)
           [([], False) for i in range(len(symbols))] + [([], True)],
           # ref_data
-          last_action if last_action is not None else PYACC.BracedCode()
+          last_action
         )
       )
       precedence = (
@@ -2530,7 +2588,7 @@ class PYACC(element.Element):
     ):
       pass
 
-  # GENERATE ELEMENT(list(ref) prologue_text, list(ref) terminals, list(ref) nonterminals, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
+  # GENERATE ELEMENT(list(ref) prologue_text, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
@@ -2540,6 +2598,7 @@ class PYACC(element.Element):
     prologue_text = [],
     terminals = [],
     nonterminals = [],
+    n_productions = -1,
     productions = [],
     first_nonterminal = -1,
     start_nonterminal = -1,
@@ -2555,6 +2614,11 @@ class PYACC(element.Element):
     self.prologue_text = prologue_text
     self.terminals = terminals
     self.nonterminals = nonterminals
+    self.n_productions = (
+      element.deserialize_int(n_productions)
+    if isinstance(n_productions, str) else
+      n_productions
+    )
     self.productions = productions
     self.first_nonterminal = (
       element.deserialize_int(first_nonterminal)
@@ -2585,6 +2649,7 @@ class PYACC(element.Element):
       'nonterminals',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.nonterminals])
     )
+    self.set('n_productions', element.serialize_int(self.n_productions))
     self.set(
       'productions',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.productions])
@@ -2609,6 +2674,7 @@ class PYACC(element.Element):
       element.deserialize_ref(i, ref_list)
       for i in self.get('nonterminals', '').split()
     ]
+    self.n_productions = element.deserialize_int(self.get('n_productions', '-1'))
     self.productions = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('productions', '').split()
@@ -2627,6 +2693,7 @@ class PYACC(element.Element):
     result.prologue_text = self.prologue_text
     result.terminals = self.terminals
     result.nonterminals = self.nonterminals
+    result.n_productions = self.n_productions
     result.productions = self.productions
     result.first_nonterminal = self.first_nonterminal
     result.start_nonterminal = self.start_nonterminal
@@ -2652,6 +2719,10 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.nonterminals])
         )
       )
+    if self.n_productions != -1:
+      params.append(
+        'n_productions = {0:s}'.format(repr(self.n_productions))
+      )
     if len(self.productions):
       params.append(
         'productions = [{0:s}]'.format(
@@ -2688,6 +2759,7 @@ class PYACC(element.Element):
       PYACC.Terminal(name = '$undefined')
     ]
     self.nonterminals = []
+    self.n_productions = 1 # includes start and midrule action productions
     self.productions = []
 
     # variables that won't be serialized
@@ -2767,6 +2839,7 @@ class PYACC(element.Element):
     # compute productions and nonterminals precedence table
     for i in self.productions:
       i.add_to_lr1(self, _lr1)
+    assert len(_lr1.productions) == self.n_productions
 
     # propagate lookaheads
     modified = True
index ddcf5ad..6466812 100644 (file)
@@ -278,8 +278,11 @@ def generate(pyacc, skel_file, out_file):
   # generate translate table for nonterminal symbols
   # this is effectively a map from productions back to nonterminal symbols
   # we do not generate an entry for the first production (start production)
-  translate_nonterminals = numpy.zeros(
+  # we generate extra fake entries after end of pyacc.nonterminals for fake
+  # productions due to midrule actions (which leave gaps in the numbering)
+  translate_nonterminals = numpy.full(
     (len(lr1dfa.productions) - 1,),
+    -1,
     numpy.int16
   )
   for i in range(len(pyacc.nonterminals)):
@@ -288,13 +291,20 @@ def generate(pyacc, skel_file, out_file):
         pyacc.nonterminals[i].character_set[j] - 1:
         pyacc.nonterminals[i].character_set[j + 1] - 1
       ] = i
+  midrule_actions = [translate_nonterminals == -1]
+  n_midrule_actions = numpy.sum(midrule_actions)
+  translate_nonterminals[midrule_actions] = numpy.arange(
+    len(pyacc.nonterminals),
+    len(pyacc.nonterminals) + n_midrule_actions,
+    dtype = numpy.int16
+  )
 
   # translate and compress the tables
   bison_lr1dfa = BisonLR1DFA(
     lr1dfa,
     len(pyacc.terminals),
     translate_terminals,
-    len(pyacc.nonterminals),
+    len(pyacc.nonterminals) + n_midrule_actions,
     translate_nonterminals
   )
 
@@ -370,6 +380,7 @@ def generate(pyacc, skel_file, out_file):
               for i in pyacc.terminals
             ] +
             ['"{0:s}"'.format(i.name) for i in pyacc.nonterminals] +
+            ['"$@{0:d}"'.format(i) for i in range(n_midrule_actions)] +
             ['YY_NULLPTR']
           ):
             if x + len(i) >= 70:
@@ -717,7 +728,7 @@ static const yytype_int16 yyr2[] =
                     lr1dfa.productions[i][1].get_text()
                   )
                   for i in range(len(lr1dfa.productions))
-                  if len(lr1dfa.productions[i][1])
+                  if lr1dfa.productions[i][1] is not None
                 ]
               )
             )
index 2db2f45..7aa87b2 100755 (executable)
@@ -35,9 +35,9 @@ in_file = args[0]
 
 with open(in_file) as fin:
   pyacc = element.deserialize(fin, ast.factory)
-element.serialize(pyacc, 'a.xml', 'utf-8')
-pyacc = element.deserialize('a.xml', ast.factory, 'utf-8')
+#element.serialize(pyacc, 'a.xml', 'utf-8')
+#pyacc = element.deserialize('a.xml', ast.factory, 'utf-8')
 pyacc.post_process()
-element.serialize(pyacc, 'b.xml', 'utf-8')
-pyacc = element.deserialize('b.xml', ast.factory, 'utf-8')
+#element.serialize(pyacc, 'b.xml', 'utf-8')
+#pyacc = element.deserialize('b.xml', ast.factory, 'utf-8')
 bison_lr1dfa.generate(pyacc, skel_file, out_file)
index 637cb82..771fa19 100644 (file)
@@ -21,7 +21,8 @@ E : E '+' E { $$ = $1 + $3; }
   | E '*' E { $$ = $1 * $3; }
   | E '/' E { $$ = $1 / $3; }
   | '(' E ')' { $$ = $2; }
-  | '-' E %prec UMINUS { $$ = -$2; }
+  /*| '-' E %prec UMINUS { $$ = -$2; }*/
+  | '-' { printf("unary minus\n"); } E %prec UMINUS { $$ = -$3; }
   | NUM
   ;
 %%