Improve how production lengths are tracked and subtracted from StackReferences
authorNick Downing <downing.nick@gmail.com>
Fri, 20 Jul 2018 04:55:54 +0000 (14:55 +1000)
committerNick Downing <downing.nick@gmail.com>
Fri, 20 Jul 2018 04:55:54 +0000 (14:55 +1000)
ast.py
bison_lr1dfa.py

diff --git a/ast.py b/ast.py
index f2e9919..45c61df 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -306,10 +306,11 @@ class PYACC(element.Element):
         section,
         production,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        last_action
       ):
         raise NotImplementedException
-      def add_to_symbols(self, pyacc, last_action, symbols):
+      def add_to_symbols(self, pyacc, production, last_action, symbols):
         return last_action
 
     class Action(Item):
@@ -345,11 +346,18 @@ class PYACC(element.Element):
         section,
         production,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        last_action
       ):
-        pass
-      def add_to_symbols(self, pyacc, last_action, symbols):
-        assert last_action is None
+        production.n_symbols += int(last_action)
+        return True
+      def add_to_symbols(self, pyacc, production, last_action, symbols):
+        assert last_action is None # no mid-rule actions for now
+        for i in self[0]:
+          if isinstance(i, PYACC.Text.StackLocation):
+            i.offset = -production.n_symbols
+          elif isinstance(i, PYACC.Text.StackReference):
+            i.offset = -production.n_symbols
         return self[0]
 
     class DPrec(Item):
@@ -488,7 +496,8 @@ class PYACC(element.Element):
         section,
         production,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        last_action
       ):
         self[0].post_process(
           pyacc,
@@ -500,6 +509,7 @@ class PYACC(element.Element):
         )
         assert production.precedence_terminal == -1
         production.precedence_terminal = self[0].terminal
+        return last_action
 
     class SymbolRef(Item):
       # GENERATE ELEMENT(int symbol) BEGIN
@@ -553,8 +563,10 @@ class PYACC(element.Element):
         section,
         production,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        last_action
       ):
+        production.n_symbols += int(last_action) + 1
         if isinstance(self[0], PYACC.Char):
           character = ord(self[0][0].get_text())
           assert character != 0 # would conflict with YYEOF
@@ -581,8 +593,9 @@ class PYACC(element.Element):
           assert False
         if self.symbol >= 0:
           production.last_terminal = self.symbol
-      def add_to_symbols(self, pyacc, last_action, symbols):
-        assert last_action is None
+        return False
+      def add_to_symbols(self, pyacc, production, last_action, symbols):
+        assert last_action is None # no mid-rule actions for now
         symbols.append(
           (pyacc.terminals[self.symbol].character_set, [])
         if self.symbol >= 0 else
@@ -590,7 +603,7 @@ class PYACC(element.Element):
         )
         return None
 
-    # GENERATE ELEMENT(int lhs_nonterminal, int last_terminal, int precedence_terminal) BEGIN
+    # GENERATE ELEMENT(int lhs_nonterminal, int n_symbols, int last_terminal, int precedence_terminal) BEGIN
     def __init__(
       self,
       tag = 'PYACC_Production',
@@ -598,6 +611,7 @@ class PYACC(element.Element):
       text = '',
       children = [],
       lhs_nonterminal = -1,
+      n_symbols = -1,
       last_terminal = -1,
       precedence_terminal = -1
     ):
@@ -613,6 +627,11 @@ class PYACC(element.Element):
       if isinstance(lhs_nonterminal, str) else
         lhs_nonterminal
       )
+      self.n_symbols = (
+        element.deserialize_int(n_symbols)
+      if isinstance(n_symbols, str) else
+        n_symbols
+      )
       self.last_terminal = (
         element.deserialize_int(last_terminal)
       if isinstance(last_terminal, str) else
@@ -626,11 +645,13 @@ class PYACC(element.Element):
     def serialize(self, ref_list):
       element.Element.serialize(self, ref_list)
       self.set('lhs_nonterminal', element.serialize_int(self.lhs_nonterminal))
+      self.set('n_symbols', element.serialize_int(self.n_symbols))
       self.set('last_terminal', element.serialize_int(self.last_terminal))
       self.set('precedence_terminal', element.serialize_int(self.precedence_terminal))
     def deserialize(self, ref_list):
       element.Element.deserialize(self, ref_list)
       self.lhs_nonterminal = element.deserialize_int(self.get('lhs_nonterminal', '-1'))
+      self.n_symbols = element.deserialize_int(self.get('n_symbols', '-1'))
       self.last_terminal = element.deserialize_int(self.get('last_terminal', '-1'))
       self.precedence_terminal = element.deserialize_int(self.get('precedence_terminal', '-1'))
     def copy(self, factory = None):
@@ -639,6 +660,7 @@ class PYACC(element.Element):
         Production if factory is None else factory
       )
       result.lhs_nonterminal = self.lhs_nonterminal
+      result.n_symbols = self.n_symbols
       result.last_terminal = self.last_terminal
       result.precedence_terminal = self.precedence_terminal
       return result
@@ -648,6 +670,10 @@ class PYACC(element.Element):
         params.append(
           'lhs_nonterminal = {0:s}'.format(repr(self.lhs_nonterminal))
         )
+      if self.n_symbols != -1:
+        params.append(
+          'n_symbols = {0:s}'.format(repr(self.n_symbols))
+        )
       if self.last_terminal != -1:
         params.append(
           'last_terminal = {0:s}'.format(repr(self.last_terminal))
@@ -672,15 +698,18 @@ class PYACC(element.Element):
     ):
       self.lhs_nonterminal = lhs_nonterminal
 
+      self.n_symbols = 0
       self.last_terminal = -1
       self.precedence_terminal = -1
+      last_action = False
       for i in self:
-        i.post_process(
+        last_action = i.post_process(
           pyacc,
           section,
           self,
           character_to_symbol,
-          name_to_symbol
+          name_to_symbol,
+          last_action
         )
 
       character_set = pyacc.nonterminals[
@@ -698,7 +727,7 @@ class PYACC(element.Element):
       symbols = []
       lookaheads = []
       for i in self:
-        last_action = i.add_to_symbols(pyacc, last_action, symbols)
+        last_action = i.add_to_symbols(pyacc, self, last_action, symbols)
       _lr1.productions.append(
         (
           # symbols (list of terminal_set, nonterminal_set)
@@ -2115,7 +2144,37 @@ class PYACC(element.Element):
       return tag
 
   class Text(element.Element):
-    class Escape(element.Element):
+    class Item(element.Element):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Text_Item',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        element.Element.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = element.Element.copy(
+          self,
+          Item if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Text.Item({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def get_text(self):
+        raise NotImplementedException
+
+    class Escape(Item):
       # GENERATE ELEMENT(int character) BEGIN
       def __init__(
         self,
@@ -2125,7 +2184,7 @@ class PYACC(element.Element):
         children = [],
         character = -1
       ):
-        element.Element.__init__(
+        PYACC.Text.Item.__init__(
           self,
           tag,
           attrib,
@@ -2138,20 +2197,20 @@ class PYACC(element.Element):
           character
         )
       def serialize(self, ref_list):
-        element.Element.serialize(self, ref_list)
+        PYACC.Text.Item.serialize(self, ref_list)
         self.set('character', element.serialize_int(self.character))
       def deserialize(self, ref_list):
-        element.Element.deserialize(self, ref_list)
+        PYACC.Text.Item.deserialize(self, ref_list)
         self.character = element.deserialize_int(self.get('character', '-1'))
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = PYACC.Text.Item.copy(
           self,
           Escape if factory is None else factory
         )
         result.character = self.character
         return result
       def repr_serialize(self, params):
-        element.Element.repr_serialize(self, params)
+        PYACC.Text.Item.repr_serialize(self, params)
         if self.character != -1:
           params.append(
             'character = {0:s}'.format(repr(self.character))
@@ -2161,44 +2220,59 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Text.Escape({0:s})'.format(', '.join(params))
       # GENERATE END
+      def get_text(self):
+        return chr(self.character)
 
-    class StackLocation(element.Element):
-      # GENERATE ELEMENT(int index) BEGIN
+    class StackLocation(Item):
+      # GENERATE ELEMENT(int offset, int index) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Text_StackLocation',
         attrib = {},
         text = '',
         children = [],
+        offset = -1,
         index = -1
       ):
-        element.Element.__init__(
+        PYACC.Text.Item.__init__(
           self,
           tag,
           attrib,
           text,
           children
         )
+        self.offset = (
+          element.deserialize_int(offset)
+        if isinstance(offset, str) else
+          offset
+        )
         self.index = (
           element.deserialize_int(index)
         if isinstance(index, str) else
           index
         )
       def serialize(self, ref_list):
-        element.Element.serialize(self, ref_list)
+        PYACC.Text.Item.serialize(self, ref_list)
+        self.set('offset', element.serialize_int(self.offset))
         self.set('index', element.serialize_int(self.index))
       def deserialize(self, ref_list):
-        element.Element.deserialize(self, ref_list)
+        PYACC.Text.Item.deserialize(self, ref_list)
+        self.offset = element.deserialize_int(self.get('offset', '-1'))
         self.index = element.deserialize_int(self.get('index', '-1'))
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = PYACC.Text.Item.copy(
           self,
           StackLocation if factory is None else factory
         )
+        result.offset = self.offset
         result.index = self.index
         return result
       def repr_serialize(self, params):
-        element.Element.repr_serialize(self, params)
+        PYACC.Text.Item.repr_serialize(self, params)
+        if self.offset != -1:
+          params.append(
+            'offset = {0:s}'.format(repr(self.offset))
+          )
         if self.index != -1:
           params.append(
             'index = {0:s}'.format(repr(self.index))
@@ -2209,43 +2283,56 @@ class PYACC(element.Element):
         return 'ast.PYACC.Text.StackLocation({0:s})'.format(', '.join(params))
       # GENERATE END
 
-    class StackReference(element.Element):
-      # GENERATE ELEMENT(int index) BEGIN
+    class StackReference(Item):
+      # GENERATE ELEMENT(int offset, int index) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Text_StackReference',
         attrib = {},
         text = '',
         children = [],
+        offset = -1,
         index = -1
       ):
-        element.Element.__init__(
+        PYACC.Text.Item.__init__(
           self,
           tag,
           attrib,
           text,
           children
         )
+        self.offset = (
+          element.deserialize_int(offset)
+        if isinstance(offset, str) else
+          offset
+        )
         self.index = (
           element.deserialize_int(index)
         if isinstance(index, str) else
           index
         )
       def serialize(self, ref_list):
-        element.Element.serialize(self, ref_list)
+        PYACC.Text.Item.serialize(self, ref_list)
+        self.set('offset', element.serialize_int(self.offset))
         self.set('index', element.serialize_int(self.index))
       def deserialize(self, ref_list):
-        element.Element.deserialize(self, ref_list)
+        PYACC.Text.Item.deserialize(self, ref_list)
+        self.offset = element.deserialize_int(self.get('offset', '-1'))
         self.index = element.deserialize_int(self.get('index', '-1'))
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = PYACC.Text.Item.copy(
           self,
           StackReference if factory is None else factory
         )
+        result.offset = self.offset
         result.index = self.index
         return result
       def repr_serialize(self, params):
-        element.Element.repr_serialize(self, params)
+        PYACC.Text.Item.repr_serialize(self, params)
+        if self.offset != -1:
+          params.append(
+            'offset = {0:s}'.format(repr(self.offset))
+          )
         if self.index != -1:
           params.append(
             'index = {0:s}'.format(repr(self.index))
@@ -2255,8 +2342,10 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Text.StackReference({0:s})'.format(', '.join(params))
       # GENERATE END
+      def get_text(self):   
+        return '(yyvsp[{0:d}])'.format(self.offset + self.index)
 
-    class ValueLocation(element.Element):
+    class ValueLocation(Item):
       # GENERATE ELEMENT() BEGIN
       def __init__(
         self,
@@ -2265,7 +2354,7 @@ class PYACC(element.Element):
         text = '',
         children = []
       ):
-        element.Element.__init__(
+        PYACC.Text.Item.__init__(
           self,
           tag,
           attrib,
@@ -2273,7 +2362,7 @@ class PYACC(element.Element):
           children
         )
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = PYACC.Text.Item.copy(
           self,
           ValueLocation if factory is None else factory
         )
@@ -2284,7 +2373,7 @@ class PYACC(element.Element):
         return 'ast.PYACC.Text.ValueLocation({0:s})'.format(', '.join(params))
       # GENERATE END
 
-    class ValueReference(element.Element):
+    class ValueReference(Item):
       # GENERATE ELEMENT() BEGIN
       def __init__(
         self,
@@ -2293,7 +2382,7 @@ class PYACC(element.Element):
         text = '',
         children = []
       ):
-        element.Element.__init__(
+        PYACC.Text.Item.__init__(
           self,
           tag,
           attrib,
@@ -2301,7 +2390,7 @@ class PYACC(element.Element):
           children
         )
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = PYACC.Text.Item.copy(
           self,
           ValueReference if factory is None else factory
         )
@@ -2311,6 +2400,8 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Text.ValueReference({0:s})'.format(', '.join(params))
       # GENERATE END
+      def get_text(self):
+        return '(yyval)'
 
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -2338,21 +2429,12 @@ class PYACC(element.Element):
       self.repr_serialize(params)
       return 'ast.PYACC.Text({0:s})'.format(', '.join(params))
     # GENERATE END
-    def get_text(self, len_production = 0):
+    def get_text(self):
       return ''.join(
         [
           j
           for i in range(len(self))
-          for j in [
-            element.get_text(self, i),
-            (
-              '(yyvsp[{0:d}])'.format(self[i].index - len_production)
-            if isinstance(self[i], PYACC.Text.StackReference) else
-              '(yyval)'
-            if isinstance(self[i], PYACC.Text.ValueReference) else
-              chr(self[i].character)
-            )
-          ]
+          for j in [element.get_text(self, i), self[i].get_text()]
         ] +
         [element.get_text(self, len(self))]
       )
@@ -2774,6 +2856,7 @@ tag_to_class = {
   'PYACC_TerminalRef': PYACC.TerminalRef,
   'PYACC_NonterminalRef': PYACC.NonterminalRef,
   'PYACC_Text': PYACC.Text,
+  'PYACC_Text_Item': PYACC.Text.Item,
   'PYACC_Text_Escape': PYACC.Text.Escape,
   'PYACC_Text_StackLocation': PYACC.Text.StackLocation,
   'PYACC_Text_StackReference': PYACC.Text.StackReference,
index 23c39a6..ddcf5ad 100644 (file)
@@ -307,7 +307,7 @@ def generate(pyacc, skel_file, out_file):
             '''/* GENERATE SECTION1FIRST BEGIN */
 {0:s}/* GENERATE SECTION1FIRST END*/
 '''.format(
-              ''.join([element.get_text(i, 0) for i in pyacc.prologue_text])
+              ''.join([i.get_text() for i in pyacc.prologue_text])
             )
           )
         elif line == '/* GENERATE TOKENSEQUAL */\n':
@@ -699,7 +699,7 @@ static const yytype_int16 yyr2[] =
             '''/* GENERATE SECTION2INITIAL BEGIN */
 {0:s}/* GENERATE SECTION2INITIAL END */
 '''.format(
-              '' #.join([element.get_text(i, 0) for i in pyacc[1].code_blocks])
+              '' #.join([i.get_text() for i in pyacc[1].code_blocks])
             )
           )
         elif line == '/* GENERATE SECTION2 */\n':
@@ -714,9 +714,7 @@ static const yytype_int16 yyr2[] =
     break;
 '''.format(
                     i,
-                    lr1dfa.productions[i][1].get_text(
-                      lr1dfa.productions[i][0] # length of production
-                    )
+                    lr1dfa.productions[i][1].get_text()
                   )
                   for i in range(len(lr1dfa.productions))
                   if len(lr1dfa.productions[i][1])