Avoid use of the Grammar and Grammar.Production objects, instead merge Grammar logic...
authorNick Downing <downing.nick@gmail.com>
Tue, 17 Jul 2018 13:31:06 +0000 (23:31 +1000)
committerNick Downing <downing.nick@gmail.com>
Tue, 17 Jul 2018 13:31:06 +0000 (23:31 +1000)
ast.py
bison_lr1dfa.py

diff --git a/ast.py b/ast.py
index 3333fa4..9ae080d 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -1,5 +1,6 @@
+import bisect_set
 import element
-import grammar
+import lr1
 
 class Item(element.Element):
   # GENERATE ELEMENT() BEGIN
@@ -328,34 +329,6 @@ class PYACC(element.Element):
       return 'ast.PYACC.ID({0:s})'.format(', '.join(params))
     # GENERATE END
 
-  class IDNone(element.Element):
-    # GENERATE ELEMENT() BEGIN
-    def __init__(
-      self,
-      tag = 'PYACC_IDNone',
-      attrib = {},
-      text = '',
-      children = []
-    ):
-      element.Element.__init__(
-        self,
-        tag,
-        attrib,
-        text,
-        children
-      )
-    def copy(self, factory = None):
-      result = element.Element.copy(
-        self,
-        IDNone if factory is None else factory
-      )
-      return result
-    def __repr__(self):
-      params = []
-      self.repr_serialize(params)
-      return 'ast.PYACC.IDNone({0:s})'.format(', '.join(params))
-    # GENERATE END
-
   class Int(element.Element):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -384,14 +357,363 @@ class PYACC(element.Element):
       return 'ast.PYACC.Int({0:s})'.format(', '.join(params))
     # GENERATE END
 
-  class IntNone(element.Element):
-    # GENERATE ELEMENT() BEGIN
+  class Production(element.Element):
+    class Item(element.Element):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Item',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        element.Element.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = element.Element.copy(
+          self,
+          Item if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Item({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        last_terminal
+      ):
+        raise NotImplementedException
+      def add_to_symbols(self, pyacc, action, symbols):
+        return action
+
+    class Action(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Action',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          Action if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Action({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        last_terminal
+      ):
+        return last_terminal
+      def add_to_symbols(self, pyacc, action, symbols):
+        assert action is None
+        return self[0]
+
+    class DPrec(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_DPrec',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          DPrec if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.DPrec({0:s})'.format(', '.join(params))
+      # GENERATE END
+
+    class Empty(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Empty',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          Empty if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Empty({0:s})'.format(', '.join(params))
+      # GENERATE END
+
+    class Merge(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Merge',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          Merge if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Merge({0:s})'.format(', '.join(params))
+      # GENERATE END
+
+    class Prec(Item):
+      # GENERATE ELEMENT(int symbol) BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Prec',
+        attrib = {},
+        text = '',
+        children = [],
+        symbol = -1
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+        self.symbol = (
+          element.deserialize_int(symbol)
+        if isinstance(symbol, str) else
+          symbol
+        )
+      def serialize(self, ref_list, indent = 0):
+        PYACC.Production.Item.serialize(self, ref_list, indent)
+        self.set('symbol', element.serialize_int(self.symbol))
+      def deserialize(self, ref_list):
+        PYACC.Production.Item.deserialize(self, ref_list)
+        self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          Prec if factory is None else factory
+        )
+        result.symbol = self.symbol
+        return result
+      def repr_serialize(self, params):
+        PYACC.Production.Item.repr_serialize(self, params)
+        if self.symbol != -1:
+          params.append(
+            'symbol = {0:s}'.format(repr(self.symbol))
+          )
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Prec({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        last_terminal
+      ):
+        # make this contain Section1Or2.Symbol eventually, duplicate for now
+        if isinstance(self[0], PYACC.Char):
+          character = ord(self[0].get_text())
+          assert character != 0 # would conflict with YYEOF
+          if character in character_to_symbol:
+            self.symbol = character_to_symbol[character]
+            assert self.symbol >= 0
+          else:
+            self.symbol = len(pyacc.terminal_symbols)
+            character_to_symbol[character] = self.symbol
+            pyacc.terminal_symbols.append(
+              PYACC.Symbol(
+                token = character,
+                character_set = [self.symbol, self.symbol + 1]
+              )
+            )
+        elif isinstance(self[0], PYACC.ID):
+          name = element.get_text(self[0], 0)
+          if name in name_to_symbol:
+            self.symbol = name_to_symbol[name]
+            assert self.symbol >= 0
+          else:
+            self.symbol = len(pyacc.terminal_symbols)
+            name_to_symbol[name] = self.symbol
+            pyacc.terminal_symbols.append(
+              PYACC.Symbol(
+                name = name,
+                character_set = [self.symbol, self.symbol + 1]
+              )
+            )
+        else:
+          assert False
+        assert production.precedence_terminal == -1
+        production.precedence_terminal = self.symbol
+        return last_terminal
+
+    class Symbol(Item):
+      # GENERATE ELEMENT(int symbol) BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Production_Symbol',
+        attrib = {},
+        text = '',
+        children = [],
+        symbol = -1
+      ):
+        PYACC.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+        self.symbol = (
+          element.deserialize_int(symbol)
+        if isinstance(symbol, str) else
+          symbol
+        )
+      def serialize(self, ref_list, indent = 0):
+        PYACC.Production.Item.serialize(self, ref_list, indent)
+        self.set('symbol', element.serialize_int(self.symbol))
+      def deserialize(self, ref_list):
+        PYACC.Production.Item.deserialize(self, ref_list)
+        self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+      def copy(self, factory = None):
+        result = PYACC.Production.Item.copy(
+          self,
+          Symbol if factory is None else factory
+        )
+        result.symbol = self.symbol
+        return result
+      def repr_serialize(self, params):
+        PYACC.Production.Item.repr_serialize(self, params)
+        if self.symbol != -1:
+          params.append(
+            'symbol = {0:s}'.format(repr(self.symbol))
+          )
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Production.Symbol({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        last_terminal
+      ):
+        if isinstance(self[0], PYACC.Char):
+          character = ord(self[0].get_text())
+          assert character != 0 # would conflict with YYEOF
+          if character in character_to_symbol:
+            self.symbol = character_to_symbol[character]
+            assert self.symbol >= 0
+          else:
+            self.symbol = len(pyacc.terminal_symbols)
+            character_to_symbol[character] = self.symbol
+            pyacc.terminal_symbols.append(
+              PYACC.Symbol(
+                token = character,
+                character_set = [self.symbol, self.symbol + 1]
+              )
+            )
+        elif isinstance(self[0], PYACC.ID):
+          name = element.get_text(self[0], 0)
+          if name in name_to_symbol:
+            self.symbol = name_to_symbol[name]
+          else:
+            self.symbol = ~len(pyacc.nonterminal_symbols)
+            name_to_symbol[name] = self.symbol
+            pyacc.nonterminal_symbols.append(
+              PYACC.Symbol(
+                name = name,
+                character_set = []
+              )
+            )
+        else:
+          assert False
+        return self.symbol if self.symbol >= 0 else last_terminal
+      def add_to_symbols(self, pyacc, action, symbols):
+        assert action is None
+        symbols.append(
+          (pyacc.terminal_symbols[self.symbol].character_set, [])
+        if self.symbol >= 0 else
+          ([], pyacc.nonterminal_symbols[~self.symbol].character_set)
+        )
+        return None
+
+    # GENERATE ELEMENT(int lhs_nonterminal, int precedence_terminal) BEGIN
     def __init__(
       self,
-      tag = 'PYACC_IntNone',
+      tag = 'PYACC_Production',
       attrib = {},
       text = '',
-      children = []
+      children = [],
+      lhs_nonterminal = -1,
+      precedence_terminal = -1
     ):
       element.Element.__init__(
         self,
@@ -400,18 +722,105 @@ class PYACC(element.Element):
         text,
         children
       )
+      self.lhs_nonterminal = (
+        element.deserialize_int(lhs_nonterminal)
+      if isinstance(lhs_nonterminal, str) else
+        lhs_nonterminal
+      )
+      self.precedence_terminal = (
+        element.deserialize_int(precedence_terminal)
+      if isinstance(precedence_terminal, str) else
+        precedence_terminal
+      )
+    def serialize(self, ref_list, indent = 0):
+      element.Element.serialize(self, ref_list, indent)
+      self.set('lhs_nonterminal', element.serialize_int(self.lhs_nonterminal))
+      self.set('precedence_terminal', element.serialize_int(self.precedence_terminal))
+    def deserialize(self, ref_list):
+      element.Element.deserialize(self, ref_list)
+      self.lhs_nonterminal = element.deserialize_int(self.get('lhs_nonterminal', '-1'))
+      self.precedence_terminal = element.deserialize_int(self.get('precedence_terminal', '-1'))
     def copy(self, factory = None):
       result = element.Element.copy(
         self,
-        IntNone if factory is None else factory
+        Production if factory is None else factory
       )
+      result.lhs_nonterminal = self.lhs_nonterminal
+      result.precedence_terminal = self.precedence_terminal
       return result
+    def repr_serialize(self, params):
+      element.Element.repr_serialize(self, params)
+      if self.lhs_nonterminal != -1:
+        params.append(
+          'lhs_nonterminal = {0:s}'.format(repr(self.lhs_nonterminal))
+        )
+      if self.precedence_terminal != -1:
+        params.append(
+          'precedence_terminal = {0:s}'.format(repr(self.precedence_terminal))
+        )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
-      return 'ast.PYACC.IntNone({0:s})'.format(', '.join(params))
+      return 'ast.PYACC.Production({0:s})'.format(', '.join(params))
     # GENERATE END
 
+    def post_process(
+      self,
+      pyacc,
+      lhs_nonterminal,
+      character_to_symbol,
+      name_to_symbol
+    ):
+      self.lhs_nonterminal = lhs_nonterminal
+
+      self.precedence_terminal = -1
+      last_terminal = -1
+      for i in self:
+        last_terminal = i.post_process(
+          pyacc,
+          self,
+          character_to_symbol,
+          name_to_symbol,
+          last_terminal
+        )
+      if self.precedence_terminal == -1:
+        self.precedence_terminal = last_terminal
+
+      character_set = pyacc.nonterminal_symbols[
+        self.lhs_nonterminal
+      ].character_set
+      character = 1 + len(pyacc.productions)
+      if len(character_set) and character_set[-1] == character:
+        character_set[-1] = character + 1
+      else:
+        character_set.extend([character, character + 1])
+      pyacc.productions.append(self)
+
+    def to_lr1_production(self, pyacc):
+      action = None
+      symbols = []
+      lookaheads = []
+      for i in self:
+        action = i.add_to_symbols(pyacc, action, symbols)
+      return (
+        (
+          # precedence
+          (
+            (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
+            (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
+          if self.precedence_terminal != -1 else
+            -1
+          ),
+          # symbols (list of terminal_set, nonterminal_set)
+          symbols,
+          # lookaheads (list of initial_set, can_be_empty)
+          [([], False) for i in range(len(symbols))] + [([], True)],
+          # group_bounds
+          []
+        ),
+        action if action is not None else PYACC.BracedCode()
+      )
+
   class Section(element.Element):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -779,6 +1188,7 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
+          False, # is_nonterminal
           pyacc.precedences, # precedence
           self.type # associativity
         )
@@ -811,6 +1221,23 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1Or2.Start({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol
+      ):
+        self[0].post_process(
+          pyacc,
+          section,
+          character_to_symbol,
+          name_to_symbol,
+          True, # is_nonterminal
+          -1, # precedence
+          -1 # associativity
+        )
+        pyacc.start_nonterminal = ~self[0].symbol
 
     class TagOrSymbol(element.Element):
       # GENERATE ELEMENT() BEGIN
@@ -845,6 +1272,7 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
+        is_nonterminal,
         precedence,
         associativity,
         tag
@@ -903,6 +1331,7 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
+        is_terminal,
         precedence,
         associativity,
         tag
@@ -910,14 +1339,15 @@ class PYACC(element.Element):
         return self
 
     class Symbol(TagOrSymbol):
-      # GENERATE ELEMENT(int user_token) BEGIN
+      # GENERATE ELEMENT(int symbol, int token) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Section1Or2_Symbol',
         attrib = {},
         text = '',
         children = [],
-        user_token = -1
+        symbol = -1,
+        token = -1
       ):
         PYACC.Section1Or2.TagOrSymbol.__init__(
           self,
@@ -926,29 +1356,41 @@ class PYACC(element.Element):
           text,
           children
         )
-        self.user_token = (
-          element.deserialize_int(user_token)
-        if isinstance(user_token, str) else
-          user_token
+        self.symbol = (
+          element.deserialize_int(symbol)
+        if isinstance(symbol, str) else
+          symbol
+        )
+        self.token = (
+          element.deserialize_int(token)
+        if isinstance(token, str) else
+          token
         )
       def serialize(self, ref_list, indent = 0):
         PYACC.Section1Or2.TagOrSymbol.serialize(self, ref_list, indent)
-        self.set('user_token', element.serialize_int(self.user_token))
+        self.set('symbol', element.serialize_int(self.symbol))
+        self.set('token', element.serialize_int(self.token))
       def deserialize(self, ref_list):
         PYACC.Section1Or2.TagOrSymbol.deserialize(self, ref_list)
-        self.user_token = element.deserialize_int(self.get('user_token', '-1'))
+        self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+        self.token = element.deserialize_int(self.get('token', '-1'))
       def copy(self, factory = None):
         result = PYACC.Section1Or2.TagOrSymbol.copy(
           self,
           Symbol if factory is None else factory
         )
-        result.user_token = self.user_token
+        result.symbol = self.symbol
+        result.token = self.token
         return result
       def repr_serialize(self, params):
         PYACC.Section1Or2.TagOrSymbol.repr_serialize(self, params)
-        if self.user_token != -1:
+        if self.symbol != -1:
           params.append(
-            'user_token = {0:s}'.format(repr(self.user_token))
+            'symbol = {0:s}'.format(repr(self.symbol))
+          )
+        if self.token != -1:
+          params.append(
+            'token = {0:s}'.format(repr(self.token))
           )
       def __repr__(self):
         params = []
@@ -961,51 +1403,70 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
+        is_nonterminal,
         precedence,
         associativity,
         tag
       ):
-        if isinstance(self[0], PYACC.Char):
-          character = ord(self[0].get_text())
-          assert character != 0 # would conflict with YYEOF
-          if character in character_to_symbol:
-            symbol = character_to_symbol[character]
-            assert symbol >= 0
-          else:
-            symbol = len(pyacc.terminal_symbols)
-            character_to_symbol[character] = symbol
-            pyacc.terminal_symbols.append(
-              PYACC.Symbol(
-                token = character,
-                character_set = [symbol, symbol + 1]
+        if is_nonterminal:
+          if isinstance(self[0], PYACC.ID):
+            name = element.get_text(self[0], 0)
+            if name in name_to_symbol:
+              self.symbol = name_to_symbol[name]
+              assert self.symbol < 0
+            else:
+              self.symbol = ~len(pyacc.nonterminal_symbols)
+              name_to_symbol[name] = self.symbol
+              pyacc.nonterminal_symbols.append(
+                PYACC.Symbol(
+                  name = name,
+                  character_set = []
+                )
               )
-            )
-            print('symbol', symbol, 'token', character)
-        elif isinstance(self[0], PYACC.ID):
-          name = element.get_text(self[0], 0)
-          if name in name_to_symbol:
-            symbol = name_to_symbol[name]
-            assert symbol >= 0
           else:
-            symbol = len(pyacc.terminal_symbols)
-            name_to_symbol[name] = symbol
-            pyacc.terminal_symbols.append(
-              PYACC.Symbol(
-                name = name,
-                character_set = [symbol, symbol + 1]
-              )
-            )
+            assert False
+          # in this case ignore token, precedence and associativity
         else:
-          assert False
-        if self.user_token != -1:
-          assert pyacc.terminal_symbols[symbol].token == -1
-          pyacc.terminal_symbols[symbol].token = self.user_token
-        if precedence != -1:
-          assert pyacc.terminal_symbols[symbol].precedence == -1
-          pyacc.terminal_symbols[symbol].precedence = precedence
-        if associativity != -1:
-          assert pyacc.terminal_symbols[symbol].associativity == -1
-          pyacc.terminal_symbols[symbol].associativity = associativity
+          if isinstance(self[0], PYACC.Char):
+            character = ord(self[0].get_text())
+            assert character != 0 # would conflict with YYEOF
+            if character in character_to_symbol:
+              self.symbol = character_to_symbol[character]
+              assert self.symbol >= 0
+            else:
+              self.symbol = len(pyacc.terminal_symbols)
+              character_to_symbol[character] = self.symbol
+              pyacc.terminal_symbols.append(
+                PYACC.Symbol(
+                  token = character,
+                  character_set = [self.symbol, self.symbol + 1]
+                )
+              )
+          elif isinstance(self[0], PYACC.ID):
+            name = element.get_text(self[0], 0)
+            if name in name_to_symbol:
+              self.symbol = name_to_symbol[name]
+              assert self.symbol >= 0
+            else:
+              self.symbol = len(pyacc.terminal_symbols)
+              name_to_symbol[name] = self.symbol
+              pyacc.terminal_symbols.append(
+                PYACC.Symbol(
+                  name = name,
+                  character_set = [self.symbol, self.symbol + 1]
+                )
+              )
+          else:
+            assert False
+          if self.token != -1:
+            assert pyacc.terminal_symbols[self.symbol].token == -1
+            pyacc.terminal_symbols[self.symbol].token = self.token
+          if precedence != -1:
+            assert pyacc.terminal_symbols[self.symbol].precedence == -1
+            pyacc.terminal_symbols[self.symbol].precedence = precedence
+          if associativity != -1:
+            assert pyacc.terminal_symbols[self.symbol].associativity == -1
+            pyacc.terminal_symbols[self.symbol].associativity = associativity
         return tag
 
     class TaggedSymbols(element.Element):
@@ -1041,6 +1502,7 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
+        is_terminal,
         precedence,
         associativity
       ):
@@ -1051,6 +1513,7 @@ class PYACC(element.Element):
             section,
             character_to_symbol,
             name_to_symbol,
+            is_terminal,
             precedence,
             associativity,
             tag
@@ -1095,6 +1558,7 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
+          False, # is_nonterminal
           -1, # precedence
           -1 # associativity
         )
@@ -1794,258 +2258,6 @@ class PYACC(element.Element):
 
   class Section2(Section1Or2):
     class Rules(Item):
-      class RHS(element.Element):
-        class Action(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_Action',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              Action if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.Action({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        class DPrec(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_DPrec',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              DPrec if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.DPrec({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        class Empty(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_Empty',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              Empty if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.Empty({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        class Merge(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_Merge',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              Merge if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.Merge({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        class Prec(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_Prec',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              Prec if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.Prec({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        class Symbol(element.Element):
-          # GENERATE ELEMENT() BEGIN
-          def __init__(
-            self,
-            tag = 'PYACC_Section2_Rules_RHS_Symbol',
-            attrib = {},
-            text = '',
-            children = []
-          ):
-            element.Element.__init__(
-              self,
-              tag,
-              attrib,
-              text,
-              children
-            )
-          def copy(self, factory = None):
-            result = element.Element.copy(
-              self,
-              Symbol if factory is None else factory
-            )
-            return result
-          def __repr__(self):
-            params = []
-            self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHS.Symbol({0:s})'.format(', '.join(params))
-          # GENERATE END
-
-        # GENERATE ELEMENT() BEGIN
-        def __init__(
-          self,
-          tag = 'PYACC_Section2_Rules_RHS',
-          attrib = {},
-          text = '',
-          children = []
-        ):
-          element.Element.__init__(
-            self,
-            tag,
-            attrib,
-            text,
-            children
-          )
-        def copy(self, factory = None):
-          result = element.Element.copy(
-            self,
-            RHS if factory is None else factory
-          )
-          return result
-        def __repr__(self):
-          params = []
-          self.repr_serialize(params)
-          return 'ast.PYACC.Section2.Rules.RHS({0:s})'.format(', '.join(params))
-        # GENERATE END
-        def post_process(
-          self,
-          pyacc,
-          lhs_symbol,
-          character_to_symbol,
-          name_to_symbol
-        ):
-          production = grammar.Grammar.Production(
-            nonterminal = len(pyacc.grammar)
-          )
-          for i in range(len(self)):
-            if isinstance(self[i], PYACC.Section2.Rules.RHS.Symbol):
-              if isinstance(self[i][0], PYACC.Char):
-                character = ord(self[i][0].get_text())
-                assert character != 0 # would conflict with YYEOF
-                if character in character_to_symbol:
-                  symbol = character_to_symbol[character]
-                  assert symbol >= 0
-                else:
-                  symbol = len(pyacc.terminal_symbols)
-                  character_to_symbol[character] = symbol
-                  pyacc.terminal_symbols.append(
-                    PYACC.Symbol(
-                      token = character,
-                      character_set = [symbol, symbol + 1]
-                    )
-                  )
-                production.append(
-                  grammar.Grammar.Production.Symbol(
-                    terminal_set = pyacc.terminal_symbols[symbol].character_set
-                  )
-                )
-              elif isinstance(self[i][0], PYACC.ID):
-                production.append(
-                  grammar.Grammar.Production.NamedSymbol(
-                    # (non)terminal_set will be filled in later once assigned
-                    name = element.get_text(self[i][0], 0)
-                  )
-                )
-              else:
-                assert False
-            elif isinstance(self[i], PYACC.Section2.Rules.RHS.Action):
-              assert i == len(self) - 1
-              assert isinstance(self[i][0], PYACC.BracedCode)
-              pyacc.actions_braced_code.append(self[i][0])
-              break
-          else:
-            pyacc.actions_braced_code.append(PYACC.BracedCode())
-
-          character_set = pyacc.nonterminal_symbols[lhs_symbol].character_set
-          character = len(pyacc.grammar)
-          if len(character_set) and character_set[-1] == character:
-            character_set[-1] = character + 1
-          else:
-            character_set.extend([character, character + 1])
-          pyacc.grammar.append(production)
-
       # GENERATE ELEMENT() BEGIN
       def __init__(
         self,
@@ -2079,23 +2291,20 @@ class PYACC(element.Element):
         character_to_symbol,
         name_to_symbol
       ):
-        assert isinstance(self[0], PYACC.ID)
-        lhs_name = element.get_text(self[0], 0)
-        if lhs_name in name_to_symbol:
-          i = name_to_symbol[lhs_name]
-          assert i < 0
-          lhs_symbol = ~i
-        else:
-          lhs_symbol = len(pyacc.nonterminal_symbols)
+        self[0].post_process(
+          pyacc,
+          section,
           character_to_symbol,
-          name_to_symbol[lhs_name] = ~lhs_symbol
-          pyacc.nonterminal_symbols.append(
-            PYACC.Symbol(name = lhs_name, character_set = [])
-          )
+          name_to_symbol,
+          True, # is_nonterminal
+          -1, # precedence
+          -1, # associativity
+          None # tag
+        )
         for i in self[1:]:
           i.post_process(
             pyacc,
-            lhs_symbol,
+            ~self[0].symbol,
             character_to_symbol,
             name_to_symbol
           )
@@ -2218,7 +2427,7 @@ class PYACC(element.Element):
       return 'ast.PYACC.ValueReference({0:s})'.format(', '.join(params))
     # GENERATE END
 
-  # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(ref) actions_braced_code) BEGIN
+  # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, int start_nonterminal, list(ref) productions) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
@@ -2229,8 +2438,8 @@ class PYACC(element.Element):
     precedences = -1,
     terminal_symbols = [],
     nonterminal_symbols = [],
-    grammar = None,
-    actions_braced_code = []
+    start_nonterminal = -1,
+    productions = []
   ):
     element.Element.__init__(
       self,
@@ -2247,8 +2456,12 @@ class PYACC(element.Element):
     )
     self.terminal_symbols = terminal_symbols
     self.nonterminal_symbols = nonterminal_symbols
-    self.grammar = grammar
-    self.actions_braced_code = actions_braced_code
+    self.start_nonterminal = (
+      element.deserialize_int(start_nonterminal)
+    if isinstance(start_nonterminal, str) else
+      start_nonterminal
+    )
+    self.productions = productions
   def serialize(self, ref_list, indent = 0):
     element.Element.serialize(self, ref_list, indent)
     self.set(
@@ -2264,10 +2477,10 @@ class PYACC(element.Element):
       'nonterminal_symbols',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.nonterminal_symbols])
     )
-    self.set('grammar', element.serialize_ref(self.grammar, ref_list))
+    self.set('start_nonterminal', element.serialize_int(self.start_nonterminal))
     self.set(
-      'actions_braced_code',
-      ' '.join([element.serialize_ref(i, ref_list) for i in self.actions_braced_code])
+      'productions',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.productions])
     )
   def deserialize(self, ref_list):
     element.Element.deserialize(self, ref_list)
@@ -2284,10 +2497,10 @@ class PYACC(element.Element):
       element.deserialize_ref(i, ref_list)
       for i in self.get('nonterminal_symbols', '').split()
     ]
-    self.grammar = element.deserialize_ref(self.get('grammar', '-1'), ref_list)
-    self.actions_braced_code = [
+    self.start_nonterminal = element.deserialize_int(self.get('start_nonterminal', '-1'))
+    self.productions = [
       element.deserialize_ref(i, ref_list)
-      for i in self.get('actions_braced_code', '').split()
+      for i in self.get('productions', '').split()
     ]
   def copy(self, factory = None):
     result = element.Element.copy(
@@ -2298,8 +2511,8 @@ class PYACC(element.Element):
     result.precedences = self.precedences
     result.terminal_symbols = self.terminal_symbols
     result.nonterminal_symbols = self.nonterminal_symbols
-    result.grammar = self.grammar
-    result.actions_braced_code = self.actions_braced_code
+    result.start_nonterminal = self.start_nonterminal
+    result.productions = self.productions
     return result
   def repr_serialize(self, params):
     element.Element.repr_serialize(self, params)
@@ -2325,14 +2538,14 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.nonterminal_symbols])
         )
       )
-    if self.grammar != None:
+    if self.start_nonterminal != -1:
       params.append(
-        'grammar = {0:s}'.format(repr(self.grammar))
+        'start_nonterminal = {0:s}'.format(repr(self.start_nonterminal))
       )
-    if len(self.actions_braced_code):
+    if len(self.productions):
       params.append(
-        'actions_braced_code = [{0:s}]'.format(
-          ', '.join([repr(i) for i in self.actions_braced_code])
+        'productions = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.productions])
         )
       )
   def __repr__(self):
@@ -2340,6 +2553,7 @@ class PYACC(element.Element):
     self.repr_serialize(params)
     return 'ast.PYACC({0:s})'.format(', '.join(params))
   # GENERATE END
+
   def post_process(self):
     # variables that will be serialized
     self.prologue_text = []
@@ -2350,24 +2564,14 @@ class PYACC(element.Element):
       PYACC.Symbol(name = '$undefined', character_set = [2, 3])
     ]
     self.nonterminal_symbols = []
-    self.grammar = grammar.Grammar(
-      children = [
-        grammar.Grammar.Production(
-          children = [
-            grammar.Grammar.Production.NamedSymbol()
-          ],
-          nonterminal = 0
-        )
-      ],
-      eof_terminal = 0
-    )
-    self.actions_braced_code = []
+    self.start_nonterminal = -1
+    self.productions = []
 
     # variables that won't be serialized
     character_to_symbol = {} # indexed by int, always >= 0 (terminal)
     # note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
     # (don't bother storing the '$undefined', it can't be looked up)
-    name_to_symbol = {'error': 0}
+    name_to_symbol = {'error': 1}
 
     # perform the semantic analysis pass
     for i in self:
@@ -2384,27 +2588,55 @@ class PYACC(element.Element):
         i.token = token
         token += 1
 
-    # if start symbol not specified, use first nonterminal defined in file
-    if len(self.grammar[0][0].name) == 0:
-      self.grammar[0][0].name = self.nonterminal_symbols[0].name
-
-    # look up rule names and substitute appropriate character_set for each
-    self.grammar.n_terminals = len(self.terminal_symbols)
-    self.grammar.post_process(
-      dict(
-        [
-          (i.name, (i.character_set, []))
-          for i in self.terminal_symbols
-          if len(i.name) # fix this later
-        ] +
-        [
-          (i.name, ([], i.character_set))
-          for i in self.nonterminal_symbols
-        ]
+    # fill in start nonterminal if not overridden by user
+    if self.start_nonterminal == -1:
+      assert len(self.nonterminal_symbols) != 0
+      self.start_nonterminal = 0
+
+  def to_lr1(self):
+    lr1_productions = [
+      (
+        # precedence
+        -1,
+        # symbols (list of terminal_set, nonterminal_set)
+        [([], self.nonterminal_symbols[self.start_nonterminal].character_set)],
+        # lookaheads (list of initial_set, can_be_empty)
+        [([], False), ([], True)],
+        # group_bounds
+        []
       )
-    )
-
-# GENERATE FACTORY(grammar.factory) BEGIN
+    ]
+    actions = []
+    for i in self.productions:
+      lr1_production, action = i.to_lr1_production(self)
+      lr1_productions.append(lr1_production)
+      actions.append(action)
+
+    # propagate lookaheads
+    modified = True
+    while modified:
+      modified = False
+      for _, symbols, lookaheads, _ in lr1_productions:
+        for i in range(len(symbols) - 1, -1, -1):
+          initial_set, nonterminal_set = symbols[i]
+          can_be_empty = False
+          for j in range(0, len(nonterminal_set), 2):
+            for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
+              child_initial_set, child_can_be_empty = lr1_productions[k][2][0]
+              initial_set = bisect_set.bisect_set_or(initial_set, child_initial_set)
+              can_be_empty = can_be_empty or child_can_be_empty
+          # at this point can_be_empty refers to current symbol only
+          if can_be_empty:
+            next_initial_set, can_be_empty = lookaheads[i + 1]
+            initial_set = bisect_set.bisect_set_or(initial_set, next_initial_set)
+          # at this point can_be_empty refers to all remaining symbols
+          if (initial_set, can_be_empty) != lookaheads[i]:
+            lookaheads[i] = (initial_set, can_be_empty)
+            modified = True
+
+    return (lr1.LR1(lr1_productions, len(self.terminal_symbols), 0), actions)
+
+# GENERATE FACTORY(element.Element) BEGIN
 tag_to_class = {
   'Item': Item,
   'PYACC': PYACC,
@@ -2414,9 +2646,15 @@ tag_to_class = {
   'PYACC_Char': PYACC.Char,
   'PYACC_Escape': PYACC.Escape,
   'PYACC_ID': PYACC.ID,
-  'PYACC_IDNone': PYACC.IDNone,
   'PYACC_Int': PYACC.Int,
-  'PYACC_IntNone': PYACC.IntNone,
+  'PYACC_Production': PYACC.Production,
+  'PYACC_Production_Item': PYACC.Production.Item,
+  'PYACC_Production_Action': PYACC.Production.Action,
+  'PYACC_Production_DPrec': PYACC.Production.DPrec,
+  'PYACC_Production_Empty': PYACC.Production.Empty,
+  'PYACC_Production_Merge': PYACC.Production.Merge,
+  'PYACC_Production_Prec': PYACC.Production.Prec,
+  'PYACC_Production_Symbol': PYACC.Production.Symbol,
   'PYACC_Section': PYACC.Section,
   'PYACC_StackLocation': PYACC.StackLocation,
   'PYACC_StackReference': PYACC.StackReference,
@@ -2459,17 +2697,10 @@ tag_to_class = {
   'PYACC_Section1_YACC': PYACC.Section1.YACC,
   'PYACC_Section2': PYACC.Section2,
   'PYACC_Section2_Rules': PYACC.Section2.Rules,
-  'PYACC_Section2_Rules_RHS': PYACC.Section2.Rules.RHS,
-  'PYACC_Section2_Rules_RHS_Action': PYACC.Section2.Rules.RHS.Action,
-  'PYACC_Section2_Rules_RHS_DPrec': PYACC.Section2.Rules.RHS.DPrec,
-  'PYACC_Section2_Rules_RHS_Empty': PYACC.Section2.Rules.RHS.Empty,
-  'PYACC_Section2_Rules_RHS_Merge': PYACC.Section2.Rules.RHS.Merge,
-  'PYACC_Section2_Rules_RHS_Prec': PYACC.Section2.Rules.RHS.Prec,
-  'PYACC_Section2_Rules_RHS_Symbol': PYACC.Section2.Rules.RHS.Symbol,
   'PYACC_Section3': PYACC.Section3,
   'PYACC_ValueLocation': PYACC.ValueLocation,
   'PYACC_ValueReference': PYACC.ValueReference
 }
 def factory(tag, attrib = {}, *args, **kwargs):
-  return tag_to_class.get(tag, grammar.factory)(tag, attrib, *args, **kwargs)
+  return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
 # GENERATE END
index d55f12e..bc6d4d0 100644 (file)
@@ -251,7 +251,8 @@ class BisonLR1DFA:
     self.n_terminals = n_terminals
 
 def generate(pyacc, skel_file, out_file):
-  lr1dfa = pyacc.grammar.to_lr1().to_lalr1()
+  lr1, actions = pyacc.to_lr1()
+  lr1dfa = lr1.to_lalr1()
 
   # generate translate table for terminal symbols
   # this undoes yacc/bison's rather wasteful mapping of 0x00..0xff to literal
@@ -713,12 +714,12 @@ static const yytype_int16 yyr2[] =
     break;
 '''.format(
                     i + 1,
-                    pyacc.actions_braced_code[i].get_text(
+                    actions[i].get_text(
                       bison_lr1dfa.rule_data[i + 1, 1] # length of production
                     )
                   )
-                  for i in range(len(pyacc.actions_braced_code))
-                  if len(pyacc.actions_braced_code[i])
+                  for i in range(len(actions))
+                  if len(actions[i])
                 ]
               )
             )