Make *_to_symbol contain symbol index rather than positive terminal index or negative...
authorNick Downing <downing.nick@gmail.com>
Sat, 28 Jul 2018 23:39:29 +0000 (09:39 +1000)
committerNick Downing <downing.nick@gmail.com>
Sat, 28 Jul 2018 23:39:29 +0000 (09:39 +1000)
ast.py

diff --git a/ast.py b/ast.py
index 387f42c..2a04c33 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -717,9 +717,11 @@ class PYACC(element.Element):
           assert character != 0 # would conflict with YYEOF
           if character in character_to_symbol:
             self.symbol = character_to_symbol[character]
-            assert self.symbol >= 0
+            assert (
+              pyacc.symbols[self.symbol]._type == PYACC.Symbol.TYPE_TERMINAL
+            )
           else:
-            self.symbol = len(pyacc.terminals)
+            self.symbol = len(pyacc.symbols)
             character_to_symbol[character] = self.symbol
             pyacc.symbols.append(
               PYACC.Symbol(
@@ -728,13 +730,12 @@ class PYACC(element.Element):
                 character_set = [character, character + 1]
               )
             )
-            pyacc.terminals.append(pyacc.symbols[-1])
         elif isinstance(self[0], PYACC.ID):
           name = self[0].get_text()
           if name in name_to_symbol:
             self.symbol = name_to_symbol[name]
           else:
-            self.symbol = ~len(pyacc.nonterminals)
+            self.symbol = len(pyacc.symbols)
             name_to_symbol[name] = self.symbol
             pyacc.symbols.append(
               PYACC.Symbol(
@@ -744,13 +745,12 @@ class PYACC(element.Element):
                 character_set = []
               )
             )
-            pyacc.nonterminals.append(pyacc.symbols[-1])
         elif isinstance(self[0], PYACC.String):
           string = self[0][0].get_text()
           self.symbol = string_to_symbol[string] # must already exist
         else:
           assert False
-        if self.symbol >= 0:
+        if pyacc.symbols[self.symbol]._type == PYACC.Symbol.TYPE_TERMINAL:
           production.last_terminal = self.symbol
         return False
 
@@ -778,20 +778,16 @@ class PYACC(element.Element):
               last_action
             )
           )
-        if self.symbol >= 0:
-          symbols.append((pyacc.terminals[self.symbol].character_set, []))
-          tag_names.append(
-            ''
-          if pyacc.terminals[self.symbol]._tag == -1 else
-            pyacc.tags[pyacc.terminals[self.symbol]._tag].name
-          )
-        else:
-          symbols.append(([], pyacc.nonterminals[~self.symbol].character_set))
-          tag_names.append(
-            ''
-          if pyacc.nonterminals[~self.symbol]._tag == -1 else
-            pyacc.tags[pyacc.nonterminals[~self.symbol]._tag].name
-          )
+        symbols.append(
+          (pyacc.symbols[self.symbol].character_set, [])
+        if pyacc.symbols[self.symbol]._type == PYACC.Symbol.TYPE_TERMINAL else
+          ([], pyacc.symbols[self.symbol].character_set)
+        )
+        tag_names.append(
+          ''
+        if pyacc.symbols[self.symbol]._tag == -1 else
+          pyacc.tags[pyacc.symbols[self.symbol]._tag].name
+        )
         return None
 
     # GENERATE ELEMENT(int lhs_nonterminal, int n_symbols, int last_terminal, int precedence_terminal) BEGIN
@@ -907,7 +903,7 @@ class PYACC(element.Element):
           last_action
         )
 
-      i = pyacc.nonterminals[self.lhs_nonterminal]
+      i = pyacc.symbols[self.lhs_nonterminal]
       if len(i.character_set) and i.character_set[-1] == pyacc.n_productions:
         i.character_set[-1] = pyacc.n_productions + 1
       else:
@@ -938,8 +934,8 @@ class PYACC(element.Element):
           ):
             i.tag_name = (
               ''
-            if pyacc.nonterminals[self.lhs_nonterminal]._tag == -1 else
-              pyacc.tags[pyacc.nonterminals[self.lhs_nonterminal]._tag].name
+            if pyacc.symbols[self.lhs_nonterminal]._tag == -1 else
+              pyacc.tags[pyacc.symbols[self.lhs_nonterminal]._tag].name
             )
 
       _lr1.productions.append(
@@ -954,9 +950,9 @@ class PYACC(element.Element):
       )
 
       precedence = (
-        pyacc.terminals[self.precedence_terminal].precedence
+        pyacc.symbols[self.precedence_terminal].precedence
       if self.precedence_terminal != -1 else
-        pyacc.terminals[self.last_terminal].precedence
+        pyacc.symbols[self.last_terminal].precedence
       if self.last_terminal != -1 else
         -1
       )
@@ -1250,7 +1246,7 @@ class PYACC(element.Element):
           name_to_tag,
           -1 # precedence
         )
-        pyacc.start_nonterminal = ~self[0].symbol
+        pyacc.start_nonterminal = self[0].symbol
 
     class TaggedSymbols(element.Element):
       # GENERATE ELEMENT() BEGIN
@@ -2434,12 +2430,12 @@ class PYACC(element.Element):
           -1 # precedence
         )
         if pyacc.first_nonterminal == -1:
-          pyacc.first_nonterminal = ~self[0].symbol
+          pyacc.first_nonterminal = self[0].symbol
         for i in self[1:]:
           i.post_process(
             pyacc,
             section,
-            ~self[0].symbol,
+            self[0].symbol,
             character_to_symbol,
             name_to_symbol,
             string_to_symbol,
@@ -2705,9 +2701,12 @@ class PYACC(element.Element):
         assert character != 0 # would conflict with YYEOF
         if character in character_to_symbol:
           self.symbol = character_to_symbol[character]
-          assert self.symbol >= 0
+          assert (
+            pyacc.symbols[self.symbol]._type ==
+            PYACC.Symbol.TYPE_TERMINAL
+          )
         else:
-          self.symbol = len(pyacc.terminals)
+          self.symbol = len(pyacc.symbols)
           character_to_symbol[character] = self.symbol
           pyacc.symbols.append(
             PYACC.Symbol(
@@ -2716,7 +2715,6 @@ class PYACC(element.Element):
               character_set = [character, character + 1]
             )
           )
-          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         self.symbol = name_to_symbol[name] # must already exist
@@ -2725,12 +2723,8 @@ class PYACC(element.Element):
         self.symbol = string_to_symbol[string] # must already exist
       else:
         assert False
-      if self.symbol >= 0:
-        assert pyacc.terminals[self.symbol].code_props[_type] is None
-        pyacc.terminals[self.symbol].code_props[_type] = code
-      else:
-        assert pyacc.nonterminals[~self.symbol].code_props[_type] is None
-        pyacc.nonterminals[~self.symbol].code_props[_type] = code
+      assert pyacc.symbols[self.symbol].code_props[_type] is None
+      pyacc.symbols[self.symbol].code_props[_type] = code
 
   class TerminalRef(SymbolRef):
     # GENERATE ELEMENT(int user_token) BEGIN
@@ -2796,9 +2790,12 @@ class PYACC(element.Element):
         assert character != 0 # would conflict with YYEOF
         if character in character_to_symbol:
           self.symbol = character_to_symbol[character]
-          assert self.symbol >= 0
+          assert (
+            pyacc.symbols[self.symbol]._type ==
+            PYACC.Symbol.TYPE_TERMINAL
+          )
         else:
-          self.symbol = len(pyacc.terminals)
+          self.symbol = len(pyacc.symbols)
           character_to_symbol[character] = self.symbol
           pyacc.symbols.append(
             PYACC.Symbol(
@@ -2807,14 +2804,16 @@ class PYACC(element.Element):
               character_set = [character, character + 1]
             )
           )
-          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         if name in name_to_symbol:
           self.symbol = name_to_symbol[name]
-          assert self.symbol >= 0
+          assert (
+            pyacc.symbols[self.symbol]._type ==
+            PYACC.Symbol.TYPE_TERMINAL
+          )
         else:
-          self.symbol = len(pyacc.terminals)
+          self.symbol = len(pyacc.symbols)
           name_to_symbol[name] = self.symbol
           pyacc.symbols.append(
             PYACC.Symbol(
@@ -2823,16 +2822,18 @@ class PYACC(element.Element):
               code_props = [None, None]
             )
           )
-          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.String):
         string = self[0][0].get_text()
         self.symbol = string_to_symbol[string] # must already exist
-        assert self.symbol >= 0
+        assert (
+          pyacc.symbols[self.symbol]._type ==
+          PYACC.Symbol.TYPE_TERMINAL
+        )
       else:
         assert False
       if self.user_token != -1:
-        assert len(pyacc.terminals[self.symbol].character_set) == 0
-        pyacc.terminals[self.symbol].character_set = (
+        assert len(pyacc.symbols[self.symbol].character_set) == 0
+        pyacc.symbols[self.symbol].character_set = (
           [self.user_token, self.user_token + 1]
         )
       if len(self) >= 2:
@@ -2840,11 +2841,11 @@ class PYACC(element.Element):
         assert string not in string_to_symbol
         string_to_symbol[string] = self.symbol
       if _tag != -1:
-        assert pyacc.terminals[self.symbol]._tag == -1
-        pyacc.terminals[self.symbol]._tag = _tag
+        assert pyacc.symbols[self.symbol]._tag == -1
+        pyacc.symbols[self.symbol]._tag = _tag
       if precedence != -1:
-        assert pyacc.terminals[self.symbol].precedence == -1
-        pyacc.terminals[self.symbol].precedence = precedence
+        assert pyacc.symbols[self.symbol].precedence == -1
+        pyacc.symbols[self.symbol].precedence = precedence
       return _tag
 
   class NonterminalRef(SymbolRef):
@@ -2910,9 +2911,12 @@ class PYACC(element.Element):
         name = self[0].get_text()
         if name in name_to_symbol:
           self.symbol = name_to_symbol[name]
-          assert self.symbol < 0
+          assert (
+            pyacc.symbols[self.symbol]._type ==
+            PYACC.Symbol.TYPE_NONTERMINAL
+          )
         else:
-          self.symbol = ~len(pyacc.nonterminals)
+          self.symbol = len(pyacc.symbols)
           name_to_symbol[name] = self.symbol
           pyacc.symbols.append(
             PYACC.Symbol(
@@ -2922,11 +2926,13 @@ class PYACC(element.Element):
               character_set = []
             )
           )
-          pyacc.nonterminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.String):
         string = self[0][0].get_text()
         self.symbol = string_to_symbol[string] # must already exist
-        assert self.symbol < 0
+        assert (
+          pyacc.symbols[self.symbol]._type ==
+          PYACC.Symbol.TYPE_NONTERMINAL
+        )
       else:
         assert False
       assert self.user_token == -1
@@ -2935,8 +2941,8 @@ class PYACC(element.Element):
         assert string not in string_to_symbol
         string_to_symbol[string] = self.symbol
       if _tag != -1:
-        assert pyacc.nonterminals[~self.symbol]._tag == -1
-        pyacc.nonterminals[~self.symbol]._tag = _tag
+        assert pyacc.symbols[self.symbol]._tag == -1
+        pyacc.symbols[self.symbol]._tag = _tag
       assert precedence == -1
       return _tag
 
@@ -3366,7 +3372,7 @@ class PYACC(element.Element):
     ):
       pass
 
-  # GENERATE ELEMENT(list(ref) top_code, list(ref) before_union_code, list(ref) requires_code, str union_name, ref union_code, list(ref) after_union_code, ref initial_action_code, list(ref) tags, list(ref) symbols, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
+  # GENERATE ELEMENT(list(ref) top_code, list(ref) before_union_code, list(ref) requires_code, str union_name, ref union_code, list(ref) after_union_code, ref initial_action_code, list(ref) tags, list(ref) symbols, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
@@ -3382,8 +3388,6 @@ class PYACC(element.Element):
     initial_action_code = None,
     tags = [],
     symbols = [],
-    terminals = [],
-    nonterminals = [],
     n_productions = -1,
     productions = [],
     first_nonterminal = -1,
@@ -3406,8 +3410,6 @@ class PYACC(element.Element):
     self.initial_action_code = initial_action_code
     self.tags = tags
     self.symbols = symbols
-    self.terminals = terminals
-    self.nonterminals = nonterminals
     self.n_productions = (
       element.deserialize_int(n_productions)
     if isinstance(n_productions, str) else
@@ -3458,14 +3460,6 @@ class PYACC(element.Element):
       'symbols',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.symbols])
     )
-    self.set(
-      'terminals',
-      ' '.join([element.serialize_ref(i, ref_list) for i in self.terminals])
-    )
-    self.set(
-      'nonterminals',
-      ' '.join([element.serialize_ref(i, ref_list) for i in self.nonterminals])
-    )
     self.set('n_productions', element.serialize_int(self.n_productions))
     self.set(
       'productions',
@@ -3506,14 +3500,6 @@ class PYACC(element.Element):
       element.deserialize_ref(i, ref_list)
       for i in self.get('symbols', '').split()
     ]
-    self.terminals = [
-      element.deserialize_ref(i, ref_list)
-      for i in self.get('terminals', '').split()
-    ]
-    self.nonterminals = [
-      element.deserialize_ref(i, ref_list)
-      for i in self.get('nonterminals', '').split()
-    ]
     self.n_productions = element.deserialize_int(self.get('n_productions', '-1'))
     self.productions = [
       element.deserialize_ref(i, ref_list)
@@ -3539,8 +3525,6 @@ class PYACC(element.Element):
     result.initial_action_code = self.initial_action_code
     result.tags = self.tags
     result.symbols = self.symbols
-    result.terminals = self.terminals
-    result.nonterminals = self.nonterminals
     result.n_productions = self.n_productions
     result.productions = self.productions
     result.first_nonterminal = self.first_nonterminal
@@ -3597,18 +3581,6 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.symbols])
         )
       )
-    if len(self.terminals):
-      params.append(
-        'terminals = [{0:s}]'.format(
-          ', '.join([repr(i) for i in self.terminals])
-        )
-      )
-    if len(self.nonterminals):
-      params.append(
-        'nonterminals = [{0:s}]'.format(
-          ', '.join([repr(i) for i in self.nonterminals])
-        )
-      )
     if self.n_productions != -1:
       params.append(
         'n_productions = {0:s}'.format(repr(self.n_productions))
@@ -3667,8 +3639,6 @@ class PYACC(element.Element):
         code_props = [None, None]
       )
     ]
-    self.terminals = self.symbols[:]
-    self.nonterminals = []
     # the following stores the destructor and printer for each tag
     self.tags = [
       PYACC.Tag(name = '', code_props = [None, None]),
@@ -3678,11 +3648,8 @@ class PYACC(element.Element):
     self.productions = []
 
     # variables that won't be serialized
-    character_to_symbol = {} # indexed by int, always >= 0 (terminal)
-    # note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
-    # (don't bother storing the '$undefined', it can't be looked up)
-    name_to_symbol = {'error': 1}
-    # note: in string_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
+    character_to_symbol = {} # indexed by ord(character)
+    name_to_symbol = {'error': 1} # don't bother storing $-prefixed names
     string_to_symbol = {}
     name_to_tag = {'': 0, '*': 1}
 
@@ -3706,8 +3673,8 @@ class PYACC(element.Element):
 
     # fill in token numbers that are not characters or overridden by user
     token = 0x100
-    for i in self.terminals:
-      if len(i.character_set) == 0:
+    for i in self.symbols:
+      if i._type == PYACC.Symbol.TYPE_TERMINAL and len(i.character_set) == 0:
         i.character_set = [token, token + 1]
         token += 1
 
@@ -3720,7 +3687,7 @@ class PYACC(element.Element):
           [
             (
               [],
-              self.nonterminals[
+              self.symbols[
                 self.start_nonterminal
               if self.start_nonterminal != -1 else
                 self.first_nonterminal
@@ -3739,7 +3706,14 @@ class PYACC(element.Element):
       # associativities (indexed by *_prec value)
       self.associativities,
       # n_terminals
-      max([0] + [i.character_set[-1] for i in self.terminals]),
+      max(
+        [0] +
+        [
+          i.character_set[-1]
+          for i in self.symbols
+          if i._type == PYACC.Symbol.TYPE_TERMINAL
+        ]
+      ),
       # eof_terminal
       0
     )
@@ -3749,7 +3723,8 @@ class PYACC(element.Element):
     for character1, _, precedence in sorted(
       [
         k
-        for i in self.terminals
+        for i in self.symbols
+        if i._type == PYACC.Symbol.TYPE_TERMINAL
         for j in range(0, len(i.character_set), 2)
         for k in [
           (i.character_set[j], True, -1),