Implement pyacc.symbols instead of pyacc.(non)terminals, combine PYACC.Terminal and...
authorNick Downing <downing.nick@gmail.com>
Sat, 28 Jul 2018 00:30:29 +0000 (10:30 +1000)
committerNick Downing <downing.nick@gmail.com>
Sat, 28 Jul 2018 02:18:21 +0000 (12:18 +1000)
ast.py
bison_lr1dfa.py

diff --git a/ast.py b/ast.py
index 85ad092..387f42c 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -136,7 +136,11 @@ class PYACC(element.Element):
     # GENERATE END
 
   class Symbol(TagOrSymbol):
-    # GENERATE ELEMENT(int _tag, list(int) character_set) BEGIN
+    TYPE_NONE = -1
+    TYPE_TERMINAL = 0
+    TYPE_NONTERMINAL = 1
+
+    # GENERATE ELEMENT(int _type, int _tag, list(int) character_set, int precedence) BEGIN
     def __init__(
       self,
       tag = 'PYACC_Symbol',
@@ -145,8 +149,10 @@ class PYACC(element.Element):
       children = [],
       name = '',
       code_props = [],
+      _type = -1,
       _tag = -1,
-      character_set = []
+      character_set = [],
+      precedence = -1
     ):
       PYACC.TagOrSymbol.__init__(
         self,
@@ -157,6 +163,11 @@ class PYACC(element.Element):
         name,
         code_props
       )
+      self._type = (
+        element.deserialize_int(_type)
+      if isinstance(_type, str) else
+        _type
+      )
       self._tag = (
         element.deserialize_int(_tag)
       if isinstance(_tag, str) else
@@ -167,30 +178,45 @@ class PYACC(element.Element):
       if isinstance(character_set, str) else
         character_set
       )
+      self.precedence = (
+        element.deserialize_int(precedence)
+      if isinstance(precedence, str) else
+        precedence
+      )
     def serialize(self, ref_list):
       PYACC.TagOrSymbol.serialize(self, ref_list)
+      self.set('_type', element.serialize_int(self._type))
       self.set('_tag', element.serialize_int(self._tag))
       self.set(
         'character_set',
         ' '.join([element.serialize_int(i) for i in self.character_set])
       )
+      self.set('precedence', element.serialize_int(self.precedence))
     def deserialize(self, ref_list):
       PYACC.TagOrSymbol.deserialize(self, ref_list)
+      self._type = element.deserialize_int(self.get('_type', '-1'))
       self._tag = element.deserialize_int(self.get('_tag', '-1'))
       self.character_set = [
         element.deserialize_int(i)
         for i in self.get('character_set', '').split()
       ]
+      self.precedence = element.deserialize_int(self.get('precedence', '-1'))
     def copy(self, factory = None):
       result = PYACC.TagOrSymbol.copy(
         self,
         Symbol if factory is None else factory
       )
+      result._type = self._type
       result._tag = self._tag
       result.character_set = self.character_set
+      result.precedence = self.precedence
       return result
     def repr_serialize(self, params):
       PYACC.TagOrSymbol.repr_serialize(self, params)
+      if self._type != -1:
+        params.append(
+          '_type = {0:s}'.format(repr(self._type))
+        )
       if self._tag != -1:
         params.append(
           '_tag = {0:s}'.format(repr(self._tag))
@@ -201,57 +227,6 @@ class PYACC(element.Element):
             ', '.join([repr(i) for i in self.character_set])
           )
         )
-    def __repr__(self):
-      params = []
-      self.repr_serialize(params)
-      return 'ast.PYACC.Symbol({0:s})'.format(', '.join(params))
-    # GENERATE END
-
-  class Terminal(Symbol):
-    # GENERATE ELEMENT(int precedence) BEGIN
-    def __init__(
-      self,
-      tag = 'PYACC_Terminal',
-      attrib = {},
-      text = '',
-      children = [],
-      name = '',
-      code_props = [],
-      _tag = -1,
-      character_set = [],
-      precedence = -1
-    ):
-      PYACC.Symbol.__init__(
-        self,
-        tag,
-        attrib,
-        text,
-        children,
-        name,
-        code_props,
-        _tag,
-        character_set
-      )
-      self.precedence = (
-        element.deserialize_int(precedence)
-      if isinstance(precedence, str) else
-        precedence
-      )
-    def serialize(self, ref_list):
-      PYACC.Symbol.serialize(self, ref_list)
-      self.set('precedence', element.serialize_int(self.precedence))
-    def deserialize(self, ref_list):
-      PYACC.Symbol.deserialize(self, ref_list)
-      self.precedence = element.deserialize_int(self.get('precedence', '-1'))
-    def copy(self, factory = None):
-      result = PYACC.Symbol.copy(
-        self,
-        Terminal if factory is None else factory
-      )
-      result.precedence = self.precedence
-      return result
-    def repr_serialize(self, params):
-      PYACC.Symbol.repr_serialize(self, params)
       if self.precedence != -1:
         params.append(
           'precedence = {0:s}'.format(repr(self.precedence))
@@ -259,43 +234,7 @@ class PYACC(element.Element):
     def __repr__(self):
       params = []
       self.repr_serialize(params)
-      return 'ast.PYACC.Terminal({0:s})'.format(', '.join(params))
-    # GENERATE END
-
-  class Nonterminal(Symbol):
-    # GENERATE ELEMENT() BEGIN
-    def __init__(
-      self,
-      tag = 'PYACC_Nonterminal',
-      attrib = {},
-      text = '',
-      children = [],
-      name = '',
-      code_props = [],
-      _tag = -1,
-      character_set = []
-    ):
-      PYACC.Symbol.__init__(
-        self,
-        tag,
-        attrib,
-        text,
-        children,
-        name,
-        code_props,
-        _tag,
-        character_set
-      )
-    def copy(self, factory = None):
-      result = PYACC.Symbol.copy(
-        self,
-        Nonterminal if factory is None else factory
-      )
-      return result
-    def __repr__(self):
-      params = []
-      self.repr_serialize(params)
-      return 'ast.PYACC.Nonterminal({0:s})'.format(', '.join(params))
+      return 'ast.PYACC.Symbol({0:s})'.format(', '.join(params))
     # GENERATE END
 
   # syntax classes
@@ -782,12 +721,14 @@ class PYACC(element.Element):
           else:
             self.symbol = len(pyacc.terminals)
             character_to_symbol[character] = self.symbol
-            pyacc.terminals.append(
-              PYACC.Terminal(
+            pyacc.symbols.append(
+              PYACC.Symbol(
                 code_props = [None, None],
+                _type = PYACC.Symbol.TYPE_TERMINAL,
                 character_set = [character, character + 1]
               )
             )
+            pyacc.terminals.append(pyacc.symbols[-1])
         elif isinstance(self[0], PYACC.ID):
           name = self[0].get_text()
           if name in name_to_symbol:
@@ -795,13 +736,15 @@ class PYACC(element.Element):
           else:
             self.symbol = ~len(pyacc.nonterminals)
             name_to_symbol[name] = self.symbol
-            pyacc.nonterminals.append(
-              PYACC.Nonterminal(
+            pyacc.symbols.append(
+              PYACC.Symbol(
                 name = name,
                 code_props = [None, None],
+                _type = PYACC.Symbol.TYPE_NONTERMINAL,
                 character_set = []
               )
             )
+            pyacc.nonterminals.append(pyacc.symbols[-1])
         elif isinstance(self[0], PYACC.String):
           string = self[0][0].get_text()
           self.symbol = string_to_symbol[string] # must already exist
@@ -2766,12 +2709,14 @@ class PYACC(element.Element):
         else:
           self.symbol = len(pyacc.terminals)
           character_to_symbol[character] = self.symbol
-          pyacc.terminals.append(
-            PYACC.Terminal(
+          pyacc.symbols.append(
+            PYACC.Symbol(
               code_props = [None, None],
+              _type = PYACC.Symbol.TYPE_TERMINAL,
               character_set = [character, character + 1]
             )
           )
+          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         self.symbol = name_to_symbol[name] # must already exist
@@ -2855,12 +2800,14 @@ class PYACC(element.Element):
         else:
           self.symbol = len(pyacc.terminals)
           character_to_symbol[character] = self.symbol
-          pyacc.terminals.append(
-            PYACC.Terminal(
+          pyacc.symbols.append(
+            PYACC.Symbol(
               code_props = [None, None],
+              _type = PYACC.Symbol.TYPE_TERMINAL,
               character_set = [character, character + 1]
             )
           )
+          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         if name in name_to_symbol:
@@ -2869,12 +2816,14 @@ class PYACC(element.Element):
         else:
           self.symbol = len(pyacc.terminals)
           name_to_symbol[name] = self.symbol
-          pyacc.terminals.append(
-            PYACC.Terminal(
+          pyacc.symbols.append(
+            PYACC.Symbol(
               name = name,
+              _type = PYACC.Symbol.TYPE_TERMINAL,
               code_props = [None, None]
             )
           )
+          pyacc.terminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.String):
         string = self[0][0].get_text()
         self.symbol = string_to_symbol[string] # must already exist
@@ -2965,13 +2914,15 @@ class PYACC(element.Element):
         else:
           self.symbol = ~len(pyacc.nonterminals)
           name_to_symbol[name] = self.symbol
-          pyacc.nonterminals.append(
-            PYACC.Nonterminal(
+          pyacc.symbols.append(
+            PYACC.Symbol(
               name = name,
               code_props = [None, None],
+              _type = PYACC.Symbol.TYPE_NONTERMINAL,
               character_set = []
             )
           )
+          pyacc.nonterminals.append(pyacc.symbols[-1])
       elif isinstance(self[0], PYACC.String):
         string = self[0][0].get_text()
         self.symbol = string_to_symbol[string] # must already exist
@@ -3415,7 +3366,7 @@ class PYACC(element.Element):
     ):
       pass
 
-  # GENERATE ELEMENT(list(ref) top_code, list(ref) before_union_code, list(ref) requires_code, str union_name, ref union_code, list(ref) after_union_code, ref initial_action_code, list(ref) tags, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
+  # GENERATE ELEMENT(list(ref) top_code, list(ref) before_union_code, list(ref) requires_code, str union_name, ref union_code, list(ref) after_union_code, ref initial_action_code, list(ref) tags, list(ref) symbols, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
@@ -3430,6 +3381,7 @@ class PYACC(element.Element):
     after_union_code = [],
     initial_action_code = None,
     tags = [],
+    symbols = [],
     terminals = [],
     nonterminals = [],
     n_productions = -1,
@@ -3453,6 +3405,7 @@ class PYACC(element.Element):
     self.after_union_code = after_union_code
     self.initial_action_code = initial_action_code
     self.tags = tags
+    self.symbols = symbols
     self.terminals = terminals
     self.nonterminals = nonterminals
     self.n_productions = (
@@ -3501,6 +3454,10 @@ class PYACC(element.Element):
       'tags',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.tags])
     )
+    self.set(
+      'symbols',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.symbols])
+    )
     self.set(
       'terminals',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.terminals])
@@ -3545,6 +3502,10 @@ class PYACC(element.Element):
       element.deserialize_ref(i, ref_list)
       for i in self.get('tags', '').split()
     ]
+    self.symbols = [
+      element.deserialize_ref(i, ref_list)
+      for i in self.get('symbols', '').split()
+    ]
     self.terminals = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('terminals', '').split()
@@ -3577,6 +3538,7 @@ class PYACC(element.Element):
     result.after_union_code = self.after_union_code
     result.initial_action_code = self.initial_action_code
     result.tags = self.tags
+    result.symbols = self.symbols
     result.terminals = self.terminals
     result.nonterminals = self.nonterminals
     result.n_productions = self.n_productions
@@ -3629,6 +3591,12 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.tags])
         )
       )
+    if len(self.symbols):
+      params.append(
+        'symbols = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.symbols])
+        )
+      )
     if len(self.terminals):
       params.append(
         'terminals = [{0:s}]'.format(
@@ -3681,21 +3649,25 @@ class PYACC(element.Element):
     self.after_union_code = []
     self.initial_action_code = None
     self.precedences = 0
-    self.terminals = [
-      PYACC.Terminal(
+    self.symbols = [
+      PYACC.Symbol(
         name = '$eof',
         code_props = [None, None],
+        _type = PYACC.Symbol.TYPE_TERMINAL,
         character_set = [0, 1]
       ),
-      PYACC.Terminal(
+      PYACC.Symbol(
         name = 'error',
+        _type = PYACC.Symbol.TYPE_TERMINAL,
         code_props = [None, None]
       ),
-      PYACC.Terminal(
+      PYACC.Symbol(
         name = '$undefined',
+        _type = PYACC.Symbol.TYPE_TERMINAL,
         code_props = [None, None]
       )
     ]
+    self.terminals = self.symbols[:]
     self.nonterminals = []
     # the following stores the destructor and printer for each tag
     self.tags = [
@@ -3836,8 +3808,6 @@ tag_to_class = {
   'PYACC_TagOrSymbol': PYACC.TagOrSymbol,
   'PYACC_Tag': PYACC.Tag,
   'PYACC_Symbol': PYACC.Symbol,
-  'PYACC_Terminal': PYACC.Terminal,
-  'PYACC_Nonterminal': PYACC.Nonterminal,
   'PYACC_BracedPredicate': PYACC.BracedPredicate,
   'PYACC_Char': PYACC.Char,
   'PYACC_Int': PYACC.Int,
index 0523fae..d9526df 100644 (file)
@@ -1,3 +1,4 @@
+import ast
 import element
 import numpy
 import sys
@@ -274,48 +275,52 @@ def generate(pyacc, skel_file, out_file, defines_file = None):
   # this undoes yacc/bison's rather wasteful mapping of 0x00..0xff to literal
   # characters, and also accommodates any token value overrides given by the
   # user, yielding a consecutive set of terminal numbers that are really used
+  n_terminals = 0
   translate_terminals = numpy.full(
     (lr1dfa.n_terminals,),
     2, # '$undefined'
     numpy.int16
   )
-  for i in range(len(pyacc.terminals)):
-    for j in range(0, len(pyacc.terminals[i].character_set), 2):
-      translate_terminals[
-        pyacc.terminals[i].character_set[j]:
-        pyacc.terminals[i].character_set[j + 1]
-      ] = i
+  for i in pyacc.symbols:
+    if i._type == ast.PYACC.Symbol.TYPE_TERMINAL:
+      for j in range(0, len(i.character_set), 2):
+        translate_terminals[
+          i.character_set[j]:i.character_set[j + 1]
+        ] = n_terminals
+      n_terminals += 1
 
   # generate translate table for nonterminal symbols
   # this is effectively a map from productions back to nonterminal symbols
   # we do not generate an entry for the first production (start production)
-  # we generate extra fake entries after end of pyacc.nonterminals for fake
+  # we generate extra fake entries after end of the nonterminals for fake
   # productions due to midrule actions (which leave gaps in the numbering)
+  n_nonterminals = 0
   translate_nonterminals = numpy.full(
     (len(lr1dfa.productions) - 1,),
     -1,
     numpy.int16
   )
-  for i in range(len(pyacc.nonterminals)):
-    for j in range(0, len(pyacc.nonterminals[i].character_set), 2):
-      translate_nonterminals[
-        pyacc.nonterminals[i].character_set[j] - 1:
-        pyacc.nonterminals[i].character_set[j + 1] - 1
-      ] = i
+  for i in pyacc.symbols:
+    if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL: 
+      for j in range(0, len(i.character_set), 2):
+        translate_nonterminals[
+          i.character_set[j] - 1:i.character_set[j + 1] - 1
+        ] = n_nonterminals
+      n_nonterminals += 1
   midrule_actions = [translate_nonterminals == -1]
   n_midrule_actions = numpy.sum(midrule_actions)
   translate_nonterminals[midrule_actions] = numpy.arange(
-    len(pyacc.nonterminals),
-    len(pyacc.nonterminals) + n_midrule_actions,
+    n_nonterminals,
+    n_nonterminals + n_midrule_actions,
     dtype = numpy.int16
   )
 
   # translate and compress the tables
   bison_lr1dfa = BisonLR1DFA(
     lr1dfa,
-    len(pyacc.terminals),
+    n_terminals,
     translate_terminals,
-    len(pyacc.nonterminals) + n_midrule_actions,
+    n_nonterminals + n_midrule_actions,
     translate_nonterminals
   )
 
@@ -356,8 +361,11 @@ def generate(pyacc, skel_file, out_file, defines_file = None):
                 ','.join(
                   [
                     '\n    {0:s} = {1:d}'.format(i.name, i.character_set[0])
-                    for i in pyacc.terminals[3:]
-                    if len(i.name)
+                    for i in pyacc.symbols[3:]
+                    if (
+                      i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
+                      len(i.name)
+                    )
                   ]
                 )
               )
@@ -370,8 +378,11 @@ def generate(pyacc, skel_file, out_file, defines_file = None):
                 ''.join(
                   [
                     '#define {0:s} {1:d}\n'.format(i.name, i.character_set[0])
-                    for i in pyacc.terminals[3:]
-                    if len(i.name)
+                    for i in pyacc.symbols[3:]
+                    if (
+                      i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
+                      len(i.name)
+                    )
                   ]
                 )
               )
@@ -423,9 +434,14 @@ typedef union YYSTYPE YYSTYPE;
                     '\\\\x{0:02x}'.format(i.character_set[0])
                   )
                 )
-                for i in pyacc.terminals
+                for i in pyacc.symbols
+                if i._type == ast.PYACC.Symbol.TYPE_TERMINAL
+              ] +
+              [
+                '"{0:s}"'.format(i.name)
+                for i in pyacc.symbols
+                if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL
               ] +
-              ['"{0:s}"'.format(i.name) for i in pyacc.nonterminals] +
               ['"$@{0:d}"'.format(i) for i in range(n_midrule_actions)] +
               ['YY_NULLPTR']
             ):