First cut at making the semantic analysis accept what it needs to build bison
authorNick Downing <downing.nick@gmail.com>
Thu, 26 Jul 2018 14:36:32 +0000 (00:36 +1000)
committerNick Downing <downing.nick@gmail.com>
Sat, 28 Jul 2018 00:07:20 +0000 (10:07 +1000)
ast.py
bison_lr1dfa.py
skel/Makefile
skel/y.tab.c.patch
skel/y.tab.h.patch

diff --git a/ast.py b/ast.py
index 0bbe302..85ad092 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -1,6 +1,7 @@
 import bisect_set
 import element
 import lr1
+import sys
 
 class Item(element.Element):
   # GENERATE ELEMENT() BEGIN
@@ -34,23 +35,24 @@ class Item(element.Element):
     pyacc,
     section,
     character_to_symbol,
-    name_to_symbol
+    name_to_symbol,
+    string_to_symbol,
+    name_to_tag
   ):
-    raise NotImplementedException
+    raise NotImplementedError
  
 class PYACC(element.Element):
   # internal classes
-  class Symbol(element.Element):
-    # GENERATE ELEMENT(str name, str tag_name, list(int) character_set) BEGIN
+  class TagOrSymbol(element.Element):
+    # GENERATE ELEMENT(str name, list(ref) code_props) BEGIN
     def __init__(
       self,
-      tag = 'PYACC_Symbol',
+      tag = 'PYACC_TagOrSymbol',
       attrib = {},
       text = '',
       children = [],
       name = '',
-      tag_name = '',
-      character_set = []
+      code_props = []
     ):
       element.Element.__init__(
         self,
@@ -60,46 +62,138 @@ class PYACC(element.Element):
         children
       )
       self.name = name
-      self.tag_name = tag_name
+      self.code_props = code_props
+    def serialize(self, ref_list):
+      element.Element.serialize(self, ref_list)
+      self.set('name', element.serialize_str(self.name))
+      self.set(
+        'code_props',
+        ' '.join([element.serialize_ref(i, ref_list) for i in self.code_props])
+      )
+    def deserialize(self, ref_list):
+      element.Element.deserialize(self, ref_list)
+      self.name = element.deserialize_str(self.get('name', ''))
+      self.code_props = [
+        element.deserialize_ref(i, ref_list)
+        for i in self.get('code_props', '').split()
+      ]
+    def copy(self, factory = None):
+      result = element.Element.copy(
+        self,
+        TagOrSymbol if factory is None else factory
+      )
+      result.name = self.name
+      result.code_props = self.code_props
+      return result
+    def repr_serialize(self, params):
+      element.Element.repr_serialize(self, params)
+      if self.name != '':
+        params.append(
+          'name = {0:s}'.format(repr(self.name))
+        )
+      if len(self.code_props):
+        params.append(
+          'code_props = [{0:s}]'.format(
+            ', '.join([repr(i) for i in self.code_props])
+          )
+        )
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.TagOrSymbol({0:s})'.format(', '.join(params))
+    # GENERATE END
+
+  class Tag(TagOrSymbol):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_Tag',
+      attrib = {},
+      text = '',
+      children = [],
+      name = '',
+      code_props = []
+    ):
+      PYACC.TagOrSymbol.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        name,
+        code_props
+      )
+    def copy(self, factory = None):
+      result = PYACC.TagOrSymbol.copy(
+        self,
+        Tag if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.Tag({0:s})'.format(', '.join(params))
+    # GENERATE END
+
+  class Symbol(TagOrSymbol):
+    # GENERATE ELEMENT(int _tag, list(int) character_set) BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_Symbol',
+      attrib = {},
+      text = '',
+      children = [],
+      name = '',
+      code_props = [],
+      _tag = -1,
+      character_set = []
+    ):
+      PYACC.TagOrSymbol.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        name,
+        code_props
+      )
+      self._tag = (
+        element.deserialize_int(_tag)
+      if isinstance(_tag, str) else
+        _tag
+      )
       self.character_set = (
         [element.deserialize_int(i) for i in character_set.split()]
       if isinstance(character_set, str) else
         character_set
       )
     def serialize(self, ref_list):
-      element.Element.serialize(self, ref_list)
-      self.set('name', element.serialize_str(self.name))
-      self.set('tag_name', element.serialize_str(self.tag_name))
+      PYACC.TagOrSymbol.serialize(self, ref_list)
+      self.set('_tag', element.serialize_int(self._tag))
       self.set(
         'character_set',
         ' '.join([element.serialize_int(i) for i in self.character_set])
       )
     def deserialize(self, ref_list):
-      element.Element.deserialize(self, ref_list)
-      self.name = element.deserialize_str(self.get('name', ''))
-      self.tag_name = element.deserialize_str(self.get('tag_name', ''))
+      PYACC.TagOrSymbol.deserialize(self, ref_list)
+      self._tag = element.deserialize_int(self.get('_tag', '-1'))
       self.character_set = [
         element.deserialize_int(i)
         for i in self.get('character_set', '').split()
       ]
     def copy(self, factory = None):
-      result = element.Element.copy(
+      result = PYACC.TagOrSymbol.copy(
         self,
         Symbol if factory is None else factory
       )
-      result.name = self.name
-      result.tag_name = self.tag_name
+      result._tag = self._tag
       result.character_set = self.character_set
       return result
     def repr_serialize(self, params):
-      element.Element.repr_serialize(self, params)
-      if self.name != '':
-        params.append(
-          'name = {0:s}'.format(repr(self.name))
-        )
-      if self.tag_name != '':
+      PYACC.TagOrSymbol.repr_serialize(self, params)
+      if self._tag != -1:
         params.append(
-          'tag_name = {0:s}'.format(repr(self.tag_name))
+          '_tag = {0:s}'.format(repr(self._tag))
         )
       if len(self.character_set):
         params.append(
@@ -122,7 +216,8 @@ class PYACC(element.Element):
       text = '',
       children = [],
       name = '',
-      tag_name = '',
+      code_props = [],
+      _tag = -1,
       character_set = [],
       precedence = -1
     ):
@@ -133,7 +228,8 @@ class PYACC(element.Element):
         text,
         children,
         name,
-        tag_name,
+        code_props,
+        _tag,
         character_set
       )
       self.precedence = (
@@ -175,7 +271,8 @@ class PYACC(element.Element):
       text = '',
       children = [],
       name = '',
-      tag_name = '',
+      code_props = [],
+      _tag = -1,
       character_set = []
     ):
       PYACC.Symbol.__init__(
@@ -185,7 +282,8 @@ class PYACC(element.Element):
         text,
         children,
         name,
-        tag_name,
+        code_props,
+        _tag,
         character_set
       )
     def copy(self, factory = None):
@@ -320,9 +418,11 @@ class PYACC(element.Element):
         production,
         character_to_symbol,
         name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
         last_action
       ):
-        raise NotImplementedException
+        raise NotImplementedError
       def add_to_symbols(
         self,
         pyacc,
@@ -369,6 +469,8 @@ class PYACC(element.Element):
         production,
         character_to_symbol,
         name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
         last_action
       ):
         pyacc.n_productions += int(last_action) # midrule action production
@@ -413,13 +515,14 @@ class PYACC(element.Element):
         return self[0]
 
     class DPrec(Item):
-      # GENERATE ELEMENT() BEGIN
+      # GENERATE ELEMENT(int value) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Production_DPrec',
         attrib = {},
         text = '',
-        children = []
+        children = [],
+        value = -1
       ):
         PYACC.Production.Item.__init__(
           self,
@@ -428,12 +531,30 @@ class PYACC(element.Element):
           text,
           children
         )
+        self.value = (
+          element.deserialize_int(value)
+        if isinstance(value, str) else
+          value
+        )
+      def serialize(self, ref_list):
+        PYACC.Production.Item.serialize(self, ref_list)
+        self.set('value', element.serialize_int(self.value))
+      def deserialize(self, ref_list):
+        PYACC.Production.Item.deserialize(self, ref_list)
+        self.value = element.deserialize_int(self.get('value', '-1'))
       def copy(self, factory = None):
         result = PYACC.Production.Item.copy(
           self,
           DPrec if factory is None else factory
         )
+        result.value = self.value
         return result
+      def repr_serialize(self, params):
+        PYACC.Production.Item.repr_serialize(self, params)
+        if self.value != -1:
+          params.append(
+            'value = {0:s}'.format(repr(self.value))
+          )
       def __repr__(self):
         params = []
         self.repr_serialize(params)
@@ -467,6 +588,31 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Production.Empty({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
+        last_action
+      ):
+        # just skip %empty for now (fix this later)
+        return last_action
+
+      def add_to_symbols(
+        self,
+        pyacc,
+        production,
+        last_action,
+        _lr1,
+        symbols,
+        tag_names
+      ):
+        # just skip %empty for now (fix this later)
+        return last_action
 
     class Merge(Item):
       # GENERATE ELEMENT() BEGIN
@@ -549,6 +695,8 @@ class PYACC(element.Element):
         production,
         character_to_symbol,
         name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
         last_action
       ):
         self[0].post_process(
@@ -556,11 +704,13 @@ class PYACC(element.Element):
           section,
           character_to_symbol,
           name_to_symbol,
-          '', # tag_name
+          string_to_symbol,
+          name_to_tag,
+          -1, # _tag
           -1 # precedence
         )
         assert production.precedence_terminal == -1
-        production.precedence_terminal = self[0].terminal
+        production.precedence_terminal = self[0].symbol
         return last_action
 
     class SymbolRef(Item):
@@ -617,6 +767,8 @@ class PYACC(element.Element):
         production,
         character_to_symbol,
         name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
         last_action
       ):
         pyacc.n_productions += int(last_action) # midrule action production
@@ -631,7 +783,10 @@ class PYACC(element.Element):
             self.symbol = len(pyacc.terminals)
             character_to_symbol[character] = self.symbol
             pyacc.terminals.append(
-              PYACC.Terminal(character_set = [character, character + 1])
+              PYACC.Terminal(
+                code_props = [None, None],
+                character_set = [character, character + 1]
+              )
             )
         elif isinstance(self[0], PYACC.ID):
           name = self[0].get_text()
@@ -641,8 +796,15 @@ class PYACC(element.Element):
             self.symbol = ~len(pyacc.nonterminals)
             name_to_symbol[name] = self.symbol
             pyacc.nonterminals.append(
-              PYACC.Nonterminal(name = name, character_set = [])
+              PYACC.Nonterminal(
+                name = name,
+                code_props = [None, None],
+                character_set = []
+              )
             )
+        elif isinstance(self[0], PYACC.String):
+          string = self[0][0].get_text()
+          self.symbol = string_to_symbol[string] # must already exist
         else:
           assert False
         if self.symbol >= 0:
@@ -675,10 +837,18 @@ class PYACC(element.Element):
           )
         if self.symbol >= 0:
           symbols.append((pyacc.terminals[self.symbol].character_set, []))
-          tag_names.append(pyacc.terminals[self.symbol].tag_name)
+          tag_names.append(
+            ''
+          if pyacc.terminals[self.symbol]._tag == -1 else
+            pyacc.tags[pyacc.terminals[self.symbol]._tag].name
+          )
         else:
           symbols.append(([], pyacc.nonterminals[~self.symbol].character_set))
-          tag_names.append(pyacc.nonterminals[~self.symbol].tag_name)
+          tag_names.append(
+            ''
+          if pyacc.nonterminals[~self.symbol]._tag == -1 else
+            pyacc.tags[pyacc.nonterminals[~self.symbol]._tag].name
+          )
         return None
 
     # GENERATE ELEMENT(int lhs_nonterminal, int n_symbols, int last_terminal, int precedence_terminal) BEGIN
@@ -772,7 +942,9 @@ class PYACC(element.Element):
       section,
       lhs_nonterminal,
       character_to_symbol,
-      name_to_symbol
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
     ):
       self.lhs_nonterminal = lhs_nonterminal
 
@@ -787,6 +959,8 @@ class PYACC(element.Element):
           self,
           character_to_symbol,
           name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
           last_action
         )
 
@@ -819,7 +993,11 @@ class PYACC(element.Element):
             isinstance(i, PYACC.Text.ValueReference) and
             len(i.tag_name) == 0
           ):
-            i.tag_name = pyacc.nonterminals[self.lhs_nonterminal].tag_name
+            i.tag_name = (
+              ''
+            if pyacc.nonterminals[self.lhs_nonterminal]._tag == -1 else
+              pyacc.tags[pyacc.nonterminals[self.lhs_nonterminal]._tag].name
+            )
 
       _lr1.productions.append(
         (
@@ -873,15 +1051,38 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1Or2.Code({0:s})'.format(', '.join(params))
       # GENERATE END
-
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        name = self[0].get_text()
+        if len(name) == 0: # do the same as Section1.Prologue
+          (
+            pyacc.before_union_code
+          if pyacc.union_code is None else
+            pyacc.after_union_code
+          ).append(self[1])
+        elif name == 'top':
+          pyacc.top_code.append(self[1])
+        elif name == 'requires':
+          pyacc.requires_code.append(self[1])
+        else:
+          assert False
     class CodeProps(Item):
-      # GENERATE ELEMENT() BEGIN
+      # GENERATE ELEMENT(int _type) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Section1Or2_CodeProps',
         attrib = {},
         text = '',
-        children = []
+        children = [],
+        _type = -1
       ):
         Item.__init__(
           self,
@@ -890,17 +1091,54 @@ class PYACC(element.Element):
           text,
           children
         )
+        self._type = (
+          element.deserialize_int(_type)
+        if isinstance(_type, str) else
+          _type
+        )
+      def serialize(self, ref_list):
+        Item.serialize(self, ref_list)
+        self.set('_type', element.serialize_int(self._type))
+      def deserialize(self, ref_list):
+        Item.deserialize(self, ref_list)
+        self._type = element.deserialize_int(self.get('_type', '-1'))
       def copy(self, factory = None):
         result = Item.copy(
           self,
           CodeProps if factory is None else factory
         )
+        result._type = self._type
         return result
+      def repr_serialize(self, params):
+        Item.repr_serialize(self, params)
+        if self._type != -1:
+          params.append(
+            '_type = {0:s}'.format(repr(self._type))
+          )
       def __repr__(self):
         params = []
         self.repr_serialize(params)
         return 'ast.PYACC.Section1Or2.CodeProps({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        self[1].set_code_props(
+          pyacc,
+          section,
+          character_to_symbol,
+          name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
+          self._type,
+          self[0]
+        )
 
     class DefaultPrec(Item):
       # GENERATE ELEMENT() BEGIN
@@ -959,14 +1197,14 @@ class PYACC(element.Element):
       # GENERATE END
 
     class Precedence(Item):
-      # GENERATE ELEMENT(int type) BEGIN
+      # GENERATE ELEMENT(int _type) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Section1Or2_Precedence',
         attrib = {},
         text = '',
         children = [],
-        type = -1
+        _type = -1
       ):
         Item.__init__(
           self,
@@ -975,29 +1213,29 @@ class PYACC(element.Element):
           text,
           children
         )
-        self.type = (
-          element.deserialize_int(type)
-        if isinstance(type, str) else
-          type
+        self._type = (
+          element.deserialize_int(_type)
+        if isinstance(_type, str) else
+          _type
         )
       def serialize(self, ref_list):
         Item.serialize(self, ref_list)
-        self.set('type', element.serialize_int(self.type))
+        self.set('_type', element.serialize_int(self._type))
       def deserialize(self, ref_list):
         Item.deserialize(self, ref_list)
-        self.type = element.deserialize_int(self.get('type', '-1'))
+        self._type = element.deserialize_int(self.get('_type', '-1'))
       def copy(self, factory = None):
         result = Item.copy(
           self,
           Precedence if factory is None else factory
         )
-        result.type = self.type
+        result._type = self._type
         return result
       def repr_serialize(self, params):
         Item.repr_serialize(self, params)
-        if self.type != -1:
+        if self._type != -1:
           params.append(
-            'type = {0:s}'.format(repr(self.type))
+            '_type = {0:s}'.format(repr(self._type))
           )
       def __repr__(self):
         params = []
@@ -1009,16 +1247,20 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         self[0].post_process(
           pyacc,
           section,
           character_to_symbol,
           name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
           len(pyacc.associativities) # precedence
         )
-        pyacc.associativities.append(self.type)
+        pyacc.associativities.append(self._type)
 
     class Start(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1052,16 +1294,20 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         self[0].post_process(
           pyacc,
           section,
           character_to_symbol,
           name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
           -1 # precedence
         )
-        pyacc.start_nonterminal = self[0].nonterminal
+        pyacc.start_nonterminal = ~self[0].symbol
 
     class TaggedSymbols(element.Element):
       # GENERATE ELEMENT() BEGIN
@@ -1096,18 +1342,44 @@ class PYACC(element.Element):
         section,
         character_to_symbol,
         name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
         precedence
       ):
-        tag_name = ''
+        _tag = -1
         for i in self:
-          tag_name = i.post_process(
+          _tag = i.post_process(
             pyacc,
             section,
             character_to_symbol,
             name_to_symbol,
-            tag_name,
+            string_to_symbol,
+            name_to_tag,
+            _tag,
             precedence
           )
+      def set_code_props(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
+        _type,
+        code
+      ):
+        for i in self:
+          i.set_code_props(
+            pyacc,
+            section,
+            character_to_symbol,
+            name_to_symbol,
+            string_to_symbol,
+            name_to_tag,
+            _type,
+            code
+          )
 
     class Token(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1141,13 +1413,17 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         self[0].post_process(
           pyacc,
           section,
           character_to_symbol,
           name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
           -1 # precedence
         )
 
@@ -1178,6 +1454,24 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1Or2.Type({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        self[0].post_process(
+          pyacc,
+          section,
+          character_to_symbol,
+          name_to_symbol,
+          string_to_symbol,
+          name_to_tag,
+          -1 # precedence
+        )
 
     class Union(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1211,7 +1505,9 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         pyacc.union_name = self[0].get_text()
         pyacc.union_code = self[1] 
@@ -1246,14 +1542,18 @@ class PYACC(element.Element):
       self,
       pyacc,
       character_to_symbol,
-      name_to_symbol
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
     ):
       for i in self:
         i.post_process(
           pyacc,
           self,
           character_to_symbol,
-          name_to_symbol
+          name_to_symbol,
+          string_to_symbol,
+          name_to_tag
         )
  
   class Section1(Section1Or2):
@@ -1284,7 +1584,73 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1.Define({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        name = self[0].get_text()
+        value = self[1].get_text() if len(self) >= 2 else ''
+        if name == 'api.prefix':
+          section.api_prefix = value
+        elif name == 'api.pure':
+          section.api_pure = value
+        elif name == 'locations':
+          assert value == '' or value == 'true' or value == 'false'
+          section.locations = value != 'false'
+        elif name == 'parse.error':
+          section.parse_error = value
+        elif name == 'parse.lac':
+          section.parse_lac = value
+        elif name == 'parse.trace':
+          assert value == '' or value == 'true' or value == 'false'
+          section.parse_trace = value != 'false'
+        else:
+          assert False
 
+    class Defines(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'PYACC_Section1_Defines',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = Item.copy(
+          self,
+          Defines if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.PYACC.Section1.Defines({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        assert len(self) == 0
+        section.defines = True
     class ErrorVerbose(Item):
       # GENERATE ELEMENT() BEGIN
       def __init__(
@@ -1314,13 +1680,14 @@ class PYACC(element.Element):
       # GENERATE END
 
     class Expect(Item):
-      # GENERATE ELEMENT() BEGIN
+      # GENERATE ELEMENT(int value) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Section1_Expect',
         attrib = {},
         text = '',
-        children = []
+        children = [],
+        value = -1
       ):
         Item.__init__(
           self,
@@ -1329,26 +1696,55 @@ class PYACC(element.Element):
           text,
           children
         )
+        self.value = (
+          element.deserialize_int(value)
+        if isinstance(value, str) else
+          value
+        )
+      def serialize(self, ref_list):
+        Item.serialize(self, ref_list)
+        self.set('value', element.serialize_int(self.value))
+      def deserialize(self, ref_list):
+        Item.deserialize(self, ref_list)
+        self.value = element.deserialize_int(self.get('value', '-1'))
       def copy(self, factory = None):
         result = Item.copy(
           self,
           Expect if factory is None else factory
         )
+        result.value = self.value
         return result
+      def repr_serialize(self, params):
+        Item.repr_serialize(self, params)
+        if self.value != -1:
+          params.append(
+            'value = {0:s}'.format(repr(self.value))
+          )
       def __repr__(self):
         params = []
         self.repr_serialize(params)
         return 'ast.PYACC.Section1.Expect({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        section.expect = self.value
 
     class ExpectRR(Item):
-      # GENERATE ELEMENT() BEGIN
+      # GENERATE ELEMENT(int value) BEGIN
       def __init__(
         self,
         tag = 'PYACC_Section1_ExpectRR',
         attrib = {},
         text = '',
-        children = []
+        children = [],
+        value = -1
       ):
         Item.__init__(
           self,
@@ -1357,12 +1753,30 @@ class PYACC(element.Element):
           text,
           children
         )
+        self.value = (
+          element.deserialize_int(value)
+        if isinstance(value, str) else
+          value
+        )
+      def serialize(self, ref_list):
+        Item.serialize(self, ref_list)
+        self.set('value', element.serialize_int(self.value))
+      def deserialize(self, ref_list):
+        Item.deserialize(self, ref_list)
+        self.value = element.deserialize_int(self.get('value', '-1'))
       def copy(self, factory = None):
         result = Item.copy(
           self,
           ExpectRR if factory is None else factory
         )
+        result.value = self.value
         return result
+      def repr_serialize(self, params):
+        Item.repr_serialize(self, params)
+        if self.value != -1:
+          params.append(
+            'value = {0:s}'.format(repr(self.value))
+          )
       def __repr__(self):
         params = []
         self.repr_serialize(params)
@@ -1480,6 +1894,17 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1.InitialAction({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        assert pyacc.initial_action_code is None
+        pyacc.initial_action_code = self[0]
 
     class Language(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1681,7 +2106,9 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         (
           pyacc.before_union_code
@@ -1800,6 +2227,16 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1.Verbose({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      ):
+        section.verbose = True
 
     class YACC(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1829,13 +2266,22 @@ class PYACC(element.Element):
         return 'ast.PYACC.Section1.YACC({0:s})'.format(', '.join(params))
       # GENERATE END
 
-    # GENERATE ELEMENT() BEGIN
+    # GENERATE ELEMENT(str api_prefix, str api_pure, bool locations, str parse_error, str parse_lac, bool parse_trace, bool defines, int expect, bool verbose) BEGIN
     def __init__(
       self,
       tag = 'PYACC_Section1',
       attrib = {},
       text = '',
-      children = []
+      children = [],
+      api_prefix = '',
+      api_pure = '',
+      locations = False,
+      parse_error = '',
+      parse_lac = '',
+      parse_trace = False,
+      defines = False,
+      expect = -1,
+      verbose = False
     ):
       PYACC.Section1Or2.__init__(
         self,
@@ -1844,17 +2290,158 @@ class PYACC(element.Element):
         text,
         children
       )
+      self.api_prefix = api_prefix
+      self.api_pure = api_pure
+      self.locations = (
+        element.deserialize_bool(locations)
+      if isinstance(locations, str) else
+        locations
+      )
+      self.parse_error = parse_error
+      self.parse_lac = parse_lac
+      self.parse_trace = (
+        element.deserialize_bool(parse_trace)
+      if isinstance(parse_trace, str) else
+        parse_trace
+      )
+      self.defines = (
+        element.deserialize_bool(defines)
+      if isinstance(defines, str) else
+        defines
+      )
+      self.expect = (
+        element.deserialize_int(expect)
+      if isinstance(expect, str) else
+        expect
+      )
+      self.verbose = (
+        element.deserialize_bool(verbose)
+      if isinstance(verbose, str) else
+        verbose
+      )
+    def serialize(self, ref_list):
+      PYACC.Section1Or2.serialize(self, ref_list)
+      self.set('api_prefix', element.serialize_str(self.api_prefix))
+      self.set('api_pure', element.serialize_str(self.api_pure))
+      self.set('locations', element.serialize_bool(self.locations))
+      self.set('parse_error', element.serialize_str(self.parse_error))
+      self.set('parse_lac', element.serialize_str(self.parse_lac))
+      self.set('parse_trace', element.serialize_bool(self.parse_trace))
+      self.set('defines', element.serialize_bool(self.defines))
+      self.set('expect', element.serialize_int(self.expect))
+      self.set('verbose', element.serialize_bool(self.verbose))
+    def deserialize(self, ref_list):
+      PYACC.Section1Or2.deserialize(self, ref_list)
+      self.api_prefix = element.deserialize_str(self.get('api_prefix', ''))
+      self.api_pure = element.deserialize_str(self.get('api_pure', ''))
+      self.locations = element.deserialize_bool(self.get('locations', 'false'))
+      self.parse_error = element.deserialize_str(self.get('parse_error', ''))
+      self.parse_lac = element.deserialize_str(self.get('parse_lac', ''))
+      self.parse_trace = element.deserialize_bool(self.get('parse_trace', 'false'))
+      self.defines = element.deserialize_bool(self.get('defines', 'false'))
+      self.expect = element.deserialize_int(self.get('expect', '-1'))
+      self.verbose = element.deserialize_bool(self.get('verbose', 'false'))
     def copy(self, factory = None):
       result = PYACC.Section1Or2.copy(
         self,
         Section1 if factory is None else factory
       )
+      result.api_prefix = self.api_prefix
+      result.api_pure = self.api_pure
+      result.locations = self.locations
+      result.parse_error = self.parse_error
+      result.parse_lac = self.parse_lac
+      result.parse_trace = self.parse_trace
+      result.defines = self.defines
+      result.expect = self.expect
+      result.verbose = self.verbose
       return result
+    def repr_serialize(self, params):
+      PYACC.Section1Or2.repr_serialize(self, params)
+      if self.api_prefix != '':
+        params.append(
+          'api_prefix = {0:s}'.format(repr(self.api_prefix))
+        )
+      if self.api_pure != '':
+        params.append(
+          'api_pure = {0:s}'.format(repr(self.api_pure))
+        )
+      if self.locations != False:
+        params.append(
+          'locations = {0:s}'.format(repr(self.locations))
+        )
+      if self.parse_error != '':
+        params.append(
+          'parse_error = {0:s}'.format(repr(self.parse_error))
+        )
+      if self.parse_lac != '':
+        params.append(
+          'parse_lac = {0:s}'.format(repr(self.parse_lac))
+        )
+      if self.parse_trace != False:
+        params.append(
+          'parse_trace = {0:s}'.format(repr(self.parse_trace))
+        )
+      if self.defines != False:
+        params.append(
+          'defines = {0:s}'.format(repr(self.defines))
+        )
+      if self.expect != -1:
+        params.append(
+          'expect = {0:s}'.format(repr(self.expect))
+        )
+      if self.verbose != False:
+        params.append(
+          'verbose = {0:s}'.format(repr(self.verbose))
+        )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
       return 'ast.PYACC.Section1({0:s})'.format(', '.join(params))
     # GENERATE END
+    def post_process(
+      self,
+      pyacc,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
+    ):
+      self.api_prefix = ''
+      self.api_pure = ''
+      self.locations = False
+      self.parse_error = ''
+      self.parse_lac = ''
+      self.parse_trace = False
+      self.defines = False
+      self.expect = -1
+      self.verbose = False
+      PYACC.Section1Or2.post_process(
+        self,
+        pyacc,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
+      )
+      if len(self.api_prefix):
+        sys.stderr.write('warning: ignoring %define api.prefix\n')
+      if len(self.api_pure):
+        sys.stderr.write('warning: ignoring %define api.pure\n')
+      if self.locations:
+        sys.stderr.write('warning: ignoring %define locations\n')
+      if len(self.parse_error):
+        sys.stderr.write('warning: ignoring %define parse.error\n')
+      if len(self.parse_lac):
+        sys.stderr.write('warning: ignoring %define parse.lac\n')
+      if self.parse_trace:
+        sys.stderr.write('warning: ignoring %define parse.trace\n')
+      if self.defines:
+        sys.stderr.write('warning: ignoring %defines\n')
+      if self.expect != -1:
+        sys.stderr.write('warning: ignoring %expect\n')
+      if self.verbose:
+        sys.stderr.write('warning: ignoring %verbose\n')
 
   class Section2(Section1Or2):
     class Rules(Item):
@@ -1889,25 +2476,31 @@ class PYACC(element.Element):
         pyacc,
         section,
         character_to_symbol,
-        name_to_symbol
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag
       ):
         self[0].post_process(
           pyacc,
           section,
           character_to_symbol,
           name_to_symbol,
-          '', # tag_name
+          string_to_symbol,
+          name_to_tag,
+          -1, # _tag
           -1 # precedence
         )
         if pyacc.first_nonterminal == -1:
-          pyacc.first_nonterminal = self[0].nonterminal
+          pyacc.first_nonterminal = ~self[0].symbol
         for i in self[1:]:
           i.post_process(
             pyacc,
             section,
-            self[0].nonterminal,
+            ~self[0].symbol,
             character_to_symbol,
-            name_to_symbol
+            name_to_symbol,
+            string_to_symbol,
+            name_to_tag
           )
 
     # GENERATE ELEMENT() BEGIN
@@ -1998,20 +2591,34 @@ class PYACC(element.Element):
       section,
       character_to_symbol,
       name_to_symbol,
-      tag_name,
+      string_to_symbol,
+      name_to_tag,
+      _tag,
       precedence
     ):
-      raise NotImplementedException
+      raise NotImplementedError
+    def set_code_props(
+      self,
+      pyacc,
+      section,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag,
+      _type,
+      code
+    ):
+      raise NotImplementedError
 
-  class Tag(TagOrSymbolRef):
-    # GENERATE ELEMENT(int type) BEGIN
+  class TagRef(TagOrSymbolRef):
+    # GENERATE ELEMENT(int _tag) BEGIN
     def __init__(
       self,
-      tag = 'PYACC_Tag',
+      tag = 'PYACC_TagRef',
       attrib = {},
       text = '',
       children = [],
-      type = -1
+      _tag = -1
     ):
       PYACC.TagOrSymbolRef.__init__(
         self,
@@ -2020,34 +2627,34 @@ class PYACC(element.Element):
         text,
         children
       )
-      self.type = (
-        element.deserialize_int(type)
-      if isinstance(type, str) else
-        type
+      self._tag = (
+        element.deserialize_int(_tag)
+      if isinstance(_tag, str) else
+        _tag
       )
     def serialize(self, ref_list):
       PYACC.TagOrSymbolRef.serialize(self, ref_list)
-      self.set('type', element.serialize_int(self.type))
+      self.set('_tag', element.serialize_int(self._tag))
     def deserialize(self, ref_list):
       PYACC.TagOrSymbolRef.deserialize(self, ref_list)
-      self.type = element.deserialize_int(self.get('type', '-1'))
+      self._tag = element.deserialize_int(self.get('_tag', '-1'))
     def copy(self, factory = None):
       result = PYACC.TagOrSymbolRef.copy(
         self,
-        Tag if factory is None else factory
+        TagRef if factory is None else factory
       )
-      result.type = self.type
+      result._tag = self._tag
       return result
     def repr_serialize(self, params):
       PYACC.TagOrSymbolRef.repr_serialize(self, params)
-      if self.type != -1:
+      if self._tag != -1:
         params.append(
-          'type = {0:s}'.format(repr(self.type))
+          '_tag = {0:s}'.format(repr(self._tag))
         )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
-      return 'ast.PYACC.Tag({0:s})'.format(', '.join(params))
+      return 'ast.PYACC.TagRef({0:s})'.format(', '.join(params))
     # GENERATE END
     def post_process(
       self,
@@ -2055,21 +2662,53 @@ class PYACC(element.Element):
       section,
       character_to_symbol,
       name_to_symbol,
-      tag_name,
+      string_to_symbol,
+      name_to_tag,
+      _tag,
       precedence
     ):
-      return self[0].get_text()
+      name = self[0].get_text()
+      if name in name_to_tag:
+        self._tag = name_to_tag[name]
+      else:
+        self._tag = len(pyacc.tags)
+        name_to_tag[name] = self._tag
+        pyacc.tags.append(
+          PYACC.Tag(name = name, code_props = [None, None])
+        )
+      return self._tag
+    def set_code_props(
+      self,
+      pyacc,
+      section,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag,
+      _type,
+      code
+    ):
+      name = self[0].get_text()
+      if name in name_to_tag:
+        self._tag = name_to_tag[name]
+      else:
+        self._tag = len(pyacc.tags)
+        name_to_tag[name] = self._tag
+        pyacc.tags.append(
+          PYACC.Tag(name = name, code_props = [None, None])
+        )
+      assert pyacc.tags[self._tag].code_props[_type] is None
+      pyacc.tags[self._tag].code_props[_type] = code
 
-  class TerminalRef(TagOrSymbolRef):
-    # GENERATE ELEMENT(int terminal, int user_token) BEGIN
+  class SymbolRef(TagOrSymbolRef):
+    # GENERATE ELEMENT(int symbol) BEGIN
     def __init__(
       self,
-      tag = 'PYACC_TerminalRef',
+      tag = 'PYACC_SymbolRef',
       attrib = {},
       text = '',
       children = [],
-      terminal = -1,
-      user_token = -1
+      symbol = -1
     ):
       PYACC.TagOrSymbolRef.__init__(
         self,
@@ -2078,10 +2717,94 @@ class PYACC(element.Element):
         text,
         children
       )
-      self.terminal = (
-        element.deserialize_int(terminal)
-      if isinstance(terminal, str) else
-        terminal
+      self.symbol = (
+        element.deserialize_int(symbol)
+      if isinstance(symbol, str) else
+        symbol
+      )
+    def serialize(self, ref_list):
+      PYACC.TagOrSymbolRef.serialize(self, ref_list)
+      self.set('symbol', element.serialize_int(self.symbol))
+    def deserialize(self, ref_list):
+      PYACC.TagOrSymbolRef.deserialize(self, ref_list)
+      self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+    def copy(self, factory = None):
+      result = PYACC.TagOrSymbolRef.copy(
+        self,
+        SymbolRef if factory is None else factory
+      )
+      result.symbol = self.symbol
+      return result
+    def repr_serialize(self, params):
+      PYACC.TagOrSymbolRef.repr_serialize(self, params)
+      if self.symbol != -1:
+        params.append(
+          'symbol = {0:s}'.format(repr(self.symbol))
+        )
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.SymbolRef({0:s})'.format(', '.join(params))
+    # GENERATE END
+    def set_code_props(
+      self,
+      pyacc,
+      section,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag,
+      _type,
+      code
+    ):
+      if isinstance(self[0], PYACC.Char):
+        character = ord(self[0][0].get_text())
+        assert character != 0 # would conflict with YYEOF
+        if character in character_to_symbol:
+          self.symbol = character_to_symbol[character]
+          assert self.symbol >= 0
+        else:
+          self.symbol = len(pyacc.terminals)
+          character_to_symbol[character] = self.symbol
+          pyacc.terminals.append(
+            PYACC.Terminal(
+              code_props = [None, None],
+              character_set = [character, character + 1]
+            )
+          )
+      elif isinstance(self[0], PYACC.ID):
+        name = self[0].get_text()
+        self.symbol = name_to_symbol[name] # must already exist
+      elif isinstance(self[0], PYACC.String):
+        string = self[0][0].get_text()
+        self.symbol = string_to_symbol[string] # must already exist
+      else:
+        assert False
+      if self.symbol >= 0:
+        assert pyacc.terminals[self.symbol].code_props[_type] is None
+        pyacc.terminals[self.symbol].code_props[_type] = code
+      else:
+        assert pyacc.nonterminals[~self.symbol].code_props[_type] is None
+        pyacc.nonterminals[~self.symbol].code_props[_type] = code
+
+  class TerminalRef(SymbolRef):
+    # GENERATE ELEMENT(int user_token) BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_TerminalRef',
+      attrib = {},
+      text = '',
+      children = [],
+      symbol = -1,
+      user_token = -1
+    ):
+      PYACC.SymbolRef.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        symbol
       )
       self.user_token = (
         element.deserialize_int(user_token)
@@ -2089,27 +2812,20 @@ class PYACC(element.Element):
         user_token
       )
     def serialize(self, ref_list):
-      PYACC.TagOrSymbolRef.serialize(self, ref_list)
-      self.set('terminal', element.serialize_int(self.terminal))
+      PYACC.SymbolRef.serialize(self, ref_list)
       self.set('user_token', element.serialize_int(self.user_token))
     def deserialize(self, ref_list):
-      PYACC.TagOrSymbolRef.deserialize(self, ref_list)
-      self.terminal = element.deserialize_int(self.get('terminal', '-1'))
+      PYACC.SymbolRef.deserialize(self, ref_list)
       self.user_token = element.deserialize_int(self.get('user_token', '-1'))
     def copy(self, factory = None):
-      result = PYACC.TagOrSymbolRef.copy(
+      result = PYACC.SymbolRef.copy(
         self,
         TerminalRef if factory is None else factory
       )
-      result.terminal = self.terminal
       result.user_token = self.user_token
       return result
     def repr_serialize(self, params):
-      PYACC.TagOrSymbolRef.repr_serialize(self, params)
-      if self.terminal != -1:
-        params.append(
-          'terminal = {0:s}'.format(repr(self.terminal))
-        )
+      PYACC.SymbolRef.repr_serialize(self, params)
       if self.user_token != -1:
         params.append(
           'user_token = {0:s}'.format(repr(self.user_token))
@@ -2125,69 +2841,81 @@ class PYACC(element.Element):
       section,
       character_to_symbol,
       name_to_symbol,
-      tag_name,
+      string_to_symbol,
+      name_to_tag,
+      _tag,
       precedence
     ):
       if isinstance(self[0], PYACC.Char):
         character = ord(self[0][0].get_text())
         assert character != 0 # would conflict with YYEOF
         if character in character_to_symbol:
-          self.terminal = character_to_symbol[character]
-          assert self.terminal >= 0
+          self.symbol = character_to_symbol[character]
+          assert self.symbol >= 0
         else:
-          self.terminal = len(pyacc.terminals)
-          character_to_symbol[character] = self.terminal
+          self.symbol = len(pyacc.terminals)
+          character_to_symbol[character] = self.symbol
           pyacc.terminals.append(
-            PYACC.Terminal(character_set = [character, character + 1])
+            PYACC.Terminal(
+              code_props = [None, None],
+              character_set = [character, character + 1]
+            )
           )
       elif isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         if name in name_to_symbol:
-          self.terminal = name_to_symbol[name]
-          assert self.terminal >= 0
+          self.symbol = name_to_symbol[name]
+          assert self.symbol >= 0
         else:
-          self.terminal = len(pyacc.terminals)
-          name_to_symbol[name] = self.terminal
+          self.symbol = len(pyacc.terminals)
+          name_to_symbol[name] = self.symbol
           pyacc.terminals.append(
-            PYACC.Terminal(name = name)
+            PYACC.Terminal(
+              name = name,
+              code_props = [None, None]
+            )
           )
+      elif isinstance(self[0], PYACC.String):
+        string = self[0][0].get_text()
+        self.symbol = string_to_symbol[string] # must already exist
+        assert self.symbol >= 0
       else:
         assert False
       if self.user_token != -1:
-        assert len(pyacc.terminals[self.terminal].character_set) == 0
-        pyacc.terminals[self.terminal].character_set = (
+        assert len(pyacc.terminals[self.symbol].character_set) == 0
+        pyacc.terminals[self.symbol].character_set = (
           [self.user_token, self.user_token + 1]
         )
-      if len(tag_name):
-        assert len(pyacc.terminals[self.terminal].tag_name) == 0
-        pyacc.terminals[self.terminal].tag_name = tag_name
+      if len(self) >= 2:
+        string = self[1][0].get_text()
+        assert string not in string_to_symbol
+        string_to_symbol[string] = self.symbol
+      if _tag != -1:
+        assert pyacc.terminals[self.symbol]._tag == -1
+        pyacc.terminals[self.symbol]._tag = _tag
       if precedence != -1:
-        assert pyacc.terminals[self.terminal].precedence == -1
-        pyacc.terminals[self.terminal].precedence = precedence
-      return tag_name
+        assert pyacc.terminals[self.symbol].precedence == -1
+        pyacc.terminals[self.symbol].precedence = precedence
+      return _tag
 
-  class NonterminalRef(TagOrSymbolRef):
-    # GENERATE ELEMENT(int nonterminal, int user_token) BEGIN
+  class NonterminalRef(SymbolRef):
+    # GENERATE ELEMENT(int user_token) BEGIN
     def __init__(
       self,
       tag = 'PYACC_NonterminalRef',
       attrib = {},
       text = '',
       children = [],
-      nonterminal = -1,
+      symbol = -1,
       user_token = -1
     ):
-      PYACC.TagOrSymbolRef.__init__(
+      PYACC.SymbolRef.__init__(
         self,
         tag,
         attrib,
         text,
-        children
-      )
-      self.nonterminal = (
-        element.deserialize_int(nonterminal)
-      if isinstance(nonterminal, str) else
-        nonterminal
+        children,
+        symbol
       )
       self.user_token = (
         element.deserialize_int(user_token)
@@ -2195,27 +2923,20 @@ class PYACC(element.Element):
         user_token
       )
     def serialize(self, ref_list):
-      PYACC.TagOrSymbolRef.serialize(self, ref_list)
-      self.set('nonterminal', element.serialize_int(self.nonterminal))
+      PYACC.SymbolRef.serialize(self, ref_list)
       self.set('user_token', element.serialize_int(self.user_token))
     def deserialize(self, ref_list):
-      PYACC.TagOrSymbolRef.deserialize(self, ref_list)
-      self.nonterminal = element.deserialize_int(self.get('nonterminal', '-1'))
+      PYACC.SymbolRef.deserialize(self, ref_list)
       self.user_token = element.deserialize_int(self.get('user_token', '-1'))
     def copy(self, factory = None):
-      result = PYACC.TagOrSymbolRef.copy(
+      result = PYACC.SymbolRef.copy(
         self,
         NonterminalRef if factory is None else factory
       )
-      result.nonterminal = self.nonterminal
       result.user_token = self.user_token
       return result
     def repr_serialize(self, params):
-      PYACC.TagOrSymbolRef.repr_serialize(self, params)
-      if self.nonterminal != -1:
-        params.append(
-          'nonterminal = {0:s}'.format(repr(self.nonterminal))
-        )
+      PYACC.SymbolRef.repr_serialize(self, params)
       if self.user_token != -1:
         params.append(
           'user_token = {0:s}'.format(repr(self.user_token))
@@ -2231,29 +2952,42 @@ class PYACC(element.Element):
       section,
       character_to_symbol,
       name_to_symbol,
-      tag_name,
+      string_to_symbol,
+      name_to_tag,
+      _tag,
       precedence
     ):
       if isinstance(self[0], PYACC.ID):
         name = self[0].get_text()
         if name in name_to_symbol:
-          i = name_to_symbol[name]
-          assert i < 0
-          self.nonterminal = ~i
+          self.symbol = name_to_symbol[name]
+          assert self.symbol < 0
         else:
-          self.nonterminal = len(pyacc.nonterminals)
-          name_to_symbol[name] = ~self.nonterminal
+          self.symbol = ~len(pyacc.nonterminals)
+          name_to_symbol[name] = self.symbol
           pyacc.nonterminals.append(
-            PYACC.Nonterminal(name = name, character_set = [])
+            PYACC.Nonterminal(
+              name = name,
+              code_props = [None, None],
+              character_set = []
+            )
           )
+      elif isinstance(self[0], PYACC.String):
+        string = self[0][0].get_text()
+        self.symbol = string_to_symbol[string] # must already exist
+        assert self.symbol < 0
       else:
         assert False
-      if len(tag_name):
-        assert len(pyacc.terminals[self.terminal].tag_name) == 0
-        pyacc.terminals[self.terminal].tag_name = tag_name
       assert self.user_token == -1
+      if len(self) >= 2:
+        string = self[1][0].get_text()
+        assert string not in string_to_symbol
+        string_to_symbol[string] = self.symbol
+      if _tag != -1:
+        assert pyacc.nonterminals[~self.symbol]._tag == -1
+        pyacc.nonterminals[~self.symbol]._tag = _tag
       assert precedence == -1
-      return tag_name
+      return _tag
 
   class Text(element.Element):
     class Item(element.Element):
@@ -2284,7 +3018,7 @@ class PYACC(element.Element):
         return 'ast.PYACC.Text.Item({0:s})'.format(', '.join(params))
       # GENERATE END
       def get_text(self):
-        raise NotImplementedException
+        raise NotImplementedError
 
     class Escape(Item):
       # GENERATE ELEMENT(int character) BEGIN
@@ -2675,21 +3409,27 @@ class PYACC(element.Element):
       self,
       pyacc,
       character_to_symbol,
-      name_to_symbol
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
     ):
       pass
 
-  # GENERATE ELEMENT(list(ref) before_union_code, str union_name, ref union_code, list(ref) after_union_code, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
+  # GENERATE ELEMENT(list(ref) top_code, list(ref) before_union_code, list(ref) requires_code, str union_name, ref union_code, list(ref) after_union_code, ref initial_action_code, list(ref) tags, list(ref) terminals, list(ref) nonterminals, int n_productions, list(ref) productions, int first_nonterminal, int start_nonterminal, list(int) associativities) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
     attrib = {},
     text = '',
     children = [],
+    top_code = [],
     before_union_code = [],
+    requires_code = [],
     union_name = '',
     union_code = None,
     after_union_code = [],
+    initial_action_code = None,
+    tags = [],
     terminals = [],
     nonterminals = [],
     n_productions = -1,
@@ -2705,10 +3445,14 @@ class PYACC(element.Element):
       text,
       children
     )
+    self.top_code = top_code
     self.before_union_code = before_union_code
+    self.requires_code = requires_code
     self.union_name = union_name
     self.union_code = union_code
     self.after_union_code = after_union_code
+    self.initial_action_code = initial_action_code
+    self.tags = tags
     self.terminals = terminals
     self.nonterminals = nonterminals
     self.n_productions = (
@@ -2734,16 +3478,29 @@ class PYACC(element.Element):
     )
   def serialize(self, ref_list):
     element.Element.serialize(self, ref_list)
+    self.set(
+      'top_code',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.top_code])
+    )
     self.set(
       'before_union_code',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.before_union_code])
     )
+    self.set(
+      'requires_code',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.requires_code])
+    )
     self.set('union_name', element.serialize_str(self.union_name))
     self.set('union_code', element.serialize_ref(self.union_code, ref_list))
     self.set(
       'after_union_code',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.after_union_code])
     )
+    self.set('initial_action_code', element.serialize_ref(self.initial_action_code, ref_list))
+    self.set(
+      'tags',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.tags])
+    )
     self.set(
       'terminals',
       ' '.join([element.serialize_ref(i, ref_list) for i in self.terminals])
@@ -2765,16 +3522,29 @@ class PYACC(element.Element):
     )
   def deserialize(self, ref_list):
     element.Element.deserialize(self, ref_list)
+    self.top_code = [
+      element.deserialize_ref(i, ref_list)
+      for i in self.get('top_code', '').split()
+    ]
     self.before_union_code = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('before_union_code', '').split()
     ]
+    self.requires_code = [
+      element.deserialize_ref(i, ref_list)
+      for i in self.get('requires_code', '').split()
+    ]
     self.union_name = element.deserialize_str(self.get('union_name', ''))
     self.union_code = element.deserialize_ref(self.get('union_code', '-1'), ref_list)
     self.after_union_code = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('after_union_code', '').split()
     ]
+    self.initial_action_code = element.deserialize_ref(self.get('initial_action_code', '-1'), ref_list)
+    self.tags = [
+      element.deserialize_ref(i, ref_list)
+      for i in self.get('tags', '').split()
+    ]
     self.terminals = [
       element.deserialize_ref(i, ref_list)
       for i in self.get('terminals', '').split()
@@ -2799,10 +3569,14 @@ class PYACC(element.Element):
       self,
       PYACC if factory is None else factory
     )
+    result.top_code = self.top_code
     result.before_union_code = self.before_union_code
+    result.requires_code = self.requires_code
     result.union_name = self.union_name
     result.union_code = self.union_code
     result.after_union_code = self.after_union_code
+    result.initial_action_code = self.initial_action_code
+    result.tags = self.tags
     result.terminals = self.terminals
     result.nonterminals = self.nonterminals
     result.n_productions = self.n_productions
@@ -2813,12 +3587,24 @@ class PYACC(element.Element):
     return result
   def repr_serialize(self, params):
     element.Element.repr_serialize(self, params)
+    if len(self.top_code):
+      params.append(
+        'top_code = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.top_code])
+        )
+      )
     if len(self.before_union_code):
       params.append(
         'before_union_code = [{0:s}]'.format(
           ', '.join([repr(i) for i in self.before_union_code])
         )
       )
+    if len(self.requires_code):
+      params.append(
+        'requires_code = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.requires_code])
+        )
+      )
     if self.union_name != '':
       params.append(
         'union_name = {0:s}'.format(repr(self.union_name))
@@ -2833,6 +3619,16 @@ class PYACC(element.Element):
           ', '.join([repr(i) for i in self.after_union_code])
         )
       )
+    if self.initial_action_code != None:
+      params.append(
+        'initial_action_code = {0:s}'.format(repr(self.initial_action_code))
+      )
+    if len(self.tags):
+      params.append(
+        'tags = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.tags])
+        )
+      )
     if len(self.terminals):
       params.append(
         'terminals = [{0:s}]'.format(
@@ -2877,17 +3673,35 @@ class PYACC(element.Element):
 
   def post_process(self):
     # variables that will be serialized
+    self.top_code = []
     self.before_union_code = []
+    self.requires_code = []
     self.union_name = ''
     self.union_code = None
     self.after_union_code = []
+    self.initial_action_code = None
     self.precedences = 0
     self.terminals = [
-      PYACC.Terminal(name = '$eof', character_set = [0, 1]),
-      PYACC.Terminal(name = 'error'),
-      PYACC.Terminal(name = '$undefined')
+      PYACC.Terminal(
+        name = '$eof',
+        code_props = [None, None],
+        character_set = [0, 1]
+      ),
+      PYACC.Terminal(
+        name = 'error',
+        code_props = [None, None]
+      ),
+      PYACC.Terminal(
+        name = '$undefined',
+        code_props = [None, None]
+      )
     ]
     self.nonterminals = []
+    # the following stores the destructor and printer for each tag
+    self.tags = [
+      PYACC.Tag(name = '', code_props = [None, None]),
+      PYACC.Tag(name = '*', code_props = [None, None]),
+    ]
     self.n_productions = 1 # includes start and midrule action productions
     self.productions = []
 
@@ -2896,12 +3710,27 @@ class PYACC(element.Element):
     # note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
     # (don't bother storing the '$undefined', it can't be looked up)
     name_to_symbol = {'error': 1}
+    # note: in string_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
+    string_to_symbol = {}
+    name_to_tag = {'': 0, '*': 1}
 
     # perform the semantic analysis pass
     self.first_nonterminal = -1
     self.start_nonterminal = -1
-    self[0].post_process(self, character_to_symbol, name_to_symbol)
-    self[1].post_process(self, character_to_symbol, name_to_symbol)
+    self[0].post_process(
+      self,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
+    )
+    self[1].post_process(
+      self,
+      character_to_symbol,
+      name_to_symbol,
+      string_to_symbol,
+      name_to_tag
+    )
 
     # fill in token numbers that are not characters or overridden by user
     token = 0x100
@@ -3004,6 +3833,8 @@ class PYACC(element.Element):
 tag_to_class = {
   'Item': Item,
   'PYACC': PYACC,
+  'PYACC_TagOrSymbol': PYACC.TagOrSymbol,
+  'PYACC_Tag': PYACC.Tag,
   'PYACC_Symbol': PYACC.Symbol,
   'PYACC_Terminal': PYACC.Terminal,
   'PYACC_Nonterminal': PYACC.Nonterminal,
@@ -3031,6 +3862,7 @@ tag_to_class = {
   'PYACC_Section1Or2_Union': PYACC.Section1Or2.Union,
   'PYACC_Section1': PYACC.Section1,
   'PYACC_Section1_Define': PYACC.Section1.Define,
+  'PYACC_Section1_Defines': PYACC.Section1.Defines,
   'PYACC_Section1_ErrorVerbose': PYACC.Section1.ErrorVerbose,
   'PYACC_Section1_Expect': PYACC.Section1.Expect,
   'PYACC_Section1_ExpectRR': PYACC.Section1.ExpectRR,
@@ -3054,7 +3886,8 @@ tag_to_class = {
   'PYACC_Section2_Rules': PYACC.Section2.Rules,
   'PYACC_String': PYACC.String,
   'PYACC_TagOrSymbolRef': PYACC.TagOrSymbolRef,
-  'PYACC_Tag': PYACC.Tag,
+  'PYACC_TagRef': PYACC.TagRef,
+  'PYACC_SymbolRef': PYACC.SymbolRef,
   'PYACC_TerminalRef': PYACC.TerminalRef,
   'PYACC_NonterminalRef': PYACC.NonterminalRef,
   'PYACC_Text': PYACC.Text,
index 9395d3b..0523fae 100644 (file)
@@ -59,7 +59,7 @@ class BisonLR1DFA:
         goto_table[nonterminal0:nonterminal1, i] = gotos[j]
         nonterminal0 = nonterminal1
       assert nonterminal0 == len(lr1dfa.productions)
-
     # permute and combine columns/rows on the basis of the translate vectors
     new_action_table = numpy.zeros(
       (len(lr1dfa.states), n_terminals),
@@ -324,7 +324,15 @@ def generate(pyacc, skel_file, out_file, defines_file = None):
       with open(out_file, 'w+') as fout:
         line = fin.readline()
         while len(line):
-          if line == '/* GENERATE SECTION1BEFOREUNION */\n':
+          if line == '/* GENERATE SECTION1TOP */\n':
+            fout.write(
+              '''/* GENERATE SECTION1TOP BEGIN */
+{0:s}/* GENERATE SECTION1TOP END */
+'''.format(
+                ''.join([i.get_text() for i in pyacc.top_code])
+              )
+            )
+          elif line == '/* GENERATE SECTION1BEFOREUNION */\n':
             fout.write(
               '''/* GENERATE SECTION1BEFOREUNION BEGIN */
 {0:s}/* GENERATE SECTION1BEFOREUNION END */
@@ -332,6 +340,14 @@ def generate(pyacc, skel_file, out_file, defines_file = None):
                 ''.join([i.get_text() for i in pyacc.before_union_code])
               )
             )
+          elif line == '/* GENERATE SECTION1REQUIRES */\n':
+            fout.write(
+              '''/* GENERATE SECTION1REQUIRES BEGIN */
+{0:s}/* GENERATE SECTION1REQUIRES END */
+'''.format(
+                ''.join([i.get_text() for i in pyacc.requires_code])
+              )
+            )
           elif line == '/* GENERATE TOKENSEQUAL */\n':
             fout.write(
               '''/* GENERATE TOKENSEQUAL BEGIN */{0:s}
index 4ce396a..5791e6a 100644 (file)
@@ -1,6 +1,7 @@
 y.tab.c: skel.y
        ../../bootstrap_bison.git/src/bison -dy $<
-       cp $@ $@.orig
+       grep -v "^#line " <$@ >$@.orig
+       cp $@.orig $@
        patch $@ <$@.patch
        cp y.tab.h y.tab.h.orig
        patch y.tab.h <y.tab.h.patch
index b475840..957dcee 100644 (file)
@@ -1,15 +1,20 @@
---- y.tab.c.orig       2018-07-20 21:54:15.781339387 +1000
-+++ y.tab.c    2018-07-20 21:55:37.481335930 +1000
-@@ -63,7 +63,7 @@
+--- y.tab.c.orig       2018-07-26 21:54:50.154480530 +1000
++++ y.tab.c    2018-07-26 22:00:46.558496008 +1000
+@@ -58,11 +58,11 @@
+ /* Pull parsers.  */
+ #define YYPULL 1
+-
+-
++/* GENERATE SECTION1TOP */
  
  /* Copy the first part of user declarations.  */
  
--#line 67 "y.tab.c" /* yacc.c:339  */
 +/* GENERATE SECTION1BEFOREUNION */
  
  # ifndef YY_NULLPTR
  #  if defined __cplusplus && 201103L <= __cplusplus
-@@ -98,24 +98,15 @@
+@@ -97,22 +97,17 @@
  # define YYTOKENTYPE
    enum yytokentype
    {
  /* Tokens.  */
 -#define TOKEN 258
 +/* GENERATE TOKENS */
++
++/* GENERATE SECTION1REQUIRES */
  
  /* Value type.  */
  #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 -
 -union YYSTYPE
 -{
--#line 3 "skel.y" /* yacc.c:355  */
 - /* something */ 
 -
--#line 116 "y.tab.c" /* yacc.c:355  */
 -};
 -
 -typedef union YYSTYPE YYSTYPE;
  # define YYSTYPE_IS_TRIVIAL 1
  # define YYSTYPE_IS_DECLARED 1
  #endif
-@@ -129,7 +120,7 @@
+@@ -126,6 +121,7 @@
  
  /* Copy the second part of user declarations.  */
  
--#line 133 "y.tab.c" /* yacc.c:358  */
 +/* GENERATE SECTION1AFTERUNION */
  
  #ifdef short
  # undef short
-@@ -368,155 +359,7 @@
+@@ -364,155 +360,7 @@
  # endif
  #endif /* !YYCOPY_NEEDED */
  
  
  #define yyerrok         (yyerrstatus = 0)
  #define yyclearin       (yychar = YYEMPTY)
-@@ -1191,20 +1034,8 @@
+@@ -1187,14 +1035,7 @@
    YY_REDUCE_PRINT (yyn);
    switch (yyn)
      {
 -        case 2:
--#line 5 "skel.y" /* yacc.c:1648  */
 -    { /* do something */ }
--#line 1198 "y.tab.c" /* yacc.c:1648  */
 -    break;
 -
 -  case 3:
--#line 5 "skel.y" /* yacc.c:1648  */
 -    { /* do something else */ }
--#line 1204 "y.tab.c" /* yacc.c:1648  */
 -    break;
 -
 +/* GENERATE SECTION2 */
  
--#line 1208 "y.tab.c" /* yacc.c:1648  */
        default: break;
      }
-   /* User semantic actions sometimes alter yychar, and that requires
-@@ -1432,3 +1263,5 @@
+@@ -1423,3 +1264,5 @@
  #endif
    return yyresult;
  }
index 21c8d73..ee0a7ca 100644 (file)
@@ -1,6 +1,6 @@
---- y.tab.h.orig       2018-07-20 21:53:34.541341132 +1000
-+++ y.tab.h    2018-07-20 21:54:00.765340023 +1000
-@@ -45,7 +45,7 @@
+--- y.tab.h.orig       2018-07-26 21:54:50.166480531 +1000
++++ y.tab.h    2018-07-26 21:55:18.658481768 +1000
+@@ -45,24 +45,17 @@
  # define YYTOKENTYPE
    enum yytokentype
    {
@@ -9,8 +9,10 @@
    };
  #endif
  /* Tokens.  */
-@@ -53,16 +53,7 @@
+ #define TOKEN 258
  
++/* GENERATE SECTION1REQUIRES */
++
  /* Value type.  */
  #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 -