Implement semantic analysis pass to build a regex.Grammar object, then LR1, etc
authorNick Downing <downing.nick@gmail.com>
Tue, 3 Jul 2018 12:32:52 +0000 (22:32 +1000)
committerNick Downing <downing.nick@gmail.com>
Tue, 3 Jul 2018 12:32:52 +0000 (22:32 +1000)
ast.py
bison_lr1dfa.py
bootstrap_pyacc.py
regex.py

diff --git a/ast.py b/ast.py
index e487015..70465ab 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -32,12 +32,141 @@ class Item(element.Element):
     self,
     pyacc,
     section,
-    name_to_terminal,
-    name_to_nonterminal
+    name_to_symbol
   ):
     raise NotImplementedException
  
 class PYACC(element.Element):
+  # internal classes
+  class Symbol(element.Element):
+    # GENERATE ELEMENT(str name, list(int) char_set) BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_Symbol',
+      attrib = {},
+      text = '',
+      children = [],
+      name = '',
+      char_set = []
+    ):
+      element.Element.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children
+      )
+      self.name = name
+      self.char_set = (
+        [element.deserialize_int(i) for i in char_set.split()]
+      if isinstance(char_set, str) else
+        char_set
+      )
+    def serialize(self, ref_list, indent = 0):
+      element.Element.serialize(self, ref_list, indent)
+      self.set('name', element.serialize_str(self.name))
+      self.set(
+        'char_set',
+        ' '.join([element.serialize_int(i) for i in self.char_set])
+      )
+    def deserialize(self, ref_list):
+      element.Element.deserialize(self, ref_list)
+      self.name = element.deserialize_str(self.get('name', ''))
+      self.char_set = [
+        element.deserialize_int(i)
+        for i in self.get('char_set', '').split()
+      ]
+    def copy(self, factory = None):
+      result = element.Element.copy(
+        self,
+        Symbol if factory is None else factory
+      )
+      result.name = self.name
+      result.char_set = self.char_set
+      return result
+    def repr_serialize(self, params):
+      element.Element.repr_serialize(self, params)
+      if self.name != '':
+        params.append(
+          'name = {0:s}'.format(repr(self.name))
+        )
+      if len(self.char_set):
+        params.append(
+          'char_set = [{0:s}]'.format(
+            ', '.join([repr(i) for i in self.char_set])
+          )
+        )
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.Symbol({0:s})'.format(', '.join(params))
+    # GENERATE END
+
+  class TerminalSymbol(Symbol):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_TerminalSymbol',
+      attrib = {},
+      text = '',
+      children = [],
+      name = '',
+      char_set = []
+    ):
+      PYACC.Symbol.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        name,
+        char_set
+      )
+    def copy(self, factory = None):
+      result = PYACC.Symbol.copy(
+        self,
+        TerminalSymbol if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.TerminalSymbol({0:s})'.format(', '.join(params))
+    # GENERATE END
+
+  class NonterminalSymbol(Symbol):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_NonterminalSymbol',
+      attrib = {},
+      text = '',
+      children = [],
+      name = '',
+      char_set = []
+    ):
+      PYACC.Symbol.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children,
+        name,
+        char_set
+      )
+    def copy(self, factory = None):
+      result = PYACC.Symbol.copy(
+        self,
+        NonterminalSymbol if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.NonterminalSymbol({0:s})'.format(', '.join(params))
+    # GENERATE END
+
+  # syntax classes
   class BracedCode(element.Element):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -94,7 +223,6 @@ class PYACC(element.Element):
       return 'ast.PYACC.BracedPredicate({0:s})'.format(', '.join(params))
     # GENERATE END
 
-
   class Char(element.Element):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -122,15 +250,27 @@ class PYACC(element.Element):
       self.repr_serialize(params)
       return 'ast.PYACC.Char({0:s})'.format(', '.join(params))
     # GENERATE END
+    def get_text(self):
+      return ''.join(
+        [
+          (
+            chr(i.char)
+          if isinstance(i, PYACC.Escape) else
+            element.get_text(i, 0)
+          )
+          for i in self
+        ]
+      )
 
   class Escape(element.Element):
-    # GENERATE ELEMENT() BEGIN
+    # GENERATE ELEMENT(int char) BEGIN
     def __init__(
       self,
       tag = 'PYACC_Escape',
       attrib = {},
       text = '',
-      children = []
+      children = [],
+      char = -1
     ):
       element.Element.__init__(
         self,
@@ -139,12 +279,30 @@ class PYACC(element.Element):
         text,
         children
       )
+      self.char = (
+        element.deserialize_int(char)
+      if isinstance(char, str) else
+        char
+      )
+    def serialize(self, ref_list, indent = 0):
+      element.Element.serialize(self, ref_list, indent)
+      self.set('char', element.serialize_int(self.char))
+    def deserialize(self, ref_list):
+      element.Element.deserialize(self, ref_list)
+      self.char = element.deserialize_int(self.get('char', '-1'))
     def copy(self, factory = None):
       result = element.Element.copy(
         self,
         Escape if factory is None else factory
       )
+      result.char = self.char
       return result
+    def repr_serialize(self, params):
+      element.Element.repr_serialize(self, params)
+      if self.char != -1:
+        params.append(
+          'char = {0:s}'.format(repr(self.char))
+        )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
@@ -293,16 +451,13 @@ class PYACC(element.Element):
     def post_process(
       self,
       pyacc,
-      name_to_terminal,
-      name_to_nonterminal
+      name_to_symbol
     ):
       for i in self:
-        print(i.tag)
         i.post_process(
           pyacc,
           self,
-          name_to_terminal,
-          name_to_nonterminal
+          name_to_symbol
         )
 
   class String(element.Element):
@@ -389,6 +544,34 @@ class PYACC(element.Element):
       return 'ast.PYACC.TagNone({0:s})'.format(', '.join(params))
     # GENERATE END
 
+  class Text(element.Element):
+    # GENERATE ELEMENT() BEGIN
+    def __init__(
+      self,
+      tag = 'PYACC_Text',
+      attrib = {},
+      text = '',
+      children = []
+    ):
+      element.Element.__init__(
+        self,
+        tag,
+        attrib,
+        text,
+        children
+      )
+    def copy(self, factory = None):
+      result = element.Element.copy(
+        self,
+        Text if factory is None else factory
+      )
+      return result
+    def __repr__(self):
+      params = []
+      self.repr_serialize(params)
+      return 'ast.PYACC.Text({0:s})'.format(', '.join(params))
+    # GENERATE END
+
   class Section1Or2(Section):
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -724,7 +907,28 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1Or2.Token({0:s})'.format(', '.join(params))
       # GENERATE END
-
+      def post_process(
+        self,
+        pyacc,
+        section,
+        name_to_symbol
+      ):
+        for i in self:
+          assert isinstance(i, PYACC.ID)
+          token_name = element.get_text(i, 0)
+          assert token_name not in name_to_symbol
+          name_to_symbol[token_name] = len(pyacc.symbols)
+          pyacc.symbols.append(
+            PYACC.TerminalSymbol(
+              name = token_name,
+              char_set = [
+                pyacc.grammar.terminal_thres,
+                pyacc.grammar.terminal_thres + 1
+              ]
+            )
+          )
+          pyacc.grammar.terminal_thres += 1
     class Type(Item):
       class Symbols(element.Element):
         # GENERATE ELEMENT() BEGIN
@@ -1211,7 +1415,7 @@ class PYACC(element.Element):
         text = '',
         children = []
       ):
-        element.Element.__init__(
+        Item.__init__(
           self,
           tag,
           attrib,
@@ -1219,7 +1423,7 @@ class PYACC(element.Element):
           children
         )
       def copy(self, factory = None):
-        result = element.Element.copy(
+        result = Item.copy(
           self,
           Prologue if factory is None else factory
         )
@@ -1229,6 +1433,14 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section1.Prologue({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        name_to_symbol
+      ):
+        assert isinstance(self[0], PYACC.Text)
+        pyacc.prologue_text.append(element.get_text(self[0], 0))
 
     class Require(Item):
       # GENERATE ELEMENT() BEGIN
@@ -1399,180 +1611,152 @@ class PYACC(element.Element):
 
   class Section2(Section1Or2):
     class Rules(Item):
-      class RHSes(element.Element):
-        class RHS(element.Element):
-          class Action(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+      class RHS(element.Element):
+        class Action(element.Element):
+          # GENERATE ELEMENT() BEGIN
+          def __init__(
+            self,
+            tag = 'PYACC_Section2_Rules_RHS_Action',
+            attrib = {},
+            text = '',
+            children = []
+          ):
+            element.Element.__init__(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_Action',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                Action if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.Action({0:s})'.format(', '.join(params))
-            # GENERATE END
-
-          class DPrec(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+              tag,
+              attrib,
+              text,
+              children
+            )
+          def copy(self, factory = None):
+            result = element.Element.copy(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_DPrec',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                DPrec if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.DPrec({0:s})'.format(', '.join(params))
-            # GENERATE END
-
-          class Empty(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+              Action if factory is None else factory
+            )
+            return result
+          def __repr__(self):
+            params = []
+            self.repr_serialize(params)
+            return 'ast.PYACC.Section2.Rules.RHS.Action({0:s})'.format(', '.join(params))
+          # GENERATE END
+
+        class DPrec(element.Element):
+          # GENERATE ELEMENT() BEGIN
+          def __init__(
+            self,
+            tag = 'PYACC_Section2_Rules_RHS_DPrec',
+            attrib = {},
+            text = '',
+            children = []
+          ):
+            element.Element.__init__(
+              self,
+              tag,
+              attrib,
+              text,
+              children
+            )
+          def copy(self, factory = None):
+            result = element.Element.copy(
+              self,
+              DPrec if factory is None else factory
+            )
+            return result
+          def __repr__(self):
+            params = []
+            self.repr_serialize(params)
+            return 'ast.PYACC.Section2.Rules.RHS.DPrec({0:s})'.format(', '.join(params))
+          # GENERATE END
+
+        class Empty(element.Element):
+          # GENERATE ELEMENT() BEGIN
+          def __init__(
+            self,
+            tag = 'PYACC_Section2_Rules_RHS_Empty',
+            attrib = {},
+            text = '',
+            children = []
+          ):
+            element.Element.__init__(
+              self,
+              tag,
+              attrib,
+              text,
+              children
+            )
+          def copy(self, factory = None):
+            result = element.Element.copy(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_Empty',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                Empty if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.Empty({0:s})'.format(', '.join(params))
-            # GENERATE END
-
-          class Merge(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+              Empty if factory is None else factory
+            )
+            return result
+          def __repr__(self):
+            params = []
+            self.repr_serialize(params)
+            return 'ast.PYACC.Section2.Rules.RHS.Empty({0:s})'.format(', '.join(params))
+          # GENERATE END
+
+        class Merge(element.Element):
+          # GENERATE ELEMENT() BEGIN
+          def __init__(
+            self,
+            tag = 'PYACC_Section2_Rules_RHS_Merge',
+            attrib = {},
+            text = '',
+            children = []
+          ):
+            element.Element.__init__(
+              self,
+              tag,
+              attrib,
+              text,
+              children
+            )
+          def copy(self, factory = None):
+            result = element.Element.copy(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_Merge',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                Merge if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.Merge({0:s})'.format(', '.join(params))
-            # GENERATE END
-
-          class Prec(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+              Merge if factory is None else factory
+            )
+            return result
+          def __repr__(self):
+            params = []
+            self.repr_serialize(params)
+            return 'ast.PYACC.Section2.Rules.RHS.Merge({0:s})'.format(', '.join(params))
+          # GENERATE END
+
+        class Prec(element.Element):
+          # GENERATE ELEMENT() BEGIN
+          def __init__(
+            self,
+            tag = 'PYACC_Section2_Rules_RHS_Prec',
+            attrib = {},
+            text = '',
+            children = []
+          ):
+            element.Element.__init__(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_Prec',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                Prec if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.Prec({0:s})'.format(', '.join(params))
-            # GENERATE END
-
-          class Symbol(element.Element):
-            # GENERATE ELEMENT() BEGIN
-            def __init__(
+              tag,
+              attrib,
+              text,
+              children
+            )
+          def copy(self, factory = None):
+            result = element.Element.copy(
               self,
-              tag = 'PYACC_Section2_Rules_RHSes_RHS_Symbol',
-              attrib = {},
-              text = '',
-              children = []
-            ):
-              element.Element.__init__(
-                self,
-                tag,
-                attrib,
-                text,
-                children
-              )
-            def copy(self, factory = None):
-              result = element.Element.copy(
-                self,
-                Symbol if factory is None else factory
-              )
-              return result
-            def __repr__(self):
-              params = []
-              self.repr_serialize(params)
-              return 'ast.PYACC.Section2.Rules.RHSes.RHS.Symbol({0:s})'.format(', '.join(params))
-            # GENERATE END
+              Prec if factory is None else factory
+            )
+            return result
+          def __repr__(self):
+            params = []
+            self.repr_serialize(params)
+            return 'ast.PYACC.Section2.Rules.RHS.Prec({0:s})'.format(', '.join(params))
+          # GENERATE END
 
+        class Symbol(element.Element):
           # GENERATE ELEMENT() BEGIN
           def __init__(
             self,
-            tag = 'PYACC_Section2_Rules_RHSes_RHS',
+            tag = 'PYACC_Section2_Rules_RHS_Symbol',
             attrib = {},
             text = '',
             children = []
@@ -1587,19 +1771,19 @@ class PYACC(element.Element):
           def copy(self, factory = None):
             result = element.Element.copy(
               self,
-              RHS if factory is None else factory
+              Symbol if factory is None else factory
             )
             return result
           def __repr__(self):
             params = []
             self.repr_serialize(params)
-            return 'ast.PYACC.Section2.Rules.RHSes.RHS({0:s})'.format(', '.join(params))
+            return 'ast.PYACC.Section2.Rules.RHS.Symbol({0:s})'.format(', '.join(params))
           # GENERATE END
 
         # GENERATE ELEMENT() BEGIN
         def __init__(
           self,
-          tag = 'PYACC_Section2_Rules_RHSes',
+          tag = 'PYACC_Section2_Rules_RHS',
           attrib = {},
           text = '',
           children = []
@@ -1614,14 +1798,59 @@ class PYACC(element.Element):
         def copy(self, factory = None):
           result = element.Element.copy(
             self,
-            RHSes if factory is None else factory
+            RHS if factory is None else factory
           )
           return result
         def __repr__(self):
           params = []
           self.repr_serialize(params)
-          return 'ast.PYACC.Section2.Rules.RHSes({0:s})'.format(', '.join(params))
+          return 'ast.PYACC.Section2.Rules.RHS({0:s})'.format(', '.join(params))
         # GENERATE END
+        def post_process(
+          self,
+          pyacc,
+          lhs_symbol,
+          name_to_symbol
+        ):
+          expr = regex.RegexEmpty()
+          for i in range(len(self)):
+            if isinstance(self[i], PYACC.Section2.Rules.RHS.Symbol):
+              if isinstance(self[i][0], PYACC.Char):
+                text = self[i][0].get_text()
+                expr = regex.RegexSequence(
+                  children = [
+                    expr,
+                    regex.RegexCharacter(
+                      char_set = [ord(text), ord(text) + 1]
+                    )
+                  ]
+                )
+              elif isinstance(self[i][0], PYACC.ID):
+                expr = regex.RegexSequence(
+                  children = [
+                    expr,
+                    regex.RegexCharacterRule(
+                      rule_name = element.get_text(self[i][0], 0)
+                      # char_set will be filled in later once assigned
+                    )
+                  ]
+                )
+              else:
+                assert False
+            elif isinstance(i, PYACC.Section2.Rules.RHS.Action):
+              assert i == len(self) - 1
+              assert isinstance(self[i][0], PYACC.Text)
+              pyacc.actions_text.append(element.get_text(self[i][0], 0))
+              break
+          else:
+            pyacc.actions_text.append('')
+
+          char_set = pyacc.symbols[lhs_symbol].char_set
+          if len(char_set) and char_set[-1] == len(pyacc.grammar):
+            char_set[-1] = len(pyacc.grammar) + 1
+          else:
+            char_set.extend([len(pyacc.grammar), len(pyacc.grammar) + 1])
+          pyacc.grammar.append(regex.Grammar.Production(children = [expr]))
 
       # GENERATE ELEMENT() BEGIN
       def __init__(
@@ -1649,6 +1878,32 @@ class PYACC(element.Element):
         self.repr_serialize(params)
         return 'ast.PYACC.Section2.Rules({0:s})'.format(', '.join(params))
       # GENERATE END
+      def post_process(
+        self,
+        pyacc,
+        section,
+        name_to_symbol
+      ):
+        assert isinstance(self[0], PYACC.ID)
+        lhs_name = element.get_text(self[0], 0)
+        if lhs_name in name_to_symbol:
+          lhs_symbol = name_to_symbol[lhs_name]
+          assert isinstance(
+            pyacc.symbols[lhs.symbol],
+            PYACC.NonterminalSymbol
+          )
+        else:
+          lhs_symbol = len(pyacc.symbols)
+          name_to_symbol[lhs_name] = lhs_symbol
+          pyacc.symbols.append(
+            PYACC.NonterminalSymbol(name = lhs_name, char_set = [])
+          )
+        for i in self[1:]:
+          i.post_process(
+            pyacc,
+            lhs_symbol,
+            name_to_symbol
+          )
 
     # GENERATE ELEMENT() BEGIN
     def __init__(
@@ -1705,13 +1960,17 @@ class PYACC(element.Element):
       return 'ast.PYACC.Section3({0:s})'.format(', '.join(params))
     # GENERATE END
 
-  # GENERATE ELEMENT() BEGIN
+  # GENERATE ELEMENT(list(str) prologue_text, list(ref) symbols, ref grammar, list(str) actions_text) BEGIN
   def __init__(
     self,
     tag = 'PYACC',
     attrib = {},
     text = '',
-    children = []
+    children = [],
+    prologue_text = [],
+    symbols = [],
+    grammar = None,
+    actions_text = []
   ):
     element.Element.__init__(
       self,
@@ -1720,12 +1979,82 @@ class PYACC(element.Element):
       text,
       children
     )
+    self.prologue_text = (
+      [element.deserialize_str(i) for i in prologue_text.split()]
+    if isinstance(prologue_text, str) else
+      prologue_text
+    )
+    self.symbols = symbols
+    self.grammar = grammar
+    self.actions_text = (
+      [element.deserialize_str(i) for i in actions_text.split()]
+    if isinstance(actions_text, str) else
+      actions_text
+    )
+  def serialize(self, ref_list, indent = 0):
+    element.Element.serialize(self, ref_list, indent)
+    self.set(
+      'prologue_text',
+      ' '.join([element.serialize_str(i) for i in self.prologue_text])
+    )
+    self.set(
+      'symbols',
+      ' '.join([element.serialize_ref(i, ref_list) for i in self.symbols])
+    )
+    self.set('grammar', element.serialize_ref(self.grammar, ref_list))
+    self.set(
+      'actions_text',
+      ' '.join([element.serialize_str(i) for i in self.actions_text])
+    )
+  def deserialize(self, ref_list):
+    element.Element.deserialize(self, ref_list)
+    self.prologue_text = [
+      element.deserialize_str(i)
+      for i in self.get('prologue_text', '').split()
+    ]
+    self.symbols = [
+      element.deserialize_ref(i, ref_list)
+      for i in self.get('symbols', '').split()
+    ]
+    self.grammar = element.deserialize_ref(self.get('grammar', '-1'), ref_list)
+    self.actions_text = [
+      element.deserialize_str(i)
+      for i in self.get('actions_text', '').split()
+    ]
   def copy(self, factory = None):
     result = element.Element.copy(
       self,
       PYACC if factory is None else factory
     )
+    result.prologue_text = self.prologue_text
+    result.symbols = self.symbols
+    result.grammar = self.grammar
+    result.actions_text = self.actions_text
     return result
+  def repr_serialize(self, params):
+    element.Element.repr_serialize(self, params)
+    if len(self.prologue_text):
+      params.append(
+        'prologue_text = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.prologue_text])
+        )
+      )
+    if len(self.symbols):
+      params.append(
+        'symbols = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.symbols])
+        )
+      )
+    if self.grammar != None:
+      params.append(
+        'grammar = {0:s}'.format(repr(self.grammar))
+      )
+    if len(self.actions_text):
+      params.append(
+        'actions_text = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self.actions_text])
+        )
+      )
   def __repr__(self):
     params = []
     self.repr_serialize(params)
@@ -1733,28 +2062,55 @@ class PYACC(element.Element):
   # GENERATE END
   def post_process(self):
     # variables that will be serialized
-    self.lr1 = regex.LR1()
+    self.prologue_text = []
+    self.symbols = [
+      PYACC.NonterminalSymbol(name = 'error', char_set = [])
+    ]
+    self.grammar = regex.Grammar(
+      children = [
+        regex.Grammar.Production(
+          children = [
+            regex.RegexCharacterRule()
+          ]
+        )
+      ],
+      terminal_thres = 0x102
+    )
     self.actions_text = []
 
     # variables that won't be serialized
-    name_to_terminal = {}
-    name_to_nonterminal = {}
+    name_to_symbol = {'error': 0}
 
+    # perform the semantic analysis pass
     for i in self:
       i.post_process(
         self,
-        name_to_terminal,
-        name_to_nonterminal
+        name_to_symbol
       )
 
-    # do something to lr1 here
+    # if start symbol not specified, use first nonterminal defined in file
+    if len(self.grammar[0][0].rule_name) == 0:
+      for i in self.symbols:
+        if isinstance(i, PYACC.NonterminalSymbol):
+          self.grammar[0][0].rule_name = i.name
+
+    # make nonterminal char_set offset by terminal_thres which is now known
+    for i in self.symbols:
+      if isinstance(i, PYACC.NonterminalSymbol):
+        i.char_set = [j + self.grammar.terminal_thres for j in i.char_set]
 
-    self.actions_text.append(PYACC.Text(text = 'ECHO;\n'))
+    # look up all rule names and substitute appropriate char_set for each
+    self.grammar.post_process(
+      dict([(i.name, i.char_set) for i in self.symbols])
+    )
 
 # GENERATE FACTORY(regex.factory) BEGIN
 tag_to_class = {
   'Item': Item,
   'PYACC': PYACC,
+  'PYACC_Symbol': PYACC.Symbol,
+  'PYACC_TerminalSymbol': PYACC.TerminalSymbol,
+  'PYACC_NonterminalSymbol': PYACC.NonterminalSymbol,
   'PYACC_BracedCode': PYACC.BracedCode,
   'PYACC_BracedPredicate': PYACC.BracedPredicate,
   'PYACC_Char': PYACC.Char,
@@ -1767,6 +2123,7 @@ tag_to_class = {
   'PYACC_String': PYACC.String,
   'PYACC_Tag': PYACC.Tag,
   'PYACC_TagNone': PYACC.TagNone,
+  'PYACC_Text': PYACC.Text,
   'PYACC_Section1Or2': PYACC.Section1Or2,
   'PYACC_Section1Or2_Code': PYACC.Section1Or2.Code,
   'PYACC_Section1Or2_CodeProps': PYACC.Section1Or2.CodeProps,
@@ -1805,14 +2162,13 @@ tag_to_class = {
   'PYACC_Section1_YACC': PYACC.Section1.YACC,
   'PYACC_Section2': PYACC.Section2,
   'PYACC_Section2_Rules': PYACC.Section2.Rules,
-  'PYACC_Section2_Rules_RHSes': PYACC.Section2.Rules.RHSes,
-  'PYACC_Section2_Rules_RHSes_RHS': PYACC.Section2.Rules.RHSes.RHS,
-  'PYACC_Section2_Rules_RHSes_RHS_Action': PYACC.Section2.Rules.RHSes.RHS.Action,
-  'PYACC_Section2_Rules_RHSes_RHS_DPrec': PYACC.Section2.Rules.RHSes.RHS.DPrec,
-  'PYACC_Section2_Rules_RHSes_RHS_Empty': PYACC.Section2.Rules.RHSes.RHS.Empty,
-  'PYACC_Section2_Rules_RHSes_RHS_Merge': PYACC.Section2.Rules.RHSes.RHS.Merge,
-  'PYACC_Section2_Rules_RHSes_RHS_Prec': PYACC.Section2.Rules.RHSes.RHS.Prec,
-  'PYACC_Section2_Rules_RHSes_RHS_Symbol': PYACC.Section2.Rules.RHSes.RHS.Symbol,
+  'PYACC_Section2_Rules_RHS': PYACC.Section2.Rules.RHS,
+  'PYACC_Section2_Rules_RHS_Action': PYACC.Section2.Rules.RHS.Action,
+  'PYACC_Section2_Rules_RHS_DPrec': PYACC.Section2.Rules.RHS.DPrec,
+  'PYACC_Section2_Rules_RHS_Empty': PYACC.Section2.Rules.RHS.Empty,
+  'PYACC_Section2_Rules_RHS_Merge': PYACC.Section2.Rules.RHS.Merge,
+  'PYACC_Section2_Rules_RHS_Prec': PYACC.Section2.Rules.RHS.Prec,
+  'PYACC_Section2_Rules_RHS_Symbol': PYACC.Section2.Rules.RHS.Symbol,
   'PYACC_Section3': PYACC.Section3
 }
 def factory(tag, attrib = {}, *args, **kwargs):
index ea0164b..e358d3c 100644 (file)
@@ -3,50 +3,9 @@ import numpy
 import regex
 
 class BisonLR1DFA:
-  YY_TRAILING_MASK = 0x2000
-  YY_TRAILING_HEAD_MASK = 0x4000
-
   def __init__(self, lr1dfa):
-    # we use a modified version of the transition routine, we do not know
-    # how many threads are active, so we just create null threads as they
-    # are referred to (resulting threads have current marks but no history),
-    # each thread is a list in forward order, not a stack in reverse order
-    def transit(transition):
-      nonlocal threads0, threads1, prefix_slop # note: also uses i
-      j = prefix_slop
-      for trans in transition:
-        if len(threads0) < j + trans[1]:
-          threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
-        if trans[0] == regex.DFA.TRANSITION_POP:
-          j += trans[1]
-        elif trans[0] == regex.DFA.TRANSITION_DUP:
-          while j < trans[1]:
-            threads0[:0] = [None] * prefix_slop
-            threads1[:0] = [None] * prefix_slop
-            j += prefix_slop
-            prefix_slop *= 2
-          threads0[j - trans[1]:j] = [
-            list(k)
-            for k in threads0[j:j + trans[1]]
-          ]
-          j -= trans[1]
-        elif trans[0] == regex.DFA.TRANSITION_MARK:
-          for k in range(j, j + trans[1]):
-            threads0[j].append(trans[2])
-        elif trans[0] == regex.DFA.TRANSITION_MOVE:
-          threads1.extend(threads0[j:j + trans[1]])
-          j += trans[1]
-        #elif trans[0] == regex.DFA.TRANSITION_DEL:
-        #  del threads1[-trans[1]:]
-        else:
-          assert False
-      assert j == len(threads0)
-      threads0, threads1 = threads1, threads0
-      del threads1[prefix_slop:]
-
-    threads0 = [None]
-    threads1 = [None]
-    prefix_slop = 1
+    print(repr(lr1dfa))
+    assert False
 
     # this is basically just a renumbering
 
@@ -195,11 +154,7 @@ class BisonLR1DFA:
     #print(flex_state_to_action)
 
 def generate(pyacc, skel_file, out_file):
-  lr1 = pyacc.to_lr1()
-  eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
-  eob_expr.post_process(len(pyacc.actions))
-  eob_expr.add_to_lr1(lr1)
-  bison_lr1dfa = BisonLR1DFA(lr1.to_lalr1())
+  bison_lr1dfa = BisonLR1DFA(pyacc.grammar.to_lr1().to_lalr1())
 
   with open(skel_file, 'r') as fin:
     with open(out_file, 'w+') as fout:
index 32e16db..3ce5754 100755 (executable)
@@ -15,8 +15,8 @@ except getopt.GetoptError as err:
   sys.stderr.write(str(err))
   sys.exit(1)
 
-out_file = 'lex.yy.c'
-skel_file = os.path.join(home_dir, 'skel/lex.yy.c')
+out_file = 'y.tab.c'
+skel_file = os.path.join(home_dir, 'skel/y.tab.c')
 for opt, arg in opts:
   if opt == '-o' or opt == '--outfile':
     out_file = arg
@@ -26,7 +26,7 @@ for opt, arg in opts:
     assert False
 if len(args) < 1:
   sys.stdout.write(
-    'usage: {0:s} rules.l\n'.format(
+    'usage: {0:s} rules.y\n'.format(
       sys.argv[0]
     )
   )
@@ -34,6 +34,6 @@ if len(args) < 1:
 in_file = args[0]
 
 with open(in_file) as fin:
-  plex = element.deserialize(fin, ast.factory)
-plex.post_process()
-flex_dfa.generate(plex, skel_file, out_file)
+  pyacc = element.deserialize(fin, ast.factory)
+pyacc.post_process()
+bison_lr1dfa.generate(pyacc, skel_file, out_file)
index 080b4c3..7e9cc72 100644 (file)
--- a/regex.py
+++ b/regex.py
@@ -918,6 +918,28 @@ class Grammar(element.Element):
       self.repr_serialize(params)
       return 'regex.Grammar.Production({0:s})'.format(', '.join(params))
     # GENERATE END
+    def post_process(self, nonterminal, rule_char_sets):
+      self.nonterminal = nonterminal
+      self[0].post_process(0, rule_char_sets)
+    def add_to_lr1(self, lr1):
+      symbols = []
+      lookaheads = []
+      group_bounds = []
+      self[0].to_lr1_symbols(
+        lr1.terminal_thres,
+        symbols,
+        lookaheads,
+        group_bounds
+      )
+      lookaheads.append(([], True)) # initial_set, can_be_empty (sentinel)
+      lr1.productions.append(
+        (
+          self.priority * 2 + int(self.right_to_left),
+          symbols,
+          lookaheads,
+          group_bounds
+        )
+      )
 
   # GENERATE ELEMENT(int terminal_thres) BEGIN
   def __init__(
@@ -966,29 +988,11 @@ class Grammar(element.Element):
   # GENERATE END
   def post_process(self, rule_char_sets):
     for i in range(len(self)):
-      self[i].nonterminal = i
-      self[i][0].post_process(0, rule_char_sets)
+      self[i].post_process(i, rule_char_sets)
   def to_lr1(self):
     lr1 = LR1([], self.terminal_thres)
     for i in self:
-      symbols = []
-      lookaheads = []
-      group_bounds = []
-      i[0].to_lr1_symbols(
-        self.terminal_thres,
-        symbols,
-        lookaheads,
-        group_bounds
-      )
-      lookaheads.append(([], True)) # initial_set, can_be_empty (sentinel)
-      lr1.productions.append(
-        (
-          i.priority * 2 + int(i.right_to_left),
-          symbols,
-          lookaheads,
-          group_bounds
-        )
-      )
+      i.add_to_lr1(lr1)
     # propagate lookaheads
     modified = True
     while modified: