Implement %space, which controls how element groups capture inter-token space origin/master
authorNick Downing <downing.nick@gmail.com>
Thu, 27 Sep 2018 05:15:34 +0000 (15:15 +1000)
committerNick Downing <downing.nick@gmail.com>
Thu, 27 Sep 2018 05:15:34 +0000 (15:15 +1000)
ast.py
generate_py.py
piyacc.py
skel/skel_py_element.py
tests_ast/cal_py.y

diff --git a/ast.py b/ast.py
index e505ff1..e3ba5e5 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -358,7 +358,9 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
+        last_action,
+        has_space,
+        last_grouped_pos
       ):
         raise NotImplementedError
       def add_to_symbols(
@@ -375,12 +377,12 @@ class AST(element.Element):
         self,
         _ast,
         production,
-        first_action,
-        add_space,
         groups,
-        pos
+        first_action,
+        pos,
+        last_grouped_pos
       ):
-        return first_action, add_space, pos
+        return first_action, pos, last_grouped_pos
 
     class Action(Item):
       # GENERATE ELEMENT() BEGIN
@@ -418,11 +420,13 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
+        last_action,
+        has_space,
+        last_grouped_pos
       ):
         _ast.n_productions += int(last_action) # midrule action production
         production.n_symbols += int(last_action) # midrule action symbol
-        return True
+        return True, has_space, last_grouped_pos
       def add_to_symbols(
         self,
         _ast,
@@ -444,7 +448,7 @@ class AST(element.Element):
               # lookaheads (list of initial_set, can_be_empty)
               [([], True)],
               # ref_data
-              ([], None, last_action)
+              (False, [], True, 0, last_action)
             )
           )
         assert isinstance(self[0], AST.Text) # temporary
@@ -464,14 +468,12 @@ class AST(element.Element):
         self,
         _ast,
         production,
-        first_action,
-        add_space,
         groups,
-        pos
+        first_action,
+        pos,
+        last_grouped_pos
       ):
-        if first_action: # had first action, treat this as symbol
-          return True, False, pos - 2
-        return True, add_space, pos
+        return True, pos - 2 * int(first_action), last_grouped_pos
 
     class DPrec(Item):
       # GENERATE ELEMENT(int value) BEGIN
@@ -556,33 +558,13 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
-      ):
-        # just skip %empty for now (fix this later)
-        return last_action
-
-      def add_to_symbols(
-        self,
-        _ast,
-        production,
         last_action,
-        _lr1,
-        symbols,
-        tag_names
-      ):
-        # just skip %empty for now (fix this later)
-        return last_action
-      def add_to_groups(
-        self,
-        _ast,
-        production,
-        first_action,
-        add_space,
-        groups,
-        pos
+        has_space,
+        last_grouped_pos
       ):
-        # just skip %empty for now (fix this later)
-        return first_action, add_space, pos
+        assert not production.has_empty
+        production.has_empty = True
+        return last_action, has_space, last_grouped_pos
 
     class Merge(Item):
       # GENERATE ELEMENT() BEGIN
@@ -667,7 +649,9 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
+        last_action,
+        has_space,
+        last_grouped_pos
       ):
         assert production.precedence_terminal == -1
         _, production.precedence_terminal = self[0].post_process(
@@ -681,17 +665,20 @@ class AST(element.Element):
           -1, # _tag
           -1 # precedence
         )
-        return last_action
+        return last_action, has_space, last_grouped_pos
 
+    # has_space says whether there's a %space between this and previous
+    # SymbolRef, if not we treat this as having a built-in %space prefix
     class SymbolRef(Item):
-      # GENERATE ELEMENT(int symbol) BEGIN
+      # GENERATE ELEMENT(int symbol, bool has_space) BEGIN
       def __init__(
         self,
         tag = 'AST_Production_SymbolRef',
         attrib = {},
         text = '',
         children = [],
-        symbol = -1
+        symbol = -1,
+        has_space = False
       ):
         AST.Production.Item.__init__(
           self,
@@ -705,18 +692,26 @@ class AST(element.Element):
         if isinstance(symbol, str) else
           symbol
         )
+        self.has_space = (
+          element.deserialize_bool(has_space)
+        if isinstance(has_space, str) else
+          has_space
+        )
       def serialize(self, ref_list):
         AST.Production.Item.serialize(self, ref_list)
         self.set('symbol', element.serialize_int(self.symbol))
+        self.set('has_space', element.serialize_bool(self.has_space))
       def deserialize(self, ref_list):
         AST.Production.Item.deserialize(self, ref_list)
         self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+        self.has_space = element.deserialize_bool(self.get('has_space', 'false'))
       def copy(self, factory = None):
         result = AST.Production.Item.copy(
           self,
           SymbolRef if factory is None else factory
         )
         result.symbol = self.symbol
+        result.has_space = self.has_space
         return result
       def repr_serialize(self, params):
         AST.Production.Item.repr_serialize(self, params)
@@ -724,6 +719,10 @@ class AST(element.Element):
           params.append(
             'symbol = {0:s}'.format(repr(self.symbol))
           )
+        if self.has_space != False:
+          params.append(
+            'has_space = {0:s}'.format(repr(self.has_space))
+          )
       def __repr__(self):
         params = []
         self.repr_serialize(params)
@@ -738,7 +737,9 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
+        last_action,
+        has_space,
+        last_grouped_pos
       ):
         _ast.n_productions += int(last_action) # midrule action production
         production.n_symbols += int(last_action) + 1 # midrule action symbol
@@ -755,7 +756,8 @@ class AST(element.Element):
         )
         if _ast.symbols[self.symbol]._type == AST.Symbol.TYPE_TERMINAL:
           production.last_terminal = self.symbol
-        return False
+        self.has_space = has_space
+        return False, False, last_grouped_pos
       def add_to_symbols(
         self,
         _ast,
@@ -777,7 +779,7 @@ class AST(element.Element):
               # lookaheads (list of initial_set, can_be_empty)
               [([], True)],
               # ref_data
-              ([], None, last_action)
+              (False, [], True, 0, last_action)
             )
           )
         symbols.append(
@@ -795,12 +797,12 @@ class AST(element.Element):
         self,
         _ast,
         production,
-        first_action,
-        add_space,
         groups,
-        pos
+        first_action,
+        pos,
+        last_grouped_pos
       ):
-        return True, False, pos - 2
+        return True, pos - 1 - int(not self.has_space), last_grouped_pos
 
     class GroupElement(Item):
       # GENERATE ELEMENT() BEGIN
@@ -838,10 +840,12 @@ class AST(element.Element):
         name_to_symbol,
         string_to_symbol,
         name_to_tag,
-        last_action
+        last_action,
+        has_space,
+        last_grouped_pos
       ):
         for i in self[1:]:
-          last_action = i.post_process(
+          last_action, has_space, last_grouped_pos = i.post_process(
             _ast,
             section,
             production,
@@ -849,9 +853,12 @@ class AST(element.Element):
             name_to_symbol,
             string_to_symbol,
             name_to_tag,
-            last_action
+            last_action,
+            has_space,
+            last_grouped_pos
           )
-        return last_action
+        last_grouped_pos = production.n_symbols * 2 + int(has_space)
+        return last_action, has_space, last_grouped_pos
       def add_to_symbols(
         self,
         _ast,
@@ -875,25 +882,79 @@ class AST(element.Element):
         self,
         _ast,
         production,
-        first_action,
-        add_space,
         groups,
-        pos
+        first_action,
+        pos,
+        last_grouped_pos
       ):
-        pos0 = pos
+        pos1 = pos
         for i in self[:0:-1]:
-          first_action, add_space, pos0 = i.add_to_groups(
+          first_action, pos, last_grouped_pos = i.add_to_groups(
             _ast,
             production,
-            add_space,
-            first_action,
             groups,
-            pos0
+            first_action,
+            pos,
+            last_grouped_pos
           )
-        groups.append((pos0, pos, self[0]))
-        return first_action, True, pos - 1
+        groups.append((pos, pos1, self[0]))
+        pos = pos1 - 1
+        return first_action, pos, pos
  
-    # GENERATE ELEMENT(int lhs_nonterminal, int n_symbols, int last_terminal, int precedence_terminal) BEGIN
+    class Space(Item):
+      # GENERATE ELEMENT() BEGIN
+      def __init__(
+        self,
+        tag = 'AST_Production_Space',
+        attrib = {},
+        text = '',
+        children = []
+      ):
+        AST.Production.Item.__init__(
+          self,
+          tag,
+          attrib,
+          text,
+          children
+        )
+      def copy(self, factory = None):
+        result = AST.Production.Item.copy(
+          self,
+          Space if factory is None else factory
+        )
+        return result
+      def __repr__(self):
+        params = []
+        self.repr_serialize(params)
+        return 'ast.AST.Production.Space({0:s})'.format(', '.join(params))
+      # GENERATE END
+      def post_process(
+        self,
+        _ast,
+        section,
+        production,
+        character_to_symbol,
+        name_to_symbol,
+        string_to_symbol,
+        name_to_tag,
+        last_action,
+        has_space,
+        last_grouped_pos
+      ):
+        assert not has_space
+        return last_action, True, last_grouped_pos
+      def add_to_groups(
+        self,
+        _ast,
+        production,
+        groups,
+        first_action,
+        pos,
+        last_grouped_pos
+      ):
+        return first_action, pos - 1, last_grouped_pos
+
+    # GENERATE ELEMENT(int lhs_nonterminal, int n_symbols, int last_terminal, int precedence_terminal, bool has_empty, bool has_space, bool take_space_right) BEGIN
     def __init__(
       self,
       tag = 'AST_Production',
@@ -903,7 +964,10 @@ class AST(element.Element):
       lhs_nonterminal = -1,
       n_symbols = -1,
       last_terminal = -1,
-      precedence_terminal = -1
+      precedence_terminal = -1,
+      has_empty = False,
+      has_space = False,
+      take_space_right = False
     ):
       element.Element.__init__(
         self,
@@ -932,18 +996,39 @@ class AST(element.Element):
       if isinstance(precedence_terminal, str) else
         precedence_terminal
       )
+      self.has_empty = (
+        element.deserialize_bool(has_empty)
+      if isinstance(has_empty, str) else
+        has_empty
+      )
+      self.has_space = (
+        element.deserialize_bool(has_space)
+      if isinstance(has_space, str) else
+        has_space
+      )
+      self.take_space_right = (
+        element.deserialize_bool(take_space_right)
+      if isinstance(take_space_right, str) else
+        take_space_right
+      )
     def serialize(self, ref_list):
       element.Element.serialize(self, ref_list)
       self.set('lhs_nonterminal', element.serialize_int(self.lhs_nonterminal))
       self.set('n_symbols', element.serialize_int(self.n_symbols))
       self.set('last_terminal', element.serialize_int(self.last_terminal))
       self.set('precedence_terminal', element.serialize_int(self.precedence_terminal))
+      self.set('has_empty', element.serialize_bool(self.has_empty))
+      self.set('has_space', element.serialize_bool(self.has_space))
+      self.set('take_space_right', element.serialize_bool(self.take_space_right))
     def deserialize(self, ref_list):
       element.Element.deserialize(self, ref_list)
       self.lhs_nonterminal = element.deserialize_int(self.get('lhs_nonterminal', '-1'))
       self.n_symbols = element.deserialize_int(self.get('n_symbols', '-1'))
       self.last_terminal = element.deserialize_int(self.get('last_terminal', '-1'))
       self.precedence_terminal = element.deserialize_int(self.get('precedence_terminal', '-1'))
+      self.has_empty = element.deserialize_bool(self.get('has_empty', 'false'))
+      self.has_space = element.deserialize_bool(self.get('has_space', 'false'))
+      self.take_space_right = element.deserialize_bool(self.get('take_space_right', 'false'))
     def copy(self, factory = None):
       result = element.Element.copy(
         self,
@@ -953,6 +1038,9 @@ class AST(element.Element):
       result.n_symbols = self.n_symbols
       result.last_terminal = self.last_terminal
       result.precedence_terminal = self.precedence_terminal
+      result.has_empty = self.has_empty
+      result.has_space = self.has_space
+      result.take_space_right = self.take_space_right
       return result
     def repr_serialize(self, params):
       element.Element.repr_serialize(self, params)
@@ -972,6 +1060,18 @@ class AST(element.Element):
         params.append(
           'precedence_terminal = {0:s}'.format(repr(self.precedence_terminal))
         )
+      if self.has_empty != False:
+        params.append(
+          'has_empty = {0:s}'.format(repr(self.has_empty))
+        )
+      if self.has_space != False:
+        params.append(
+          'has_space = {0:s}'.format(repr(self.has_space))
+        )
+      if self.take_space_right != False:
+        params.append(
+          'take_space_right = {0:s}'.format(repr(self.take_space_right))
+        )
     def __repr__(self):
       params = []
       self.repr_serialize(params)
@@ -993,9 +1093,12 @@ class AST(element.Element):
       self.n_symbols = 0 # includes midrule actions
       self.last_terminal = -1
       self.precedence_terminal = -1
+      self.has_empty = False
       last_action = False
+      has_space = False
+      last_grouped_pos = 0
       for i in self:
-        last_action = i.post_process(
+        last_action, has_space, last_grouped_pos = i.post_process(
           _ast,
           section,
           self,
@@ -1003,8 +1106,16 @@ class AST(element.Element):
           name_to_symbol,
           string_to_symbol,
           name_to_tag,
-          last_action
+          last_action,
+          has_space,
+          last_grouped_pos
         )
+      assert not self.has_empty or self.n_symbols == 0
+      self.has_space = has_space # if %space after last symbol
+      self.take_space_right = last_grouped_pos > self.n_symbols * 2
+      #print('self.take_space_right', self.take_space_right)
+      #print('last_grouped_pos', last_grouped_pos)
+      #print('self.n_symbols', self.n_symbols)
 
       i = _ast.symbols[self.lhs_nonterminal]
       if len(i.character_set) and i.character_set[-1] == _ast.n_productions:
@@ -1043,19 +1154,23 @@ class AST(element.Element):
 
       # go backwards collecting negative indices of element group start/end
       # here we ignore the first action, rather than holding the most recent
-      first_action = False # have not had first action yet
-      add_space = False # didn't absorb inter-token space
       groups = []
-      pos = 0
+      first_action = False # have not had first action yet
+      pos = int(self.has_space) - int(self.take_space_right)
+      last_grouped_pos = pos # says last position where a group has started
       for i in self[::-1]:
-        first_action, add_space, pos = i.add_to_groups(
+        first_action, pos, last_grouped_pos = i.add_to_groups(
           _ast,
           self,
-          first_action,
-          add_space,
           groups,
-          pos
+          first_action,
+          pos,
+          last_grouped_pos
         )
+      take_space_left = last_grouped_pos == pos
+      #print('take_space_left', take_space_left)
+      #print('last_grouped_pos', last_grouped_pos)
+      #print('pos', pos)
 
       _lr1.productions.append(
         (
@@ -1064,7 +1179,7 @@ class AST(element.Element):
           # lookaheads (list of initial_set, can_be_empty)
           [([], False) for i in range(len(symbols))] + [([], True)],
           # ref_data
-          (groups, pos if add_space else None, last_action)
+          (self.take_space_right, groups, take_space_left, pos, last_action)
         )
       )
 
@@ -3760,7 +3875,7 @@ class AST(element.Element):
           # lookaheads (list of initial_set, can_be_empty)
           [([], False), ([], True)],
           # ref_data
-          ([], None, None) # temporary
+          (False, [], True, 0, None) # temporary
         )
       ],
       # precedences
@@ -3858,6 +3973,7 @@ tag_to_class = {
   'AST_Production_Prec': AST.Production.Prec,
   'AST_Production_SymbolRef': AST.Production.SymbolRef,
   'AST_Production_GroupElement': AST.Production.GroupElement,
+  'AST_Production_Space': AST.Production.Space,
   'AST_Section1Or2': AST.Section1Or2,
   'AST_Section1Or2_Code': AST.Section1Or2.Code,
   'AST_Section1Or2_CodeProps': AST.Section1Or2.CodeProps,
index cde45c5..db3f8a0 100644 (file)
@@ -108,9 +108,25 @@ YYERROR_VERBOSE = {1:s}
               ''.join(
                 [
                   '''def yy_action{0:d}():
-  global yyval, yyloc
-{1:s}{2:s}{3:s}'''.format(
+  global {1:s}yyval, yyloc
+{2:s}{3:s}{4:s}{5:s}'''.format(
                     i,
+                    (
+                      'yytoken, yylval, yylloc, '
+                    if actions[i][0] else # take_space_right
+                      ''
+                    ),
+                    (
+                      '''  if yytoken == -1:
+    yylval = None
+    yylloc = None
+    yytoken = yylex()
+  yy_element_stack.append(lex_yy.yy_element_space)
+  lex_yy.yy_element_space = element.Element('root')
+'''
+                    if actions[i][0] else # take_space_right
+                      ''
+                    ),
                     ''.join(
                       [
                         '''  yy_element_stack[{0:s}:{1:s}] = [
@@ -130,32 +146,32 @@ YYERROR_VERBOSE = {1:s}
                           str(pos1) if pos1 else '',
                           ast_text_to_python(factory_text, '          ')
                         )
-                        for pos0, pos1, factory_text in actions[i][0]
+                        for pos0, pos1, factory_text in actions[i][1]
                       ]
                     ),
                     (
-                      ''
-                    if actions[i][1] is None else
                       '''  yy_element_stack[{0:s}:{1:s}] = [
     element.Element('root')
   ]
 '''.format(
                         (
-                          str(actions[i][1])
-                        if actions[i][1] else
+                          str(actions[i][3])
+                        if actions[i][3] else # pos
                           'len(yy_element_stack)'
                         ),
                         (
-                          str(actions[i][1])
-                        if actions[i][1] else
+                          str(actions[i][3])
+                        if actions[i][3] else # pos
                           ''
                         )
                       )
+                    if actions[i][2] else # take_space_left
+                      ''
                     ),
                     (
                       '  pass\n'
-                    if actions[i][2] is None else
-                      ast_text_to_python(actions[i][2], '  ')
+                    if actions[i][4] is None else # last_action
+                      ast_text_to_python(actions[i][4], '  ')
                     )
                   )
                   for i in range(len(actions))
index cdb66f2..4876d28 100755 (executable)
--- a/piyacc.py
+++ b/piyacc.py
@@ -53,8 +53,8 @@ with open(in_file) as fin:
 #element.serialize(_ast, 'a.xml', 'utf-8')
 #_ast = element.deserialize('a.xml', ast.factory, 'utf-8')
 _ast.post_process()
-#element.serialize(_ast, 'b.xml', 'utf-8')
-#_ast = element.deserialize('b.xml', ast.factory, 'utf-8')
+element.serialize(_ast, 'b.xml', 'utf-8')
+_ast = element.deserialize('b.xml', ast.factory, 'utf-8')
 (generate_py.generate_py if python else generate_bison.generate_bison)(
   _ast,
   _element,
index 382df56..380e629 100644 (file)
@@ -75,31 +75,17 @@ def yyparse(factory, *args, **kwargs):
       break
     yystack.append((state, yyval, yyloc))
 
-    # concatenate yy_element_stack[base * 2:] to space then AST element
-    i = base * 2
-    #print('i', i, 'len(yy_element_stack)', len(yy_element_stack))
-    if i >= len(yy_element_stack):
-      yy_element_stack.extend(
-        [element.Element('root'), element.Element('root')]
+    # action creates empty space in yy_element_stack[base * 2] if needed
+    assert len(yy_element_stack) > base * 2
+
+    # concatenate yy_element_stack[base * 2 + 1:] to a single AST element
+    yy_element_stack[base * 2 + 1:] = [
+      element.concatenate(
+        yy_element_stack[base * 2 + 1:],
+        element.Element,
+        'root'
       )
-    else:
-      #print('yy_element_stack[i]')
-      #xml.etree.ElementTree.dump(yy_element_stack[i])
-      #print('yy_element_stack[i + 1]')
-      #xml.etree.ElementTree.dump(yy_element_stack[i + 1])
-      for j in range(i + 2, len(yy_element_stack)):
-        #print('j', j)
-        #print('yy_element_stack[j]')
-        #xml.etree.ElementTree.dump(yy_element_stack[j])
-        k = len(yy_element_stack[i + 1])
-        element.set_text(
-          yy_element_stack[i + 1],
-          k,
-          element.get_text(yy_element_stack[i + 1], k) +
-          element.get_text(yy_element_stack[j], 0)
-        )
-        yy_element_stack[i + 1][k:] = yy_element_stack[j][:]
-      del yy_element_stack[i + 2:]
+    ]
 
     state = yy_lr1dfa_states[state][3][
       bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
index 0b574a2..d3cc85e 100644 (file)
@@ -20,22 +20,22 @@ S : S E '\n' {
   yyerrok()
 }
   ;
-E : (?E{ast.AST.Add}E '+' E) {
+E : %space (?E{ast.AST.Add}E '+' E) {
     $$ = $1 + $3
   }
-  | (?E{ast.AST.Sub}E '-' E) {
+  | %space (?E{ast.AST.Sub}E '-' E) {
     $$ = $1 - $3
   }
-  | (?E{ast.AST.Mul}E '*' E) {
+  | %space (?E{ast.AST.Mul}E '*' E) {
     $$ = $1 * $3
   }
-  | (?E{ast.AST.Div}E '/' E) {
+  | %space (?E{ast.AST.Div}E '/' E) {
     $$ = $1 / $3
   }
   | '(' E ')' {
     $$ = $2
   }
-  | (?E{ast.AST.Neg}'-' E) %prec UMINUS {
+  | %space (?E{ast.AST.Neg}'-' E) %prec UMINUS {
     $$ = -$2
   }
   | NUM