First cut at automatic AST generation with pilex.py --element switch
authorNick Downing <downing.nick@gmail.com>
Wed, 12 Sep 2018 13:01:53 +0000 (23:01 +1000)
committerNick Downing <downing.nick@gmail.com>
Wed, 12 Sep 2018 13:01:53 +0000 (23:01 +1000)
generate_flex.py
generate_py.py
pilex.py
skel/skel_py_element.py [new file with mode: 0644]

index 549225a..a9eb4fb 100644 (file)
@@ -1,7 +1,7 @@
 import os
 import regex
 
-def generate_flex(_ast, home_dir, skel_file, out_file):
+def generate_flex(_ast, _element, home_dir, skel_file, out_file):
   _nfa = _ast.to_nfa()
 
   # end of buffer expression (do it here because only necessary for flex)
index 0d78c06..21c9c5c 100644 (file)
@@ -1,8 +1,8 @@
 import os
 import wrap_repr
 
-def ast_text_to_python(_ast_text, indent):
-  text = _ast_text.get_text()
+def ast_text_to_python(ast_text, indent):
+  text = ast_text.get_text()
   text_strip = text.strip()
   if text_strip[:1] == '{' and text_strip[-1:] == '}':
     text = text_strip[1:-1]
@@ -30,11 +30,14 @@ def ast_text_to_python(_ast_text, indent):
       lines[j] = '{0:s}{1:s}\n'.format(indent, lines[j][len(prefix):])
   return ''.join(lines)
 
-def generate_py(_ast, home_dir, skel_file, out_file):
+def generate_py(_ast, _element, home_dir, skel_file, out_file):
   _dfa = _ast.to_nfa().to_dfa()
 
   if skel_file is None:
-    skel_file = os.path.join(home_dir, 'skel/skel_py.py')
+    skel_file = os.path.join(
+      home_dir,
+      'skel/skel_py_element.py' if _element else 'skel/skel_py.py'
+    )
   if out_file is None:
     out_file = (
       _ast[0].outfile
index b255bae..c3be236 100755 (executable)
--- a/pilex.py
+++ b/pilex.py
@@ -12,18 +12,21 @@ home_dir = os.path.dirname(sys.argv[0])
 try:
   opts, args = getopt.getopt(
     sys.argv[1:],
-    'o:pS:',
-    ['outfile=', 'python', 'skel=']
+    'eo:pS:',
+    ['element', 'outfile=', 'python', 'skel=']
   )
 except getopt.GetoptError as err:
   sys.stderr.write('{0:s}\n'.format(str(err)))
   sys.exit(1)
 
 out_file = None
+_element = False
 python = False
 skel_file = None
 for opt, arg in opts:
-  if opt == '-o' or opt == '--outfile':
+  if opt == '-e' or opt == '--element':
+    _element = True
+  elif opt == '-o' or opt == '--outfile':
     out_file = arg
   elif opt == '-p' or opt == '--python':
     python = True
@@ -49,6 +52,7 @@ _ast.post_process()
 #_ast = element.deserialize('b.xml', ast.factory, 'utf-8')
 (generate_py.generate_py if python else generate_flex.generate_flex)(
   _ast,
+  _element,
   home_dir,
   skel_file,
   out_file
diff --git a/skel/skel_py_element.py b/skel/skel_py_element.py
new file mode 100644 (file)
index 0000000..59a73b2
--- /dev/null
@@ -0,0 +1,232 @@
+import bisect
+import element
+import sys
+import y_tab
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYReject(Exception):
+  pass
+
+class YYContinue(Exception):
+  pass
+
+class YYTerminate(Exception):
+  pass
+
+class YYBufferList:
+  def __init__(self, next = None):
+    self.next = next
+
+class YYBufferBlock(YYBufferList):
+  def __init__(self, next = None, pos = 0, text = ''):
+    YYBufferList.__init__(self, next)
+    self.pos = pos
+    self.text = text
+
+class YYBufferState(YYBufferList):
+  def __init__(self, next = None, file_in = None):
+    YYBufferList.__init__(self, next)
+    self.file_in = file_in
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState(None, None)]
+
+yystart = INITIAL
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yytext = ''
+len_yytext = 0
+
+def REJECT():
+  raise YYReject()
+
+def yyterminate():
+  raise YYTerminate()
+
+def yyless(i):
+  assert len_yytext >= i
+  yytext = yytext[:i]
+  len_yytext = i
+
+def unput(text):
+  while len_yytext:
+    block = yy_buffer_stack[-1].next
+    while block is None or block.pos >= len(block.text):
+      if block is None:
+        yy_buffer_stack.pop()
+        block = yy_buffer_stack[-1].next
+        yyin = yy_buffer_stack[-1].file_in
+      else:
+        block = block.next
+        yy_buffer_stack[-1].next = block
+    i = min(len_yytext, len(block.text) - block.pos)
+    block.pos += i
+    len_yytext -= i
+  yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+# GENERATE SECTION2
+
+def yylex():
+  global yyin, yy_threads0, yy_threads1, yy_prefix_slop, yytext, len_yytext
+
+  # GENERATE SECTION2INITIAL
+
+  y_tab.yy_element_space = element.Element()
+  y_tab.yy_element_token = element.Element()
+  while True:
+    while len_yytext:
+      block = yy_buffer_stack[-1].next
+      while block is None or block.pos >= len(block.text):
+        if block is None:
+          yy_buffer_stack.pop()
+          block = yy_buffer_stack[-1].next
+          yyin = yy_buffer_stack[-1].file_in
+        else:
+          block = block.next
+          yy_buffer_stack[-1].next = block
+      i = min(len_yytext, len(block.text) - block.pos)
+      block.pos += i
+      len_yytext -= i
+    yytext = ''
+
+    del yy_threads0[yy_prefix_slop:]
+    yy_threads0.append(None)
+
+    buffer_ptr = len(yy_buffer_stack) - 1
+    block_prev = yy_buffer_stack[buffer_ptr]
+    block = block_prev.next
+    if block is not None:
+      block_pos = block.pos
+    file_in = yyin
+
+    action = yy_dfa_start_action[yystart]
+    while action != -1:
+      state, transition = yy_dfa_actions[action]
+      #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+      i = yy_prefix_slop
+      assert len(yy_threads1) == yy_prefix_slop
+      for trans in transition:
+        if trans[0] == 0: #DFA.TRANSITION_POP:
+          i += trans[1]
+        elif trans[0] == 1: #DFA.TRANSITION_DUP:
+          while i < trans[1]:
+            yy_threads0[:0] = [None] * yy_prefix_slop
+            yy_threads1[:0] = [None] * yy_prefix_slop
+            i += yy_prefix_slop
+            yy_prefix_slop *= 2
+          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+          i -= trans[1]
+        elif trans[0] == 2: #DFA.TRANSITION_MARK:
+          yy_threads0[i:i + trans[1]] = [
+            (len_yytext, trans[2], thread)
+            for thread in yy_threads0[i:i + trans[1]]
+          ]
+        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+          yy_threads1.extend(yy_threads0[i:i + trans[1]])
+          i += trans[1]
+        #elif trans[0] == DFA.TRANSITION_DEL:
+        #  del yy_threads1[-trans[1]:]
+        else:
+          assert False
+      assert i == len(yy_threads0)
+      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+      del yy_threads1[yy_prefix_slop:]
+
+      if state == 0:
+        # there is only one match, which is complete
+        assert len(yy_threads0) == yy_prefix_slop + 1
+        assert yy_dfa_states[state][2] == [0]
+        break
+
+      while block is None or block_pos >= len(block.text):
+        if block is None:
+          text = file_in.readline()
+          if len(text):
+            block = YYBufferBlock(None, 0, text)
+            block_pos = 0
+            block_prev.next = block
+          else:
+            buffer_ptr -= 1
+            if buffer_ptr < 0:
+              break # EOF
+            block_prev = yy_buffer_stack[buffer_ptr]
+            block = block_prev.next
+            if block is not None:
+              block_pos = block.pos
+            file_in = yy_buffer_stack[buffer_ptr].file_in
+        else:
+          i = len_yytext - len(yytext)
+          if i:
+            yytext += block.text[block_pos - i:]
+          block_prev = block
+          block = block_prev.next
+          if block is not None:
+            block_pos = block.pos
+      else: 
+        #print('block_pos', block_pos, 'block.text', block.text)
+        action = yy_dfa_states[state][1][
+          bisect.bisect_right(
+            yy_dfa_states[state][0],
+            ord(block.text[block_pos])
+          )
+        ]
+        block_pos += 1
+        len_yytext += 1
+        continue
+      # EOF
+      if i == 0:
+        try:
+          return yy_eof_actions[yystart]()
+        except YYTerminate:
+          return 0
+      break
+
+    i = len_yytext - len(yytext)
+    if i:
+      assert block is not None
+      yytext += block.text[block_pos - i:]
+
+    for i in yy_dfa_states[state][2]:
+      _, _, thread = yy_threads0[yy_prefix_slop + i]
+      #print('thread', thread)
+      len_yytext, mark, thread = thread
+      assert thread is None
+
+      assert len(yytext) >= len_yytext
+      yytext = yytext[:len_yytext]
+      #print('yytext', yytext)
+      element.set_text(y_tab.yy_element_token, 0, yytext)
+
+      try:
+        return yy_actions[mark >> 1]()
+      except YYReject:
+        pass
+      except YYContinue:
+        break
+      except YYTerminate:
+        return 0
+    else:
+      raise Exception('scanner jammed')
+
+    # append yy_element_token contents onto yy_element_space
+    i = len(y_tab.yy_element_space)
+    element.set_text(
+      y_tab.yy_element_space,
+      i,
+      element.get_text(y_tab.yy_element_space, i) +
+      element.get_text(y_tab.yy_element_token, 0)
+    )
+    y_tab.yy_element_space[i:] = y_tab.yy_element_token[:]
+    # clear yy_element_token for next yytext or EOF action
+    element.set_text(y_tab.yy_element_token, 0, '')
+    y_tab.yy_element_token[:] = []
+
+# GENERATE SECTION3