l_to_python.py

   1 #!/usr/bin/env python3
   2
   3 import ast
   4 import element
   5 import lex_yy
   6 import os
   7 import sys
   8 import xml.etree.ElementTree
   9 import y_tab
  10
  11 def my_rstrip(text, indent):
  12   i = len(text)
  13   while i > 0 and text[i - 1] == '}':
  14     i -= 1
  15     assert i > 0
  16     while text[i - 1] != '{':
  17       i -= 1
  18       assert i > 0
  19     i -= 1
  20   return text[:i].rstrip('\t ') + indent + text[i:]
  21
  22 def c_to_python(context, text):
  23   lex_yy.yyin = None
  24   lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)]
  25   lex_yy.yytext_len = 0
  26   lex_yy.unput(text)
  27   root = y_tab.yyparse(ast.AST.TranslationUnit)
  28   context.lines = []
  29   root.translate_translation_unit(context)
  30   return ''.join(context.lines)
  31
  32 root = xml.etree.ElementTree.parse(
  33   sys.stdin,
  34   xml.etree.ElementTree.XMLParser(
  35     target = xml.etree.ElementTree.TreeBuilder(element.Element),
  36     encoding = 'unicode'
  37   )
  38 ).getroot()
  39
  40 context = ast.Context()
  41 #context.translate_identifier['BEGIN'] = 'self.BEGIN'
  42 #context.translate_identifier['yylval'] = 'ref_data.yylval'
  43 #context.translate_identifier['yytext'] = 'self.yytext'
  44 #context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
  45 #context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
  46
  47 actions = []
  48 with open('a.c', 'w') as fout:
  49   def extract(i, parent, indent):
  50     if i.tag == 'AST_Section1Or2_CodeBlock':
  51       node = i[0]
  52       assert node.tag == 'AST_Text'
  53       indent += '  '
  54       initial = True
  55     elif i.tag == 'AST_Section2_Rule_Action':
  56       if len(i) == 0: # continued actions
  57         assert parent.tag == 'AST_Section2_Rule'
  58         assert len(parent) == 3
  59         element.set_text(
  60           parent,
  61           2,
  62           element.get_text(parent, 2).rstrip('\t ') + ' /*COLUMN32*/ '
  63         )
  64         return
  65       node = i[0]
  66       assert node.tag == 'AST_Text'
  67       initial = False
  68     elif i.tag == 'AST_Section3':
  69       node = i
  70       initial = True
  71     else:
  72       child_indent = indent
  73       if i.tag == 'AST':
  74         for j in range(1, len(i) + 1):
  75           element.set_text(i, j, element.get_text(i, j).rstrip() + '\n')
  76       elif i.tag == 'AST_Section1_StartConditions':
  77         for j in i:
  78           assert j.tag == 'AST_Name'
  79           text = element.get_text(j, 0)
  80           context.translate_identifier[text] = 'DFA.{0:s}'.format(text)
  81       elif (
  82         i.tag == 'AST_Section2_Rule' or
  83         i.tag == 'AST_Section2_Rule_FLexRule'
  84       ):
  85         element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
  86       elif i.tag == 'AST_Section2_CompoundRule':
  87         child_indent += '  '
  88         element.set_text(
  89           i,
  90           0,
  91           indent + element.get_text(i, 0).lstrip('\t ')
  92         )
  93         for j in range(1, len(i)):
  94           element.set_text(
  95             i,
  96             j,
  97             #element.get_text(i, j).rstrip('\t ') + child_indent
  98             my_rstrip(element.get_text(i, j), child_indent)
  99           )
 100         element.set_text(
 101           i,
 102           len(i),
 103           indent + element.get_text(i, len(i)).lstrip('\t ')
 104         )
 105       for j in i:
 106         extract(j, i, child_indent)
 107       return
 108     assert len(node) == 0
 109     text = element.get_text(node, 0)
 110
 111     j = 0
 112     while j < len(text):
 113       k = text.index('\n', j) + 1
 114       line = text[j:k]
 115       j = k
 116
 117       if (
 118         (line[:10] == '#include <' and line[-4:] == '.h>\n') or
 119         (line[:10] == '#include "' and line[-4:] == '.h"\n')
 120       ):
 121         name = line[10:-4].replace('/', '.')
 122         fout.write(
 123           '''@@@ IMPORT({0:s})
 124 {1:s}#undef NULL
 125 #undef bool
 126 #undef false
 127 #undef true
 128 @@@ IMPORT END\n'''.format(
 129             line[10:-4].replace('/', '.'),
 130             line
 131           )
 132         )
 133       else:
 134         fout.write(line)
 135     fout.write('@@@\n')
 136
 137     actions.append((node, parent, indent, initial))
 138   extract(root, None, '')
 139
 140 os.system('gcc -I tests/flex_h -E a.c >a.i')
 141 with open('a.i') as fin:
 142   for node, parent, indent, initial in actions:
 143     lines = []
 144     line = fin.readline()
 145     while line != '@@@\n':
 146       assert len(line)
 147       if (
 148         line[:1] == '#' or
 149         (line == '\n' and len(lines) and lines[-1] == '\n')
 150       ):
 151         pass
 152       elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n':
 153         # make the importing look like a function call in the C code:
 154         #lines.append('import("{0:s}");\n'.format(line[11:-2]))
 155         line = fin.readline()
 156         while line != '@@@ IMPORT END\n':
 157           assert len(line)
 158           line = fin.readline()
 159       else:
 160         lines.append(line)
 161       line = fin.readline()
 162     text = ''.join(lines)
 163
 164     if initial:
 165       context.indent = indent
 166       text = c_to_python(context, text)
 167     else:
 168       assert parent.tag == 'AST_Section2_Rule'
 169       assert len(parent) == 3
 170       prefix = element.get_text(parent, 2).rstrip('\t ')
 171       context.indent = indent
 172       text = c_to_python(
 173         context,
 174         'void a(void) {{\n{0:s}}}\n'.format(text)
 175       )
 176       assert text[:len(indent) + 10] == '\n{0:s}def a():\n'.format(indent)
 177       text = text[len(indent) + 10:]
 178       if len(text) == 0:
 179         text = '\n'
 180       elif text == '\n':
 181         pass
 182       elif text.index('\n') == len(text) - 1:
 183         prefix += ' /*COLUMN32*/ '
 184         text = text.lstrip('\t ')
 185       else:
 186         prefix += ' '
 187         text = '{{\n{0:s}{1:s}}}\n'.format(
 188           text,
 189           indent
 190         )
 191       element.set_text(parent, 2, prefix)
 192     element.set_text(node, 0, text)
 193
 194 xml.etree.ElementTree.ElementTree(root).write(
 195   sys.stdout,
 196   encoding = 'unicode' # strangely does not seem to default to this
 197 )