l_to_python.py

   1 #!/usr/bin/env python3
   2
   3 import t_def
   4 import element
   5 import lex_yy
   6 import os
   7 import sys
   8 import xml.etree.ElementTree
   9 import y_tab
  10
  11 def my_rstrip(text, indent):
  12   i = len(text)
  13   while i > 0 and text[i - 1] == '}':
  14     i -= 1
  15     assert i > 0
  16     while text[i - 1] != '{':
  17       i -= 1
  18       assert i > 0
  19     i -= 1
  20   return text[:i].rstrip('\t ') + indent + text[i:]
  21
  22 def c_to_python(context, text):
  23   lex_yy.yyin = None
  24   lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()]
  25   lex_yy.yytext_len = 0
  26   lex_yy.unput(text)
  27   root = y_tab.yyparse(t_def.AST.TranslationUnit)
  28   context.lines = []
  29   root.translate_translation_unit(context)
  30   return ''.join(context.lines)
  31
  32 root = xml.etree.ElementTree.parse(
  33   sys.stdin,
  34   xml.etree.ElementTree.XMLParser(
  35     target = xml.etree.ElementTree.TreeBuilder(element.Element),
  36     encoding = 'unicode'
  37   )
  38 ).getroot()
  39
  40 context = t_def.Context()
  41
  42 actions = []
  43 with open('a.c', 'w') as fout:
  44   def extract(i, parent, indent):
  45     if i.tag == 'AST_Section1Or2_CodeBlock':
  46       node = i[0]
  47       assert node.tag == 'AST_Text'
  48       if parent.tag == 'AST_Section1':
  49         indent += '  '
  50         initial = True
  51       else:
  52         initial = False
  53     elif i.tag == 'AST_Section2_Rule_Action':
  54       if len(i) == 0: # continued actions
  55         assert parent.tag == 'AST_Section2_Rule'
  56         assert len(parent) == 3
  57         element.set_text(
  58           parent,
  59           2,
  60           element.get_text(parent, 2).rstrip('\t ') + ' /*COLUMN32*/ '
  61         )
  62         return
  63       node = i[0]
  64       assert node.tag == 'AST_Text'
  65       initial = False
  66     elif i.tag == 'AST_Section3':
  67       node = i
  68       initial = True
  69     else:
  70       child_indent = indent
  71       if i.tag == 'AST':
  72         element.set_text(
  73           i,
  74           0,
  75           element.get_text(i, 0).lstrip()
  76         )
  77         for j in range(1, len(i)):
  78           element.set_text(
  79             i,
  80             j,
  81             '{0:s}\n{1:s}'.format(
  82               element.get_text(i, j).strip(),
  83               '\n' if j == 2 else ''
  84             )
  85           )
  86         element.set_text(
  87           i,
  88           len(i),
  89           element.get_text(i, len(i)).lstrip()
  90         )
  91       elif i.tag == 'AST_Section1' or i.tag == 'AST_Section2':
  92         # kludge, concatenate single line codeblocks to see overall meaning,
  93         # hopefully we can make the scanner do this itself in Python version
  94         j = 0
  95         while j < len(i):
  96           if i[j].tag == 'AST_Section1Or2_CodeBlock':
  97             k = j + 1
  98             pre_delimiter = element.get_text(i[j], 0)
  99             post_delimiter = element.get_text(i[j], 1)
 100             while (
 101               k < len(i) and
 102               len(element.get_text(i, k).strip()) == 0 and
 103               i[k].tag == 'AST_Section1Or2_CodeBlock' and
 104               element.get_text(i[k], 0) == pre_delimiter and
 105               element.get_text(i[k], 1) == post_delimiter
 106             ):
 107               k += 1
 108             element.set_text(
 109               i[k - 1][0],
 110               0,
 111               ''.join([element.get_text(i[l][0], 0) for l in range(j, k)])
 112             )
 113             del i[j:k - 1]
 114           j += 1
 115       elif (
 116         i.tag == 'AST_Section2_Rule' or
 117         i.tag == 'AST_Section2_Rule_FLexRule'
 118       ):
 119         element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
 120       elif i.tag == 'AST_Section2_CompoundRule':
 121         child_indent += '  '
 122         element.set_text(
 123           i,
 124           0,
 125           indent + element.get_text(i, 0).lstrip('\t ')
 126         )
 127         for j in range(1, len(i)):
 128           element.set_text(
 129             i,
 130             j,
 131             #element.get_text(i, j).rstrip('\t ') + child_indent
 132             my_rstrip(element.get_text(i, j), child_indent)
 133           )
 134         element.set_text(
 135           i,
 136           len(i),
 137           indent + element.get_text(i, len(i)).lstrip('\t ')
 138         )
 139       for j in i:
 140         extract(j, i, child_indent)
 141       return
 142     assert len(node) == 0
 143     text = element.get_text(node, 0)
 144
 145     j = 0
 146     while j < len(text):
 147       k = text.index('\n', j) + 1
 148       line = text[j:k]
 149       j = k
 150
 151       if (
 152         (line[:10] == '#include <' and line[-4:] == '.h>\n') or
 153         (line[:10] == '#include "' and line[-4:] == '.h"\n')
 154       ):
 155         name = line[10:-4].replace('/', '.')
 156         fout.write(
 157           '''@@@ IMPORT({0:s})
 158 {1:s}#undef NULL
 159 #undef bool
 160 #undef false
 161 #undef true
 162 @@@ IMPORT END\n'''.format(
 163             line[10:-4].replace('/', '.'),
 164             line
 165           )
 166         )
 167       else:
 168         fout.write(line)
 169     fout.write('@@@\n')
 170
 171     actions.append((node, parent, indent, initial))
 172   extract(root, None, '')
 173
 174 os.system(
 175   'gcc{0:s} -E a.c >a.i'.format(
 176     ''.join([' "{0:s}"'.format(i) for i in sys.argv[1:]])
 177   )
 178 )
 179 with open('a.i') as fin:
 180   for node, parent, indent, initial in actions:
 181     lines = []
 182     line = fin.readline()
 183     while line != '@@@\n':
 184       assert len(line)
 185       if (
 186         line[:1] == '#' or
 187         (line == '\n' and len(lines) and lines[-1] == '\n')
 188       ):
 189         pass
 190       elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n':
 191         # make the importing look like a function call in the C code:
 192         #lines.append('import("{0:s}");\n'.format(line[11:-2]))
 193         line = fin.readline()
 194         while line != '@@@ IMPORT END\n':
 195           assert len(line)
 196           line = fin.readline()
 197       else:
 198         lines.append(line)
 199       line = fin.readline()
 200     text = ''.join(lines)
 201
 202     if initial:
 203       context.indent = indent
 204       text = c_to_python(context, text)
 205     else:
 206       context.indent = indent
 207       text = c_to_python(
 208         context,
 209         'void a(void) {{\n{0:s}}}\n'.format(text)
 210       )
 211       assert text[:len(indent) + 9] == '{0:s}def a():\n'.format(indent)
 212       text = text[len(indent) + 9:]
 213       if len(text) == 0:
 214         text = '\n'
 215       elif text == '\n':
 216         pass
 217       elif text.index('\n') == len(text) - 1:
 218         if parent.tag == 'AST_Section2_Rule':
 219           element.set_text(
 220             parent,
 221             2,
 222             element.get_text(parent, 2).rstrip('\t ') + ' /*COLUMN32*/ '
 223           )
 224         text = text.lstrip('\t ')
 225       else:
 226         if parent.tag == 'AST_Section2_Rule':
 227           element.set_text(
 228             parent,
 229             2,
 230             element.get_text(parent, 2).rstrip('\t ') + ' '
 231           )
 232           text = '{{\n{0:s}{1:s}}}\n'.format(text, indent)
 233     element.set_text(node, 0, text)
 234
 235 xml.etree.ElementTree.ElementTree(root).write(
 236   sys.stdout,
 237   encoding = 'unicode' # strangely does not seem to default to this
 238 )