l_to_python.py

   1 #!/usr/bin/env python3
   2
   3 import t_def
   4 import lex_yy
   5 import os
   6 import sys
   7 import xml.etree.ElementTree
   8 import y_tab
   9
  10 def get_text(root, i):
  11   if i < 0:
  12     i += len(root) + 1
  13   text = root.text if i == 0 else root[i - 1].tail
  14   return '' if text is None else text
  15
  16 def set_text(root, i, text):
  17   if i < 0:
  18     i += len(root) + 1
  19   if len(text) == 0:
  20     text = None
  21   if i == 0:
  22     root.text = text
  23   else:
  24     root[i - 1].tail = text
  25
  26 def my_rstrip(text, indent):
  27   i = len(text)
  28   while i > 0 and text[i - 1] == '}':
  29     i -= 1
  30     assert i > 0
  31     while text[i - 1] != '{':
  32       i -= 1
  33       assert i > 0
  34     i -= 1
  35   return text[:i].rstrip('\t ') + indent + text[i:]
  36
  37 def c_to_python(context, text):
  38   lex_yy.yyin = None
  39   lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()]
  40   lex_yy.yytext_len = 0
  41   lex_yy.unput(text)
  42   root = y_tab.yyparse(t_def.AST.TranslationUnit)
  43   context.lines = []
  44   root.translate_translation_unit(context)
  45   return ''.join(context.lines)
  46
  47 root = xml.etree.ElementTree.parse(sys.stdin).getroot()
  48
  49 context = t_def.Context()
  50
  51 actions = []
  52 with open('a.c', 'w') as fout:
  53   def extract(i, parent, indent):
  54     if i.tag == 'AST_Section1Or2_CodeBlock':
  55       node = i[0]
  56       assert node.tag == 'AST_Text'
  57       if parent.tag == 'AST_Section1':
  58         indent += '  '
  59         initial = True
  60       else:
  61         initial = False
  62     elif i.tag == 'AST_Section2_Rule_Action':
  63       if len(i) == 0: # continued actions
  64         assert parent.tag == 'AST_Section2_Rule'
  65         assert len(parent) == 3
  66         set_text(
  67           parent,
  68           2,
  69           get_text(parent, 2).rstrip('\t ') + ' /*COLUMN32*/ '
  70         )
  71         return
  72       node = i[0]
  73       assert node.tag == 'AST_Text'
  74       initial = False
  75     elif i.tag == 'AST_Section3':
  76       node = i
  77       initial = True
  78     else:
  79       child_indent = indent
  80       if i.tag == 'AST':
  81         set_text(
  82           i,
  83           0,
  84           get_text(i, 0).lstrip()
  85         )
  86         for j in range(1, len(i)):
  87           set_text(
  88             i,
  89             j,
  90             '{0:s}\n{1:s}'.format(
  91               get_text(i, j).strip(),
  92               '\n' if j == 2 else ''
  93             )
  94           )
  95         set_text(
  96           i,
  97           len(i),
  98           get_text(i, len(i)).lstrip()
  99         )
 100       elif i.tag == 'AST_Section1' or i.tag == 'AST_Section2':
 101         # kludge, concatenate single line codeblocks to see overall meaning,
 102         # hopefully we can make the scanner do this itself in Python version
 103         j = 0
 104         while j < len(i):
 105           if i[j].tag == 'AST_Section1Or2_CodeBlock':
 106             k = j + 1
 107             pre_delimiter = get_text(i[j], 0)
 108             post_delimiter = get_text(i[j], 1)
 109             while (
 110               k < len(i) and
 111               len(get_text(i, k).strip()) == 0 and
 112               i[k].tag == 'AST_Section1Or2_CodeBlock' and
 113               get_text(i[k], 0) == pre_delimiter and
 114               get_text(i[k], 1) == post_delimiter
 115             ):
 116               k += 1
 117             set_text(
 118               i[k - 1][0],
 119               0,
 120               ''.join([get_text(i[l][0], 0) for l in range(j, k)])
 121             )
 122             del i[j:k - 1]
 123           j += 1
 124       elif (
 125         i.tag == 'AST_Section2_Rule' or
 126         i.tag == 'AST_Section2_Rule_FLexRule'
 127       ):
 128         set_text(i, 0, get_text(i, 0).lstrip('\t '))
 129       elif i.tag == 'AST_Section2_CompoundRule':
 130         child_indent += '  '
 131         set_text(
 132           i,
 133           0,
 134           indent + get_text(i, 0).lstrip('\t ')
 135         )
 136         for j in range(1, len(i)):
 137           set_text(
 138             i,
 139             j,
 140             #get_text(i, j).rstrip('\t ') + child_indent
 141             my_rstrip(get_text(i, j), child_indent)
 142           )
 143         set_text(
 144           i,
 145           len(i),
 146           indent + get_text(i, len(i)).lstrip('\t ')
 147         )
 148       for j in i:
 149         extract(j, i, child_indent)
 150       return
 151     assert len(node) == 0
 152     text = get_text(node, 0)
 153
 154     j = 0
 155     while j < len(text):
 156       k = text.index('\n', j) + 1
 157       line = text[j:k]
 158       j = k
 159
 160       if (
 161         (line[:10] == '#include <' and line[-4:] == '.h>\n') or
 162         (line[:10] == '#include "' and line[-4:] == '.h"\n')
 163       ):
 164         name = line[10:-4].replace('/', '.')
 165         fout.write(
 166           '''@@@ IMPORT({0:s})
 167 {1:s}#undef NULL
 168 #undef bool
 169 #undef false
 170 #undef true
 171 @@@ IMPORT END\n'''.format(
 172             line[10:-4].replace('/', '.'),
 173             line
 174           )
 175         )
 176       else:
 177         fout.write(line)
 178     fout.write('@@@\n')
 179
 180     actions.append((node, parent, indent, initial))
 181   extract(root, None, '')
 182
 183 os.system(
 184   'gcc{0:s} -E a.c >a.i'.format(
 185     ''.join([' "{0:s}"'.format(i) for i in sys.argv[1:]])
 186   )
 187 )
 188 with open('a.i') as fin:
 189   for node, parent, indent, initial in actions:
 190     lines = []
 191     line = fin.readline()
 192     while line != '@@@\n':
 193       assert len(line)
 194       if (
 195         line[:1] == '#' or
 196         (line == '\n' and len(lines) and lines[-1] == '\n')
 197       ):
 198         pass
 199       elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n':
 200         # make the importing look like a function call in the C code:
 201         #lines.append('import("{0:s}");\n'.format(line[11:-2]))
 202         line = fin.readline()
 203         while line != '@@@ IMPORT END\n':
 204           assert len(line)
 205           line = fin.readline()
 206       else:
 207         lines.append(line)
 208       line = fin.readline()
 209     text = ''.join(lines)
 210
 211     if initial:
 212       context.indent = indent
 213       text = c_to_python(context, text)
 214     else:
 215       context.indent = indent
 216       text = c_to_python(
 217         context,
 218         'void a(void) {{\n{0:s}}}\n'.format(text)
 219       )
 220       assert text[:len(indent) + 9] == '{0:s}def a():\n'.format(indent)
 221       text = text[len(indent) + 9:]
 222       if len(text) == 0:
 223         text = '\n'
 224       elif text == '\n':
 225         pass
 226       elif text.index('\n') == len(text) - 1:
 227         if parent.tag == 'AST_Section2_Rule':
 228           set_text(
 229             parent,
 230             2,
 231             get_text(parent, 2).rstrip('\t ') + ' /*COLUMN32*/ '
 232           )
 233         text = text.lstrip('\t ')
 234       else:
 235         if parent.tag == 'AST_Section2_Rule':
 236           set_text(
 237             parent,
 238             2,
 239             get_text(parent, 2).rstrip('\t ') + ' '
 240           )
 241           text = '{{\n{0:s}{1:s}}}\n'.format(text, indent)
 242     set_text(node, 0, text)
 243
 244 xml.etree.ElementTree.ElementTree(root).write(
 245   sys.stdout,
 246   encoding = 'unicode' # strangely does not seem to default to this
 247 )