From 7e163c69bd0c80ddd20fc729ca8b036a851c9719 Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Sun, 13 Jan 2019 00:01:09 +1100 Subject: [PATCH] Use full grammar with %start translation_unit_opt, wrap action code in a dummy function to compensate, improve translation of functions and block statements --- ansi_c.py | 8 +-- ansi_c.y | 4 +- ast.py | 143 ++++++++++++++++++++++++++++++++++++------------- c_to_python.py | 37 ++++++++----- l_to_python.py | 49 +++++++++++------ 5 files changed, 170 insertions(+), 71 deletions(-) mode change 100644 => 100755 c_to_python.py diff --git a/ansi_c.py b/ansi_c.py index b527a8c..bcf8f26 100755 --- a/ansi_c.py +++ b/ansi_c.py @@ -22,7 +22,7 @@ import sys import xml.etree.ElementTree import y_tab -_ast = y_tab.yyparse(ast.AST) -element.serialize(_ast, 'a.xml', 'utf-8') -_ast = element.deserialize('a.xml', ast.factory, 'utf-8') -xml.etree.ElementTree.dump(_ast) +root = y_tab.yyparse(ast.AST.TranslationUnit) +element.serialize(root, 'a.xml', 'utf-8') +root = element.deserialize('a.xml', ast.factory, 'utf-8') +xml.etree.ElementTree.dump(root) diff --git a/ansi_c.y b/ansi_c.y index ecec2ad..ef00cd4 100644 --- a/ansi_c.y +++ b/ansi_c.y @@ -15,7 +15,7 @@ %token ALIGNAS ALIGNOF ATOMIC GENERIC NORETURN STATIC_ASSERT THREAD_LOCAL -%start block_item_list_opt +%start translation_unit_opt %expect 2 @@ -529,7 +529,6 @@ block_item | statement ; -/* changed %start to block_item_list_opt rather than translation_list translation_unit_opt : | translation_unit @@ -558,7 +557,6 @@ declaration_list : declaration | declaration_list declaration ; -*/ identifier_opt : (?E{ast.AST.IdentifierEmpty}) diff --git a/ast.py b/ast.py index f2aceef..3e356bd 100644 --- a/ast.py +++ b/ast.py @@ -6,7 +6,6 @@ class Context: self, indent = '', enclosing_loop = None, - #initial = False, translate_identifier = { 'NULL': 'None', 'false': 'False', @@ -14,9 +13,8 @@ class Context: 'true': 'True' } ): - self.indent = ' ' + self.indent = indent self.enclosing_loop = enclosing_loop - #self.initial = initial # whether to add declared identifiers as 'self.' self.translate_identifier = translate_identifier class AST(element.Element): @@ -256,10 +254,21 @@ class AST(element.Element): return 'ast.AST.BlockItemList({0:s})'.format(', '.join(params)) # GENERATE END def translate(self, context): + result = [] for i in self: - i.translate(context) - for i in range(len(self) + 1): - element.set_text(self, i, '') + if isinstance(i, AST.StatementBlock): + assert isinstance(i[0], AST.BlockItemList) + i[0].translate(context) + result.extend(i[0]) + else: + i.translate(context) + result.append(i) + self[:] = result + if len(self) == 0: + element.set_text(self, 0, '{0:s}pass\n'.format(context.indent)) + else: + for i in range(len(self) + 1): + element.set_text(self, i, '') class CommaEllipsis(Element): # GENERATE ELEMENT() BEGIN @@ -556,11 +565,19 @@ class AST(element.Element): def translate(self, context): assert len(self) == 3 self[0].translate(context) - #initial_save = context.initial - #context.initial = False self[1].translate(context) + assert isinstance(self[1], AST.ParameterDeclarationList) + if len(self[1]) == 1: + assert isinstance(self[1][0], AST.ParameterDeclaration) + assert isinstance(self[1][0][0], AST.DeclarationSpecifierList) + if ( + len(self[1][0][0]) == 1 and + isinstance(self[1][0][0][0], AST.TypeSpecifier) and + self[1][0][0][0].n == 0 and + isinstance(self[1][0][1], AST.DeclaratorAbstract) + ): + del self[1][:] self[2].translate(context) - #context.initial = initial_save element.set_text(self, 0, '') element.set_text(self, 1, '(') element.set_text(self, 2, '') @@ -596,10 +613,7 @@ class AST(element.Element): def translate(self, context): assert len(self) == 2 self[0].translate(context) - #initial_save = context.initial - #context.initial = False self[1].translate(context) - #context.initial = initial_save element.set_text(self, 0, '') element.set_text(self, 1, '(') element.set_text(self, 2, ')') @@ -631,12 +645,6 @@ class AST(element.Element): self.repr_serialize(params) return 'ast.AST.DeclaratorIdentifier({0:s})'.format(', '.join(params)) # GENERATE END - def translate(self, context): - #if context.initial: - # text = element.get_text(self[0], 0) - # assert text not in context.translate_identifier - # context.translate_identifier[text] = 'self.{0:s}'.format(text) - AST.Declarator.translate(self, context) class DeclaratorPointer(Declarator): # GENERATE ELEMENT() BEGIN @@ -2779,6 +2787,18 @@ class AST(element.Element): self.repr_serialize(params) return 'ast.AST.FunctionDefinition({0:s})'.format(', '.join(params)) # GENERATE END + def translate(self, context): + assert len(self) == 4 + del self[0] + self[0].translate(context) + del self[1] + indent_save = context.indent + context.indent += ' ' + self[1].translate(context) + context.indent = indent_save + element.set_text(self, 0, '{0:s}def '.format(context.indent)) + element.set_text(self, 1, ':\n') + element.set_text(self, 2, '') class FunctionSpecifier(Element): # GENERATE ELEMENT(int n) BEGIN @@ -3164,7 +3184,10 @@ class AST(element.Element): # GENERATE END def translate(self, context): assert len(self) == 1 + indent_save = context.indent + context.indent += ' ' self[0].translate(context) + context.indent = indent_save element.set_text(self, 0, '') element.set_text(self, 1, '') @@ -3255,10 +3278,7 @@ class AST(element.Element): assert len(self) == 0 if isinstance(enclosing_loop, StatementDoWhile): self.append(enclosing_loop[1].copy()) - indent_save = context.indent - context.indent += ' ' self[0].translate(context) - context.indent = indent_save element.set_text(self, 0, '{0:s}if '.format(context.indent)) element.set_text( self, @@ -3270,10 +3290,7 @@ class AST(element.Element): ) elif isinstance(enclosing_loop, StatementFor): self.append(enclosing_loop[2].copy()) - indent_save = context.indent - context.indent += ' ' self[0].translate(context) - context.indent = indent_save element.set_text(self, 0, '') element.set_text(self, 1, '{0:s}continue\n'.format(context.indent)) else: @@ -3336,14 +3353,18 @@ class AST(element.Element): # GENERATE END def translate(self, context): assert len(self) == 2 - indent_save = context.indent - context.indent += ' ' enclosing_loop_save = context.enclosing_loop context.enclosing_loop = self + self[0] = AST.StatementBlock( + children = [ + AST.BlockItemList( + children = [self[0]] + ) + ] + ) self[0].translate(context) context.enclosing_loop = enclosing_loop_save self[1].translate(context) - context.indent = indent_save element.set_text(self, 0, '{0:s}while True:\n'.format(context.indent)) element.set_text(self, 1, '{0:s} if not ('.format(context.indent)) element.set_text(self, 2, '):\n{0:s} break\n'.format(context.indent)) @@ -3461,11 +3482,15 @@ class AST(element.Element): # GENERATE END def translate(self, context): assert len(self) == 2 - indent_save = context.indent - context.indent += ' ' self[0].translate(context) + self[1] = AST.StatementBlock( + children = [ + AST.BlockItemList( + children = [self[1]] + ) + ] + ) self[1].translate(context) - context.indent = indent_save element.set_text(self, 0, '{0:s}if '.format(context.indent)) element.set_text(self, 1, ':\n') element.set_text(self, 2, '') @@ -3499,12 +3524,23 @@ class AST(element.Element): # GENERATE END def translate(self, context): assert len(self) == 3 - indent_save = context.indent - context.indent += ' ' self[0].translate(context) + self[1] = AST.StatementBlock( + children = [ + AST.BlockItemList( + children = [self[1]] + ) + ] + ) self[1].translate(context) + self[2] = AST.StatementBlock( + children = [ + AST.BlockItemList( + children = [self[2]] + ) + ] + ) self[2].translate(context) - context.indent = indent_save element.set_text(self, 0, '{0:s}if '.format(context.indent)) element.set_text(self, 1, ':\n') element.set_text(self, 2, '{0:s}else:\n'.format(context.indent)) @@ -3623,14 +3659,18 @@ class AST(element.Element): # GENERATE END def translate(self, context): assert len(self) == 2 - indent_save = context.indent - context.indent += ' ' self[0].translate(context) enclosing_loop_save = context.enclosing_loop context.enclosing_loop = self + self[1] = AST.StatementBlock( + children = [ + AST.BlockItemList( + children = [self[1]] + ) + ] + ) self[1].translate(context) context.enclosing_loop = enclosing_loop_save - context.indent = indent_save element.set_text(self, 0, '{0:s}while '.format(context.indent)) element.set_text(self, 1, ':\n') element.set_text(self, 2, '') @@ -4056,6 +4096,34 @@ class AST(element.Element): return 'ast.AST.UnionSpecifier({0:s})'.format(', '.join(params)) # GENERATE END + class TranslationUnit(Element): + # GENERATE ELEMENT() BEGIN + def __init__( + self, + tag = 'AST_TranslationUnit', + attrib = {}, + text = '', + children = [] + ): + AST.Element.__init__( + self, + tag, + attrib, + text, + children + ) + def copy(self, factory = None): + result = AST.Element.copy( + self, + TranslationUnit if factory is None else factory + ) + return result + def __repr__(self): + params = [] + self.repr_serialize(params) + return 'ast.AST.TranslationUnit({0:s})'.format(', '.join(params)) + # GENERATE END + # GENERATE ELEMENT() BEGIN def __init__( self, @@ -4216,7 +4284,8 @@ tag_to_class = { 'AST_TypeQualifierList': AST.TypeQualifierList, 'AST_TypeQualifierOrStaticList': AST.TypeQualifierOrStaticList, 'AST_TypeSpecifier': AST.TypeSpecifier, - 'AST_UnionSpecifier': AST.UnionSpecifier + 'AST_UnionSpecifier': AST.UnionSpecifier, + 'AST_TranslationUnit': AST.TranslationUnit } def factory(tag, attrib = {}, *args, **kwargs): return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs) diff --git a/c_to_python.py b/c_to_python.py old mode 100644 new mode 100755 index a60fed5..e092700 --- a/c_to_python.py +++ b/c_to_python.py @@ -1,16 +1,29 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2018 Nick Downing +# SPDX-License-Identifier: GPL-2.0-only +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 51 +# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + import ast import element -import lex_yy -#import xml.etree.ElementTree +import sys +import xml.etree.ElementTree import y_tab -def c_to_python(context, text): - lex_yy.yyin = None - lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)] - lex_yy.yytext_len = 0 - lex_yy.unput(text) - root = y_tab.yyparse(ast.AST.BlockItemList) - root.translate(context) - #print('@@@') - #xml.etree.ElementTree.dump(root) - return element.to_text(root) +root = y_tab.yyparse(ast.AST.TranslationUnit) +element.serialize(root, 'a.xml', 'utf-8') +root = element.deserialize('a.xml', ast.factory, 'utf-8') +root.translate(ast.Context()) +sys.stdout.write(element.to_text(root)) diff --git a/l_to_python.py b/l_to_python.py index 0411558..d954ba2 100755 --- a/l_to_python.py +++ b/l_to_python.py @@ -1,19 +1,12 @@ #!/usr/bin/env python3 import ast -import c_to_python import element +import lex_yy import os import sys import xml.etree.ElementTree - -root = xml.etree.ElementTree.parse( - sys.stdin, - xml.etree.ElementTree.XMLParser( - target = xml.etree.ElementTree.TreeBuilder(element.Element), - encoding = 'unicode' - ) -).getroot() +import y_tab def my_rstrip(text, indent): i = len(text) @@ -26,6 +19,25 @@ def my_rstrip(text, indent): i -= 1 return text[:i].rstrip('\t ') + indent + text[i:] +def c_to_python(context, text): + lex_yy.yyin = None + lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)] + lex_yy.yytext_len = 0 + lex_yy.unput(text) + root = y_tab.yyparse(ast.AST.TranslationUnit) + root.translate(context) + #print('@@@') + #xml.etree.ElementTree.dump(root) + return element.to_text(root) + +root = xml.etree.ElementTree.parse( + sys.stdin, + xml.etree.ElementTree.XMLParser( + target = xml.etree.ElementTree.TreeBuilder(element.Element), + encoding = 'unicode' + ) +).getroot() + context = ast.Context() #context.translate_identifier['BEGIN'] = 'self.BEGIN' #context.translate_identifier['yylval'] = 'ref_data.yylval' @@ -129,7 +141,7 @@ with open('a.i') as fin: pass elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n': # make the importing look like a function call in the C code: - lines.append('import("{0:s}");\n'.format(line[11:-2])) + #lines.append('import("{0:s}");\n'.format(line[11:-2])) line = fin.readline() while line != '@@@ IMPORT END\n': assert len(line) @@ -137,15 +149,22 @@ with open('a.i') as fin: else: lines.append(line) line = fin.readline() - context.indent = indent + ' ' - context.initial = initial - text = c_to_python.c_to_python(context, ''.join(lines)) + text = ''.join(lines) + if initial: - element.set_text(i[0], 0, text) + context.indent = indent + ' ' + text = c_to_python(context, text) else: assert parent.tag == 'AST_Section2_Rule' assert len(parent) == 3 prefix = element.get_text(parent, 2).rstrip('\t ') + context.indent = indent + text = c_to_python( + context, + 'void a(void) {{\n{0:s}}}\n'.format(text) + ) + assert text[:len(indent) + 9] == indent + 'def a():\n' + text = text[len(indent) + 9:] if len(text) == 0: text = '\n' elif text == '\n': @@ -160,7 +179,7 @@ with open('a.i') as fin: indent ) element.set_text(parent, 2, prefix) - element.set_text(i[0], 0, text) + element.set_text(i[0], 0, text) xml.etree.ElementTree.ElementTree(root).write( sys.stdout, -- 2.34.1