From f9fdd965ce9ef49c28b50ca6dcd0d3d8d72faa66 Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Mon, 14 Jan 2019 14:54:58 +1100 Subject: [PATCH] First cut at y_to_python.py, need to normalize whitespace and indent in *.y.new --- ansi_c.l | 4 +- ast.py | 53 ++++++++++++---- l_to_python.py | 7 +-- o.sh | 4 ++ tests/parse.y | 87 +++++++++++++++---------- xml_to_y.py | 9 +++ y_to_python.py | 168 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 282 insertions(+), 50 deletions(-) create mode 100755 o.sh create mode 100755 xml_to_y.py create mode 100755 y_to_python.py diff --git a/ansi_c.l b/ansi_c.l index 49900bf..01e37e2 100644 --- a/ansi_c.l +++ b/ansi_c.l @@ -85,7 +85,9 @@ import y_tab return y_tab.TYPEDEF_NAME } -(?E{ast.AST.Identifier}{L}{A}*) { + /* hack for yacc/bison specification */ +(?E{ast.AST.Identifier}{L}{A}*) | +(?E{ast.AST.Identifier}"$$"|"$"{D}+) { return y_tab.IDENTIFIER } diff --git a/ast.py b/ast.py index bcd537e..22bb2e1 100644 --- a/ast.py +++ b/ast.py @@ -403,6 +403,12 @@ class AST(element.Element): # GENERATE END def __str__(self): return 'void' + def translate_size(self, context): + return 0 + def translate_type(self, context): + return 'NoneType' + def translate_zero(self, context): + return 'None' class TypeInt(Type): # GENERATE ELEMENT(bool signed, int bits) BEGIN @@ -626,17 +632,27 @@ class AST(element.Element): def translate_size(self, context): return 4 def translate_type(self, context): - assert ( + return ( + 'str' + if ( isinstance(self.target_type, AST.TypeInt) and self.target_type.bits == 8 + ) else + 'list' ) - return 'str' def translate_zero(self, context): - assert ( - isinstance(self.target_type, AST.TypeInt) and - self.target_type.bits == 8 + return ( + ( + '\'\'' + if ( + isinstance(self.target_type, AST.TypeInt) and + self.target_type.bits == 8 + ) else + '[]' + ) + if context.top_level else + 'None' ) - return '\'\'' if context.top_level else 'None' class TypeArray(Type): # GENERATE ELEMENT(ref element_type, int element_count) BEGIN @@ -1120,9 +1136,14 @@ class AST(element.Element): return self[0].get_type_and_name( AST.TypeArray( element_type = base_type, - element_count = int( - element.get_text(self[2], 0), - 8 if element.get_text(self[2], 0)[:2] in octal_prefix else 0 + element_count = ( + -1 + if isinstance(self[2], AST.ExpressionEmpty) else + # kludgey way, assuming not calculated size + int( + element.get_text(self[2], 0), + 8 if element.get_text(self[2], 0)[:2] in octal_prefix else 0 + ) ) ) ) @@ -4961,6 +4982,7 @@ class AST(element.Element): return 'ast.AST({0:s})'.format(', '.join(params)) # GENERATE END +# void char short int long float double signed unsigned bool complex imaginary type_specifiers_to_type = { (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeVoid(), (0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 8), @@ -4969,18 +4991,27 @@ type_specifiers_to_type = { (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16), (0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16), (0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 16), + (0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16), + (0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16), + (0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 16), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), + (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), + (0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32), (0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), (0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), (0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32), (0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), (0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), - (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), (0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32), - (0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32), + (0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), + (0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32), + (0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32), (0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64), (0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64), (0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 64), + (0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64), + (0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64), + (0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 64), (0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0): AST.TypeFloat(complex = 0, bits = 32), (0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0): AST.TypeFloat(complex = 2, bits = 32), (0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1): AST.TypeFloat(complex = 1, bits = 32), diff --git a/l_to_python.py b/l_to_python.py index 1399a68..6c9473c 100755 --- a/l_to_python.py +++ b/l_to_python.py @@ -21,7 +21,7 @@ def my_rstrip(text, indent): def c_to_python(context, text): lex_yy.yyin = None - lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)] + lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()] lex_yy.yytext_len = 0 lex_yy.unput(text) root = y_tab.yyparse(ast.AST.TranslationUnit) @@ -184,10 +184,7 @@ with open('a.i') as fin: text = text.lstrip('\t ') else: prefix += ' ' - text = '{{\n{0:s}{1:s}}}\n'.format( - text, - indent - ) + text = '{{\n{0:s}{1:s}}}\n'.format(text, indent) element.set_text(parent, 2, prefix) element.set_text(node, 0, text) diff --git a/o.sh b/o.sh new file mode 100755 index 0000000..5a85fe7 --- /dev/null +++ b/o.sh @@ -0,0 +1,4 @@ +#!/bin/sh +../bootstrap_bison.git/src/bison -o /dev/null tests/parse.y 2>tests/parse.y.xml +./y_to_python.py tests/parse.y.new.xml +./xml_to_y.py tests/parse.y.new diff --git a/tests/parse.y b/tests/parse.y index 814842a..8811acc 100644 --- a/tests/parse.y +++ b/tests/parse.y @@ -12,6 +12,9 @@ %left CCL_OP_DIFF CCL_OP_UNION +/* Nick extra rules for action groups */ +%token TOK_ACTION_GROUP TOK_ELEMENT_GROUP + /* *POSIX and AT&T lex place the * precedence of the repeat operator, {}, below that of concatenation. @@ -173,14 +176,14 @@ initlex : sect1 : sect1 startconddecl namelist1 { - insert_after(2, ""); - sprintf(piece_temp, "", xcluflg ? "true" : "false"); + insert_after(2, ""); + sprintf(piece_temp, "", xcluflg ? "true" : "false"); insert_before(1, piece_temp); } | sect1 options { - insert_after(1, ""); - insert_before(1, ""); + insert_after(1, ""); + insert_before(1, ""); } | | error @@ -223,47 +226,47 @@ option : TOK_OUTFILE '=' NAME { outfilename = xstrdup(nmstr); did_outfilename = 1; - insert_after(2, ""); - insert_before(0,""); + insert_after(2, ""); + insert_before(0,""); } | TOK_EXTRA_TYPE '=' NAME { extra_type = xstrdup(nmstr); /*}*/ - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } | TOK_PREFIX '=' NAME { prefix = xstrdup(nmstr); if (strchr(prefix, '[') || strchr(prefix, ']')) flexerror(_("Prefix must not contain [ or ]")); /*}*/ - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } | TOK_YYCLASS '=' NAME { yyclass = xstrdup(nmstr); /*}*/ - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } | TOK_HEADER_FILE '=' NAME { headerfilename = xstrdup(nmstr); /*}*/ - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } | TOK_TABLES_FILE '=' NAME { tablesext = true; tablesfilename = xstrdup(nmstr); /*}*/ - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } ; sect2 : sect2 scon initforrule flexrule '\n' { scon_stk_ptr = $2; /*}*/ - insert_after(4, ""); - insert_before(1, ""); + insert_after(4, ""); + insert_before(1, ""); } | sect2 scon '{' sect2 '}' { scon_stk_ptr = $2; /*}*/ - insert_after(4, ""); - insert_before(1, ""); + insert_after(4, ""); + insert_before(1, ""); } | ; @@ -315,8 +318,8 @@ flexrule : '^' rule pinpoint_message( "'^' operator results in sub-optimal performance" ); } - insert_after(1, ""); - insert_before(0, ""); + insert_after(1, ""); + insert_before(0, ""); } | rule @@ -341,8 +344,8 @@ flexrule : '^' rule mkbranch( scset[i], pat ); } - insert_after(0, ""); - insert_before(0, ""); + insert_after(0, ""); + insert_before(0, ""); } | EOF_OP @@ -366,8 +369,8 @@ flexrule : '^' rule else build_eof_action(); } - insert_after(0, ""); - insert_before(0, ""); + insert_after(0, ""); + insert_before(0, ""); } | error @@ -380,8 +383,8 @@ scon_stk_ptr : scon : '<' scon_stk_ptr namelist2 '>' { $$ = $2; /*}*/ - insert_after(3, ""); - insert_before(0, ""); + insert_after(3, ""); + insert_before(0, ""); } | '<' '*' '>' @@ -399,8 +402,8 @@ scon : '<' scon_stk_ptr namelist2 '>' if ( j > scon_stk_ptr ) scon_stk[++scon_stk_ptr] = i; } - insert_after(2, ""); - insert_before(0, ""); + insert_after(2, ""); + insert_before(0, ""); } | @@ -413,7 +416,7 @@ scon : '<' scon_stk_ptr namelist2 '>' piece[piece2 + 1] = piece[piece2]; /* empty */ piece[piece2] = piece[piece2 - 1]; /* empty */ piece[piece2 - 1] = temp; - insert_before(0, ""); + insert_before(0, ""); } ; @@ -871,11 +874,29 @@ singleton : singleton '*' | '(' re ')' { $$ = $2; /*}*/ -#if 0 /* for now do things in the traditional lex way without subexpressions */ insert_after(2, ""); insert_before(0, ""); -#endif } + /* Nick extra rules for unnumbered groups */ + | '(' ':' re ')' + { $$ = $3; } + /* Nick extra rules for named groups */ + | '(' NAME re ')' + { $$ = $3; /*}*/ + insert_after(3, ""); + insert_before(0, ""); +} + /* Nick extra rules for action groups */ + | '(' TOK_ACTION_GROUP re ')' + { $$ = $3; /*}*/ + insert_after(3, ""); + insert_before(0, ""); +} + | '(' TOK_ELEMENT_GROUP re ')' + { $$ = $3; /*}*/ + insert_after(3, ""); + insert_before(0, ""); +} | CHAR { diff --git a/xml_to_y.py b/xml_to_y.py new file mode 100755 index 0000000..6cb6974 --- /dev/null +++ b/xml_to_y.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 + +import element +import sys +import xml.etree.ElementTree + +sys.stdout.write( + element.to_text(xml.etree.ElementTree.parse(sys.stdin).getroot()) +) diff --git a/y_to_python.py b/y_to_python.py new file mode 100755 index 0000000..6333847 --- /dev/null +++ b/y_to_python.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 + +import ast +import element +import lex_yy +import os +import sys +import xml.etree.ElementTree +import y_tab + +def my_rstrip(text, indent): + i = len(text) + while i > 0 and text[i - 1] == '}': + i -= 1 + assert i > 0 + while text[i - 1] != '{': + i -= 1 + assert i > 0 + i -= 1 + return text[:i].rstrip('\t ') + indent + text[i:] + +def c_to_python(context, text): + lex_yy.yyin = None + lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()] + lex_yy.yytext_len = 0 + lex_yy.unput(text) + root = y_tab.yyparse(ast.AST.TranslationUnit) + context.lines = [] + root.translate_translation_unit(context) + return ''.join(context.lines) + +root = xml.etree.ElementTree.parse( + sys.stdin, + xml.etree.ElementTree.XMLParser( + target = xml.etree.ElementTree.TreeBuilder(element.Element), + encoding = 'unicode' + ) +).getroot() + +context = ast.Context() +#context.translate_identifier['BEGIN'] = 'self.BEGIN' +#context.translate_identifier['yylval'] = 'ref_data.yylval' +#context.translate_identifier['yytext'] = 'self.yytext' +#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state' +#context.translate_identifier['yy_push_state'] = 'self.yy_push_state' + +actions = [] +with open('a.c', 'w') as fout: + def extract(i, indent): + if i.tag == 'AST_Section1_Prologue': + node = i[0] + assert node.tag == 'AST_Text' + indent += ' ' + initial = True + elif i.tag == 'AST_Production_Action': + node = i[0] + assert node.tag == 'AST_Text' + initial = False + elif i.tag == 'AST_Section3': + node = i + initial = True + else: + child_indent = indent + if i.tag == 'AST': + for j in range(1, len(i) + 1): + element.set_text(i, j, element.get_text(i, j).rstrip() + '\n') + #elif ( + # i.tag == 'AST_Section2_Rule' or + # i.tag == 'AST_Section2_Rule_FLexRule' + #): + # element.set_text(i, 0, element.get_text(i, 0).lstrip('\t ')) + #elif i.tag == 'AST_Section2_CompoundRule': + # child_indent += ' ' + # element.set_text( + # i, + # 0, + # indent + element.get_text(i, 0).lstrip('\t ') + # ) + # for j in range(1, len(i)): + # element.set_text( + # i, + # j, + # #element.get_text(i, j).rstrip('\t ') + child_indent + # my_rstrip(element.get_text(i, j), child_indent) + # ) + # element.set_text( + # i, + # len(i), + # indent + element.get_text(i, len(i)).lstrip('\t ') + # ) + for j in i: + extract(j, child_indent) + return + #assert len(node) == 0 + #text = element.get_text(node, 0) + text = element.to_text(node) + + lines = [i.rstrip() for i in text.split('\n')] + while len(lines) and len(lines[-1]) == 0: + del lines[-1] + while len(lines) and len(lines[0]) == 0: + del lines[0] + for line in lines: + if ( + (line[:10] == '#include <' and line[-3:] == '.h>') or + (line[:10] == '#include "' and line[-3:] == '.h"') + ): + fout.write( + '''@@@ IMPORT({0:s}) +{1:s} +#undef NULL +#undef bool +#undef false +#undef true +@@@ IMPORT END\n'''.format( + line[10:-3].replace('/', '.'), + line + ) + ) + else: + fout.write(line + '\n') + fout.write('@@@\n') + + actions.append((node, indent, initial)) + extract(root, '') + +os.system('gcc -I tests/flex_h -E a.c >a.i') +with open('a.i') as fin: + for node, indent, initial in actions: + lines = [] + line = fin.readline() + while line != '@@@\n': + assert len(line) + if ( + line[:1] == '#' or + (line == '\n' and len(lines) and lines[-1] == '\n') + ): + pass + elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n': + # make the importing look like a function call in the C code: + #lines.append('import("{0:s}");\n'.format(line[11:-2])) + line = fin.readline() + while line != '@@@ IMPORT END\n': + assert len(line) + line = fin.readline() + else: + lines.append(line) + line = fin.readline() + text = ''.join(lines) + + if initial: + context.indent = indent + text = c_to_python(context, text) + else: + context.indent = indent + text = c_to_python( + context, + 'void a(void) {0:s}'.format(text) # already has braces and \n + ) + assert text[:len(indent) + 10] == '\n{0:s}def a():\n'.format(indent) + text = text[len(indent) + 10:] + text = '{{\n{0:s}{1:s}}}\n'.format(text, indent) + element.set_text(node, 0, text) + +xml.etree.ElementTree.ElementTree(root).write( + sys.stdout, + encoding = 'unicode' # strangely does not seem to default to this +) -- 2.34.1