From f0e381e7e467e390dbd871db3caec2ac8575425e Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Sat, 12 Jan 2019 14:12:16 +1100 Subject: [PATCH] Reinstate the translation based on the new pilex/piyacc parser, tidy up a bit --- ast.py | 57 +++++++++++---- c_to_python.py | 23 ++---- element.py | 10 +++ l_to_python.py | 37 +++++----- tests/scan.l | 186 ++++++++++++++++++++++++++++++++++++------------- 5 files changed, 215 insertions(+), 98 deletions(-) diff --git a/ast.py b/ast.py index cb04951..f2aceef 100644 --- a/ast.py +++ b/ast.py @@ -6,7 +6,7 @@ class Context: self, indent = '', enclosing_loop = None, - initial = False, + #initial = False, translate_identifier = { 'NULL': 'None', 'false': 'False', @@ -16,10 +16,38 @@ class Context: ): self.indent = ' ' self.enclosing_loop = enclosing_loop - self.initial = initial # whether to add declared identifiers as 'self.' + #self.initial = initial # whether to add declared identifiers as 'self.' self.translate_identifier = translate_identifier class AST(element.Element): + class Text(element.Element): + # GENERATE ELEMENT() BEGIN + def __init__( + self, + tag = 'AST_Text', + attrib = {}, + text = '', + children = [] + ): + element.Element.__init__( + self, + tag, + attrib, + text, + children + ) + def copy(self, factory = None): + result = element.Element.copy( + self, + Text if factory is None else factory + ) + return result + def __repr__(self): + params = [] + self.repr_serialize(params) + return 'ast.AST.Text({0:s})'.format(', '.join(params)) + # GENERATE END + class Element(element.Element): # GENERATE ELEMENT() BEGIN def __init__( @@ -105,7 +133,7 @@ class AST(element.Element): return 'ast.AST.DeclarationOrStatement({0:s})'.format(', '.join(params)) # GENERATE END def translate(self, context): - Element.translate(self, context) + AST.Element.translate(self, context) element.set_text( self, 0, @@ -528,11 +556,11 @@ class AST(element.Element): def translate(self, context): assert len(self) == 3 self[0].translate(context) - initial_save = context.initial - context.initial = False + #initial_save = context.initial + #context.initial = False self[1].translate(context) self[2].translate(context) - context.initial = initial_save + #context.initial = initial_save element.set_text(self, 0, '') element.set_text(self, 1, '(') element.set_text(self, 2, '') @@ -568,10 +596,10 @@ class AST(element.Element): def translate(self, context): assert len(self) == 2 self[0].translate(context) - initial_save = context.initial - context.initial = False + #initial_save = context.initial + #context.initial = False self[1].translate(context) - context.initial = initial_save + #context.initial = initial_save element.set_text(self, 0, '') element.set_text(self, 1, '(') element.set_text(self, 2, ')') @@ -604,11 +632,11 @@ class AST(element.Element): return 'ast.AST.DeclaratorIdentifier({0:s})'.format(', '.join(params)) # GENERATE END def translate(self, context): - if context.initial: - text = element.get_text(self[0], 0) - assert text not in context.translate_identifier - context.translate_identifier[text] = 'self.{0:s}'.format(text) - Declarator.translate(self, context) + #if context.initial: + # text = element.get_text(self[0], 0) + # assert text not in context.translate_identifier + # context.translate_identifier[text] = 'self.{0:s}'.format(text) + AST.Declarator.translate(self, context) class DeclaratorPointer(Declarator): # GENERATE ELEMENT() BEGIN @@ -4058,6 +4086,7 @@ class AST(element.Element): # GENERATE FACTORY(element.Element) BEGIN tag_to_class = { 'AST': AST, + 'AST_Text': AST.Text, 'AST_Element': AST.Element, 'AST_DeclarationOrStatement': AST.DeclarationOrStatement, 'AST_AlignAsExpression': AST.AlignAsExpression, diff --git a/c_to_python.py b/c_to_python.py index 549a072..a60fed5 100644 --- a/c_to_python.py +++ b/c_to_python.py @@ -1,24 +1,15 @@ -import ansi_c_yylex -import ansi_c_yyparse import ast import element +import lex_yy #import xml.etree.ElementTree +import y_tab def c_to_python(context, text): - root = ast.BlockItemList(text = text) - ansi_c_yyparse.yyparse( - root, - 0, - 0, - ast.factory, - ansi_c_yylex.yylex( - root, - 0, - 0, - ast.factory, - iter([]) - ) - ) + lex_yy.yyin = None + lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)] + lex_yy.yytext_len = 0 + lex_yy.unput(text) + root = y_tab.yyparse(ast.AST.BlockItemList) root.translate(context) #print('@@@') #xml.etree.ElementTree.dump(root) diff --git a/element.py b/element.py index 2121e08..2d02217 100644 --- a/element.py +++ b/element.py @@ -160,6 +160,16 @@ def set_text(root, i, text): else: root[i - 1].tail = text +def to_text(root): + return ''.join( + [ + j + for i in range(len(root)) + for j in [get_text(root, i), to_text(root[i])] + ] + + [get_text(root, len(root))] + ) + def concatenate(children, factory = Element, *args, **kwargs): root = factory(*args, **kwargs) for child in children: diff --git a/l_to_python.py b/l_to_python.py index 4f11487..0411558 100755 --- a/l_to_python.py +++ b/l_to_python.py @@ -27,20 +27,20 @@ def my_rstrip(text, indent): return text[:i].rstrip('\t ') + indent + text[i:] context = ast.Context() -context.translate_identifier['BEGIN'] = 'self.BEGIN' -context.translate_identifier['yylval'] = 'ref_data.yylval' -context.translate_identifier['yytext'] = 'self.yytext' -context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state' -context.translate_identifier['yy_push_state'] = 'self.yy_push_state' +#context.translate_identifier['BEGIN'] = 'self.BEGIN' +#context.translate_identifier['yylval'] = 'ref_data.yylval' +#context.translate_identifier['yytext'] = 'self.yytext' +#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state' +#context.translate_identifier['yy_push_state'] = 'self.yy_push_state' actions = [] with open('a.c', 'w') as fout: def extract(i, parent, indent): - if i.tag == 'PLex_Section1Or2_CodeBlock': + if i.tag == 'AST_Section1Or2_CodeBlock': initial = True - elif i.tag == 'PLex_Section2_Rule_Action': + elif i.tag == 'AST_Section2_Rule_Action': if len(i) == 0: # continued actions - assert parent.tag == 'PLex_Section2_Rule' + assert parent.tag == 'AST_Section2_Rule' assert len(parent) == 3 element.set_text( parent, @@ -51,17 +51,17 @@ with open('a.c', 'w') as fout: initial = False else: child_indent = indent - if i.tag == 'PLex_Section1_StartConditions': + if i.tag == 'AST_Section1_StartConditions': for j in i: - assert j.tag == 'PLex_Name' + assert j.tag == 'AST_Name' text = element.get_text(j, 0) context.translate_identifier[text] = 'DFA.{0:s}'.format(text) elif ( - i.tag == 'PLex_Section2_Rule' or - i.tag == 'PLex_Section2_Rule_FLexRule' + i.tag == 'AST_Section2_Rule' or + i.tag == 'AST_Section2_Rule_FLexRule' ): element.set_text(i, 0, element.get_text(i, 0).lstrip('\t ')) - elif i.tag == 'PLex_Section2_CompoundRule': + elif i.tag == 'AST_Section2_CompoundRule': child_indent += ' ' element.set_text( i, @@ -83,7 +83,7 @@ with open('a.c', 'w') as fout: for j in i: extract(j, i, child_indent) return - assert i[0].tag == 'PLex_Text' and len(i[0]) == 0 + assert i[0].tag == 'AST_Text' and len(i[0]) == 0 text = element.to_text(i[0]) j = 0 @@ -137,13 +137,13 @@ with open('a.i') as fin: else: lines.append(line) line = fin.readline() - context.indent = indent + ' #' + context.indent = indent + ' ' context.initial = initial text = c_to_python.c_to_python(context, ''.join(lines)) if initial: element.set_text(i[0], 0, text) else: - assert parent.tag == 'PLex_Section2_Rule' + assert parent.tag == 'AST_Section2_Rule' assert len(parent) == 3 prefix = element.get_text(parent, 2).rstrip('\t ') if len(text) == 0: @@ -152,11 +152,10 @@ with open('a.i') as fin: pass elif text.index('\n') == len(text) - 1: prefix += ' /*COLUMN32*/ ' - text = 'pass {0:s}'.format(text.lstrip('\t ')) + text = text.lstrip('\t ') else: prefix += ' ' - text = '{{\n{0:s} pass\n{1:s}{2:s}}}\n'.format( - indent, + text = '{{\n{0:s}{1:s}}}\n'.format( text, indent ) diff --git a/tests/scan.l b/tests/scan.l index 8e21e78..73e1b5f 100644 --- a/tests/scan.l +++ b/tests/scan.l @@ -81,9 +81,9 @@ extern const char *escaped_qstart, *escaped_qend; { \ strncpy( nmstr, yytext, sizeof(nmstr) ); \ piece_pack(); \ - piece_append(""); \ + piece_append(""); \ piece_escape(yytext, strlen(yytext)); \ - piece_append(""); \ + piece_append(""); \ piece_pack(); \ return ~NAME; \ /* return NAME;*/ \ @@ -121,14 +121,14 @@ extern const char *escaped_qstart, *escaped_qend; add_action(M4QSTART); \ yy_push_state(CODEBLOCK); \ if ((indented_code = x)) ACTION_ECHO; \ - piece_append(""); \ + piece_append(""); \ } while(0) #define END_CODEBLOCK do { \ yy_pop_state();\ add_action(M4QEND); \ if (!indented_code) line_directive_out(NULL, 0);\ - piece_append(""); \ + piece_append(""); \ } while (0) /* Nick */ @@ -158,6 +158,8 @@ static void markup_option(const char *name, int sense); %x COMMENT_DISCARD CODE_COMMENT %x SECT3_NOESCAPE %x CHARACTER_CONSTANT +/* Nick extra rules for action groups */ +%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED WS [[:blank:]]+ OPTWS [[:blank:]]* @@ -192,12 +194,12 @@ M4QEND "]""]" { - ^{WS} START_CODEBLOCK(true); piece_append(""); + ^{WS} START_CODEBLOCK(true); piece_append(""); ^"/*" add_action("/*[""["); yy_push_state( COMMENT ); ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); ^"%s"{NAME}? return SCDECL; ^"%x"{NAME}? return XSCDECL; - ^"%{".*{NL} START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append(""); + ^"%{".*{NL} START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append(""); ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} { brace_start_line = linenum; ++linenum; @@ -217,10 +219,10 @@ M4QEND "]""]" line_directive_out(NULL, 1); BEGIN(SECT2PROLOG); #if 1 - piece_append(""); + piece_append(""); piece_pack(); piece_escape(yytext, strlen(yytext)); - piece_append(""); + piece_append(""); piece_pack(); return ~SECTEND; #else @@ -228,8 +230,8 @@ M4QEND "]""]" #endif } - ^"%pointer".*{NL} yytext_is_array = false; ++linenum; piece_append(""); piece_flush(strlen(yytext) - 1); piece_append(""); - ^"%array".*{NL} yytext_is_array = true; ++linenum; piece_append(""); piece_flush(strlen(yytext) - 1); piece_append(""); + ^"%pointer".*{NL} yytext_is_array = false; ++linenum; piece_append(""); piece_flush(strlen(yytext) - 1); piece_append(""); + ^"%array".*{NL} yytext_is_array = true; ++linenum; piece_append(""); piece_flush(strlen(yytext) - 1); piece_append(""); ^"%option" BEGIN(OPTION); return TOK_OPTION; @@ -304,13 +306,13 @@ M4QEND "]""]" } { - ^"%}".*{NL} ++linenum; piece_append(""); piece_flush(strlen(yytext)); END_CODEBLOCK; + ^"%}".*{NL} ++linenum; piece_append(""); piece_flush(strlen(yytext)); END_CODEBLOCK; [^\n%\[\]]* ACTION_ECHO; . ACTION_ECHO; {NL} { ++linenum; ACTION_ECHO; - if ( indented_code ) { piece_flush(strlen(yytext)); piece_append(""); END_CODEBLOCK; } + if ( indented_code ) { piece_flush(strlen(yytext)); piece_append(""); END_CODEBLOCK; } } } @@ -512,9 +514,9 @@ M4QEND "]""]" nmstr[strlen( nmstr ) - 1] = '\0'; #if 1 piece_pack(); - piece_append("\""); + piece_append("\""); piece_escape(yytext + 1, strlen(yytext + 1) - 1); - piece_append("\""); + piece_append("\""); piece_pack(); return ~NAME; /* actually a misnomer */ #else @@ -536,7 +538,7 @@ M4QEND "]""]" ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ - ^{WS} START_CODEBLOCK(true); piece_append(""); /* indented code in prolog */ + ^{WS} START_CODEBLOCK(true); piece_append(""); /* indented code in prolog */ ^{NOT_WS}.* { /* non-indented code */ @@ -548,7 +550,7 @@ M4QEND "]""]" BEGIN(SECT2); } else { START_CODEBLOCK(true); - piece_append(""); + piece_append(""); } } @@ -560,7 +562,7 @@ M4QEND "]""]" sectnum = 0; #if 1 piece_pack(); - piece_append(""); + piece_append(""); piece_pack(); return ~YY_NULL; #else @@ -578,18 +580,20 @@ M4QEND "]""]" bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); piece_flush(strlen(yytext) - 2); - piece_append(""); + piece_append(""); piece_flush(2); - piece_append(""); + piece_append(""); } ^{OPTWS}"<" { /* Allow "<" to appear in (?x) patterns. */ if (!sf_skip_ws()) BEGIN(SC); + /* here we know yytext is not used by parser, so OK to destroy it */ + piece_flush(strlen(yytext) - 1); return '<'; } - ^{OPTWS}"^" return '^'; + ^{OPTWS}"^" /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 1); return '^'; \" BEGIN(QUOTE); return '"'; "{"/[[:digit:]] { BEGIN(NUM); @@ -604,7 +608,7 @@ M4QEND "]""]" bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); piece_flush(strlen(yytext) - 2); - piece_append(""); + piece_append(""); if ( in_rule ) { @@ -614,7 +618,7 @@ M4QEND "]""]" piece_pack(); piece_escape(yytext, 2); piece_pack(); - piece_append(""); + piece_append(""); return ~'\n'; #else return '\n'; @@ -624,7 +628,7 @@ M4QEND "]""]" abort(); #else piece_flush(strlen(yytext)); - piece_append(""); + piece_append(""); #endif } {WS}"|".*{NL} { @@ -644,9 +648,9 @@ M4QEND "]""]" ; piece_flush(i); piece_pack(); - piece_append(""); + piece_append(""); piece_escape(yytext, strlen(yytext)); - piece_append(""); + piece_append(""); piece_pack(); return ~'\n'; #else @@ -692,7 +696,7 @@ M4QEND "]""]" piece_pack(); piece_escape(yytext, strlen(yytext)); piece_pack(); - piece_append(""); + piece_append(""); return ~'\n'; #else return '\n'; @@ -724,7 +728,7 @@ M4QEND "]""]" piece_pack(); piece_escape(yytext, strlen(yytext)); piece_pack(); - piece_append(""); + piece_append(""); return ~'\n'; #else return '\n'; @@ -734,7 +738,7 @@ M4QEND "]""]" } ^{OPTWS}"<>" | - "<>" return EOF_OP; + "<>" /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 7); return EOF_OP; ^"%%".* { sectnum = 3; @@ -742,10 +746,10 @@ M4QEND "]""]" outn("/* Begin user sect3 */"); #if 1 piece_pack(); - piece_append(""); + piece_append(""); piece_escape(yytext, strlen(yytext)); piece_pack(); - piece_append(""); + piece_append(""); return ~YY_NULL; #else yyterminate(); /* to stop the parser */ @@ -854,6 +858,10 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ { unput(')'); PUT_BACK_STRING(nmdefptr, 0); + if (!lex_compat && !posix_compat) { + unput(':'); + unput('?'); + } unput('('); } } @@ -905,7 +913,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ sectnum = 0; #if 1 piece_pack(); - piece_append(""); + piece_append(""); piece_pack(); return ~YY_NULL; #else @@ -943,14 +951,47 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ } { - ":" BEGIN(SECT2); + /* Nick extra rules for named groups */ + "'"{NAME}"'" | + "<"{NAME}">" { + BEGIN(SECT2); +#if 1 + /* here we know yytext is not used by parser, so OK to destroy it */ + piece_flush(1); + piece_pack(); + piece_append(""); + piece_flush(strlen(yytext) - 1); + piece_append(""); + piece_pack(); + piece_flush(1); + return ~NAME; +#else + return NAME; +#endif + } + /* Nick extra rules for action groups */ + "A{" { + BEGIN(SECT2); + yy_push_state(ACTION_GROUP); + bracelevel = 1; + piece_flush(strlen(yytext)); + piece_append(""); + } + "E{" { + BEGIN(SECT2); + yy_push_state(ELEMENT_GROUP); + bracelevel = 1; + piece_flush(strlen(yytext)); + piece_append(""); + } + ":" BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */ "-" BEGIN(GROUP_MINUS_PARAMS); i sf_set_case_ins(1); s sf_set_dot_all(1); x sf_set_skip_ws(1); } { - ":" BEGIN(SECT2); + ":" BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */ i sf_set_case_ins(0); s sf_set_dot_all(0); x sf_set_skip_ws(0); @@ -1038,7 +1079,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ { - {OPTWS}"%}".* bracelevel = 0; piece_append(""); + {OPTWS}"%}".* bracelevel = 0; piece_append(""); "/*" ACTION_ECHO; yy_push_state( CODE_COMMENT ); @@ -1063,9 +1104,9 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ piece_flush(strlen(yytext)); if (doing_codeblock) - piece_append(""); + piece_append(""); else - markup_action(""); + markup_action(""); doing_rule_action = doing_codeblock = false; BEGIN(SECT2); } @@ -1090,7 +1131,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ { add_action( "\tYY_BREAK]""]\n" ); piece_flush(strlen(yytext)); - markup_action(""); + markup_action(""); } doing_rule_action = false; @@ -1111,11 +1152,13 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ { (\\\n)* ACTION_ECHO; \\(\\\n)*. ACTION_ECHO; - {NL} ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action(""); } else { BEGIN(ACTION); } + {NL} ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action(""); } else { BEGIN(ACTION); } . ACTION_ECHO; } -<> { + /* Nick extra rules for action groups */ + /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */ +<> { synerr( _( "EOF encountered inside an action" ) ); yyterminate(); } @@ -1143,7 +1186,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ sectnum = 0; #if 1 piece_pack(); - piece_append(""); + piece_append(""); piece_pack(); return ~YY_NULL; #else @@ -1160,7 +1203,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ sectnum = 0; #if 1 piece_pack(); - piece_append(""); + piece_append(""); piece_pack(); return ~YY_NULL; #else @@ -1168,6 +1211,51 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ #endif } } + + /* Nick extra rules for action groups */ +{ + "{" ++bracelevel; +} +{ + "}" { + if (--bracelevel == 0) { + yy_pop_state(); + piece_append(""); + return TOK_ACTION_GROUP; + } + } +} +{ + "}" { + if (--bracelevel == 0) { + yy_pop_state(); + piece_append(""); + return TOK_ELEMENT_GROUP; + } + } +} +{ + "'" yy_push_state(SINGLE_QUOTED); + \" yy_push_state(DOUBLE_QUOTED); + "/*" yy_push_state(COMMENT_DISCARD); +} +{ + [^\[\]\'\\\n]+ + \' yy_pop_state(); +} +{ + [^\[\]\"\\\n]+ + \" yy_pop_state(); +} +{ + (\\\n)* + \\(\\\n)*. +} +{ + {NL} ++linenum; + . +} + <*>.|\n format_synerr( _( "bad character: %s" ), yytext ); %% @@ -1292,16 +1380,16 @@ int flexscan(void) { } static void markup_action(const char *text) { - /* append to last token text so it appears inside .. */ + /* append to last token text so it appears inside .. */ /* a problem here is that Rule has already been reduced (marked up), */ /* because we returned a '\n' token when we detected start of action, */ - /* hence we need to move the closing tag over to our right */ + /* hence we need to move the closing tag over to our right */ int i = strlen(piece[--piece0]); - if (i < 21 || strcmp(piece[piece0] + i - 21, "") != 0) + if (i < 20 || strcmp(piece[piece0] + i - 20, "") != 0) abort(); - piece[piece0][i - 21] = 0; + piece[piece0][i - 20] = 0; piece_append(text); - piece_append(""); + piece_append(""); piece_pack(); } @@ -1311,12 +1399,12 @@ static void markup_option(const char *name, int sense) { int i = piece1; while (--i >= piece0 && strcmp(piece[i], "no") == 0) ; - sprintf(piece_temp, "", name, sense ? " value=\"true\"" : ""); + sprintf(piece_temp, "", name, sense ? " value=\"true\"" : ""); piece_insert(i + 1, piece_temp); piece_flush(strlen(yytext)); - sprintf(piece_temp, "", name); + sprintf(piece_temp, "", name); piece_append(piece_temp); - /* append to last token text so it appears inside .. */ + /* append to last token text so it appears inside .. */ --piece0; piece_pack(); } -- 2.34.1