return y_tab.TYPEDEF_NAME
}
-(?E{ast.AST.Identifier}{L}{A}*) {
+ /* hack for yacc/bison specification */
+(?E{ast.AST.Identifier}{L}{A}*) |
+(?E{ast.AST.Identifier}"$$"|"$"{D}+) {
return y_tab.IDENTIFIER
}
# GENERATE END
def __str__(self):
return 'void'
+ def translate_size(self, context):
+ return 0
+ def translate_type(self, context):
+ return 'NoneType'
+ def translate_zero(self, context):
+ return 'None'
class TypeInt(Type):
# GENERATE ELEMENT(bool signed, int bits) BEGIN
def translate_size(self, context):
return 4
def translate_type(self, context):
- assert (
+ return (
+ 'str'
+ if (
isinstance(self.target_type, AST.TypeInt) and
self.target_type.bits == 8
+ ) else
+ 'list'
)
- return 'str'
def translate_zero(self, context):
- assert (
- isinstance(self.target_type, AST.TypeInt) and
- self.target_type.bits == 8
+ return (
+ (
+ '\'\''
+ if (
+ isinstance(self.target_type, AST.TypeInt) and
+ self.target_type.bits == 8
+ ) else
+ '[]'
+ )
+ if context.top_level else
+ 'None'
)
- return '\'\'' if context.top_level else 'None'
class TypeArray(Type):
# GENERATE ELEMENT(ref element_type, int element_count) BEGIN
return self[0].get_type_and_name(
AST.TypeArray(
element_type = base_type,
- element_count = int(
- element.get_text(self[2], 0),
- 8 if element.get_text(self[2], 0)[:2] in octal_prefix else 0
+ element_count = (
+ -1
+ if isinstance(self[2], AST.ExpressionEmpty) else
+ # kludgey way, assuming not calculated size
+ int(
+ element.get_text(self[2], 0),
+ 8 if element.get_text(self[2], 0)[:2] in octal_prefix else 0
+ )
)
)
)
return 'ast.AST({0:s})'.format(', '.join(params))
# GENERATE END
+# void char short int long float double signed unsigned bool complex imaginary
type_specifiers_to_type = {
(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeVoid(),
(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 8),
(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16),
(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16),
(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 16),
+ (0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16),
+ (0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 16),
+ (0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 16),
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
+ (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
+ (0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32),
(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32),
(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
- (0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32),
- (0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32),
+ (0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
+ (0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 32),
+ (0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 32),
(0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64),
(0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64),
(0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 64),
+ (0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64),
+ (0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0): AST.TypeInt(signed = True, bits = 64),
+ (0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0): AST.TypeInt(signed = False, bits = 64),
(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0): AST.TypeFloat(complex = 0, bits = 32),
(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0): AST.TypeFloat(complex = 2, bits = 32),
(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1): AST.TypeFloat(complex = 1, bits = 32),
def c_to_python(context, text):
lex_yy.yyin = None
- lex_yy.yy_buffer_stack = [lex_yy.YYBufferState(None, None)]
+ lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()]
lex_yy.yytext_len = 0
lex_yy.unput(text)
root = y_tab.yyparse(ast.AST.TranslationUnit)
text = text.lstrip('\t ')
else:
prefix += ' '
- text = '{{\n{0:s}{1:s}}}\n'.format(
- text,
- indent
- )
+ text = '{{\n{0:s}{1:s}}}\n'.format(text, indent)
element.set_text(parent, 2, prefix)
element.set_text(node, 0, text)
--- /dev/null
+#!/bin/sh
+../bootstrap_bison.git/src/bison -o /dev/null tests/parse.y 2>tests/parse.y.xml
+./y_to_python.py <tests/parse.y.xml >tests/parse.y.new.xml
+./xml_to_y.py <tests/parse.y.new.xml >tests/parse.y.new
%left CCL_OP_DIFF CCL_OP_UNION
+/* Nick extra rules for action groups */
+%token TOK_ACTION_GROUP TOK_ELEMENT_GROUP
+
/*
*POSIX and AT&T lex place the
* precedence of the repeat operator, {}, below that of concatenation.
sect1 : sect1 startconddecl namelist1
{
- insert_after(2, "</PLex_Section1_StartConditions>");
- sprintf(piece_temp, "<PLex_Section1_StartConditions exclusive=\"%s\">", xcluflg ? "true" : "false");
+ insert_after(2, "</AST_Section1_StartConditions>");
+ sprintf(piece_temp, "<AST_Section1_StartConditions exclusive=\"%s\">", xcluflg ? "true" : "false");
insert_before(1, piece_temp);
}
| sect1 options
{
- insert_after(1, "</PLex_Section1_Options>");
- insert_before(1, "<PLex_Section1_Options>");
+ insert_after(1, "</AST_Section1_Options>");
+ insert_before(1, "<AST_Section1_Options>");
}
|
| error
{
outfilename = xstrdup(nmstr);
did_outfilename = 1;
- insert_after(2, "</PLex_Section1_Options_OutFile>");
- insert_before(0,"<PLex_Section1_Options_OutFile>");
+ insert_after(2, "</AST_Section1_Options_OutFile>");
+ insert_before(0,"<AST_Section1_Options_OutFile>");
}
| TOK_EXTRA_TYPE '=' NAME
{ extra_type = xstrdup(nmstr); /*}*/
- insert_after(2, "</PLex_Section1_Options_ExtraType>");
- insert_before(0, "<PLex_Section1_Options_ExtraType>");
+ insert_after(2, "</AST_Section1_Options_ExtraType>");
+ insert_before(0, "<AST_Section1_Options_ExtraType>");
}
| TOK_PREFIX '=' NAME
{ prefix = xstrdup(nmstr);
if (strchr(prefix, '[') || strchr(prefix, ']'))
flexerror(_("Prefix must not contain [ or ]")); /*}*/
- insert_after(2, "</PLex_Section1_Options_Prefix>");
- insert_before(0, "<PLex_Section1_Options_Prefix>");
+ insert_after(2, "</AST_Section1_Options_Prefix>");
+ insert_before(0, "<AST_Section1_Options_Prefix>");
}
| TOK_YYCLASS '=' NAME
{ yyclass = xstrdup(nmstr); /*}*/
- insert_after(2, "</PLex_Section1_Options_YYClass>");
- insert_before(0, "<PLex_Section1_Options_YYClass>");
+ insert_after(2, "</AST_Section1_Options_YYClass>");
+ insert_before(0, "<AST_Section1_Options_YYClass>");
}
| TOK_HEADER_FILE '=' NAME
{ headerfilename = xstrdup(nmstr); /*}*/
- insert_after(2, "</PLex_Section1_Options_HeaderFile>");
- insert_before(0, "<PLex_Section1_Options_HeaderFile>");
+ insert_after(2, "</AST_Section1_Options_HeaderFile>");
+ insert_before(0, "<AST_Section1_Options_HeaderFile>");
}
| TOK_TABLES_FILE '=' NAME
{ tablesext = true; tablesfilename = xstrdup(nmstr); /*}*/
- insert_after(2, "</PLex_Section1_Options_TablesFile>");
- insert_before(0, "<PLex_Section1_Options_TablesFile>");
+ insert_after(2, "</AST_Section1_Options_TablesFile>");
+ insert_before(0, "<AST_Section1_Options_TablesFile>");
}
;
sect2 : sect2 scon initforrule flexrule '\n'
{ scon_stk_ptr = $2; /*}*/
- insert_after(4, "</PLex_Section2_Rule>");
- insert_before(1, "<PLex_Section2_Rule>");
+ insert_after(4, "</AST_Section2_Rule>");
+ insert_before(1, "<AST_Section2_Rule>");
}
| sect2 scon '{' sect2 '}'
{ scon_stk_ptr = $2; /*}*/
- insert_after(4, "</PLex_Section2_CompoundRule>");
- insert_before(1, "<PLex_Section2_CompoundRule>");
+ insert_after(4, "</AST_Section2_CompoundRule>");
+ insert_before(1, "<AST_Section2_CompoundRule>");
}
|
;
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
- insert_after(1, "</PLex_Section2_Rule_FLexRule>");
- insert_before(0, "<PLex_Section2_Rule_FLexRule bol=\"true\">");
+ insert_after(1, "</AST_Section2_Rule_FLexRule>");
+ insert_before(0, "<AST_Section2_Rule_FLexRule bol=\"true\">");
}
| rule
mkbranch( scset[i],
pat );
}
- insert_after(0, "</PLex_Section2_Rule_FLexRule>");
- insert_before(0, "<PLex_Section2_Rule_FLexRule bol=\"false\">");
+ insert_after(0, "</AST_Section2_Rule_FLexRule>");
+ insert_before(0, "<AST_Section2_Rule_FLexRule bol=\"false\">");
}
| EOF_OP
else
build_eof_action();
}
- insert_after(0, "</PLex_Section2_Rule_EOFRule>");
- insert_before(0, "<PLex_Section2_Rule_EOFRule>");
+ insert_after(0, "</AST_Section2_Rule_EOFRule>");
+ insert_before(0, "<AST_Section2_Rule_EOFRule>");
}
| error
scon : '<' scon_stk_ptr namelist2 '>'
{ $$ = $2; /*}*/
- insert_after(3, "</PLex_Section2_StartConditions>");
- insert_before(0, "<PLex_Section2_StartConditions>");
+ insert_after(3, "</AST_Section2_StartConditions>");
+ insert_before(0, "<AST_Section2_StartConditions>");
}
| '<' '*' '>'
if ( j > scon_stk_ptr )
scon_stk[++scon_stk_ptr] = i;
}
- insert_after(2, "</PLex_Section2_StartConditions>");
- insert_before(0, "<PLex_Section2_StartConditions wildcard=\"true\">");
+ insert_after(2, "</AST_Section2_StartConditions>");
+ insert_before(0, "<AST_Section2_StartConditions wildcard=\"true\">");
}
|
piece[piece2 + 1] = piece[piece2]; /* empty */
piece[piece2] = piece[piece2 - 1]; /* empty */
piece[piece2 - 1] = temp;
- insert_before(0, "<PLex_Section2_StartConditions />");
+ insert_before(0, "<AST_Section2_StartConditions />");
}
;
| '(' re ')'
{ $$ = $2; /*}*/
-#if 0 /* for now do things in the traditional lex way without subexpressions */
insert_after(2, "</RegexGroup>");
insert_before(0, "<RegexGroup>");
-#endif
}
+ /* Nick extra rules for unnumbered groups */
+ | '(' ':' re ')'
+ { $$ = $3; }
+ /* Nick extra rules for named groups */
+ | '(' NAME re ')'
+ { $$ = $3; /*}*/
+ insert_after(3, "</RegexGroupName>");
+ insert_before(0, "<RegexGroupName>");
+}
+ /* Nick extra rules for action groups */
+ | '(' TOK_ACTION_GROUP re ')'
+ { $$ = $3; /*}*/
+ insert_after(3, "</RegexGroupAction>");
+ insert_before(0, "<RegexGroupAction>");
+}
+ | '(' TOK_ELEMENT_GROUP re ')'
+ { $$ = $3; /*}*/
+ insert_after(3, "</RegexGroupElement>");
+ insert_before(0, "<RegexGroupElement>");
+}
| CHAR
{
--- /dev/null
+#!/usr/bin/env python3
+
+import element
+import sys
+import xml.etree.ElementTree
+
+sys.stdout.write(
+ element.to_text(xml.etree.ElementTree.parse(sys.stdin).getroot())
+)
--- /dev/null
+#!/usr/bin/env python3
+
+import ast
+import element
+import lex_yy
+import os
+import sys
+import xml.etree.ElementTree
+import y_tab
+
+def my_rstrip(text, indent):
+ i = len(text)
+ while i > 0 and text[i - 1] == '}':
+ i -= 1
+ assert i > 0
+ while text[i - 1] != '{':
+ i -= 1
+ assert i > 0
+ i -= 1
+ return text[:i].rstrip('\t ') + indent + text[i:]
+
+def c_to_python(context, text):
+ lex_yy.yyin = None
+ lex_yy.yy_buffer_stack = [lex_yy.YYBufferState()]
+ lex_yy.yytext_len = 0
+ lex_yy.unput(text)
+ root = y_tab.yyparse(ast.AST.TranslationUnit)
+ context.lines = []
+ root.translate_translation_unit(context)
+ return ''.join(context.lines)
+
+root = xml.etree.ElementTree.parse(
+ sys.stdin,
+ xml.etree.ElementTree.XMLParser(
+ target = xml.etree.ElementTree.TreeBuilder(element.Element),
+ encoding = 'unicode'
+ )
+).getroot()
+
+context = ast.Context()
+#context.translate_identifier['BEGIN'] = 'self.BEGIN'
+#context.translate_identifier['yylval'] = 'ref_data.yylval'
+#context.translate_identifier['yytext'] = 'self.yytext'
+#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
+#context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
+
+actions = []
+with open('a.c', 'w') as fout:
+ def extract(i, indent):
+ if i.tag == 'AST_Section1_Prologue':
+ node = i[0]
+ assert node.tag == 'AST_Text'
+ indent += ' '
+ initial = True
+ elif i.tag == 'AST_Production_Action':
+ node = i[0]
+ assert node.tag == 'AST_Text'
+ initial = False
+ elif i.tag == 'AST_Section3':
+ node = i
+ initial = True
+ else:
+ child_indent = indent
+ if i.tag == 'AST':
+ for j in range(1, len(i) + 1):
+ element.set_text(i, j, element.get_text(i, j).rstrip() + '\n')
+ #elif (
+ # i.tag == 'AST_Section2_Rule' or
+ # i.tag == 'AST_Section2_Rule_FLexRule'
+ #):
+ # element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
+ #elif i.tag == 'AST_Section2_CompoundRule':
+ # child_indent += ' '
+ # element.set_text(
+ # i,
+ # 0,
+ # indent + element.get_text(i, 0).lstrip('\t ')
+ # )
+ # for j in range(1, len(i)):
+ # element.set_text(
+ # i,
+ # j,
+ # #element.get_text(i, j).rstrip('\t ') + child_indent
+ # my_rstrip(element.get_text(i, j), child_indent)
+ # )
+ # element.set_text(
+ # i,
+ # len(i),
+ # indent + element.get_text(i, len(i)).lstrip('\t ')
+ # )
+ for j in i:
+ extract(j, child_indent)
+ return
+ #assert len(node) == 0
+ #text = element.get_text(node, 0)
+ text = element.to_text(node)
+
+ lines = [i.rstrip() for i in text.split('\n')]
+ while len(lines) and len(lines[-1]) == 0:
+ del lines[-1]
+ while len(lines) and len(lines[0]) == 0:
+ del lines[0]
+ for line in lines:
+ if (
+ (line[:10] == '#include <' and line[-3:] == '.h>') or
+ (line[:10] == '#include "' and line[-3:] == '.h"')
+ ):
+ fout.write(
+ '''@@@ IMPORT({0:s})
+{1:s}
+#undef NULL
+#undef bool
+#undef false
+#undef true
+@@@ IMPORT END\n'''.format(
+ line[10:-3].replace('/', '.'),
+ line
+ )
+ )
+ else:
+ fout.write(line + '\n')
+ fout.write('@@@\n')
+
+ actions.append((node, indent, initial))
+ extract(root, '')
+
+os.system('gcc -I tests/flex_h -E a.c >a.i')
+with open('a.i') as fin:
+ for node, indent, initial in actions:
+ lines = []
+ line = fin.readline()
+ while line != '@@@\n':
+ assert len(line)
+ if (
+ line[:1] == '#' or
+ (line == '\n' and len(lines) and lines[-1] == '\n')
+ ):
+ pass
+ elif line[:11] == '@@@ IMPORT(' and line[-2:] == ')\n':
+ # make the importing look like a function call in the C code:
+ #lines.append('import("{0:s}");\n'.format(line[11:-2]))
+ line = fin.readline()
+ while line != '@@@ IMPORT END\n':
+ assert len(line)
+ line = fin.readline()
+ else:
+ lines.append(line)
+ line = fin.readline()
+ text = ''.join(lines)
+
+ if initial:
+ context.indent = indent
+ text = c_to_python(context, text)
+ else:
+ context.indent = indent
+ text = c_to_python(
+ context,
+ 'void a(void) {0:s}'.format(text) # already has braces and \n
+ )
+ assert text[:len(indent) + 10] == '\n{0:s}def a():\n'.format(indent)
+ text = text[len(indent) + 10:]
+ text = '{{\n{0:s}{1:s}}}\n'.format(text, indent)
+ element.set_text(node, 0, text)
+
+xml.etree.ElementTree.ElementTree(root).write(
+ sys.stdout,
+ encoding = 'unicode' # strangely does not seem to default to this
+)