self,
indent = '',
enclosing_loop = None,
- initial = False,
+ #initial = False,
translate_identifier = {
'NULL': 'None',
'false': 'False',
):
self.indent = ' '
self.enclosing_loop = enclosing_loop
- self.initial = initial # whether to add declared identifiers as 'self.'
+ #self.initial = initial # whether to add declared identifiers as 'self.'
self.translate_identifier = translate_identifier
class AST(element.Element):
+ class Text(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'AST_Text',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Text if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.AST.Text({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class Element(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
return 'ast.AST.DeclarationOrStatement({0:s})'.format(', '.join(params))
# GENERATE END
def translate(self, context):
- Element.translate(self, context)
+ AST.Element.translate(self, context)
element.set_text(
self,
0,
def translate(self, context):
assert len(self) == 3
self[0].translate(context)
- initial_save = context.initial
- context.initial = False
+ #initial_save = context.initial
+ #context.initial = False
self[1].translate(context)
self[2].translate(context)
- context.initial = initial_save
+ #context.initial = initial_save
element.set_text(self, 0, '')
element.set_text(self, 1, '(')
element.set_text(self, 2, '')
def translate(self, context):
assert len(self) == 2
self[0].translate(context)
- initial_save = context.initial
- context.initial = False
+ #initial_save = context.initial
+ #context.initial = False
self[1].translate(context)
- context.initial = initial_save
+ #context.initial = initial_save
element.set_text(self, 0, '')
element.set_text(self, 1, '(')
element.set_text(self, 2, ')')
return 'ast.AST.DeclaratorIdentifier({0:s})'.format(', '.join(params))
# GENERATE END
def translate(self, context):
- if context.initial:
- text = element.get_text(self[0], 0)
- assert text not in context.translate_identifier
- context.translate_identifier[text] = 'self.{0:s}'.format(text)
- Declarator.translate(self, context)
+ #if context.initial:
+ # text = element.get_text(self[0], 0)
+ # assert text not in context.translate_identifier
+ # context.translate_identifier[text] = 'self.{0:s}'.format(text)
+ AST.Declarator.translate(self, context)
class DeclaratorPointer(Declarator):
# GENERATE ELEMENT() BEGIN
# GENERATE FACTORY(element.Element) BEGIN
tag_to_class = {
'AST': AST,
+ 'AST_Text': AST.Text,
'AST_Element': AST.Element,
'AST_DeclarationOrStatement': AST.DeclarationOrStatement,
'AST_AlignAsExpression': AST.AlignAsExpression,
return text[:i].rstrip('\t ') + indent + text[i:]
context = ast.Context()
-context.translate_identifier['BEGIN'] = 'self.BEGIN'
-context.translate_identifier['yylval'] = 'ref_data.yylval'
-context.translate_identifier['yytext'] = 'self.yytext'
-context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
-context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
+#context.translate_identifier['BEGIN'] = 'self.BEGIN'
+#context.translate_identifier['yylval'] = 'ref_data.yylval'
+#context.translate_identifier['yytext'] = 'self.yytext'
+#context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
+#context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
actions = []
with open('a.c', 'w') as fout:
def extract(i, parent, indent):
- if i.tag == 'PLex_Section1Or2_CodeBlock':
+ if i.tag == 'AST_Section1Or2_CodeBlock':
initial = True
- elif i.tag == 'PLex_Section2_Rule_Action':
+ elif i.tag == 'AST_Section2_Rule_Action':
if len(i) == 0: # continued actions
- assert parent.tag == 'PLex_Section2_Rule'
+ assert parent.tag == 'AST_Section2_Rule'
assert len(parent) == 3
element.set_text(
parent,
initial = False
else:
child_indent = indent
- if i.tag == 'PLex_Section1_StartConditions':
+ if i.tag == 'AST_Section1_StartConditions':
for j in i:
- assert j.tag == 'PLex_Name'
+ assert j.tag == 'AST_Name'
text = element.get_text(j, 0)
context.translate_identifier[text] = 'DFA.{0:s}'.format(text)
elif (
- i.tag == 'PLex_Section2_Rule' or
- i.tag == 'PLex_Section2_Rule_FLexRule'
+ i.tag == 'AST_Section2_Rule' or
+ i.tag == 'AST_Section2_Rule_FLexRule'
):
element.set_text(i, 0, element.get_text(i, 0).lstrip('\t '))
- elif i.tag == 'PLex_Section2_CompoundRule':
+ elif i.tag == 'AST_Section2_CompoundRule':
child_indent += ' '
element.set_text(
i,
for j in i:
extract(j, i, child_indent)
return
- assert i[0].tag == 'PLex_Text' and len(i[0]) == 0
+ assert i[0].tag == 'AST_Text' and len(i[0]) == 0
text = element.to_text(i[0])
j = 0
else:
lines.append(line)
line = fin.readline()
- context.indent = indent + ' #'
+ context.indent = indent + ' '
context.initial = initial
text = c_to_python.c_to_python(context, ''.join(lines))
if initial:
element.set_text(i[0], 0, text)
else:
- assert parent.tag == 'PLex_Section2_Rule'
+ assert parent.tag == 'AST_Section2_Rule'
assert len(parent) == 3
prefix = element.get_text(parent, 2).rstrip('\t ')
if len(text) == 0:
pass
elif text.index('\n') == len(text) - 1:
prefix += ' /*COLUMN32*/ '
- text = 'pass {0:s}'.format(text.lstrip('\t '))
+ text = text.lstrip('\t ')
else:
prefix += ' '
- text = '{{\n{0:s} pass\n{1:s}{2:s}}}\n'.format(
- indent,
+ text = '{{\n{0:s}{1:s}}}\n'.format(
text,
indent
)
{ \
strncpy( nmstr, yytext, sizeof(nmstr) ); \
piece_pack(); \
- piece_append("<PLex_Name>"); \
+ piece_append("<AST_Name>"); \
piece_escape(yytext, strlen(yytext)); \
- piece_append("</PLex_Name>"); \
+ piece_append("</AST_Name>"); \
piece_pack(); \
return ~NAME; \
/* return NAME;*/ \
add_action(M4QSTART); \
yy_push_state(CODEBLOCK); \
if ((indented_code = x)) ACTION_ECHO; \
- piece_append("<PLex_Section1Or2_CodeBlock>"); \
+ piece_append("<AST_Section1Or2_CodeBlock>"); \
} while(0)
#define END_CODEBLOCK do { \
yy_pop_state();\
add_action(M4QEND); \
if (!indented_code) line_directive_out(NULL, 0);\
- piece_append("</PLex_Section1Or2_CodeBlock>"); \
+ piece_append("</AST_Section1Or2_CodeBlock>"); \
} while (0)
/* Nick */
%x COMMENT_DISCARD CODE_COMMENT
%x SECT3_NOESCAPE
%x CHARACTER_CONSTANT
+/* Nick extra rules for action groups */
+%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED
WS [[:blank:]]+
OPTWS [[:blank:]]*
<INITIAL>{
- ^{WS} START_CODEBLOCK(true); piece_append("<PLex_Text>");
+ ^{WS} START_CODEBLOCK(true); piece_append("<AST_Text>");
^"/*" add_action("/*[""["); yy_push_state( COMMENT );
^#{OPTWS}line{WS} yy_push_state( LINEDIR );
^"%s"{NAME}? return SCDECL;
^"%x"{NAME}? return XSCDECL;
- ^"%{".*{NL} START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<PLex_Text>");
+ ^"%{".*{NL} START_CODEBLOCK(false); piece_flush(strlen(yytext)); piece_append("<AST_Text>");
^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
brace_start_line = linenum;
++linenum;
line_directive_out(NULL, 1);
BEGIN(SECT2PROLOG);
#if 1
- piece_append("</PLex_Section1>");
+ piece_append("</AST_Section1>");
piece_pack();
piece_escape(yytext, strlen(yytext));
- piece_append("<PLex_Section2>");
+ piece_append("<AST_Section2>");
piece_pack();
return ~SECTEND;
#else
#endif
}
- ^"%pointer".*{NL} yytext_is_array = false; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>");
- ^"%array".*{NL} yytext_is_array = true; ++linenum; piece_append("<PLex_Section1_Options><PLex_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</PLex_Section1_Options_Array></PLex_Section1_Options>");
+ ^"%pointer".*{NL} yytext_is_array = false; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array>"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>");
+ ^"%array".*{NL} yytext_is_array = true; ++linenum; piece_append("<AST_Section1_Options><AST_Section1_Options_Array value=\"true\">"); piece_flush(strlen(yytext) - 1); piece_append("</AST_Section1_Options_Array></AST_Section1_Options>");
^"%option" BEGIN(OPTION); return TOK_OPTION;
}
<CODEBLOCK>{
- ^"%}".*{NL} ++linenum; piece_append("</PLex_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
+ ^"%}".*{NL} ++linenum; piece_append("</AST_Text>"); piece_flush(strlen(yytext)); END_CODEBLOCK;
[^\n%\[\]]* ACTION_ECHO;
. ACTION_ECHO;
{NL} {
++linenum;
ACTION_ECHO;
- if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</PLex_Text>"); END_CODEBLOCK; }
+ if ( indented_code ) { piece_flush(strlen(yytext)); piece_append("</AST_Text>"); END_CODEBLOCK; }
}
}
nmstr[strlen( nmstr ) - 1] = '\0';
#if 1
piece_pack();
- piece_append("<PLex_String>\"<PLex_Text>");
+ piece_append("<AST_String>\"<AST_Text>");
piece_escape(yytext + 1, strlen(yytext + 1) - 1);
- piece_append("</PLex_Text>\"</PLex_String>");
+ piece_append("</AST_Text>\"</AST_String>");
piece_pack();
return ~NAME; /* actually a misnomer */
#else
^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
- ^{WS} START_CODEBLOCK(true); piece_append("<PLex_Text>"); /* indented code in prolog */
+ ^{WS} START_CODEBLOCK(true); piece_append("<AST_Text>"); /* indented code in prolog */
^{NOT_WS}.* {
/* non-indented code */
BEGIN(SECT2);
} else {
START_CODEBLOCK(true);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
}
}
sectnum = 0;
#if 1
piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
piece_pack();
return ~YY_NULL;
#else
bracelevel = 1;
BEGIN(PERCENT_BRACE_ACTION);
piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section1Or2_CodeBlock>");
+ piece_append("<AST_Section1Or2_CodeBlock>");
piece_flush(2);
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
}
^{OPTWS}"<" {
/* Allow "<" to appear in (?x) patterns. */
if (!sf_skip_ws())
BEGIN(SC);
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(strlen(yytext) - 1);
return '<';
}
- ^{OPTWS}"^" return '^';
+ ^{OPTWS}"^" /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 1); return '^';
\" BEGIN(QUOTE); return '"';
"{"/[[:digit:]] {
BEGIN(NUM);
bracelevel = 1;
BEGIN(PERCENT_BRACE_ACTION);
piece_flush(strlen(yytext) - 2);
- piece_append("<PLex_Section2_Rule_Action>");
+ piece_append("<AST_Section2_Rule_Action>");
if ( in_rule )
{
piece_pack();
piece_escape(yytext, 2);
piece_pack();
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
return ~'\n';
#else
return '\n';
abort();
#else
piece_flush(strlen(yytext));
- piece_append("<PLex_Text>");
+ piece_append("<AST_Text>");
#endif
}
{WS}"|".*{NL} {
;
piece_flush(i);
piece_pack();
- piece_append("<PLex_Section2_Rule_Action continued=\"true\">");
+ piece_append("<AST_Section2_Rule_Action continued=\"true\">");
piece_escape(yytext, strlen(yytext));
- piece_append("</PLex_Section2_Rule_Action>");
+ piece_append("</AST_Section2_Rule_Action>");
piece_pack();
return ~'\n';
#else
piece_pack();
piece_escape(yytext, strlen(yytext));
piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
return ~'\n';
#else
return '\n';
piece_pack();
piece_escape(yytext, strlen(yytext));
piece_pack();
- piece_append("<PLex_Section2_Rule_Action><PLex_Text>");
+ piece_append("<AST_Section2_Rule_Action><AST_Text>");
return ~'\n';
#else
return '\n';
}
^{OPTWS}"<<EOF>>" |
- "<<EOF>>" return EOF_OP;
+ "<<EOF>>" /* here we know yytext is not used by parser, so OK to destroy it */ piece_flush(strlen(yytext) - 7); return EOF_OP;
^"%%".* {
sectnum = 3;
outn("/* Begin user sect3 */");
#if 1
piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
piece_escape(yytext, strlen(yytext));
piece_pack();
- piece_append("<PLex_Section3>");
+ piece_append("<AST_Section3>");
return ~YY_NULL;
#else
yyterminate(); /* to stop the parser */
{
unput(')');
PUT_BACK_STRING(nmdefptr, 0);
+ if (!lex_compat && !posix_compat) {
+ unput(':');
+ unput('?');
+ }
unput('(');
}
}
sectnum = 0;
#if 1
piece_pack();
- piece_append("</PLex_Section2>");
+ piece_append("</AST_Section2>");
piece_pack();
return ~YY_NULL;
#else
}
<GROUP_WITH_PARAMS>{
- ":" BEGIN(SECT2);
+ /* Nick extra rules for named groups */
+ "'"{NAME}"'" |
+ "<"{NAME}">" {
+ BEGIN(SECT2);
+#if 1
+ /* here we know yytext is not used by parser, so OK to destroy it */
+ piece_flush(1);
+ piece_pack();
+ piece_append("<RegexGroupName_Text>");
+ piece_flush(strlen(yytext) - 1);
+ piece_append("</RegexGroupName_Text>");
+ piece_pack();
+ piece_flush(1);
+ return ~NAME;
+#else
+ return NAME;
+#endif
+ }
+ /* Nick extra rules for action groups */
+ "A{" {
+ BEGIN(SECT2);
+ yy_push_state(ACTION_GROUP);
+ bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupAction_Text>");
+ }
+ "E{" {
+ BEGIN(SECT2);
+ yy_push_state(ELEMENT_GROUP);
+ bracelevel = 1;
+ piece_flush(strlen(yytext));
+ piece_append("<RegexGroupElement_Text>");
+ }
+ ":" BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
"-" BEGIN(GROUP_MINUS_PARAMS);
i sf_set_case_ins(1);
s sf_set_dot_all(1);
x sf_set_skip_ws(1);
}
<GROUP_MINUS_PARAMS>{
- ":" BEGIN(SECT2);
+ ":" BEGIN(SECT2); return ':'; /* Nick added return, unnumbered group */
i sf_set_case_ins(0);
s sf_set_dot_all(0);
x sf_set_skip_ws(0);
<PERCENT_BRACE_ACTION>{
- {OPTWS}"%}".* bracelevel = 0; piece_append("</PLex_Text>");
+ {OPTWS}"%}".* bracelevel = 0; piece_append("</AST_Text>");
<ACTION>"/*" ACTION_ECHO; yy_push_state( CODE_COMMENT );
piece_flush(strlen(yytext));
if (doing_codeblock)
- piece_append("</PLex_Section1Or2_CodeBlock>");
+ piece_append("</AST_Section1Or2_CodeBlock>");
else
- markup_action("</PLex_Section2_Rule_Action>");
+ markup_action("</AST_Section2_Rule_Action>");
doing_rule_action = doing_codeblock = false;
BEGIN(SECT2);
}
{
add_action( "\tYY_BREAK]""]\n" );
piece_flush(strlen(yytext));
- markup_action("</PLex_Text></PLex_Section2_Rule_Action>");
+ markup_action("</AST_Text></AST_Section2_Rule_Action>");
}
doing_rule_action = false;
<ACTION_STRING,CHARACTER_CONSTANT>{
(\\\n)* ACTION_ECHO;
\\(\\\n)*. ACTION_ECHO;
- {NL} ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</PLex_Text></PLex_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
+ {NL} ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); piece_flush(strlen(yytext)); if (doing_rule_action) markup_action("</AST_Text></AST_Section2_Rule_Action>"); } else { BEGIN(ACTION); }
. ACTION_ECHO;
}
-<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>> {
+ /* Nick extra rules for action groups */
+ /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>> {
synerr( _( "EOF encountered inside an action" ) );
yyterminate();
}
sectnum = 0;
#if 1
piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
piece_pack();
return ~YY_NULL;
#else
sectnum = 0;
#if 1
piece_pack();
- piece_append("</PLex_Section3>");
+ piece_append("</AST_Section3>");
piece_pack();
return ~YY_NULL;
#else
#endif
}
}
+
+ /* Nick extra rules for action groups */
+<ACTION_GROUP,ELEMENT_GROUP>{
+ "{" ++bracelevel;
+}
+<ACTION_GROUP>{
+ "}" {
+ if (--bracelevel == 0) {
+ yy_pop_state();
+ piece_append("</RegexGroupAction_Text>");
+ return TOK_ACTION_GROUP;
+ }
+ }
+}
+<ELEMENT_GROUP>{
+ "}" {
+ if (--bracelevel == 0) {
+ yy_pop_state();
+ piece_append("</RegexGroupElement_Text>");
+ return TOK_ELEMENT_GROUP;
+ }
+ }
+}
+<ACTION_GROUP,ELEMENT_GROUP>{
+ "'" yy_push_state(SINGLE_QUOTED);
+ \" yy_push_state(DOUBLE_QUOTED);
+ "/*" yy_push_state(COMMENT_DISCARD);
+}
+<SINGLE_QUOTED>{
+ [^\[\]\'\\\n]+
+ \' yy_pop_state();
+}
+<DOUBLE_QUOTED>{
+ [^\[\]\"\\\n]+
+ \" yy_pop_state();
+}
+<SINGLE_QUOTED,DOUBLE_QUOTED>{
+ (\\\n)*
+ \\(\\\n)*.
+}
+<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
+ {NL} ++linenum;
+ .
+}
+
<*>.|\n format_synerr( _( "bad character: %s" ), yytext );
%%
}
static void markup_action(const char *text) {
- /* append to last token text so it appears inside <PLex_Section2_Rule>..</PLex_Section2_Rule> */
+ /* append to last token text so it appears inside <AST_Section2_Rule>..</AST_Section2_Rule> */
/* a problem here is that Rule has already been reduced (marked up), */
/* because we returned a '\n' token when we detected start of action, */
- /* hence we need to move the closing </PLex_Section2_Rule> tag over to our right */
+ /* hence we need to move the closing </AST_Section2_Rule> tag over to our right */
int i = strlen(piece[--piece0]);
- if (i < 21 || strcmp(piece[piece0] + i - 21, "</PLex_Section2_Rule>") != 0)
+ if (i < 20 || strcmp(piece[piece0] + i - 20, "</AST_Section2_Rule>") != 0)
abort();
- piece[piece0][i - 21] = 0;
+ piece[piece0][i - 20] = 0;
piece_append(text);
- piece_append("</PLex_Section2_Rule>");
+ piece_append("</AST_Section2_Rule>");
piece_pack();
}
int i = piece1;
while (--i >= piece0 && strcmp(piece[i], "no") == 0)
;
- sprintf(piece_temp, "<PLex_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
+ sprintf(piece_temp, "<AST_Section1_Options_%s%s>", name, sense ? " value=\"true\"" : "");
piece_insert(i + 1, piece_temp);
piece_flush(strlen(yytext));
- sprintf(piece_temp, "</PLex_Section1_Options_%s>", name);
+ sprintf(piece_temp, "</AST_Section1_Options_%s>", name);
piece_append(piece_temp);
- /* append to last token text so it appears inside <PLex_Section1_Options>..</PLex_Section1_Options> */
+ /* append to last token text so it appears inside <AST_Section1_Options>..</AST_Section1_Options> */
--piece0;
piece_pack();
}