import element
import lex_yy_code
import state
- import xml.etree.ElementTree
YYLTYPE = state.location
}
-%code top
-{
-}
-
-%code
-{
- #current_prec = 0
- #current_lhs_location = 0
- #current_lhs_named_ref = []
- #current_lhs_symbol = []
- #current_class = unknown_sym
- #current_type = None
- gram_piece2 = 0
- gram_piece3 = 0
- #nested_rhs = 0
-}
-
/* Nick %define api.prefix {gram_} */
%define api.pure full
%define locations
%%
input
- : prologue_declarations "%%" grammar epilogue.opt
- {
- insert_after(2, '</AST_Section2>')
- insert_before(2, '<AST_Section2>')
- insert_after(0, '</AST_Section1>')
- insert_before(0, '<AST_Section1>')
- }
+ : %space (?E{ast.AST.Section1}prologue_declarations) "%%" %space (?E{ast.AST.Section2}grammar) epilogue.opt
;
/*------------------------------------.
prologue_declarations
: %empty
{
- global yychar
+ global yychar, yylval, yylloc
if yychar == YYEMPTY:
- yychar = lex_yy.gram_lex()
- temp = lex_yy.gram_piece[gram_piece2 + 1]
- lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
- lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
- lex_yy.gram_piece[gram_piece2 - 1] = temp
+ yylval = None
+ yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+ yychar = lex_yy.yylex()
+ yy_element_stack[-1] = lex_yy.yy_element_space
+ lex_yy.yy_element_space = element.Element()
}
| prologue_declarations prologue_declaration
;
prologue_declaration
: grammar_declaration
| "%{...%}"
+ | %space (?E{
+ (
+ ast.AST.Section1.PureParser
+ if $1 == 'api.pure' else
+ ast.AST.Section1.Locations
+ if $1 == 'locations' else
+ ast.AST.Section1.Debug
+ if $1 == 'parse.trace' else
+ element.Element
+ )
+ }"%<flag>")
+ | %space (?E{ast.AST.Section1.Define}"%define" variable value)
+ | %space (?E{ast.AST.Section1.Defines}"%defines")
+ | %space (?E{ast.AST.Section1.Defines}"%defines" STRING)
+ | %space (?E{ast.AST.Section1.ErrorVerbose}"%error-verbose")
+ | %space (?E{ast.AST.Section1.Expect, value = $2}"%expect" INT)
+ | %space (?E{ast.AST.Section1.ExpectRR, value = $2}"%expect-rr" INT)
+ | %space (?E{ast.AST.Section1.FilePrefix}"%file-prefix" STRING)
+ | %space (?E{ast.AST.Section1.GLRParser}"%glr-parser")
+ | %space (?E{ast.AST.Section1.InitialAction}"%initial-action" "{...}")
{
- #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
- lex_yy_code.yyin = None
- lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
- lex_yy_code.yytext = '' # fool unput()
- lex_yy_code.unput($1)
- lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
- lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2] = \
- '<AST_Section1_Prologue>%{{{0:s}%}}</AST_Section1_Prologue>'.format(
- xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- )
- }
- | "%<flag>"
- {
- #muscle_percent_define_ensure($1, @1, True)
- if $1 == 'api.pure':
- insert_after(0, '</AST_Section1_PureParser>')
- insert_before(0, '<AST_Section1_PureParser>')
- elif $1 == 'locations':
- insert_after(0, '</AST_Section1_Locations>')
- insert_before(0, '<AST_Section1_Locations>')
- elif $1 == 'parse.trace':
- insert_after(0, '</AST_Section1_Debug>')
- insert_before(0, '<AST_Section1_Debug>')
- else:
- assert False
- }
- | "%define" variable value
- {
- #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
- insert_after(2, '</AST_Section1_Define>')
- insert_before(0, '<AST_Section1_Define>')
- }
- | "%defines"
- {
- #defines_flag = True
- insert_after(0, '</AST_Section1_Defines>')
- insert_before(0, '<AST_Section1_Defines>')
- }
- | "%defines" STRING
- {
- #defines_flag = True
- #spec_defines_file = xstrdup($2)
- insert_after(1, '</AST_Section1_Defines>')
- insert_before(0, '<AST_Section1_Defines>')
- }
- | "%error-verbose"
- {
- #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
- insert_after(0, '</AST_Section1_ErrorVerbose>')
- insert_before(0, '<AST_Section1_ErrorVerbose>')
- }
- | "%expect" INT
- {
- #expected_sr_conflicts = $2
- insert_after(1, '</AST_Section1_Expect>')
- insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
- }
- | "%expect-rr" INT
- {
- #expected_rr_conflicts = $2
- insert_after(1, '</AST_Section1_ExpectRR>')
- insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
- }
- | "%file-prefix" STRING
- {
- #spec_file_prefix = $2
- insert_after(1, '</AST_Section1_FilePrefix>')
- insert_before(0, '<AST_Section1_FilePrefix>')
- }
- | "%glr-parser"
- {
- #nondeterministic_parser = True
- #glr_parser = True
- insert_after(0, '</AST_Section1_GLRParser>')
- insert_before(0, '<AST_Section1_GLRParser>')
- }
- | "%initial-action" "{...}"
- {
- #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
lex_yy_code.yyin = None
lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
lex_yy_code.yytext = '' # fool unput()
lex_yy_code.unput($2)
lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- insert_after(1, '</AST_Section1_InitialAction>')
- insert_before(0, '<AST_Section1_InitialAction>')
- }
- | "%language" STRING
- {
- #language_argmatch($2, grammar_prio, @1)
- insert_after(1, '</AST_Section1_Language>')
- insert_before(0, '<AST_Section1_Language>')
- }
- | "%name-prefix" STRING
- {
- #spec_name_prefix = $2
- insert_after(1, '</AST_Section1_NamePrefix>')
- insert_before(0, '<AST_Section1_NamePrefix>')
- }
- | "%no-lines"
- {
- #no_lines_flag = True
- insert_after(0, '</AST_Section1_Lines>')
- insert_before(0, '<AST_Section1_Lines value="false">')
- }
- | "%nondeterministic-parser"
- {
- #nondeterministic_parser = True
- insert_after(0, '</AST_Section1_NonDeterministicParser>')
- insert_before(0, '<AST_Section1_NonDeterministicParser>')
- }
- | "%output" STRING
- {
- #spec_outfile = $2
- insert_after(1, '</AST_Section1_Output>')
- insert_before(0, '<AST_Section1_Output>')
- }
- | "%param"
- {
- #current_param = $1
- }
- params
- {
- #current_param = param_none
- insert_after(2, '</AST_Section1_Param>')
- insert_before(0, '<AST_Section1_Param>')
- }
- | "%require" STRING
- {
- #version_check(&@2, $2)
- insert_after(1, '</AST_Section1_Require>')
- insert_before(0, '<AST_Section1_Require>')
- }
- | "%skeleton" STRING
- {
- #skeleton_user = $2
- #if strchr(skeleton_user, ord('/')):
- # dir_length = len(current_file)
- # skeleton_build = None
- # while dir_length and current_file[dir_length - 1] != ord('/'):
- # dir_length -= 1
- # while dir_length and current_file[dir_length - 1] == ord('/'):
- # dir_length -= 1
- # skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
- # if dir_length > 0:
- # memcpy(skeleton_build, current_file, dir_length)
- # skeleton_build[dir_length++] = ord('/')
- # strcpy(skeleton_build + dir_length, skeleton_user)
- # skeleton_user = uniqstr_new(skeleton_build)
- # free(skeleton_build)
- #skeleton_arg(skeleton_user, grammar_prio, @1)
- insert_after(1, '</AST_Section1_Skeleton>')
- insert_before(0, '<AST_Section1_Skeleton>')
- }
- | "%token-table"
- {
- #token_table_flag = True
- insert_after(0, '</AST_Section1_TokenTable>')
- insert_before(0, '<AST_Section1_TokenTable>')
- }
- | "%verbose"
- {
- #report_flag |= report_states
- insert_after(0, '</AST_Section1_Verbose>')
- insert_before(0, '<AST_Section1_Verbose>')
- }
- | "%yacc"
- {
- #yacc_flag = True
- insert_after(0, '</AST_Section1_YACC>')
- insert_before(0, '<AST_Section1_YACC>')
- }
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Section1.InitialAction)
+ assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+ tail = element.get_text(yy_element_stack[-1][0], 1)
+ yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+ element.set_text(yy_element_stack[-1][0], 1, tail)
+ }
+ | %space (?E{ast.AST.Section1.Language}"%language" STRING)
+ | %space (?E{ast.AST.Section1.NamePrefix}"%name-prefix" STRING)
+ | %space (?E{ast.AST.Section1.Lines, value = False}"%no-lines")
+ | %space (?E{ast.AST.Section1.NonDeterministicParser}"%nondeterministic-parser")
+ | %space (?E{ast.AST.Section1.Output}"%output" STRING)
+ | %space (?E{ast.AST.Section1.Param}"%param" params)
+ | %space (?E{ast.AST.Section1.Require}"%require" STRING)
+ | %space (?E{ast.AST.Section1.Skeleton}"%skeleton" STRING)
+ | %space (?E{ast.AST.Section1.TokenTable}"%token-table")
+ | %space (?E{ast.AST.Section1.Verbose}"%verbose")
+ | %space (?E{ast.AST.Section1.YACC}"%yacc")
| /*FIXME: Err? What is this horror doing here? */ ";"
;
params
: params "{...}"
- {
- #add_param(current_param, $2, @2)
- }
| "{...}"
- {
- #add_param(current_param, $1, @1)
- }
;
/*----------------------.
grammar_declaration
: precedence_declaration
| symbol_declaration
- | "%start" symbol
- {
- #grammar_start_symbol_set($2, @2)
- insert_after(1, '</AST_Section1Or2_Start>')
- insert_after(1, '</AST_SymbolRef>')
- insert_before(1, '<AST_SymbolRef>')
- insert_before(0, '<AST_Section1Or2_Start>')
- }
- | code_props_type "{...}" generic_symlist
+ /* ) ) should be )) */
+ | %space (?E{ast.AST.Section1Or2.Start}"%start" %space (?E{ast.AST.SymbolRef}symbol) )
+ | %space (?E{ast.AST.Section1Or2.CodeProps, _type = $1}code_props_type "{...}" generic_symlist)
{
- #code = None
- #code_props_symbol_action_init(&code, $2, @2)
- #code_props_translate_code(&code)
lex_yy_code.yyin = None
lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
lex_yy_code.yytext = '' # fool unput()
lex_yy_code.unput($2)
lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.CodeProps)
+ assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+ tail = element.get_text(yy_element_stack[-1][0], 1)
+ yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+ element.set_text(yy_element_stack[-1][0], 1, tail)
+ }
+ | %space (?E{ast.AST.Section1Or2.DefaultPrec, value = True}"%default-prec")
+ | %space (?E{ast.AST.Section1Or2.DefaultPrec}"%no-default-prec")
+ /* the BracedCode should be removed altogether, put here for compatibility */
+ | %space (?E{ast.AST.Section1Or2.Code}"%code" (?E{ast.AST.ID}) %space (?E{ast.AST.BracedCode}"{...}") )
+ {
+ # most egregious (leftover from when we used to run code scanner on it):
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+ assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+ assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+ element.set_text(
+ yy_element_stack[-1][0][1][0],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 1,
+ '}' + element.get_text(yy_element_stack[-1][0][1], 1)
)
- #list = None
- #list = $3
- #while list:
- # symbol_list_code_props_set(list, $1, &code)
- # list = list->next
- #symbol_list_free($3)
- insert_after(2, '</AST_Section1Or2_CodeProps>')
- insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
- }
- | "%default-prec"
- {
- #default_prec = True
- insert_after(0, '</AST_Section1Or2_DefaultPrec>')
- insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
- }
- | "%no-default-prec"
- {
- #default_prec = False
- insert_after(0, '</AST_Section1Or2_DefaultPrec>')
- insert_before(0, '<AST_Section1Or2_DefaultPrec>')
- }
- | "%code" "{...}"
- {
- #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
- lex_yy_code.yyin = None
- lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
- lex_yy_code.yytext = '' # fool unput()
- lex_yy_code.unput($2[1:-1])
- lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
- lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 2] = \
- '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
- xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- )
- insert_after(1, '</AST_Section1Or2_Code>')
- insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
}
- | "%code" ID "{...}"
- {
- #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
- lex_yy_code.yyin = None
- lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
- lex_yy_code.yytext = '' # fool unput()
- lex_yy_code.unput($3[1:-1])
- lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
- lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 4] = \
- '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
- xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- )
- insert_after(2, '</AST_Section1Or2_Code>')
- insert_before(0, '<AST_Section1Or2_Code>')
+ /* the BracedCode should be removed altogether, put here for compatibility */
+ | %space (?E{ast.AST.Section1Or2.Code}"%code" ID %space (?E{ast.AST.BracedCode}"{...}") )
+ {
+ # most egregious (leftover from when we used to run code scanner on it):
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+ assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+ assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+ element.set_text(
+ yy_element_stack[-1][0][1][0],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 1,
+ '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+ )
}
;
%token PERCENT_UNION "%union";
union_name
- : %empty
- {
- insert_before(0, '<AST_ID />')
- }
+ : (?E{ast.AST.ID}%empty)
| ID
- {
- #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
- }
;
grammar_declaration
- : "%union" union_name "{...}"
- {
- #union_seen = True
- #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
- lex_yy_code.yyin = None
- lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
- lex_yy_code.yytext = '' # fool unput()
- lex_yy_code.unput($3[1:-1])
- lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
- lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 4] = \
- '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
- xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- )
- insert_after(2, '</AST_Section1Or2_Union>')
- insert_before(0, '<AST_Section1Or2_Union>')
+ /* the BracedCode should be removed altogether, put here for compatibility */
+ : %space (?E{ast.AST.Section1Or2.Union}"%union" union_name %space (?E{ast.AST.BracedCode}"{...}") )
+ {
+ # most egregious (leftover from when we used to run code scanner on it):
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Union)
+ assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+ assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+ element.set_text(
+ yy_element_stack[-1][0][1][0],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 0,
+ element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+ )
+ element.set_text(
+ yy_element_stack[-1][0][1],
+ 1,
+ '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+ )
}
;
symbol_declaration
- : "%nterm"
- {
- #current_class = nterm_sym
- }
- symbol_defs.1
- {
- #current_class = unknown_sym
- #current_type = None
- insert_after(2, '</AST_Section1Or2_NTerm>')
- insert_before(0, '<AST_Section1Or2_NTerm>')
- }
- | "%token"
- {
- #current_class = token_sym
- }
- symbol_defs.1
- {
- #current_class = unknown_sym
- #current_type = None
- insert_after(2, '</AST_Section1Or2_Token>')
- insert_before(0, '<AST_Section1Or2_Token>')
- }
- | "%type" TAG symbols.1
- {
- #list = None
- #tag_seen = True
- #list = $3
- #while list:
- # symbol_type_set(list->content.sym, $2, @2)
- # list = list->next
- #symbol_list_free($3)
- insert_after(2, '</AST_Section1Or2_Type>')
- insert_before(0, '<AST_Section1Or2_Type>')
- }
+ : %space (?E{ast.AST.Section1Or2.NTerm}"%nterm" symbol_defs.1)
+ | %space (?E{ast.AST.Section1Or2.Token}"%token" symbol_defs.1)
+ | %space (?E{ast.AST.Section1Or2.Type}"%type" TAG symbols.1)
;
precedence_declaration
- : precedence_declarator tag.opt symbols.prec
- {
- #list = None
- #current_prec += 1
- #list = $3
- #while list:
- # symbol_type_set(list->content.sym, current_type, @2)
- # symbol_precedence_set(list->content.sym, current_prec, $1, @1)
- # list = list->next
- #symbol_list_free($3)
- #current_type = None
- insert_after(2, '</AST_Section1Or2_Precedence>')
- insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
- }
+ : %space (?E{ast.AST.Section1Or2.Precedence, _type = ($1 & 3) - 1}precedence_declarator tag.opt symbols.prec)
;
precedence_declarator
tag.opt
: %empty
- {
- #current_type = None
- }
| TAG
- {
- #current_type = $1
- #tag_seen = True
- }
;
/* Just like symbols.1 but accept INT for the sake of POSIX. */
symbols.prec
: symbol.prec
- {
- #$$ = symbol_list_sym_new($1, @1)
- }
| symbols.prec symbol.prec
- {
- #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
- }
;
symbol.prec
- : symbol
- {
- #$$ = $1
- #symbol_class_set($1, token_sym, @1, False)
- insert_after(0, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- }
- | symbol INT
- {
- #$$ = $1
- #symbol_user_token_number_set($1, $2, @2)
- #symbol_class_set($1, token_sym, @1, False)
- insert_after(1, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
- }
+ : %space (?E{ast.AST.SymbolRef}symbol)
+ | %space (?E{ast.AST.SymbolRef, user_token = $2}symbol INT)
;
/* One or more symbols to be %typed. */
symbols.1
- : symbol
- {
- #$$ = symbol_list_sym_new($1, @1)
- insert_after(0, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- }
- | symbols.1 symbol
- {
- #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
- insert_after(1, '</AST_SymbolRef>')
- insert_before(1, '<AST_SymbolRef>')
- }
+ : %space (?E{ast.AST.SymbolRef}symbol)
+ | symbols.1 %space (?E{ast.AST.SymbolRef}symbol)
;
generic_symlist
: generic_symlist_item
- {
- #$$ = $1
- }
| generic_symlist generic_symlist_item
- {
- #$$ = symbol_list_append($1, $2)
- }
;
generic_symlist_item
- : symbol
- {
- #$$ = symbol_list_sym_new($1, @1)
- insert_after(0, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- }
+ : %space (?E{ast.AST.SymbolRef}symbol)
| tag
- {
- #$$ = symbol_list_type_new($1, @1)
- }
;
tag
: TAG
| "<*>"
- {
- #$$ = uniqstr_new('*')
- }
| "<>"
- {
- #$$ = uniqstr_new('')
- }
;
/* One token definition. */
symbol_def
: TAG
- {
- #current_type = $1
- #tag_seen = True
- }
- | id
- {
- #symbol_class_set($1, current_class, @1, True)
- #symbol_type_set($1, current_type, @1)
- insert_after(0, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- }
- | id INT
- {
- #symbol_class_set($1, current_class, @1, True)
- #symbol_type_set($1, current_type, @1)
- #symbol_user_token_number_set($1, $2, @2)
- insert_after(1, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
- }
- | id string_as_id
- {
- #symbol_class_set($1, current_class, @1, True)
- #symbol_type_set($1, current_type, @1)
- #symbol_make_alias($1, $2, @$)
- insert_after(1, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- }
- | id INT string_as_id
- {
- #symbol_class_set($1, current_class, @1, True)
- #symbol_type_set($1, current_type, @1)
- #symbol_user_token_number_set($1, $2, @2)
- #symbol_make_alias($1, $3, @$)
- insert_after(2, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
- }
+ | %space (?E{ast.AST.SymbolRef}id)
+ | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT)
+ | %space (?E{ast.AST.SymbolRef}id string_as_id)
+ | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT string_as_id)
;
/* One or more symbol definitions. */
;
rules
- : id_colon named_ref.opt
- {
- #current_lhs($1, @1, $2)
- }
- rhses.1
- {
- #current_lhs(0, @1, 0)
- insert_after(3, '</AST_Section2_Rules>')
- insert_after(0, '</AST_SymbolRef>')
- insert_before(0, '<AST_SymbolRef>')
- insert_before(0, '<AST_Section2_Rules>')
- }
+ : %space (?E{ast.AST.Section2.Rules}(?E{ast.AST.SymbolRef}id_colon) named_ref.opt rhses.1)
;
rhses.1
- : rhs
- {
- #grammar_current_rule_end(@1)
- insert_after(0, '</AST_Production>')
- insert_before(0, '<AST_Production>')
- }
- | rhses.1 "|" rhs
- {
- #grammar_current_rule_end(@3)
- insert_after(2, '</AST_Production>')
- insert_before(2, '<AST_Production>')
- }
+ : %space (?E{ast.AST.Production}rhs)
+ | rhses.1 "|" %space (?E{ast.AST.Production}rhs)
| rhses.1 ";"
;
rhs
: %empty
{
- global yychar
- #if nested_rhs:
- # nested_rhs -= 1
- #else:
- # grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+ global yychar, yylval, yylloc
if yychar == YYEMPTY:
- yychar = lex_yy.gram_lex()
- temp = lex_yy.gram_piece[gram_piece2 + 1]
- lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
- lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
- lex_yy.gram_piece[gram_piece2 - 1] = temp
- }
- | rhs symbol named_ref.opt
- {
- #grammar_current_rule_symbol_append($2, @2, $3)
- insert_after(2, '</AST_Production_SymbolRef>')
- insert_after(1, '</AST_SymbolRef>')
- insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+ yylval = None
+ yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+ yychar = lex_yy.yylex()
+ yy_element_stack[-1] = lex_yy.yy_element_space
+ lex_yy.yy_element_space = element.Element()
}
- | rhs "{...}" named_ref.opt
+ | rhs %space (?E{ast.AST.Production.SymbolRef}(?E{ast.AST.SymbolRef}symbol) named_ref.opt)
+ | rhs %space (?E{ast.AST.Production.Action}"{...}" named_ref.opt)
{
- #grammar_current_rule_action_append($2, @2, $3, False)
lex_yy_code.yyin = None
lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
lex_yy_code.yytext = '' # fool unput()
lex_yy_code.unput($2)
lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- insert_after(2, '</AST_Production_Action>')
- insert_before(1, '<AST_Production_Action>')
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Production.Action)
+ assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+ tail = element.get_text(yy_element_stack[-1][0], 1)
+ yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+ element.set_text(yy_element_stack[-1][0], 1, tail)
}
| rhs "%?{...}"
{
#grammar_current_rule_action_append($2, @2, None, True)
}
- | rhs "%empty"
- {
- #grammar_current_rule_empty_set(@2)
- insert_after(1, '</AST_Production_Empty>')
- insert_before(1, '<AST_Production_Empty>')
- }
- | rhs "%prec" symbol
- {
- #grammar_current_rule_prec_set($3, @3)
- insert_after(2, '</AST_Production_Prec>')
- insert_after(2, '</AST_SymbolRef>')
- insert_before(2, '<AST_SymbolRef>')
- insert_before(1, '<AST_Production_Prec>')
- }
- | rhs "%dprec" INT
- {
- #grammar_current_rule_dprec_set($3, @3)
- insert_after(2, '</AST_Production_DPrec>')
- insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
- }
- | rhs "%merge" TAG
- {
- #grammar_current_rule_merge_set($3, @3)
- insert_after(2, '</AST_Production_Merge>')
- insert_before(1, '<AST_Production_Merge>')
- }
+ | rhs %space (?E{ast.AST.Production.Empty}"%empty")
+ /* ) ) should be )) */
+ | rhs %space (?E{ast.AST.Production.Prec}"%prec" %space (?E{ast.AST.SymbolRef}symbol) )
+ | rhs %space (?E{ast.AST.Production.DPrec, value = $3}"%dprec" INT)
+ | rhs %space (?E{ast.AST.Production.Merge}"%merge" TAG)
/* Nick extra rules for element groups */
- | rhs '('
- /*{
- #nested_rhs += 1
- }*/
- rhs ')'
+ | rhs %space (?E{ast.AST.Production.GroupElement}'(' rhs ')')
{
lex_yy_code.yyin = None
lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
lex_yy_code.unput($2[4:-1])
lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
lex_yy_code.yylex(ast.AST.Text)
- ref_list = []
- element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
- del lex_yy_code.yy_element_space.attrib['ref']
- lex_yy.gram_piece[gram_piece2 + 2] = \
- '(?E{{{0:s}}}'.format(
- xml.etree.ElementTree.tostring(
- lex_yy_code.yy_element_space,
- encoding = 'unicode'
- )
- )
- insert_after(3, '</AST_Production_GroupElement>')
- insert_before(1, '<AST_Production_GroupElement>')
+ assert isinstance(yy_element_stack[-1][0], ast.AST.Production.GroupElement)
+ assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+ tail = element.get_text(yy_element_stack[-1][0], 1)
+ yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+ element.set_text(yy_element_stack[-1][0], 1, tail)
}
/* Nick added %space */
- | rhs "%space"
- {
- insert_after(1, '</AST_Production_Space>')
- insert_before(1, '<AST_Production_Space>')
- }
+ | rhs %space (?E{ast.AST.Production.Space}"%space")
;
named_ref.opt
: %empty
- {
- #$$ = 0
- }
| BRACKETED_ID
- {
- #$$ = named_ref_new($1, @1)
- }
;
/*---------------------.
variable
: ID
| STRING
- {
- #$$ = uniqstr_new($1)
- }
;
/* Some content or empty by default. */
value
: %empty
- {
- #$$.kind = muscle_keyword
- #$$.chars = ''
- }
| ID
- {
- #$$.kind = muscle_keyword
- #$$.chars = $1
- }
| STRING
- {
- #$$.kind = muscle_string
- #$$.chars = $1
- }
| "{...}"
- {
- #$$.kind = muscle_code
- #$$.chars = strip_braces($1)
- }
;
/*--------------.
id
: ID
- {
- #$$ = symbol_from_uniqstr($1, @1)
- }
| CHAR
- {
- #$$ = symbol_get(char_name($1), @1)
- #symbol_class_set($$, token_sym, @1, False)
- #symbol_user_token_number_set($$, $1, @1)
- }
;
id_colon
: ID_COLON
- {
- #$$ = symbol_from_uniqstr($1, @1)
- }
;
symbol
/* A string used as an ID: quote it. */
string_as_id
: STRING
- {
- #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
- #symbol_class_set($$, token_sym, @1, False)
- }
;
epilogue.opt
: %empty
- | "%%" EPILOGUE
- {
- #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
- #code_scanner_last_string_free()
- insert_after(1, '</AST_Section3>')
- insert_after(0, '<AST_Section3>')
- }
+ | "%%" (?E{ast.AST.Section3}EPILOGUE %space)
;
%%
# current_lhs_location = loc
# free(current_lhs_named_ref)
# current_lhs_named_ref = ref
-
-def insert_before(n, str):
- global gram_piece3
- lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
- lex_yy.gram_piece0 += 1
- gram_piece3 += 1
-
-def insert_after(n, str):
- global gram_piece3
- lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
- lex_yy.gram_piece0 += 1
- gram_piece3 += 1
%option debug nodefault noinput noyywrap never-interactive
%{
+ import ast
import state
import y_tab
#def gram_scanner_last_string_free():
# del obstack_for_string[:]
- gram_piece = []
- gram_piece0 = 0
- gram_piece1 = 0
markup_stack = []
- percent_percent_count = 0;
+ percent_percent_count = 0
# these should be yylex()-local, but moved to here, see further down:
nesting = 0
context_state = -1
id_loc = state.location()
- code_start = scanner_cursor.copy()
- token_start = scanner_cursor.copy()
- #first = True
- if True: #first:
- scanner_cursor = y_tab.yylloc.start.copy()
- #first = False
+ code_start = None
+ token_start = None
+ scanner_cursor = state.boundary('<stdin>', 0, 0)
%}
%x SC_YACC_COMMENT
"|" return y_tab.PIPE
";" return y_tab.SEMICOLON
- {id} {
+ (?E{ast.AST.ID}{id}) {
global id_loc, bracketed_id_str
y_tab.yylval = yytext
id_loc = y_tab.yylloc
bracketed_id_str = None
+ markup_push(element.Element)
BEGIN(SC_AFTER_IDENTIFIER)
- gram_piece_pack()
- gram_piece_append('<AST_ID>')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_ID>')
- gram_piece_pack()
}
{int} {
global token_start
token_start = y_tab.yylloc.start
BEGIN(SC_ESCAPED_CHARACTER)
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_Char>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ markup_push(ast.AST.Char)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
/* Strings. */
global token_start
token_start = y_tab.yylloc.start
BEGIN(SC_ESCAPED_STRING)
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_String>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ markup_push(ast.AST.String)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
/* Prologue. */
global code_start
code_start = y_tab.yylloc.start
BEGIN(SC_PROLOGUE)
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_Section1_Prologue>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ markup_push(ast.AST.Section1.Prologue)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
/* Code in between braces. */
nesting = 0
code_start = y_tab.yylloc.start
BEGIN(SC_BRACED_CODE)
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_BracedCode>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ # new way, includes braces, wrapped by <AST_Production_Action> later
+ markup_push(ast.AST.Text)
+ markup_flush(len(yytext))
+ # old way
+ #markup_push(ast.AST.BracedCode)
+ #markup_flush(len(yytext))
+ #markup_push(ast.AST.Text)
+ # to here
}
/* Semantic predicate. */
global nesting, code_start
nesting = 0
code_start = y_tab.yylloc.start
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_BracedPredicate>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
BEGIN(SC_PREDICATE)
+ markup_push(element.Element)
+ markup_push(ast.AST.BracedPredicate)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
/* Nick extra rules for element groups */
nesting = 0
code_start = y_tab.yylloc.start
BEGIN(SC_ELEMENT_GROUP)
- gram_piece_pack()
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
")" return ord(')')
/* A type. */
- "<*>" {
- gram_piece_pack()
- gram_piece_append('<AST_TagRef><<AST_Text>*</AST_Text>></AST_TagRef>')
- return ~y_tab.TAG_ANY
+ "(?E{ast.AST.TagRef}<(?E{ast.AST.Text}*)>)" {
+ return y_tab.TAG_ANY
}
- "<>" {
- gram_piece_pack()
- gram_piece_append('<AST_TagRef><<AST_Text />></AST_TagRef>')
- return ~y_tab.TAG_NONE
+ "(?E{ast.AST.TagRef}<(?E{ast.AST.Text})>)" {
+ return y_tab.TAG_NONE
}
"<" {
global nesting, token_start
nesting = 0
token_start = y_tab.yylloc.start
BEGIN(SC_TAG)
- gram_piece_pack()
- markup_stack.append(len(gram_piece)) # <AST_TagRef>
- gram_piece_flush(len(yytext))
- markup_stack.append(len(gram_piece)) # <AST_Text>
+ markup_push(element.Element)
+ markup_push(ast.AST.TagRef)
+ markup_flush(len(yytext))
+ markup_push(ast.AST.Text)
}
"%%" {
percent_percent_count += 1
if percent_percent_count == 2:
BEGIN(SC_EPILOGUE)
- gram_piece_pack()
- gram_piece_escape(yytext)
- gram_piece_pack()
- gram_piece_pack()
- return ~y_tab.PERCENT_PERCENT
return y_tab.PERCENT_PERCENT
}
global bracketed_id_start, bracketed_id_context_state
if bracketed_id_str is not None:
scanner_cursor.column -= len(yytext)
- yyless(0)
+ markup_yyless(0)
+ markup_pop_token() # element.Element
BEGIN(SC_RETURN_BRACKETED_ID)
y_tab.yylloc = id_loc
return y_tab.ID
else:
+ markup_pop_token() # element.Element
bracketed_id_start = y_tab.yylloc.start
bracketed_id_context_state = YY_START()
BEGIN(SC_BRACKETED_ID)
}
":" {
+ markup_pop_token() # element.Element
BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
y_tab.yylloc = id_loc
- gram_piece_escape(yytext)
- return ~y_tab.ID_COLON
+ markup_flush(len(yytext))
+ return y_tab.ID_COLON
}
. {
scanner_cursor.column -= len(yytext)
- yyless(0)
+ markup_yyless(0)
+
+ # total kludge: put back all whitespace/comments after the ID, and rescan
+ # (this will mess up the position tracking, need to revisit and fix later)
+ unput(element.get_text(yy_element_space, len(yy_element_space)))
+ element.set_text(yy_element_space, len(yy_element_space), '')
+
+ markup_pop_token() # element.Element
BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
y_tab.yylloc = id_loc
- return ~y_tab.ID
+ return y_tab.ID
}
<<EOF>> {
+ markup_pop_token() # element.Element
BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
y_tab.yylloc = id_loc
- return ~y_tab.ID
+ return y_tab.ID
}
}
. {
global bracketed_id_str
scanner_cursor.column -= len(yytext)
- yyless(0)
+ markup_yyless(0)
y_tab.yylval = bracketed_id_str
bracketed_id_str = None
y_tab.yylloc = bracketed_id_loc
y_tab.yylloc.start = token_start
y_tab.yylval = gram_last_string
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_String>')
- gram_piece_append('</AST_String>')
- gram_piece_pack()
- return ~y_tab.STRING
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.String
+ markup_pop_token() # element.Element
+ return y_tab.STRING
}
<<EOF>> unexpected_eof(token_start, '"')
"\n" unexpected_newline(token_start, '"')
y_tab.yylval = ord(gram_last_string[0])
#del obstack_for_string[:]
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_Char>')
- gram_piece_append('</AST_Char>')
- gram_piece_pack()
- return ~y_tab.CHAR
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.Char
+ markup_pop_token() # element.Element
+ return y_tab.CHAR
}
"\n" unexpected_newline(token_start, '\'')
<<EOF>> unexpected_eof(token_start, '\'')
y_tab.yylval = gram_last_string
#del obstack_for_string[:]
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_TagRef>')
- gram_piece_append('</AST_TagRef>')
- gram_piece_pack()
- return ~y_tab.TAG
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.TagRef
+ markup_pop_token() # element.Element
+ return y_tab.TAG
obstack_for_string.append(yytext)
}
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
- \\[0-7]{1,3} {
- c = strtoul(yytext + 1, None, 8)
+ (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 8)}\\([0-7]{1,3})) {
+ c = int(yytext + 1, 8)
if not c or 0x7f * 2 + 1 < c:
state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
else:
obstack_for_string.append(chr(c))
- rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
- gram_piece_append(gram_piece_temp)
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\x[0-9abcdefABCDEF]+ {
- c = strtoul(yytext + 2, None, 16)
+ (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 16)}\\x([0-9abcdefABCDEF]+)) {
+ c = int(yytext + 2, 16)
if not c or 0x7f * 2 + 1 < c:
state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
else:
obstack_for_string.append(chr(c))
- rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
- gram_piece_append(gram_piece_temp)
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\a {
+ (?E{ast.AST.Text.Escape, character = 7}\\a) {
obstack_for_string.append('\a')
- gram_piece_append('<AST_Text_Escape character="7">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\b {
+ (?E{ast.AST.Text.Escape, character = 8}\\b) {
obstack_for_string.append('\b')
- gram_piece_append('<AST_Text_Escape character="8">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\f {
+ (?E{ast.AST.Text.Escape, character = 12}\\f) {
obstack_for_string.append('\f')
- gram_piece_append('<AST_Text_Escape character="12">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\n {
+ (?E{ast.AST.Text.Escape, character = 10}\\n) {
obstack_for_string.append('\n')
- gram_piece_append('<AST_Text_Escape character="10">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\r {
+ (?E{ast.AST.Text.Escape, character = 13}\\r) {
obstack_for_string.append('\r')
- gram_piece_append('<AST_Text_Escape character="13">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\t {
+ (?E{ast.AST.Text.Escape, character = 9}\\t) {
obstack_for_string.append('\t')
- gram_piece_append('<AST_Text_Escape character="9">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\v {
+ (?E{ast.AST.Text.Escape, character = 11}\\v) {
obstack_for_string.append('\v')
- gram_piece_append('<AST_Text_Escape character="11">')
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
/* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
- \\("\""|"'"|"?"|"\\") {
+ (?E{ast.AST.Text.Escape, character = ord(yy_groups[2])}\\("\""|"'"|"?"|"\\")) {
obstack_for_string.append(yytext[1])
- rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
- gram_piece_append(gram_piece_temp)
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
- \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+ (?E{ast.AST.Text.Escape, character = int(('' if yy_groups[3] is None else yy_groups[3]) + yy_groups[4], 16)}\\(u|U([0-9abcdefABCDEF]{4}))([0-9abcdefABCDEF]{4})) {
c = convert_ucn_to_byte(yytext)
if c <= 0:
state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
else:
obstack_for_string.append(chr(c))
- rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
- gram_piece_append(gram_piece_temp)
- gram_piece_flush(len(yytext))
- gram_piece_append('</AST_Text_Escape>')
}
\\(.|\n) {
p = yytext[1:]
y_tab.yylloc.start = code_start
y_tab.yylval = gram_last_string
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
- gram_piece_append('</AST_BracedCode>')
- gram_piece_pack()
- return ~y_tab.BRACED_CODE
+ # new way, includes braces, wrapped by <AST_Production_Action> later
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.Text
+ # old way
+ #markup_pop() # ast.AST.Text
+ #markup_flush(len(yytext))
+ #markup_pop() # ast.AST.BracedCode
+ # to here
+ markup_pop_token() # element.Element
+ return y_tab.BRACED_CODE
}
}
y_tab.yylloc.start = code_start
y_tab.yylval = gram_last_string
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_BracedPredicate>')
- gram_piece_append('</AST_BracedPredicate>')
- gram_piece_pack()
- return ~y_tab.BRACED_PREDICATE
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.BracedPredicate
+ markup_pop_token() # element.Element
+ return y_tab.BRACED_PREDICATE
else:
obstack_for_string.append('}')
}
y_tab.yylval = gram_last_string
#del obstack_for_string[:]
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_pack()
- return ~ord('(')
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop_token() # element.Element
+ return ord('(')
}
}
y_tab.yylloc.start = code_start
y_tab.yylval = gram_last_string
BEGIN(INITIAL)
- gram_piece_insert(markup_stack.pop(), '<AST_Text>')
- gram_piece_append('</AST_Text>')
- gram_piece_escape(yytext)
- gram_piece_insert(markup_stack.pop(), '<AST_Section1_Prologue>')
- gram_piece_append('</AST_Section1_Prologue>')
- gram_piece_pack()
- return ~y_tab.PROLOGUE
+ markup_pop() # ast.AST.Text
+ markup_flush(len(yytext))
+ markup_pop() # ast.AST.Section1.Prologue
+ markup_pop_token() # element.Element
+ return y_tab.PROLOGUE
}
<<EOF>> unexpected_eof(code_start, '%}')
y_tab.yylloc.start = code_start
y_tab.yylval = gram_last_string
BEGIN(INITIAL)
- gram_piece_pack()
- return ~y_tab.EPILOGUE
+ return y_tab.EPILOGUE
}
}
num = 0x7fffffff
return num
-#def convert_ucn_to_byte(ucn):
-# code = strtoul(ucn + 2, None, 16)
-# if 0x7f * 2 + 1 < code:
-# return -1
-# return code
-#
+def convert_ucn_to_byte(ucn):
+ code = int(ucn[2:], 16)
+ if 0x7f * 2 + 1 < code:
+ return -1
+ return code
+
#def handle_syncline(args, loc):
# file = None
# lineno = strtoul(args, &file, 10)
# del obstack_for_string[:]
# yy_delete_buffer(YY_CURRENT_BUFFER)
-def gram_piece_append(str):
- gram_piece.append(str)
-
-def gram_piece_insert(n, str):
- gram_piece[n:n] = [str]
-
-xml_escape = {'<': '<', '>': '>', '&': '&'}
-def gram_piece_escape(str):
- gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
-
-def gram_piece_flush(n):
- global yytext
- gram_piece_escape(yytext[:n])
- yytext = yytext[n:]
-
-def gram_piece_pack():
- global gram_piece0
- gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
- gram_piece0 += 1
-
-def gram_lex():
- result = yylex()
- if result < 0:
- return ~result
- gram_piece_pack()
- gram_piece_escape(yytext)
- gram_piece_pack()
- return result
+# these exist for the purpose of adding markup to sequences that are
+# recognized by several iterations of yylex(), it would be better to
+# try to use more complex regular expressions to match all in one go:
+
+def markup_flush(n):
+ text = element.get_text(yy_element_token, 0)
+ element.set_text(
+ yy_element_space,
+ len(yy_element_space),
+ element.get_text(yy_element_space, len(yy_element_space)) + text[:n]
+ )
+ element.set_text(yy_element_token, 0, text[n:])
+
+def markup_yyless(n):
+ yyless(n)
+ element.set_text(
+ yy_element_token,
+ 0,
+ element.get_text(yy_element_token, 0)[:n]
+ )
+
+def markup_push(factory, *args, **kwargs):
+ global yy_element_space
+ markup_stack.append(yy_element_space)
+ yy_element_space = factory(*args, **kwargs)
+
+def markup_pop():
+ global yy_element_space
+ _element = yy_element_space
+ yy_element_space = markup_stack.pop()
+ yy_element_space.append(_element)
+ #element.set_text(yy_element_space, len(yy_element_space), '')
+
+def markup_pop_token():
+ global yy_element_space, yy_element_token
+
+ # append yy_element_token contents onto yy_element_space
+ i = len(yy_element_space)
+ element.set_text(
+ yy_element_space,
+ i,
+ element.get_text(yy_element_space, i) +
+ element.get_text(yy_element_token, 0)
+ )
+ yy_element_space[i:] = yy_element_token[:]
+
+ # exchange, so that space is now prepended onto token
+ yy_element_token = yy_element_space
+ yy_element_space = markup_stack.pop()