__pycache__
-bootstrap/*.xml
-bootstrap/lex_yy.py
-bootstrap/lex_yy_code.py
-bootstrap/out
-bootstrap/y_tab.py
-lex-yacc-examples/*.c
-lex-yacc-examples/*.h
-lex-yacc-examples/*.o
-lex-yacc-examples/*.xml
-lex-yacc-examples/example4
-lex-yacc-examples/example7
-skel/skel_bison.c.orig
-skel/skel_bison.h.orig
-tests/*.c
-tests/*.o
-tests/*.xml
-tests/lex_yy.py
-tests/y_tab.py
-tests/cal
-tests/cal2
-tests_ast/*.xml
-tests_ast/lex_yy.py
-tests_ast/y_tab.py
+/*.xml
+/bootstrap/*.xml
+/bootstrap/lex_yy.py
+/bootstrap/lex_yy_code.py
+/bootstrap/out
+/bootstrap/y_tab.py
+/lex-yacc-examples/*.c
+/lex-yacc-examples/*.h
+/lex-yacc-examples/*.o
+/lex-yacc-examples/*.xml
+/lex-yacc-examples/example4
+/lex-yacc-examples/example7
+/lex_yy.py
+/lex_yy_code.py
+/out
+/skel/skel_bison.c.orig
+/skel/skel_bison.h.orig
+/tests/*.c
+/tests/*.o
+/tests/*.xml
+/tests/lex_yy.py
+/tests/y_tab.py
+/tests/cal
+/tests/cal2
+/tests_ast/*.xml
+/tests_ast/lex_yy.py
+/tests_ast/y_tab.py
+/y_tab.py
--- /dev/null
+all: lex_yy.py lex_yy_code.py y_tab.py
+
+lex_yy.py: scan-gram.l bootstrap/skel_lex_yy.py
+ ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+ ../pilex.git/pilex.py --python --skel bootstrap/skel_lex_yy.py $<.xml
+
+lex_yy_code.py: scan-code.l
+ ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+ ../pilex.git/pilex.py --element --python -o $@ $<.xml
+
+y_tab.py: parse-gram.y bootstrap/skel_y_tab.py
+ ../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
+ ./piyacc.py --python --skel bootstrap/skel_y_tab.py $<.xml
+
+clean:
+ rm -f lex_yy.py lex_yy_code.py y_tab.py *.xml
| "%%" EPILOGUE
{
#muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
- lex_yy_code.yyin = None
- lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
- lex_yy_code.yytext = '' # fool unput()
- lex_yy_code.unput($2)
- lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
- lex_yy_code.yylex()
#code_scanner_last_string_free()
insert_after(1, '</AST_Section3>')
insert_after(0, '<AST_Section3>')
--- /dev/null
+#!/usr/bin/env python3
+
+import lex_yy
+import y_tab
+import sys
+
+lex_yy.gram_piece_append('<root>\n <AST ref=\"0\">')
+y_tab.yyparse()
+lex_yy.gram_piece_append('</AST>\n</root>\n')
+sys.stdout.write(''.join(lex_yy.gram_piece))
--- /dev/null
+#!/bin/sh
+if ! test -d out
+then
+ mkdir out
+ bootstrap/markup.py <tests/cal.y |./reserialize.py >out/cal.y.xml.ok
+ bootstrap/markup.py <tests_ast/cal_py.y |./reserialize.py >out/cal_py.y.xml.ok
+ bootstrap/markup.py <../bootstrap_flex.git/src/parse.y |./reserialize.py >out/parse.y.xml.ok
+ bootstrap/markup.py <../bootstrap_bison.git/src/parse-gram.y |./reserialize.py >out/parse-gram.y.xml.ok
+fi
+./markup.py <tests/cal.y |./reserialize.py >out/cal.y.xml
+diff -q out/cal.y.xml.ok out/cal.y.xml
+./markup.py <tests_ast/cal_py.y |./reserialize.py >out/cal_py.y.xml
+diff -q out/cal_py.y.xml.ok out/cal_py.y.xml
+./markup.py <../bootstrap_flex.git/src/parse.y |./reserialize.py >out/parse.y.xml
+diff -q out/parse.y.xml.ok out/parse.y.xml
+./markup.py <../bootstrap_bison.git/src/parse-gram.y |./reserialize.py >out/parse-gram.y.xml
+diff -q out/parse-gram.y.xml.ok out/parse-gram.y.xml
--- /dev/null
+/* Bison Grammar Parser -*- C -*-
+
+ Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+%code requires
+{
+ import ast
+ import element
+ import lex_yy_code
+ import state
+ import xml.etree.ElementTree
+
+ YYLTYPE = state.location
+}
+
+%code top
+{
+}
+
+%code
+{
+ #current_prec = 0
+ #current_lhs_location = 0
+ #current_lhs_named_ref = []
+ #current_lhs_symbol = []
+ #current_class = unknown_sym
+ #current_type = None
+ gram_piece2 = 0
+ gram_piece3 = 0
+ #nested_rhs = 0
+}
+
+/* Nick %define api.prefix {gram_} */
+%define api.pure full
+%define locations
+%define parse.error verbose
+%define parse.lac full
+%define parse.trace
+/* Nick %defines */
+%expect 0
+/* Nick %verbose */
+
+%initial-action
+{
+ #boundary_set(&@$.start, current_file, 1, 1)
+ #boundary_set(&@$.end, current_file, 1, 1)
+}
+
+/* Define the tokens together with their human representation. */
+%token GRAM_EOF 0 "end of file"
+%token STRING "string"
+
+%token PERCENT_TOKEN "%token"
+%token PERCENT_NTERM "%nterm"
+
+%token PERCENT_TYPE "%type"
+%token PERCENT_DESTRUCTOR "%destructor"
+%token PERCENT_PRINTER "%printer"
+
+%token PERCENT_LEFT "%left"
+%token PERCENT_RIGHT "%right"
+%token PERCENT_NONASSOC "%nonassoc"
+%token PERCENT_PRECEDENCE "%precedence"
+
+%token PERCENT_PREC "%prec"
+%token PERCENT_DPREC "%dprec"
+%token PERCENT_MERGE "%merge"
+
+/*----------------------.
+| Global Declarations. |
+`----------------------*/
+
+%token
+ PERCENT_CODE "%code"
+ PERCENT_DEFAULT_PREC "%default-prec"
+ PERCENT_DEFINE "%define"
+ PERCENT_DEFINES "%defines"
+ PERCENT_ERROR_VERBOSE "%error-verbose"
+ PERCENT_EXPECT "%expect"
+ PERCENT_EXPECT_RR "%expect-rr"
+ PERCENT_FLAG "%<flag>"
+ PERCENT_FILE_PREFIX "%file-prefix"
+ PERCENT_GLR_PARSER "%glr-parser"
+ PERCENT_INITIAL_ACTION "%initial-action"
+ PERCENT_LANGUAGE "%language"
+ PERCENT_NAME_PREFIX "%name-prefix"
+ PERCENT_NO_DEFAULT_PREC "%no-default-prec"
+ PERCENT_NO_LINES "%no-lines"
+ PERCENT_NONDETERMINISTIC_PARSER
+ "%nondeterministic-parser"
+ PERCENT_OUTPUT "%output"
+ PERCENT_REQUIRE "%require"
+ PERCENT_SKELETON "%skeleton"
+ PERCENT_START "%start"
+ PERCENT_TOKEN_TABLE "%token-table"
+ PERCENT_VERBOSE "%verbose"
+ PERCENT_YACC "%yacc"
+;
+
+%token BRACED_CODE "{...}"
+%token BRACED_PREDICATE "%?{...}"
+%token BRACKETED_ID "[identifier]"
+%token CHAR "char"
+%token EPILOGUE "epilogue"
+%token EQUAL "="
+%token ID "identifier"
+%token ID_COLON "identifier:"
+%token PERCENT_PERCENT "%%"
+%token PIPE "|"
+%token PROLOGUE "%{...%}"
+%token SEMICOLON ";"
+%token TAG "<tag>"
+%token TAG_ANY "<*>"
+%token TAG_NONE "<>"
+
+/*%union {
+ character = 0
+}*/
+/*%type <character> CHAR*/
+%printer {
+ fputs_unlocked(char_name($$), yyo)
+} CHAR
+
+/*%union {
+ code = ''
+};*/
+/*%type <code> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING*/
+%printer {
+ fputs_unlocked(quotearg_style(c_quoting_style, $$), yyo)
+} STRING
+/*%printer {
+ rpl_fprintf(yyo, '{\n%s\n}', $$)
+} <code>*/
+
+/*%union {
+ uniqstr = ''
+}*/
+/*%type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag variable*/
+/*%printer {
+ fputs_unlocked($$, yyo)
+} <uniqstr>*/
+%printer {
+ rpl_fprintf(yyo, '[%s]', $$)
+} BRACKETED_ID
+%printer {
+ rpl_fprintf(yyo, '%s:', $$)
+} ID_COLON
+%printer {
+ rpl_fprintf(yyo, '%%%s', $$)
+} PERCENT_FLAG
+%printer {
+ rpl_fprintf(yyo, '<%s>', $$)
+} TAG tag
+
+/*%union {
+ integer = 0
+};*/
+/*%token <integer> INT "integer"*/
+%token INT "integer"
+/*%printer {
+ rpl_fprintf(yyo, '%d', $$)
+} <integer>*/
+
+/*%union {
+ symbol = []
+}*/
+/*%type <symbol> id id_colon string_as_id symbol symbol.prec*/
+/*%printer {
+ rpl_fprintf(yyo, '%s', $$->tag)
+} <symbol>*/
+%printer {
+ rpl_fprintf(yyo, '%s:', $$->tag)
+} id_colon
+
+/*%union {
+ assoc = 0
+};*/
+/*%type <assoc> precedence_declarator*/
+
+/*%union {
+ list = []
+}*/
+/*%type <list> symbols.1 symbols.prec generic_symlist generic_symlist_item*/
+
+/*%union {
+ named_ref = []
+}*/
+/*%type <named_ref> named_ref.opt*/
+
+/*---------.
+| %param. |
+`---------*/
+%code requires
+{
+ param_none = 0
+ param_lex = 1 << 0
+ param_parse = 1 << 1
+ param_both = param_lex | param_parse
+};
+%code
+{
+ current_param = param_none
+};
+/*%union {
+ param = 0
+}*/
+/*%token <param> PERCENT_PARAM "%param";*/
+%token PERCENT_PARAM "%param";
+/*%printer
+{
+ if $$ == param_lex:
+ fputs_unlocked('%' 'lex-param', yyo)
+ break
+ elif $$ == param_parse:
+ fputs_unlocked('%' 'parse-param', yyo)
+ break
+ elif $$ == param_both:
+ fputs_unlocked('%' 'param', yyo)
+ break
+ elif $$ == param_none:
+ assert(False)
+ break
+} <param>;*/
+
+ /*==========\
+ | Grammar. |
+ \==========*/
+%%
+
+input
+ : prologue_declarations "%%" grammar epilogue.opt
+ {
+ insert_after(2, '</AST_Section2>')
+ insert_before(2, '<AST_Section2>')
+ insert_after(0, '</AST_Section1>')
+ insert_before(0, '<AST_Section1>')
+ }
+ ;
+
+ /*------------------------------------.
+ | Declarations: before the first %%. |
+ `------------------------------------*/
+
+prologue_declarations
+ : %empty
+ {
+ global yychar
+ if yychar == YYEMPTY:
+ yychar = lex_yy.gram_lex()
+ temp = lex_yy.gram_piece[gram_piece2 + 1]
+ lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+ lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+ lex_yy.gram_piece[gram_piece2 - 1] = temp
+ }
+ | prologue_declarations prologue_declaration
+ ;
+
+prologue_declaration
+ : grammar_declaration
+ | "%{...%}"
+ {
+ #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($1)
+ lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2] = \
+ '<AST_Section1_Prologue>%{{{0:s}%}}</AST_Section1_Prologue>'.format(
+ xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ )
+ }
+ | "%<flag>"
+ {
+ #muscle_percent_define_ensure($1, @1, True)
+ if $1 == 'api.pure':
+ insert_after(0, '</AST_Section1_PureParser>')
+ insert_before(0, '<AST_Section1_PureParser>')
+ elif $1 == 'locations':
+ insert_after(0, '</AST_Section1_Locations>')
+ insert_before(0, '<AST_Section1_Locations>')
+ elif $1 == 'parse.trace':
+ insert_after(0, '</AST_Section1_Debug>')
+ insert_before(0, '<AST_Section1_Debug>')
+ else:
+ assert False
+ }
+ | "%define" variable value
+ {
+ #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ insert_after(2, '</AST_Section1_Define>')
+ insert_before(0, '<AST_Section1_Define>')
+ }
+ | "%defines"
+ {
+ #defines_flag = True
+ insert_after(0, '</AST_Section1_Defines>')
+ insert_before(0, '<AST_Section1_Defines>')
+ }
+ | "%defines" STRING
+ {
+ #defines_flag = True
+ #spec_defines_file = xstrdup($2)
+ insert_after(1, '</AST_Section1_Defines>')
+ insert_before(0, '<AST_Section1_Defines>')
+ }
+ | "%error-verbose"
+ {
+ #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ insert_after(0, '</AST_Section1_ErrorVerbose>')
+ insert_before(0, '<AST_Section1_ErrorVerbose>')
+ }
+ | "%expect" INT
+ {
+ #expected_sr_conflicts = $2
+ insert_after(1, '</AST_Section1_Expect>')
+ insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
+ }
+ | "%expect-rr" INT
+ {
+ #expected_rr_conflicts = $2
+ insert_after(1, '</AST_Section1_ExpectRR>')
+ insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
+ }
+ | "%file-prefix" STRING
+ {
+ #spec_file_prefix = $2
+ insert_after(1, '</AST_Section1_FilePrefix>')
+ insert_before(0, '<AST_Section1_FilePrefix>')
+ }
+ | "%glr-parser"
+ {
+ #nondeterministic_parser = True
+ #glr_parser = True
+ insert_after(0, '</AST_Section1_GLRParser>')
+ insert_before(0, '<AST_Section1_GLRParser>')
+ }
+ | "%initial-action" "{...}"
+ {
+ #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($2)
+ lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ insert_after(1, '</AST_Section1_InitialAction>')
+ insert_before(0, '<AST_Section1_InitialAction>')
+ }
+ | "%language" STRING
+ {
+ #language_argmatch($2, grammar_prio, @1)
+ insert_after(1, '</AST_Section1_Language>')
+ insert_before(0, '<AST_Section1_Language>')
+ }
+ | "%name-prefix" STRING
+ {
+ #spec_name_prefix = $2
+ insert_after(1, '</AST_Section1_NamePrefix>')
+ insert_before(0, '<AST_Section1_NamePrefix>')
+ }
+ | "%no-lines"
+ {
+ #no_lines_flag = True
+ insert_after(0, '</AST_Section1_Lines>')
+ insert_before(0, '<AST_Section1_Lines value="false">')
+ }
+ | "%nondeterministic-parser"
+ {
+ #nondeterministic_parser = True
+ insert_after(0, '</AST_Section1_NonDeterministicParser>')
+ insert_before(0, '<AST_Section1_NonDeterministicParser>')
+ }
+ | "%output" STRING
+ {
+ #spec_outfile = $2
+ insert_after(1, '</AST_Section1_Output>')
+ insert_before(0, '<AST_Section1_Output>')
+ }
+ | "%param"
+ {
+ #current_param = $1
+ }
+ params
+ {
+ #current_param = param_none
+ insert_after(2, '</AST_Section1_Param>')
+ insert_before(0, '<AST_Section1_Param>')
+ }
+ | "%require" STRING
+ {
+ #version_check(&@2, $2)
+ insert_after(1, '</AST_Section1_Require>')
+ insert_before(0, '<AST_Section1_Require>')
+ }
+ | "%skeleton" STRING
+ {
+ #skeleton_user = $2
+ #if strchr(skeleton_user, ord('/')):
+ # dir_length = len(current_file)
+ # skeleton_build = None
+ # while dir_length and current_file[dir_length - 1] != ord('/'):
+ # dir_length -= 1
+ # while dir_length and current_file[dir_length - 1] == ord('/'):
+ # dir_length -= 1
+ # skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
+ # if dir_length > 0:
+ # memcpy(skeleton_build, current_file, dir_length)
+ # skeleton_build[dir_length++] = ord('/')
+ # strcpy(skeleton_build + dir_length, skeleton_user)
+ # skeleton_user = uniqstr_new(skeleton_build)
+ # free(skeleton_build)
+ #skeleton_arg(skeleton_user, grammar_prio, @1)
+ insert_after(1, '</AST_Section1_Skeleton>')
+ insert_before(0, '<AST_Section1_Skeleton>')
+ }
+ | "%token-table"
+ {
+ #token_table_flag = True
+ insert_after(0, '</AST_Section1_TokenTable>')
+ insert_before(0, '<AST_Section1_TokenTable>')
+ }
+ | "%verbose"
+ {
+ #report_flag |= report_states
+ insert_after(0, '</AST_Section1_Verbose>')
+ insert_before(0, '<AST_Section1_Verbose>')
+ }
+ | "%yacc"
+ {
+ #yacc_flag = True
+ insert_after(0, '</AST_Section1_YACC>')
+ insert_before(0, '<AST_Section1_YACC>')
+ }
+ | /*FIXME: Err? What is this horror doing here? */ ";"
+ ;
+
+params
+ : params "{...}"
+ {
+ #add_param(current_param, $2, @2)
+ }
+ | "{...}"
+ {
+ #add_param(current_param, $1, @1)
+ }
+ ;
+
+/*----------------------.
+| grammar_declaration. |
+`----------------------*/
+
+grammar_declaration
+ : precedence_declaration
+ | symbol_declaration
+ | "%start" symbol
+ {
+ #grammar_start_symbol_set($2, @2)
+ insert_after(1, '</AST_Section1Or2_Start>')
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_SymbolRef>')
+ insert_before(0, '<AST_Section1Or2_Start>')
+ }
+ | code_props_type "{...}" generic_symlist
+ {
+ #code = None
+ #code_props_symbol_action_init(&code, $2, @2)
+ #code_props_translate_code(&code)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($2)
+ lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ #list = None
+ #list = $3
+ #while list:
+ # symbol_list_code_props_set(list, $1, &code)
+ # list = list->next
+ #symbol_list_free($3)
+ insert_after(2, '</AST_Section1Or2_CodeProps>')
+ insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
+ }
+ | "%default-prec"
+ {
+ #default_prec = True
+ insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+ insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
+ }
+ | "%no-default-prec"
+ {
+ #default_prec = False
+ insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+ insert_before(0, '<AST_Section1Or2_DefaultPrec>')
+ }
+ | "%code" "{...}"
+ {
+ #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($2[1:-1])
+ lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 2] = \
+ '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
+ xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ )
+ insert_after(1, '</AST_Section1Or2_Code>')
+ insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
+ }
+ | "%code" ID "{...}"
+ {
+ #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($3[1:-1])
+ lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 4] = \
+ '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
+ xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ )
+ insert_after(2, '</AST_Section1Or2_Code>')
+ insert_before(0, '<AST_Section1Or2_Code>')
+ }
+ ;
+
+/*%type <code_type> code_props_type;*/
+/*%union {
+ code_type = 0
+};*/
+/*%printer {
+ rpl_fprintf(yyo, '%s', code_props_type_string($$))
+} <code_type>;*/
+
+code_props_type
+ : "%destructor"
+ {
+ $$ = state.destructor
+ }
+ | "%printer"
+ {
+ $$ = state.printer
+ }
+ ;
+
+/*---------.
+| %union. |
+`---------*/
+
+%token PERCENT_UNION "%union";
+
+union_name
+ : %empty
+ {
+ insert_before(0, '<AST_ID />')
+ }
+ | ID
+ {
+ #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ }
+ ;
+
+grammar_declaration
+ : "%union" union_name "{...}"
+ {
+ #union_seen = True
+ #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($3[1:-1])
+ lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 4] = \
+ '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
+ xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ )
+ insert_after(2, '</AST_Section1Or2_Union>')
+ insert_before(0, '<AST_Section1Or2_Union>')
+ }
+ ;
+
+symbol_declaration
+ : "%nterm"
+ {
+ #current_class = nterm_sym
+ }
+ symbol_defs.1
+ {
+ #current_class = unknown_sym
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_NTerm>')
+ insert_before(0, '<AST_Section1Or2_NTerm>')
+ }
+ | "%token"
+ {
+ #current_class = token_sym
+ }
+ symbol_defs.1
+ {
+ #current_class = unknown_sym
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_Token>')
+ insert_before(0, '<AST_Section1Or2_Token>')
+ }
+ | "%type" TAG symbols.1
+ {
+ #list = None
+ #tag_seen = True
+ #list = $3
+ #while list:
+ # symbol_type_set(list->content.sym, $2, @2)
+ # list = list->next
+ #symbol_list_free($3)
+ insert_after(2, '</AST_Section1Or2_Type>')
+ insert_before(0, '<AST_Section1Or2_Type>')
+ }
+ ;
+
+precedence_declaration
+ : precedence_declarator tag.opt symbols.prec
+ {
+ #list = None
+ #current_prec += 1
+ #list = $3
+ #while list:
+ # symbol_type_set(list->content.sym, current_type, @2)
+ # symbol_precedence_set(list->content.sym, current_prec, $1, @1)
+ # list = list->next
+ #symbol_list_free($3)
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_Precedence>')
+ insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
+ }
+ ;
+
+precedence_declarator
+ : "%left"
+ {
+ $$ = state.left_assoc
+ }
+ | "%right"
+ {
+ $$ = state.right_assoc
+ }
+ | "%nonassoc"
+ {
+ $$ = state.non_assoc
+ }
+ | "%precedence"
+ {
+ $$ = state.precedence_assoc
+ }
+ ;
+
+tag.opt
+ : %empty
+ {
+ #current_type = None
+ }
+ | TAG
+ {
+ #current_type = $1
+ #tag_seen = True
+ }
+ ;
+
+/* Just like symbols.1 but accept INT for the sake of POSIX. */
+symbols.prec
+ : symbol.prec
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ }
+ | symbols.prec symbol.prec
+ {
+ #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+ }
+ ;
+
+symbol.prec
+ : symbol
+ {
+ #$$ = $1
+ #symbol_class_set($1, token_sym, @1, False)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | symbol INT
+ {
+ #$$ = $1
+ #symbol_user_token_number_set($1, $2, @2)
+ #symbol_class_set($1, token_sym, @1, False)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ ;
+
+/* One or more symbols to be %typed. */
+symbols.1
+ : symbol
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | symbols.1 symbol
+ {
+ #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_SymbolRef>')
+ }
+ ;
+
+generic_symlist
+ : generic_symlist_item
+ {
+ #$$ = $1
+ }
+ | generic_symlist generic_symlist_item
+ {
+ #$$ = symbol_list_append($1, $2)
+ }
+ ;
+
+generic_symlist_item
+ : symbol
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | tag
+ {
+ #$$ = symbol_list_type_new($1, @1)
+ }
+ ;
+
+tag
+ : TAG
+ | "<*>"
+ {
+ #$$ = uniqstr_new('*')
+ }
+ | "<>"
+ {
+ #$$ = uniqstr_new('')
+ }
+ ;
+
+/* One token definition. */
+symbol_def
+ : TAG
+ {
+ #current_type = $1
+ #tag_seen = True
+ }
+ | id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | id INT
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_user_token_number_set($1, $2, @2)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ | id string_as_id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_make_alias($1, $2, @$)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | id INT string_as_id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_user_token_number_set($1, $2, @2)
+ #symbol_make_alias($1, $3, @$)
+ insert_after(2, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ ;
+
+/* One or more symbol definitions. */
+symbol_defs.1
+ : symbol_def
+ | symbol_defs.1 symbol_def
+ ;
+
+ /*------------------------------------------.
+ | The grammar section: between the two %%. |
+ `------------------------------------------*/
+
+grammar
+ : rules_or_grammar_declaration
+ | grammar rules_or_grammar_declaration
+ ;
+
+/* As a Bison extension, one can use the grammar declarations in the
+ body of the grammar. */
+rules_or_grammar_declaration
+ : rules
+ | grammar_declaration ";"
+ | error ";"
+ {
+ #yyerrok
+ }
+ ;
+
+rules
+ : id_colon named_ref.opt
+ {
+ #current_lhs($1, @1, $2)
+ }
+ rhses.1
+ {
+ #current_lhs(0, @1, 0)
+ insert_after(3, '</AST_Section2_Rules>')
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ insert_before(0, '<AST_Section2_Rules>')
+ }
+ ;
+
+rhses.1
+ : rhs
+ {
+ #grammar_current_rule_end(@1)
+ insert_after(0, '</AST_Production>')
+ insert_before(0, '<AST_Production>')
+ }
+ | rhses.1 "|" rhs
+ {
+ #grammar_current_rule_end(@3)
+ insert_after(2, '</AST_Production>')
+ insert_before(2, '<AST_Production>')
+ }
+ | rhses.1 ";"
+ ;
+
+%token PERCENT_EMPTY "%empty";
+/* Nick added %space */
+%token PERCENT_SPACE "%space";
+
+rhs
+ : %empty
+ {
+ global yychar
+ #if nested_rhs:
+ # nested_rhs -= 1
+ #else:
+ # grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+ if yychar == YYEMPTY:
+ yychar = lex_yy.gram_lex()
+ temp = lex_yy.gram_piece[gram_piece2 + 1]
+ lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+ lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+ lex_yy.gram_piece[gram_piece2 - 1] = temp
+ }
+ | rhs symbol named_ref.opt
+ {
+ #grammar_current_rule_symbol_append($2, @2, $3)
+ insert_after(2, '</AST_Production_SymbolRef>')
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+ }
+ | rhs "{...}" named_ref.opt
+ {
+ #grammar_current_rule_action_append($2, @2, $3, False)
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($2)
+ lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ insert_after(2, '</AST_Production_Action>')
+ insert_before(1, '<AST_Production_Action>')
+ }
+ | rhs "%?{...}"
+ {
+ #grammar_current_rule_action_append($2, @2, None, True)
+ }
+ | rhs "%empty"
+ {
+ #grammar_current_rule_empty_set(@2)
+ insert_after(1, '</AST_Production_Empty>')
+ insert_before(1, '<AST_Production_Empty>')
+ }
+ | rhs "%prec" symbol
+ {
+ #grammar_current_rule_prec_set($3, @3)
+ insert_after(2, '</AST_Production_Prec>')
+ insert_after(2, '</AST_SymbolRef>')
+ insert_before(2, '<AST_SymbolRef>')
+ insert_before(1, '<AST_Production_Prec>')
+ }
+ | rhs "%dprec" INT
+ {
+ #grammar_current_rule_dprec_set($3, @3)
+ insert_after(2, '</AST_Production_DPrec>')
+ insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
+ }
+ | rhs "%merge" TAG
+ {
+ #grammar_current_rule_merge_set($3, @3)
+ insert_after(2, '</AST_Production_Merge>')
+ insert_before(1, '<AST_Production_Merge>')
+ }
+ /* Nick extra rules for element groups */
+ | rhs '('
+ /*{
+ #nested_rhs += 1
+ }*/
+ rhs ')'
+ {
+ lex_yy_code.yyin = None
+ lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
+ lex_yy_code.yytext = '' # fool unput()
+ lex_yy_code.unput($2[4:-1])
+ lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
+ lex_yy_code.yylex(ast.AST.Text)
+ ref_list = []
+ element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
+ del lex_yy_code.yy_element_space.attrib['ref']
+ lex_yy.gram_piece[gram_piece2 + 2] = \
+ '(?E{{{0:s}}}'.format(
+ xml.etree.ElementTree.tostring(
+ lex_yy_code.yy_element_space,
+ encoding = 'unicode'
+ )
+ )
+ insert_after(3, '</AST_Production_GroupElement>')
+ insert_before(1, '<AST_Production_GroupElement>')
+ }
+ /* Nick added %space */
+ | rhs "%space"
+ {
+ insert_after(1, '</AST_Production_Space>')
+ insert_before(1, '<AST_Production_Space>')
+ }
+ ;
+
+named_ref.opt
+ : %empty
+ {
+ #$$ = 0
+ }
+ | BRACKETED_ID
+ {
+ #$$ = named_ref_new($1, @1)
+ }
+ ;
+
+/*---------------------.
+| variable and value. |
+`---------------------*/
+
+/* The STRING form of variable is deprecated and is not M4-friendly.
+ For example, M4 fails for '%define "[" "value"'. */
+variable
+ : ID
+ | STRING
+ {
+ #$$ = uniqstr_new($1)
+ }
+ ;
+
+/* Some content or empty by default. */
+%code requires {
+};
+/*%union
+{
+ value = 0
+};*/
+/*%type <value> value;*/
+/*%printer
+{
+ if $$.kind == muscle_code:
+ rpl_fprintf(yyo, '{%s}', $$.chars)
+ break
+ elif $$.kind == muscle_keyword:
+ rpl_fprintf(yyo, '%s', $$.chars)
+ break
+ elif $$.kind == muscle_string:
+ rpl_fprintf(yyo, '"%s"', $$.chars)
+ break
+} <value>;*/
+
+value
+ : %empty
+ {
+ #$$.kind = muscle_keyword
+ #$$.chars = ''
+ }
+ | ID
+ {
+ #$$.kind = muscle_keyword
+ #$$.chars = $1
+ }
+ | STRING
+ {
+ #$$.kind = muscle_string
+ #$$.chars = $1
+ }
+ | "{...}"
+ {
+ #$$.kind = muscle_code
+ #$$.chars = strip_braces($1)
+ }
+ ;
+
+/*--------------.
+| Identifiers. |
+`--------------*/
+
+/* Identifiers are returned as uniqstr values by the scanner.
+ Depending on their use, we may need to make them genuine symbols. */
+
+id
+ : ID
+ {
+ #$$ = symbol_from_uniqstr($1, @1)
+ }
+ | CHAR
+ {
+ #$$ = symbol_get(char_name($1), @1)
+ #symbol_class_set($$, token_sym, @1, False)
+ #symbol_user_token_number_set($$, $1, @1)
+ }
+ ;
+
+id_colon
+ : ID_COLON
+ {
+ #$$ = symbol_from_uniqstr($1, @1)
+ }
+ ;
+
+symbol
+ : id
+ | string_as_id
+ ;
+
+/* A string used as an ID: quote it. */
+string_as_id
+ : STRING
+ {
+ #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
+ #symbol_class_set($$, token_sym, @1, False)
+ }
+ ;
+
+epilogue.opt
+ : %empty
+ | "%%" EPILOGUE
+ {
+ #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
+ #code_scanner_last_string_free()
+ insert_after(1, '</AST_Section3>')
+ insert_after(0, '<AST_Section3>')
+ }
+ ;
+
+%%
+
+#def lloc_default(rhs, n):
+# i = None
+# loc = None
+# loc.start = rhs[n].end
+# loc.end = rhs[n].end
+# i = 1
+# while i <= n:
+# if not equal_boundaries(rhs[i].start, rhs[i].end):
+# loc.start = rhs[i].start
+# break
+# i += 1
+# return loc
+#
+#def strip_braces(code):
+# code[len(code) - 1] = 0
+# return code + 1
+#
+#def translate_code(code, loc, plain):
+# plain_code = None
+# if plain:
+# code_props_plain_init(&plain_code, code, loc)
+# else:
+# code_props_symbol_action_init(&plain_code, code, loc)
+# code_props_translate_code(&plain_code)
+# lex_yy.gram_scanner_last_string_free()
+# return plain_code.code
+#
+#def translate_code_braceless(code, loc):
+# return translate_code(strip_braces(code), loc, True)
+#
+#def add_param(type, decl, loc):
+# alphanum = 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '_' '0123456789'
+# name_start = None
+# p = None
+# p = decl
+# while p[1]:
+# if (p == decl or not memchr(alphanum, p[-1], sizeof alphanum - 1)) and memchr(alphanum, p[0], sizeof alphanum - 10 - 1):
+# name_start = p
+# p += 1
+# p -= 1
+# while c_isspace(int(*p)):
+# p -= 1
+# p[1] = ord('\0')
+# decl += 1
+# while c_isspace(int(*decl)):
+# decl += 1
+# if not name_start:
+# complain(&loc, complaint, gettext('missing identifier in parameter declaration'))
+# else:
+# name = xmemdup0(name_start, strspn(name_start, alphanum))
+# if type & param_lex:
+# muscle_pair_list_grow('lex_param', decl, name)
+# if type & param_parse:
+# muscle_pair_list_grow('parse_param', decl, name)
+# free(name)
+# lex_yy.gram_scanner_last_string_free()
+#
+#def version_check(loc, version):
+# if strverscmp(version, '3.0.5') > 0:
+# complain(loc, complaint, 'require bison %s, but have %s', version, '3.0.5')
+# exit(63)
+#
+#def gram_error(loc, msg):
+# complain(loc, complaint, '%s', msg)
+#
+#def token_name(type):
+# return yytname[YYTRANSLATE(type)]
+#
+#def char_name(c):
+# if c == ord('\''):
+# return '\'\\\'\''
+# else:
+# buf = [None, None, None, None]
+# buf[0] = ord('\'')
+# buf[1] = c
+# buf[2] = ord('\'')
+# buf[3] = ord('\0')
+# return quotearg_style(escape_quoting_style, buf)
+#
+#def current_lhs(sym, loc, ref):
+# current_lhs_symbol = sym
+# current_lhs_location = loc
+# free(current_lhs_named_ref)
+# current_lhs_named_ref = ref
+
+def insert_before(n, str):
+ global gram_piece3
+ lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
+ lex_yy.gram_piece0 += 1
+ gram_piece3 += 1
+
+def insert_after(n, str):
+ global gram_piece3
+ lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
+ lex_yy.gram_piece0 += 1
+ gram_piece3 += 1
--- /dev/null
+#!/usr/bin/env python3
+
+import ast
+import element
+import sys
+
+element.serialize(element.deserialize(sys.stdin, ast.factory), sys.stdout)
--- /dev/null
+/* Bison Action Scanner -*- C -*-
+
+ Copyright (C) 2006-2015, 2018 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+%option debug nodefault noinput nounput noyywrap never-interactive
+
+%{
+ import ast
+
+ sc_context = -1
+%}
+
+%x SC_COMMENT SC_LINE_COMMENT
+%x SC_STRING SC_CHARACTER
+%x SC_RULE_ACTION SC_SYMBOL_ACTION
+
+/* POSIX says that a tag must be both an id and a C union member, but
+ historically almost any character is allowed in a tag. We disallow
+ NUL and newline, as this simplifies our implementation. We allow
+ "->" as a means to dereference a pointer. */
+tag (?:[^\0\n>]|->)+
+
+/* Zero or more instances of backslash-newline. Following GCC, allow
+ white space between the backslash and the newline. */
+splice (?:\\[ \f\t\v]*\n)*
+
+/* C style identifier. Must start with letter. Will be used for
+ named symbol references. Shall be kept synchronized with
+ scan-gram.l "letter" and "id". */
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+id {letter}(?:{letter}|[-0-9])*
+
+%%
+
+%{
+ assert sc_context == SC_SYMBOL_ACTION or sc_context == SC_RULE_ACTION or sc_context == INITIAL
+ BEGIN(sc_context)
+%}
+
+<SC_COMMENT>
+{
+ "*"{splice}"/" BEGIN(sc_context)
+}
+
+ /*--------------------------------------------------------------.
+ | Scanning a line comment. The initial '//' is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_LINE_COMMENT>
+{
+ "\n" BEGIN(sc_context)
+ {splice}
+}
+
+ /*--------------------------------------------.
+ | Scanning user-code characters and strings. |
+ `--------------------------------------------*/
+
+<SC_CHARACTER,SC_STRING>
+{
+ {splice}|\\{splice}.
+}
+
+<SC_CHARACTER>
+{
+ "'" BEGIN(sc_context)
+}
+
+<SC_STRING>
+{
+ "\"" BEGIN(sc_context)
+}
+
+<SC_RULE_ACTION,SC_SYMBOL_ACTION>
+{
+ "'" BEGIN(SC_CHARACTER)
+ "\"" BEGIN(SC_STRING)
+ "/"{splice}"*" BEGIN(SC_COMMENT)
+ "/"{splice}"/" BEGIN(SC_LINE_COMMENT)
+
+ [$@] {
+ state.complain(yylloc, state.Wother, 'stray \'{0:s}\''.format(yytext))
+ }
+}
+
+<SC_RULE_ACTION>
+{
+ (?E{
+ ast.AST.Text.StackReference,
+ tag_name = '' if yy_groups[2] is None else yy_groups[2][1:-1],
+ index = int(yy_groups[3])
+ }"$"("<"{tag}">")?(-?[0-9]+|{id}|"["{id}"]"))
+ (?E{
+ ast.AST.Text.StackLocation,
+ index = int(yy_groups[2])
+ }"@"(-?[0-9]+|{id}|"["{id}"]"))
+}
+
+<SC_RULE_ACTION,SC_SYMBOL_ACTION>
+{
+ (?E{
+ ast.AST.Text.ValueReference,
+ tag_name = '' if yy_groups[2] is None else yy_groups[2][1:-1]
+ }"$"("<"{tag}">")?"$")
+ (?E{
+ ast.AST.Text.ValueLocation
+ }"@$")
+}
+
+<*>
+{
+ .|\n
+}
--- /dev/null
+/* Bison Grammar Scanner -*- C -*-
+
+ Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+%option debug nodefault noinput noyywrap never-interactive
+
+%{
+ import state
+ import y_tab
+
+ scanner_cursor = state.boundary()
+ gram_last_string = ''
+ bracketed_id_str = None
+ bracketed_id_loc = 0
+ bracketed_id_start = 0
+ bracketed_id_context_state = -1
+
+ obstack_for_string = []
+ #def gram_scanner_last_string_free():
+ # del obstack_for_string[:]
+
+ gram_piece = []
+ gram_piece0 = 0
+ gram_piece1 = 0
+
+ percent_percent_count = 0;
+
+ # these should be yylex()-local, but moved to here, see further down:
+ nesting = 0
+ context_state = -1
+ id_loc = state.location()
+ code_start = scanner_cursor.copy()
+ token_start = scanner_cursor.copy()
+ #first = True
+ if True: #first:
+ scanner_cursor = y_tab.yylloc.start.copy()
+ #first = False
+%}
+
+%x SC_YACC_COMMENT
+%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
+%x SC_AFTER_IDENTIFIER
+
+%x SC_TAG
+
+%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
+%x SC_COMMENT SC_LINE_COMMENT
+%x SC_STRING SC_CHARACTER
+%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
+%x SC_ELEMENT_GROUP
+
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
+id {letter}({letter}|[-0-9])*
+int [0-9]+
+
+/* Zero or more instances of backslash-newline. Following GCC, allow
+ white space between the backslash and the newline. */
+splice (\\[ \f\t\v]*\n)*
+
+/* An equal sign, with optional leading whitespaces. This is used in some
+ deprecated constructs. */
+eqopt ([[:space:]]*=)?
+
+%%
+
+%{
+ # these should be here, but we can't access yylex()-local variables
+ # from an action since the action functions are not nested to yylex():
+ #nesting = 0
+ #context_state = 0
+ #id_loc = state.location()
+ #code_start = scanner_cursor.copy()
+ #token_start = scanner_cursor.copy()
+ #first = True
+ #if first:
+ # scanner_cursor = y_tab.yylloc.start.copy()
+ # first = False
+%}
+
+<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
+{
+ /* Comments and white space. */
+ "," state.complain(state.loc, state.Wother, 'stray \',\' treated as white space')
+ [ \f\n\t\v] |
+ "//".* #continue
+ "/*" {
+ global token_start, context_state
+ token_start = y_tab.yylloc.start
+ context_state = YY_START()
+ BEGIN(SC_YACC_COMMENT)
+ }
+
+ /* #line directives are not documented, and may be withdrawn or
+ modified in future versions of Bison. */
+ ^"#line "{int}(" \"".*"\"")?"\n" #handle_syncline(yytext + sizeof '#line ' - 1, y_tab.yylloc)
+}
+
+
+ /*----------------------------.
+ | Scanning Bison directives. |
+ `----------------------------*/
+
+ /* For directives that are also command line options, the regex must be
+ "%..."
+ after "[-_]"s are removed, and the directive must match the --long
+ option name, with a single string argument. Otherwise, add exceptions
+ to ../build-aux/cross-options.pl. */
+
+<INITIAL>
+{
+ "%binary" return y_tab.PERCENT_NONASSOC
+ "%code" return y_tab.PERCENT_CODE
+ "%debug" {
+ y_tab.yylval = 'parse.trace'
+ return y_tab.PERCENT_FLAG
+ }
+ "%default-prec" return y_tab.PERCENT_DEFAULT_PREC
+ "%define" return y_tab.PERCENT_DEFINE
+ "%defines" return y_tab.PERCENT_DEFINES
+ "%destructor" return y_tab.PERCENT_DESTRUCTOR
+ "%dprec" return y_tab.PERCENT_DPREC
+ "%empty" return y_tab.PERCENT_EMPTY
+ "%error-verbose" return y_tab.PERCENT_ERROR_VERBOSE
+ "%expect" return y_tab.PERCENT_EXPECT
+ "%expect-rr" return y_tab.PERCENT_EXPECT_RR
+ "%file-prefix" return y_tab.PERCENT_FILE_PREFIX
+ "%fixed-output-files" return y_tab.PERCENT_YACC
+ "%initial-action" return y_tab.PERCENT_INITIAL_ACTION
+ "%glr-parser" return y_tab.PERCENT_GLR_PARSER
+ "%language" return y_tab.PERCENT_LANGUAGE
+ "%left" return y_tab.PERCENT_LEFT
+ "%lex-param" {
+ y_tab.yylval = y_tab.param_lex
+ return y_tab.PERCENT_PARAM
+ }
+ "%locations" {
+ y_tab.yylval = 'locations'
+ return y_tab.PERCENT_FLAG
+ }
+ "%merge" return y_tab.PERCENT_MERGE
+ "%name-prefix" return y_tab.PERCENT_NAME_PREFIX
+ "%no-default-prec" return y_tab.PERCENT_NO_DEFAULT_PREC
+ "%no-lines" return y_tab.PERCENT_NO_LINES
+ "%nonassoc" return y_tab.PERCENT_NONASSOC
+ "%nondeterministic-parser" return y_tab.PERCENT_NONDETERMINISTIC_PARSER
+ "%nterm" return y_tab.PERCENT_NTERM
+ "%output" return y_tab.PERCENT_OUTPUT
+ "%param" {
+ y_tab.yylval = y_tab.param_both
+ return y_tab.PERCENT_PARAM
+ }
+ "%parse-param" {
+ y_tab.yylval = y_tab.param_parse
+ return y_tab.PERCENT_PARAM
+ }
+ "%prec" return y_tab.PERCENT_PREC
+ "%precedence" return y_tab.PERCENT_PRECEDENCE
+ "%printer" return y_tab.PERCENT_PRINTER
+ "%pure-parser" {
+ y_tab.yylval = 'api.pure'
+ return y_tab.PERCENT_FLAG
+ }
+ "%require" return y_tab.PERCENT_REQUIRE
+ "%right" return y_tab.PERCENT_RIGHT
+ "%skeleton" return y_tab.PERCENT_SKELETON
+ /* Nick added %space */
+ "%space" return y_tab.PERCENT_SPACE
+ "%start" return y_tab.PERCENT_START
+ "%term" return y_tab.PERCENT_TOKEN
+ "%token" return y_tab.PERCENT_TOKEN
+ "%token-table" return y_tab.PERCENT_TOKEN_TABLE
+ "%type" return y_tab.PERCENT_TYPE
+ "%union" return y_tab.PERCENT_UNION
+ "%verbose" return y_tab.PERCENT_VERBOSE
+ "%yacc" return y_tab.PERCENT_YACC
+
+ /* deprecated */
+ "%default"[-_]"prec" {
+ #deprecated_directive(loc, yytext, '%default-prec')
+ scanner_cursor.column -= len('%default-prec')
+ unput('%default-prec')
+ }
+ "%error"[-_]"verbose" {
+ #deprecated_directive(loc, yytext, '%define parse.error verbose')
+ scanner_cursor.column -= len('%define parse.error verbose')
+ unput('%define parse.error verbose')
+ }
+ "%expect"[-_]"rr" {
+ #deprecated_directive(loc, yytext, '%expect-rr')
+ scanner_cursor.column -= len('%expect-rr')
+ unput('%expect-rr')
+ }
+ "%file-prefix"{eqopt} {
+ #deprecated_directive(loc, yytext, '%file-prefix')
+ scanner_cursor.column -= len('%file-prefix')
+ unput('%file-prefix')
+ }
+ "%fixed"[-_]"output"[-_]"files" {
+ #deprecated_directive(loc, yytext, '%fixed-output-files')
+ scanner_cursor.column -= len('%fixed-output-files')
+ unput('%fixed-output-files')
+ }
+ "%name"[-_]"prefix"{eqopt} {
+ #deprecated_directive(loc, yytext, '%name-prefix')
+ scanner_cursor.column -= len('%name-prefix')
+ unput('%name-prefix')
+ }
+ "%no"[-_]"default"[-_]"prec" {
+ #deprecated_directive(loc, yytext, '%no-default-prec')
+ scanner_cursor.column -= len('%no-default-prec')
+ unput('%no-default-prec')
+ }
+ "%no"[-_]"lines" {
+ #deprecated_directive(loc, yytext, '%no-lines')
+ scanner_cursor.column -= len('%no-lines')
+ unput('%no-lines')
+ }
+ "%output"{eqopt} {
+ #deprecated_directive(loc, yytext, '%output')
+ scanner_cursor.column -= len('%output')
+ unput('%output')
+ }
+ "%pure"[-_]"parser" {
+ #deprecated_directive(loc, yytext, '%pure-parser')
+ scanner_cursor.column -= len('%pure-parser')
+ unput('%pure-parser')
+ }
+ "%token"[-_]"table" {
+ #deprecated_directive(loc, yytext, '%token-table')
+ scanner_cursor.column -= len('%token-table')
+ unput('%token-table')
+ }
+
+ "%"{id} state.complain(y_tab.yylloc, state.complaint, 'invalid directive: {0:s}'.format(state.quote(yytext)))
+
+ "=" return y_tab.EQUAL
+ "|" return y_tab.PIPE
+ ";" return y_tab.SEMICOLON
+
+ {id} {
+ global id_loc, bracketed_id_str
+ y_tab.yylval = yytext
+ id_loc = y_tab.yylloc
+ bracketed_id_str = None
+ BEGIN(SC_AFTER_IDENTIFIER)
+ gram_piece_pack()
+ gram_piece_append('<AST_ID>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_ID>')
+ gram_piece_pack()
+ }
+
+ {int} {
+ y_tab.yylval = scan_integer(yytext, 10, y_tab.yylloc)
+ return y_tab.INT
+ }
+ 0[xX][0-9abcdefABCDEF]+ {
+ y_tab.yylval = scan_integer(yytext, 16, y_tab.yylloc)
+ return y_tab.INT
+ }
+
+ /* Identifiers may not start with a digit. Yet, don't silently
+ accept "1FOO" as "1 FOO". */
+ {int}{id} state.complain(y_tab.yylloc, state.complaint, 'invalid identifier: {0:s}'.format(state.quote(yytext)))
+
+ /* Characters. */
+ "'" {
+ global token_start
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_ESCAPED_CHARACTER)
+ gram_piece_pack()
+ gram_piece_append('<AST_Char>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Strings. */
+ "\"" {
+ global token_start
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_ESCAPED_STRING)
+ gram_piece_pack()
+ gram_piece_append('<AST_String>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Prologue. */
+ "%{" {
+ global code_start
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_PROLOGUE)
+ gram_piece_pack()
+ gram_piece_append('<AST_Section1_Prologue>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Code in between braces. */
+ "{" {
+ global nesting, code_start
+ obstack_for_string.append(yytext)
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_BRACED_CODE)
+ gram_piece_pack()
+ gram_piece_append('<AST_BracedCode>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Semantic predicate. */
+ "%?"[ \f\n\t\v]*"{" {
+ global nesting, code_start
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ gram_piece_pack()
+ gram_piece_append('<AST_BracedPredicate>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ BEGIN(SC_PREDICATE)
+ }
+
+ /* Nick extra rules for element groups */
+ "(?E{" {
+ global nesting, code_start
+ obstack_for_string.append(yytext)
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_ELEMENT_GROUP)
+ gram_piece_pack()
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+ ")" return ord(')')
+
+ /* A type. */
+ "<*>" {
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef><<AST_Text>*</AST_Text>></AST_TagRef>')
+ return ~y_tab.TAG_ANY
+ }
+ "<>" {
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef><<AST_Text />></AST_TagRef>')
+ return ~y_tab.TAG_NONE
+ }
+ "<" {
+ global nesting, token_start
+ nesting = 0
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_TAG)
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ "%%" {
+ global percent_percent_count
+ percent_percent_count += 1
+ if percent_percent_count == 2:
+ BEGIN(SC_EPILOGUE)
+ gram_piece_pack()
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ gram_piece_pack()
+ return ~y_tab.PERCENT_PERCENT
+ return y_tab.PERCENT_PERCENT
+ }
+
+ "[" {
+ global bracketed_id_str, bracketed_id_start, bracketed_id_context_state
+ bracketed_id_str = None
+ bracketed_id_start = y_tab.yylloc.start
+ bracketed_id_context_state = YY_START()
+ BEGIN(SC_BRACKETED_ID)
+ }
+
+ [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character' if len(yytext) == 1 else 'invalid characters', state.quote(yytext)))
+
+ <<EOF>> {
+ y_tab.yylloc.start = scanner_cursor.copy()
+ y_tab.yylloc.end = scanner_cursor.copy()
+ yyterminate()
+ }
+}
+
+
+ /*--------------------------------------------------------------.
+ | Supporting \0 complexifies our implementation for no expected |
+ | added value. |
+ `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+ \0 state.complain(y_tab.yylloc, state.complaint, 'invalid null character')
+}
+
+
+ /*-----------------------------------------------------------------.
+ | Scanning after an identifier, checking whether a colon is next. |
+ `-----------------------------------------------------------------*/
+
+<SC_AFTER_IDENTIFIER>
+{
+ "[" {
+ global bracketed_id_start, bracketed_id_context_state
+ if bracketed_id_str is not None:
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ BEGIN(SC_RETURN_BRACKETED_ID)
+ y_tab.yylloc = id_loc
+ return y_tab.ID
+ else:
+ bracketed_id_start = y_tab.yylloc.start
+ bracketed_id_context_state = YY_START()
+ BEGIN(SC_BRACKETED_ID)
+ }
+ ":" {
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ gram_piece_escape(yytext)
+ return ~y_tab.ID_COLON
+ }
+ . {
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ return ~y_tab.ID
+ }
+ <<EOF>> {
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ return ~y_tab.ID
+ }
+}
+
+ /*--------------------------------.
+ | Scanning bracketed identifiers. |
+ `--------------------------------*/
+
+<SC_BRACKETED_ID>
+{
+ {id} {
+ global bracketed_id_str, bracketed_id_loc
+ if bracketed_id_str is not None:
+ state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: {0:s}'.format(state.quote(yytext)))
+ else:
+ bracketed_id_str = yytext
+ bracketed_id_loc = y_tab.yylloc
+ }
+ "]" {
+ global bracketed_id_str
+ BEGIN(bracketed_id_context_state)
+ if bracketed_id_str is not None:
+ if INITIAL == bracketed_id_context_state:
+ y_tab.yylval = bracketed_id_str
+ bracketed_id_str = None
+ y_tab.yylloc = bracketed_id_loc
+ return y_tab.BRACKETED_ID
+ else:
+ state.complain(y_tab.yylloc, state.complaint, 'an identifier expected')
+ }
+
+ [^\].A-Za-z0-9_/ \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character in bracketed name' if len(yytext) == 1 else 'invalid characters in bracketed name', state.quote(yytext)))
+
+ <<EOF>> {
+ BEGIN(bracketed_id_context_state)
+ unexpected_eof(bracketed_id_start, ']')
+ }
+}
+
+<SC_RETURN_BRACKETED_ID>
+{
+ . {
+ global bracketed_id_str
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ y_tab.yylval = bracketed_id_str
+ bracketed_id_str = None
+ y_tab.yylloc = bracketed_id_loc
+ BEGIN(INITIAL)
+ return y_tab.BRACKETED_ID
+ }
+}
+
+
+ /*---------------------------------------------------------------.
+ | Scanning a Yacc comment. The initial '/ *' is already eaten. |
+ `---------------------------------------------------------------*/
+
+<SC_YACC_COMMENT>
+{
+ "*/" BEGIN(context_state)
+ .|\n #continue
+ <<EOF>> {
+ unexpected_eof(token_start, '*/')
+ BEGIN(context_state)
+ }
+}
+
+
+ /*------------------------------------------------------------.
+ | Scanning a C comment. The initial '/ *' is already eaten. |
+ `------------------------------------------------------------*/
+
+<SC_COMMENT>
+{
+ "*"{splice}"/" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ <<EOF>> {
+ unexpected_eof(token_start, '*/')
+ BEGIN(context_state)
+ }
+}
+
+
+ /*--------------------------------------------------------------.
+ | Scanning a line comment. The initial '//' is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_LINE_COMMENT>
+{
+ "\n" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ {splice} obstack_for_string.append(yytext)
+ <<EOF>> BEGIN(context_state)
+}
+
+
+ /*------------------------------------------------.
+ | Scanning a Bison string, including its escapes. |
+ | The initial quote is already eaten. |
+ `------------------------------------------------*/
+
+<SC_ESCAPED_STRING>
+{
+ "\"" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_String>')
+ gram_piece_pack()
+ return ~y_tab.STRING
+ }
+ <<EOF>> unexpected_eof(token_start, '"')
+ "\n" unexpected_newline(token_start, '"')
+}
+
+ /*----------------------------------------------------------.
+ | Scanning a Bison character literal, decoding its escapes. |
+ | The initial quote is already eaten. |
+ `----------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER>
+{
+ "'" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ if len(gram_last_string) == 0:
+ state.complain(y_tab.yylloc, state.Wother, 'empty character literal')
+ y_tab.yylval = ord('\'')
+ else:
+ if len(gram_last_string) > 1:
+ state.complain(y_tab.yylloc, state.Wother, 'extra characters in character literal')
+ y_tab.yylval = ord(gram_last_string[0])
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_Char>')
+ gram_piece_pack()
+ return ~y_tab.CHAR
+ }
+ "\n" unexpected_newline(token_start, '\'')
+ <<EOF>> unexpected_eof(token_start, '\'')
+}
+
+
+
+ /*--------------------------------------------------------------.
+ | Scanning a tag. The initial angle bracket is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_TAG>
+{
+ ">" {
+ global nesting, gram_last_string
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ y_tab.yylval = gram_last_string
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_TagRef>')
+ gram_piece_pack()
+ return ~y_tab.TAG
+ obstack_for_string.append(yytext)
+ }
+
+ ([^<>]|->)+ obstack_for_string.append(yytext)
+ "<"+ {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting += len(yytext)
+ }
+
+ <<EOF>> unexpected_eof(token_start, '>')
+}
+
+ /*----------------------------.
+ | Decode escaped characters. |
+ `----------------------------*/
+
+<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+{
+ \\[0-7]{1,3} {
+ c = strtoul(yytext + 1, None, 8)
+ if not c or 0x7f * 2 + 1 < c:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\x[0-9abcdefABCDEF]+ {
+ c = strtoul(yytext + 2, None, 16)
+ if not c or 0x7f * 2 + 1 < c:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\a {
+ obstack_for_string.append('\a')
+ gram_piece_append('<AST_Text_Escape character="7">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\b {
+ obstack_for_string.append('\b')
+ gram_piece_append('<AST_Text_Escape character="8">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\f {
+ obstack_for_string.append('\f')
+ gram_piece_append('<AST_Text_Escape character="12">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\n {
+ obstack_for_string.append('\n')
+ gram_piece_append('<AST_Text_Escape character="10">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\r {
+ obstack_for_string.append('\r')
+ gram_piece_append('<AST_Text_Escape character="13">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\t {
+ obstack_for_string.append('\t')
+ gram_piece_append('<AST_Text_Escape character="9">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\v {
+ obstack_for_string.append('\v')
+ gram_piece_append('<AST_Text_Escape character="11">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
+ \\("\""|"'"|"?"|"\\") {
+ obstack_for_string.append(yytext[1])
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+ c = convert_ucn_to_byte(yytext)
+ if c <= 0:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\(.|\n) {
+ p = yytext[1:]
+ if True: #c_isspace(int(*p)) and c_isprint(int(*p)):
+ p = state.quote(p)
+ #else:
+ # p = quotearg_style_mem(escape_quoting_style, p, 1)
+ state.complain(y_tab.yylloc, state.complaint, 'invalid character after \\-escape: {0:s}'.format(p))
+ }
+}
+
+ /*--------------------------------------------.
+ | Scanning user-code characters and strings. |
+ `--------------------------------------------*/
+
+<SC_CHARACTER,SC_STRING>
+{
+ {splice}|\\{splice}[^\n\[\]] obstack_for_string.append(yytext)
+}
+
+<SC_CHARACTER>
+{
+ "'" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ \n unexpected_newline(token_start, '\'')
+ <<EOF>> unexpected_eof(token_start, '\'')
+}
+
+<SC_STRING>
+{
+ "\"" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ \n unexpected_newline(token_start, '"')
+ <<EOF>> unexpected_eof(token_start, '"')
+}
+
+
+ /*---------------------------------------------------.
+ | Strings, comments etc. can be found in user code. |
+ `---------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+ "'" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_CHARACTER)
+ }
+ "\"" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_STRING)
+ }
+ "/"{splice}"*" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_COMMENT)
+ }
+ "/"{splice}"/" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ BEGIN(SC_LINE_COMMENT)
+ }
+}
+
+
+
+ /*-----------------------------------------------------------.
+ | Scanning some code in braces (actions, predicates). The |
+ | initial "{" is already eaten. |
+ `-----------------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+ "{"|"<"{splice}"%" {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting += 1
+ }
+ "%"{splice}">" {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting -= 1
+ }
+
+ /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+ (as '<' '<%'). */
+ "<"{splice}"<" obstack_for_string.append(yytext)
+
+ <<EOF>> unexpected_eof(code_start, '}')
+}
+
+<SC_BRACED_CODE>
+{
+ "}" {
+ global nesting, gram_last_string
+ obstack_for_string.append('}')
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_BracedCode>')
+ gram_piece_pack()
+ return ~y_tab.BRACED_CODE
+ }
+}
+
+<SC_PREDICATE>
+{
+ "}" {
+ global nesting, gram_last_string
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_BracedPredicate>')
+ gram_piece_pack()
+ return ~y_tab.BRACED_PREDICATE
+ else:
+ obstack_for_string.append('}')
+ }
+}
+
+ /* Nick extra rules for element groups */
+<SC_ELEMENT_GROUP>
+{
+ "}" {
+ global nesting, gram_last_string
+ obstack_for_string.append('}')
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ return ~ord('(')
+ }
+}
+
+ /*--------------------------------------------------------------.
+ | Scanning some prologue: from "%{" (already scanned) to "%}". |
+ `--------------------------------------------------------------*/
+
+<SC_PROLOGUE>
+{
+ "%}" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_Section1_Prologue>')
+ gram_piece_pack()
+ return ~y_tab.PROLOGUE
+ }
+
+ <<EOF>> unexpected_eof(code_start, '%}')
+}
+
+
+ /*---------------------------------------------------------------.
+ | Scanning the epilogue (everything after the second "%%", which |
+ | has already been eaten). |
+ `---------------------------------------------------------------*/
+
+<SC_EPILOGUE>
+{
+ <<EOF>> {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_pack()
+ return ~y_tab.EPILOGUE
+ }
+}
+
+
+ /*-----------------------------------------------------.
+ | By default, grow the string obstack with the input. |
+ `-----------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER,SC_ELEMENT_GROUP>. |
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_ELEMENT_GROUP>\n obstack_for_string.append(yytext)
+
+
+%%
+
+#def no_cr_read(fp, buf, size):
+# bytes_read = fread_unlocked(buf, 1, size, fp)
+# if bytes_read:
+# w = memchr(buf, ord('\r'), bytes_read)
+# if w:
+# r = ++w
+# lim = buf + bytes_read
+# pass
+# while True:
+# w[-1] = ord('\n')
+# if r == lim:
+# ch = getc_unlocked(fp)
+# if ch != ord('\n') and ungetc(ch, fp) != ch:
+# break
+# else:
+# if *r == ord('\n'):
+# r += 1
+# while True:
+# if r == lim:
+# return w - buf
+# if not ((*w++ = *r++) != ord('\r')):
+# break
+# pass
+# return w - buf
+# return bytes_read
+
+def scan_integer(number, base, loc):
+ num = int(number, base)
+ if 0x7fffffff < num:
+ state.complain(y_tab.yylloc, state.complaint, 'integer out of range: {0:s}'.format(state.quote(number)))
+ num = 0x7fffffff
+ return num
+
+#def convert_ucn_to_byte(ucn):
+# code = strtoul(ucn + 2, None, 16)
+# if 0x7f * 2 + 1 < code:
+# return -1
+# return code
+#
+#def handle_syncline(args, loc):
+# file = None
+# lineno = strtoul(args, &file, 10)
+# if 0x7fffffff <= lineno:
+# state.complain(y_tab.yylloc, state.Wother, 'line number overflow')
+# lineno = 0x7fffffff
+# file = strchr(file, ord('"'))
+# if file:
+# *strchr(file + 1, ord('"')) = ord('\0')
+# current_file = uniqstr_new(file + 1)
+# boundary_set(&scanner_cursor, current_file, lineno, 1)
+
+def unexpected_end(start, msg, token_end):
+ loc = state.location(start.copy(), scanner_cursor.copy())
+ scanner_cursor.column -= len(token_end)
+ unput(token_end)
+ token_end = state.quote(token_end)
+ if token_end == '\'\\\'\'':
+ token_end = '"\'"'
+ state.complain(y_tab.yylloc, state.complaint, msg.format(token_end))
+
+def unexpected_eof(start, token_end):
+ unexpected_end(start, 'missing {0:s} at end of file', token_end)
+
+def unexpected_newline(start, token_end):
+ unexpected_end(start, 'missing {0:s} at end of line', token_end)
+
+#def gram_scanner_initialize():
+# global obstack_for_string
+# obstack_for_string = []
+#
+#def gram_scanner_free():
+# del obstack_for_string[:]
+# yy_delete_buffer(YY_CURRENT_BUFFER)
+
+def gram_piece_append(str):
+ gram_piece.append(str)
+
+def gram_piece_insert(n, str):
+ gram_piece[n:n] = [str]
+
+xml_escape = {'<': '<', '>': '>', '&': '&'}
+def gram_piece_escape(str):
+ gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
+
+def gram_piece_flush(n):
+ global yytext
+ gram_piece_escape(yytext[:n])
+ yytext = yytext[n:]
+
+def gram_piece_pack():
+ global gram_piece0
+ gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
+ gram_piece0 += 1
+
+def gram_lex():
+ result = yylex()
+ if result < 0:
+ return ~result
+ gram_piece_pack()
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ return result
--- /dev/null
+import sys
+
+# miscellaneous state accessed by scan-gram.l and parse-gram.y
+class boundary:
+ def __init__(self, file = '<stdin>', line = 0, column = 0):
+ self.file = file
+ self.line = line
+ self.column = column
+ def copy(self):
+ return boundary(self.file, self.line, self.column)
+
+class location:
+ def __init__(self, start = None, end = None):
+ self.start = boundary() if start is None else start
+ self.end = boundary() if end is None else end
+ def copy(self):
+ return location(self.start.copy(), self.end.copy())
+
+warning_midrule_values = 0
+warning_yacc = 1
+warning_conflicts_sr = 2
+warning_conflicts_rr = 3
+warning_empty_rule = 3
+warning_deprecated = 4
+warning_precedence = 5
+warning_other = 6
+warnings_size = 7
+
+Wnone = 0
+Wmidrule_values = 1 << warning_midrule_values
+Wyacc = 1 << warning_yacc
+Wconflicts_sr = 1 << warning_conflicts_sr
+Wconflicts_rr = 1 << warning_conflicts_rr
+Wdeprecated = 1 << warning_deprecated
+Wempty_rule = 1 << warning_empty_rule
+Wprecedence = 1 << warning_precedence
+Wother = 1 << warning_other
+Werror = 1 << 10
+complaint = 1 << 11
+fatal = 1 << 12
+silent = 1 << 13
+no_caret = 1 << 14
+Weverything = ~complaint & ~fatal & ~silent
+Wall = Weverything & ~Wyacc
+
+def complain(loc, flags, message):
+ #severity s = warning_severity (flags);
+ #if ((flags & complaint) && complaint_status < status_complaint)
+ # complaint_status = status_complaint;
+ #
+ #if (severity_warning <= s)
+ # {
+ # const char* prefix =
+ # s == severity_fatal ? _("fatal error")
+ # : s == severity_error ? _("error")
+ # : _("warning");
+ # if (severity_error <= s && ! complaint_status)
+ # complaint_status = status_warning_as_error;
+ # error_message (loc, flags, prefix, message, args);
+ # }
+ #
+ #if (flags & fatal)
+ # exit (EXIT_FAILURE);
+ print(message)
+ sys.exit(1)
+
+undef_assoc = 0
+right_assoc = 1
+left_assoc = 2
+non_assoc = 3
+precedence_assoc = 4
+
+destructor = 0
+printer = 1
+
+def quote(str):
+ return '"{0:s}"'.format(str.replace('\\', '\\\\').replace('"', '\\"'))