__pycache__
+bootstrap/*.xml
+bootstrap/lex_yy.py
+bootstrap/y_tab.py
lex-yacc-examples/*.c
lex-yacc-examples/*.h
lex-yacc-examples/*.o
--- /dev/null
+all: lex_yy.py y_tab.py
+
+lex_yy.py: scan-gram.l skel_lex_yy.py
+ ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+ ../../pilex.git/pilex.py --python --skel skel_lex_yy.py $<.xml
+
+y_tab.py: parse-gram.y skel_y_tab.py
+ ../../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
+ ../piyacc.py --python --skel skel_y_tab.py $<.xml
+
+clean:
+ rm -f lex_yy.py y_tab.py *.xml
--- /dev/null
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+import xml.etree.ElementTree
+
+class Element(xml.etree.ElementTree._Element_Py):
+ def __init__(self, tag = 'Element', attrib = {}, text = '', children = []):
+ xml.etree.ElementTree._Element_Py.__init__(self, tag, attrib)
+ self.ref = -1
+ self.seen = False
+ set_text(self, 0, text)
+ self[:] = children
+ def serialize(self, ref_list):
+ for i in self:
+ # parented, enforce that child can only be parented at most once
+ # (although there can be unlimited numbers of numeric refs to it)
+ assert not i.seen
+ i.seen = True
+ if i.ref == -1:
+ i.serialize(ref_list)
+ def deserialize(self, ref_list):
+ for i in self:
+ i.deserialize(ref_list)
+ def copy(self, factory = None):
+ result = (Element if factory is None else factory)(self.tag, self.attrib)
+ result.text = self.text
+ result.tail = self.tail
+ result[:] = [i.copy() for i in self]
+ return result
+ def repr_serialize(self, params):
+ if len(self):
+ params.append(
+ 'children = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self])
+ )
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'element.Element({0:s})'.format(', '.join(params))
+
+bool_to_str = ['false', 'true']
+def serialize_bool(value):
+ return bool_to_str[int(value)]
+
+str_to_bool = {'false': False, 'true': True}
+def deserialize_bool(text):
+ return str_to_bool[text]
+
+def serialize_int(value):
+ return str(value)
+
+def deserialize_int(text):
+ return int(text)
+
+def serialize_ref(value, ref_list):
+ if value is None:
+ ref = -1
+ else:
+ ref = value.ref
+ if ref == -1:
+ ref = len(ref_list)
+ ref_list.append(value)
+ value.ref = ref
+ value.set('ref', str(ref))
+ # this doesn't set the seen flag, so it will be parented by the
+ # root, unless it is already parented or gets parented later on
+ if not value.seen:
+ value.serialize(ref_list)
+ return str(ref)
+
+def deserialize_ref(text, ref_list):
+ ref = int(text)
+ return None if ref < 0 else ref_list[ref]
+
+def serialize_str(value):
+ return value
+
+def deserialize_str(text):
+ return text
+
+def serialize(value, fout, encoding = 'unicode'):
+ ref_list = []
+ serialize_ref(value, ref_list)
+ parents = [i for i in ref_list if not i.seen]
+ root = Element('root', children = parents)
+ for i in range(len(root)):
+ set_text(root, i, '\n ')
+ set_text(root, len(root), '\n')
+ root.tail = '\n'
+ xml.etree.ElementTree.ElementTree(root).write(fout, encoding)
+ for i in root:
+ i.tail = None
+ for i in ref_list:
+ i.ref = -1
+ del i.attrib['ref']
+ i = 0
+ while i < len(parents):
+ for j in parents[i]:
+ j.seen = False
+ parents.append(j)
+ i += 1
+
+def deserialize(fin, factory = Element, encoding = 'unicode'):
+ root = xml.etree.ElementTree.parse(
+ fin,
+ xml.etree.ElementTree.XMLParser(
+ target = xml.etree.ElementTree.TreeBuilder(factory),
+ encoding = encoding
+ )
+ ).getroot()
+ assert root.tag == 'root'
+ for i in root:
+ i.tail = None
+ i = 0
+ parents = root[:]
+ ref_list = []
+ while i < len(parents):
+ j = parents[i]
+ if 'ref' in j.attrib:
+ ref = int(j.attrib['ref'])
+ del j.attrib['ref']
+ if len(ref_list) < ref + 1:
+ ref_list.extend([None] * (ref + 1 - len(ref_list)))
+ ref_list[ref] = j
+ parents.extend(j[:])
+ i += 1
+ for i in root:
+ i.deserialize(ref_list)
+ return ref_list[0]
+
+# compatibility scheme to access arbitrary xml.etree.ElementTree.Element-like
+# objects (not just Element defined above) using a more consistent interface:
+def get_text(root, i):
+ if i < 0:
+ i += len(root) + 1
+ text = root.text if i == 0 else root[i - 1].tail
+ return '' if text is None else text
+
+def set_text(root, i, text):
+ if i < 0:
+ i += len(root) + 1
+ if len(text) == 0:
+ text = None
+ if i == 0:
+ root.text = text
+ else:
+ root[i - 1].tail = text
+
+def to_text(root):
+ return ''.join(
+ [
+ j
+ for i in range(len(root))
+ for j in [get_text(root, i), to_text(root[i])]
+ ] +
+ [get_text(root, len(root))]
+ )
+
+def concatenate(children, factory = Element, *args, **kwargs):
+ root = factory(*args, **kwargs)
+ for child in children:
+ i = len(root)
+ set_text(root, i, get_text(root, i) + get_text(child, 0))
+ root[i:] = child[:]
+ return root
--- /dev/null
+#!/usr/bin/env python3
+
+import lex_yy
+import y_tab
+import sys
+
+lex_yy.gram_piece_append('<root>\n <AST ref=\"0\">')
+y_tab.yyparse()
+lex_yy.gram_piece_append('</AST>\n</root>\n')
+sys.stdout.write(''.join(lex_yy.gram_piece))
--- /dev/null
+/* Bison Grammar Parser -*- C -*-
+
+ Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+%code requires
+{
+ import state
+
+ YYLTYPE = state.location
+}
+
+%code top
+{
+}
+
+%code
+{
+ #current_prec = 0
+ #current_lhs_location = 0
+ #current_lhs_named_ref = []
+ #current_lhs_symbol = []
+ #current_class = unknown_sym
+ #current_type = None
+ gram_piece2 = 0
+ gram_piece3 = 0
+ nested_rhs = 0
+}
+
+/* Nick %define api.prefix {gram_} */
+%define api.pure full
+%define locations
+%define parse.error verbose
+%define parse.lac full
+%define parse.trace
+/* Nick %defines */
+%expect 0
+/* Nick %verbose */
+
+%initial-action
+{
+ #boundary_set(&@$.start, current_file, 1, 1)
+ #boundary_set(&@$.end, current_file, 1, 1)
+}
+
+/* Define the tokens together with their human representation. */
+%token GRAM_EOF 0 "end of file"
+%token STRING "string"
+
+%token PERCENT_TOKEN "%token"
+%token PERCENT_NTERM "%nterm"
+
+%token PERCENT_TYPE "%type"
+%token PERCENT_DESTRUCTOR "%destructor"
+%token PERCENT_PRINTER "%printer"
+
+%token PERCENT_LEFT "%left"
+%token PERCENT_RIGHT "%right"
+%token PERCENT_NONASSOC "%nonassoc"
+%token PERCENT_PRECEDENCE "%precedence"
+
+%token PERCENT_PREC "%prec"
+%token PERCENT_DPREC "%dprec"
+%token PERCENT_MERGE "%merge"
+
+/*----------------------.
+| Global Declarations. |
+`----------------------*/
+
+%token
+ PERCENT_CODE "%code"
+ PERCENT_DEFAULT_PREC "%default-prec"
+ PERCENT_DEFINE "%define"
+ PERCENT_DEFINES "%defines"
+ PERCENT_ERROR_VERBOSE "%error-verbose"
+ PERCENT_EXPECT "%expect"
+ PERCENT_EXPECT_RR "%expect-rr"
+ PERCENT_FLAG "%<flag>"
+ PERCENT_FILE_PREFIX "%file-prefix"
+ PERCENT_GLR_PARSER "%glr-parser"
+ PERCENT_INITIAL_ACTION "%initial-action"
+ PERCENT_LANGUAGE "%language"
+ PERCENT_NAME_PREFIX "%name-prefix"
+ PERCENT_NO_DEFAULT_PREC "%no-default-prec"
+ PERCENT_NO_LINES "%no-lines"
+ PERCENT_NONDETERMINISTIC_PARSER
+ "%nondeterministic-parser"
+ PERCENT_OUTPUT "%output"
+ PERCENT_REQUIRE "%require"
+ PERCENT_SKELETON "%skeleton"
+ PERCENT_START "%start"
+ PERCENT_TOKEN_TABLE "%token-table"
+ PERCENT_VERBOSE "%verbose"
+ PERCENT_YACC "%yacc"
+;
+
+%token BRACED_CODE "{...}"
+%token BRACED_PREDICATE "%?{...}"
+%token BRACKETED_ID "[identifier]"
+%token CHAR "char"
+%token EPILOGUE "epilogue"
+%token EQUAL "="
+%token ID "identifier"
+%token ID_COLON "identifier:"
+%token PERCENT_PERCENT "%%"
+%token PIPE "|"
+%token PROLOGUE "%{...%}"
+%token SEMICOLON ";"
+%token TAG "<tag>"
+%token TAG_ANY "<*>"
+%token TAG_NONE "<>"
+
+/*%union {
+ character = 0
+}*/
+/*%type <character> CHAR*/
+%printer {
+ fputs_unlocked(char_name($$), yyo)
+} CHAR
+
+/*%union {
+ code = ''
+};*/
+/*%type <code> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING*/
+%printer {
+ fputs_unlocked(quotearg_style(c_quoting_style, $$), yyo)
+} STRING
+/*%printer {
+ rpl_fprintf(yyo, '{\n%s\n}', $$)
+} <code>*/
+
+/*%union {
+ uniqstr = ''
+}*/
+/*%type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag variable*/
+/*%printer {
+ fputs_unlocked($$, yyo)
+} <uniqstr>*/
+%printer {
+ rpl_fprintf(yyo, '[%s]', $$)
+} BRACKETED_ID
+%printer {
+ rpl_fprintf(yyo, '%s:', $$)
+} ID_COLON
+%printer {
+ rpl_fprintf(yyo, '%%%s', $$)
+} PERCENT_FLAG
+%printer {
+ rpl_fprintf(yyo, '<%s>', $$)
+} TAG tag
+
+/*%union {
+ integer = 0
+};*/
+/*%token <integer> INT "integer"*/
+%token INT "integer"
+/*%printer {
+ rpl_fprintf(yyo, '%d', $$)
+} <integer>*/
+
+/*%union {
+ symbol = []
+}*/
+/*%type <symbol> id id_colon string_as_id symbol symbol.prec*/
+/*%printer {
+ rpl_fprintf(yyo, '%s', $$->tag)
+} <symbol>*/
+%printer {
+ rpl_fprintf(yyo, '%s:', $$->tag)
+} id_colon
+
+/*%union {
+ assoc = 0
+};*/
+/*%type <assoc> precedence_declarator*/
+
+/*%union {
+ list = []
+}*/
+/*%type <list> symbols.1 symbols.prec generic_symlist generic_symlist_item*/
+
+/*%union {
+ named_ref = []
+}*/
+/*%type <named_ref> named_ref.opt*/
+
+/*---------.
+| %param. |
+`---------*/
+%code requires
+{
+ param_none = 0
+ param_lex = 1 << 0
+ param_parse = 1 << 1
+ param_both = param_lex | param_parse
+};
+%code
+{
+ current_param = param_none
+};
+/*%union {
+ param = 0
+}*/
+/*%token <param> PERCENT_PARAM "%param";*/
+%token PERCENT_PARAM "%param";
+/*%printer
+{
+ if $$ == param_lex:
+ fputs_unlocked('%' 'lex-param', yyo)
+ break
+ elif $$ == param_parse:
+ fputs_unlocked('%' 'parse-param', yyo)
+ break
+ elif $$ == param_both:
+ fputs_unlocked('%' 'param', yyo)
+ break
+ elif $$ == param_none:
+ assert(False)
+ break
+} <param>;*/
+
+ /*==========\
+ | Grammar. |
+ \==========*/
+%%
+
+input
+ : prologue_declarations "%%" grammar epilogue.opt
+ {
+ insert_after(2, '</AST_Section2>')
+ insert_before(2, '<AST_Section2>')
+ insert_after(0, '</AST_Section1>')
+ insert_before(0, '<AST_Section1>')
+ }
+ ;
+
+ /*------------------------------------.
+ | Declarations: before the first %%. |
+ `------------------------------------*/
+
+prologue_declarations
+ : %empty
+ {
+ global yychar
+ if yychar == YYEMPTY:
+ yychar = lex_yy.gram_lex()
+ temp = lex_yy.gram_piece[gram_piece2 + 1]
+ lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+ lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+ lex_yy.gram_piece[gram_piece2 - 1] = temp
+ }
+ | prologue_declarations prologue_declaration
+ ;
+
+prologue_declaration
+ : grammar_declaration
+ | "%{...%}"
+ {
+ #code_piece_append('<AST_Section1_Prologue>%{<AST_Text>')
+ #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ #code_piece_append('</AST_Text>%}</AST_Section1_Prologue>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2] = code_piece[code_piece0 - 1]
+ }
+ | "%<flag>"
+ {
+ #muscle_percent_define_ensure($1, @1, True)
+ if $1 == 'api.pure':
+ insert_after(0, '</AST_Section1_PureParser>')
+ insert_before(0, '<AST_Section1_PureParser>')
+ elif $1 == 'locations':
+ insert_after(0, '</AST_Section1_Locations>')
+ insert_before(0, '<AST_Section1_Locations>')
+ elif $1 == 'parse.trace':
+ insert_after(0, '</AST_Section1_Debug>')
+ insert_before(0, '<AST_Section1_Debug>')
+ else:
+ assert False
+ }
+ | "%define" variable value
+ {
+ #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ insert_after(2, '</AST_Section1_Define>')
+ insert_before(0, '<AST_Section1_Define>')
+ }
+ | "%defines"
+ {
+ #defines_flag = True
+ insert_after(0, '</AST_Section1_Defines>')
+ insert_before(0, '<AST_Section1_Defines>')
+ }
+ | "%defines" STRING
+ {
+ #defines_flag = True
+ #spec_defines_file = xstrdup($2)
+ insert_after(1, '</AST_Section1_Defines>')
+ insert_before(0, '<AST_Section1_Defines>')
+ }
+ | "%error-verbose"
+ {
+ #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ insert_after(0, '</AST_Section1_ErrorVerbose>')
+ insert_before(0, '<AST_Section1_ErrorVerbose>')
+ }
+ | "%expect" INT
+ {
+ #expected_sr_conflicts = $2
+ insert_after(1, '</AST_Section1_Expect>')
+ insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
+ }
+ | "%expect-rr" INT
+ {
+ #expected_rr_conflicts = $2
+ insert_after(1, '</AST_Section1_ExpectRR>')
+ insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
+ }
+ | "%file-prefix" STRING
+ {
+ #spec_file_prefix = $2
+ insert_after(1, '</AST_Section1_FilePrefix>')
+ insert_before(0, '<AST_Section1_FilePrefix>')
+ }
+ | "%glr-parser"
+ {
+ #nondeterministic_parser = True
+ #glr_parser = True
+ insert_after(0, '</AST_Section1_GLRParser>')
+ insert_before(0, '<AST_Section1_GLRParser>')
+ }
+ | "%initial-action" "{...}"
+ {
+ #code_piece_append('<AST_Text>')
+ #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ #code_piece_append('</AST_Text>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+ insert_after(1, '</AST_Section1_InitialAction>')
+ insert_before(0, '<AST_Section1_InitialAction>')
+ }
+ | "%language" STRING
+ {
+ #language_argmatch($2, grammar_prio, @1)
+ insert_after(1, '</AST_Section1_Language>')
+ insert_before(0, '<AST_Section1_Language>')
+ }
+ | "%name-prefix" STRING
+ {
+ #spec_name_prefix = $2
+ insert_after(1, '</AST_Section1_NamePrefix>')
+ insert_before(0, '<AST_Section1_NamePrefix>')
+ }
+ | "%no-lines"
+ {
+ #no_lines_flag = True
+ insert_after(0, '</AST_Section1_Lines>')
+ insert_before(0, '<AST_Section1_Lines value="false">')
+ }
+ | "%nondeterministic-parser"
+ {
+ #nondeterministic_parser = True
+ insert_after(0, '</AST_Section1_NonDeterministicParser>')
+ insert_before(0, '<AST_Section1_NonDeterministicParser>')
+ }
+ | "%output" STRING
+ {
+ #spec_outfile = $2
+ insert_after(1, '</AST_Section1_Output>')
+ insert_before(0, '<AST_Section1_Output>')
+ }
+ | "%param"
+ {
+ #current_param = $1
+ }
+ params
+ {
+ #current_param = param_none
+ insert_after(2, '</AST_Section1_Param>')
+ insert_before(0, '<AST_Section1_Param>')
+ }
+ | "%require" STRING
+ {
+ #version_check(&@2, $2)
+ insert_after(1, '</AST_Section1_Require>')
+ insert_before(0, '<AST_Section1_Require>')
+ }
+ | "%skeleton" STRING
+ {
+ #skeleton_user = $2
+ #if strchr(skeleton_user, ord('/')):
+ # dir_length = len(current_file)
+ # skeleton_build = None
+ # while dir_length and current_file[dir_length - 1] != ord('/'):
+ # dir_length -= 1
+ # while dir_length and current_file[dir_length - 1] == ord('/'):
+ # dir_length -= 1
+ # skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
+ # if dir_length > 0:
+ # memcpy(skeleton_build, current_file, dir_length)
+ # skeleton_build[dir_length++] = ord('/')
+ # strcpy(skeleton_build + dir_length, skeleton_user)
+ # skeleton_user = uniqstr_new(skeleton_build)
+ # free(skeleton_build)
+ #skeleton_arg(skeleton_user, grammar_prio, @1)
+ insert_after(1, '</AST_Section1_Skeleton>')
+ insert_before(0, '<AST_Section1_Skeleton>')
+ }
+ | "%token-table"
+ {
+ #token_table_flag = True
+ insert_after(0, '</AST_Section1_TokenTable>')
+ insert_before(0, '<AST_Section1_TokenTable>')
+ }
+ | "%verbose"
+ {
+ #report_flag |= report_states
+ insert_after(0, '</AST_Section1_Verbose>')
+ insert_before(0, '<AST_Section1_Verbose>')
+ }
+ | "%yacc"
+ {
+ #yacc_flag = True
+ insert_after(0, '</AST_Section1_YACC>')
+ insert_before(0, '<AST_Section1_YACC>')
+ }
+ | /*FIXME: Err? What is this horror doing here? */ ";"
+ ;
+
+params
+ : params "{...}"
+ {
+ #add_param(current_param, $2, @2)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ }
+ | "{...}"
+ {
+ #add_param(current_param, $1, @1)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ }
+ ;
+
+/*----------------------.
+| grammar_declaration. |
+`----------------------*/
+
+grammar_declaration
+ : precedence_declaration
+ | symbol_declaration
+ | "%start" symbol
+ {
+ #grammar_start_symbol_set($2, @2)
+ insert_after(1, '</AST_Section1Or2_Start>')
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_SymbolRef>')
+ insert_before(0, '<AST_Section1Or2_Start>')
+ }
+ | code_props_type "{...}" generic_symlist
+ {
+ #code = None
+ #code_props_symbol_action_init(&code, $2, @2)
+ #code_piece_append('<AST_Text>')
+ #code_props_translate_code(&code)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_piece_append('</AST_Text>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+ #list = None
+ #list = $3
+ #while list:
+ # symbol_list_code_props_set(list, $1, &code)
+ # list = list->next
+ #symbol_list_free($3)
+ insert_after(2, '</AST_Section1Or2_CodeProps>')
+ insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
+ }
+ | "%default-prec"
+ {
+ #default_prec = True
+ insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+ insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
+ }
+ | "%no-default-prec"
+ {
+ #default_prec = False
+ insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+ insert_before(0, '<AST_Section1Or2_DefaultPrec>')
+ }
+ | "%code" "{...}"
+ {
+ #code_piece_append('<AST_BracedCode>{<AST_Text>')
+ #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ #code_piece_append('</AST_Text>}</AST_BracedCode>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+ insert_after(1, '</AST_Section1Or2_Code>')
+ insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
+ }
+ | "%code" ID "{...}"
+ {
+ #code_piece_append('<AST_BracedCode>{<AST_Text>')
+ #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ #code_piece_append('</AST_Text>}</AST_BracedCode>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 4] = code_piece[code_piece0 - 1]
+ insert_after(2, '</AST_Section1Or2_Code>')
+ insert_before(0, '<AST_Section1Or2_Code>')
+ }
+ ;
+
+/*%type <code_type> code_props_type;*/
+/*%union {
+ code_type = 0
+};*/
+/*%printer {
+ rpl_fprintf(yyo, '%s', code_props_type_string($$))
+} <code_type>;*/
+
+code_props_type
+ : "%destructor"
+ {
+ $$ = destructor
+ }
+ | "%printer"
+ {
+ $$ = printer
+ }
+ ;
+
+/*---------.
+| %union. |
+`---------*/
+
+%token PERCENT_UNION "%union";
+
+union_name
+ : %empty
+ {
+ insert_before(0, '<AST_ID />')
+ }
+ | ID
+ {
+ #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+ }
+ ;
+
+grammar_declaration
+ : "%union" union_name "{...}"
+ {
+ #union_seen = True
+ #code_piece_append('<AST_BracedCode>{<AST_Text>')
+ #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ #code_piece_append('</AST_Text>}</AST_BracedCode>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 4] = code_piece[code_piece0 - 1]
+ insert_after(2, '</AST_Section1Or2_Union>')
+ insert_before(0, '<AST_Section1Or2_Union>')
+ }
+ ;
+
+symbol_declaration
+ : "%nterm"
+ {
+ #current_class = nterm_sym
+ }
+ symbol_defs.1
+ {
+ #current_class = unknown_sym
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_NTerm>')
+ insert_before(0, '<AST_Section1Or2_NTerm>')
+ }
+ | "%token"
+ {
+ #current_class = token_sym
+ }
+ symbol_defs.1
+ {
+ #current_class = unknown_sym
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_Token>')
+ insert_before(0, '<AST_Section1Or2_Token>')
+ }
+ | "%type" TAG symbols.1
+ {
+ #list = None
+ #tag_seen = True
+ #list = $3
+ #while list:
+ # symbol_type_set(list->content.sym, $2, @2)
+ # list = list->next
+ #symbol_list_free($3)
+ insert_after(2, '</AST_Section1Or2_Type>')
+ insert_before(0, '<AST_Section1Or2_Type>')
+ }
+ ;
+
+precedence_declaration
+ : precedence_declarator tag.opt symbols.prec
+ {
+ #list = None
+ #current_prec += 1
+ #list = $3
+ #while list:
+ # symbol_type_set(list->content.sym, current_type, @2)
+ # symbol_precedence_set(list->content.sym, current_prec, $1, @1)
+ # list = list->next
+ #symbol_list_free($3)
+ #current_type = None
+ insert_after(2, '</AST_Section1Or2_Precedence>')
+ insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
+ }
+ ;
+
+precedence_declarator
+ : "%left"
+ {
+ $$ = state.left_assoc
+ }
+ | "%right"
+ {
+ $$ = state.right_assoc
+ }
+ | "%nonassoc"
+ {
+ $$ = state.non_assoc
+ }
+ | "%precedence"
+ {
+ $$ = state.precedence_assoc
+ }
+ ;
+
+tag.opt
+ : %empty
+ {
+ #current_type = None
+ }
+ | TAG
+ {
+ #current_type = $1
+ #tag_seen = True
+ }
+ ;
+
+/* Just like symbols.1 but accept INT for the sake of POSIX. */
+symbols.prec
+ : symbol.prec
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ }
+ | symbols.prec symbol.prec
+ {
+ #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+ }
+ ;
+
+symbol.prec
+ : symbol
+ {
+ #$$ = $1
+ #symbol_class_set($1, token_sym, @1, False)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | symbol INT
+ {
+ #$$ = $1
+ #symbol_user_token_number_set($1, $2, @2)
+ #symbol_class_set($1, token_sym, @1, False)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ ;
+
+/* One or more symbols to be %typed. */
+symbols.1
+ : symbol
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | symbols.1 symbol
+ {
+ #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_SymbolRef>')
+ }
+ ;
+
+generic_symlist
+ : generic_symlist_item
+ {
+ #$$ = $1
+ }
+ | generic_symlist generic_symlist_item
+ {
+ #$$ = symbol_list_append($1, $2)
+ }
+ ;
+
+generic_symlist_item
+ : symbol
+ {
+ #$$ = symbol_list_sym_new($1, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | tag
+ {
+ #$$ = symbol_list_type_new($1, @1)
+ }
+ ;
+
+tag
+ : TAG
+ | "<*>"
+ {
+ #$$ = uniqstr_new('*')
+ }
+ | "<>"
+ {
+ #$$ = uniqstr_new('')
+ }
+ ;
+
+/* One token definition. */
+symbol_def
+ : TAG
+ {
+ #current_type = $1
+ #tag_seen = True
+ }
+ | id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | id INT
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_user_token_number_set($1, $2, @2)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ | id string_as_id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_make_alias($1, $2, @$)
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ }
+ | id INT string_as_id
+ {
+ #symbol_class_set($1, current_class, @1, True)
+ #symbol_type_set($1, current_type, @1)
+ #symbol_user_token_number_set($1, $2, @2)
+ #symbol_make_alias($1, $3, @$)
+ insert_after(2, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+ }
+ ;
+
+/* One or more symbol definitions. */
+symbol_defs.1
+ : symbol_def
+ | symbol_defs.1 symbol_def
+ ;
+
+ /*------------------------------------------.
+ | The grammar section: between the two %%. |
+ `------------------------------------------*/
+
+grammar
+ : rules_or_grammar_declaration
+ | grammar rules_or_grammar_declaration
+ ;
+
+/* As a Bison extension, one can use the grammar declarations in the
+ body of the grammar. */
+rules_or_grammar_declaration
+ : rules
+ | grammar_declaration ";"
+ | error ";"
+ {
+ #yyerrok
+ }
+ ;
+
+rules
+ : id_colon named_ref.opt
+ {
+ #current_lhs($1, @1, $2)
+ }
+ rhses.1
+ {
+ #current_lhs(0, @1, 0)
+ insert_after(3, '</AST_Section2_Rules>')
+ insert_after(0, '</AST_SymbolRef>')
+ insert_before(0, '<AST_SymbolRef>')
+ insert_before(0, '<AST_Section2_Rules>')
+ }
+ ;
+
+rhses.1
+ : rhs
+ {
+ #grammar_current_rule_end(@1)
+ insert_after(0, '</AST_Production>')
+ insert_before(0, '<AST_Production>')
+ }
+ | rhses.1 "|" rhs
+ {
+ #grammar_current_rule_end(@3)
+ insert_after(2, '</AST_Production>')
+ insert_before(2, '<AST_Production>')
+ }
+ | rhses.1 ";"
+ ;
+
+%token PERCENT_EMPTY "%empty";
+/* Nick added %space */
+%token PERCENT_SPACE "%space";
+
+rhs
+ : %empty
+ {
+ global yychar
+ #if nested_rhs:
+ # nested_rhs -= 1
+ #else:
+ # grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+ if yychar == YYEMPTY:
+ yychar = lex_yy.gram_lex()
+ temp = lex_yy.gram_piece[gram_piece2 + 1]
+ lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+ lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+ lex_yy.gram_piece[gram_piece2 - 1] = temp
+ }
+ | rhs symbol named_ref.opt
+ {
+ #grammar_current_rule_symbol_append($2, @2, $3)
+ insert_after(2, '</AST_Production_SymbolRef>')
+ insert_after(1, '</AST_SymbolRef>')
+ insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+ }
+ | rhs "{...}" named_ref.opt
+ {
+ #code_piece_append('<AST_Text>')
+ #grammar_current_rule_action_append($2, @2, $3, False)
+ #code_piece_append('</AST_Text>')
+ #code_piece_pack()
+ #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+ insert_after(2, '</AST_Production_Action>')
+ insert_before(1, '<AST_Production_Action>')
+ }
+ | rhs "%?{...}"
+ {
+ #grammar_current_rule_action_append($2, @2, None, True)
+ }
+ | rhs "%empty"
+ {
+ #grammar_current_rule_empty_set(@2)
+ insert_after(1, '</AST_Production_Empty>')
+ insert_before(1, '<AST_Production_Empty>')
+ }
+ | rhs "%prec" symbol
+ {
+ #grammar_current_rule_prec_set($3, @3)
+ insert_after(2, '</AST_Production_Prec>')
+ insert_after(2, '</AST_SymbolRef>')
+ insert_before(2, '<AST_SymbolRef>')
+ insert_before(1, '<AST_Production_Prec>')
+ }
+ | rhs "%dprec" INT
+ {
+ #grammar_current_rule_dprec_set($3, @3)
+ insert_after(2, '</AST_Production_DPrec>')
+ insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
+ }
+ | rhs "%merge" TAG
+ {
+ #grammar_current_rule_merge_set($3, @3)
+ insert_after(2, '</AST_Production_Merge>')
+ insert_before(1, '<AST_Production_Merge>')
+ }
+ /* Nick extra rules for element groups */
+| rhs '('
+ {
+ #nested_rhs += 1
+ }
+ rhs ')'
+ {
+ insert_after(3, '</AST_Production_GroupElement>')
+ insert_before(1, '<AST_Production_GroupElement>')
+ }
+ /* Nick added %space */
+| rhs "%space"
+ {
+ insert_after(1, '</AST_Production_Space>')
+ insert_before(1, '<AST_Production_Space>')
+ }
+ ;
+
+named_ref.opt
+ : %empty
+ {
+ #$$ = 0
+ }
+ | BRACKETED_ID
+ {
+ #$$ = named_ref_new($1, @1)
+ }
+ ;
+
+/*---------------------.
+| variable and value. |
+`---------------------*/
+
+/* The STRING form of variable is deprecated and is not M4-friendly.
+ For example, M4 fails for '%define "[" "value"'. */
+variable
+ : ID
+ | STRING
+ {
+ #$$ = uniqstr_new($1)
+ }
+ ;
+
+/* Some content or empty by default. */
+%code requires {
+};
+/*%union
+{
+ value = 0
+};*/
+/*%type <value> value;*/
+/*%printer
+{
+ if $$.kind == muscle_code:
+ rpl_fprintf(yyo, '{%s}', $$.chars)
+ break
+ elif $$.kind == muscle_keyword:
+ rpl_fprintf(yyo, '%s', $$.chars)
+ break
+ elif $$.kind == muscle_string:
+ rpl_fprintf(yyo, '"%s"', $$.chars)
+ break
+} <value>;*/
+
+value
+ : %empty
+ {
+ #$$.kind = muscle_keyword
+ #$$.chars = ''
+ }
+ | ID
+ {
+ #$$.kind = muscle_keyword
+ #$$.chars = $1
+ }
+ | STRING
+ {
+ #$$.kind = muscle_string
+ #$$.chars = $1
+ }
+ | "{...}"
+ {
+ #$$.kind = muscle_code
+ #$$.chars = strip_braces($1)
+ }
+ ;
+
+/*--------------.
+| Identifiers. |
+`--------------*/
+
+/* Identifiers are returned as uniqstr values by the scanner.
+ Depending on their use, we may need to make them genuine symbols. */
+
+id
+ : ID
+ {
+ #$$ = symbol_from_uniqstr($1, @1)
+ }
+ | CHAR
+ {
+ #$$ = symbol_get(char_name($1), @1)
+ #symbol_class_set($$, token_sym, @1, False)
+ #symbol_user_token_number_set($$, $1, @1)
+ }
+ ;
+
+id_colon
+ : ID_COLON
+ {
+ #$$ = symbol_from_uniqstr($1, @1)
+ }
+ ;
+
+symbol
+ : id
+ | string_as_id
+ ;
+
+/* A string used as an ID: quote it. */
+string_as_id
+ : STRING
+ {
+ #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
+ #symbol_class_set($$, token_sym, @1, False)
+ }
+ ;
+
+epilogue.opt
+ : %empty
+ | "%%" EPILOGUE
+ {
+ #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
+ lex_yy.gram_scanner_last_string_free() # normally done in above
+ #code_scanner_last_string_free()
+ insert_after(1, '</AST_Section3>')
+ insert_after(0, '<AST_Section3>')
+ }
+ ;
+
+%%
+
+#def lloc_default(rhs, n):
+# i = None
+# loc = None
+# loc.start = rhs[n].end
+# loc.end = rhs[n].end
+# i = 1
+# while i <= n:
+# if not equal_boundaries(rhs[i].start, rhs[i].end):
+# loc.start = rhs[i].start
+# break
+# i += 1
+# return loc
+#
+#def strip_braces(code):
+# code[len(code) - 1] = 0
+# return code + 1
+#
+#def translate_code(code, loc, plain):
+# plain_code = None
+# if plain:
+# code_props_plain_init(&plain_code, code, loc)
+# else:
+# code_props_symbol_action_init(&plain_code, code, loc)
+# code_props_translate_code(&plain_code)
+# lex_yy.gram_scanner_last_string_free()
+# return plain_code.code
+#
+#def translate_code_braceless(code, loc):
+# return translate_code(strip_braces(code), loc, True)
+#
+#def add_param(type, decl, loc):
+# alphanum = 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '_' '0123456789'
+# name_start = None
+# p = None
+# p = decl
+# while p[1]:
+# if (p == decl or not memchr(alphanum, p[-1], sizeof alphanum - 1)) and memchr(alphanum, p[0], sizeof alphanum - 10 - 1):
+# name_start = p
+# p += 1
+# p -= 1
+# while c_isspace(int(*p)):
+# p -= 1
+# p[1] = ord('\0')
+# decl += 1
+# while c_isspace(int(*decl)):
+# decl += 1
+# if not name_start:
+# complain(&loc, complaint, gettext('missing identifier in parameter declaration'))
+# else:
+# name = xmemdup0(name_start, strspn(name_start, alphanum))
+# if type & param_lex:
+# muscle_pair_list_grow('lex_param', decl, name)
+# if type & param_parse:
+# muscle_pair_list_grow('parse_param', decl, name)
+# free(name)
+# lex_yy.gram_scanner_last_string_free()
+#
+#def version_check(loc, version):
+# if strverscmp(version, '3.0.5') > 0:
+# complain(loc, complaint, 'require bison %s, but have %s', version, '3.0.5')
+# exit(63)
+#
+#def gram_error(loc, msg):
+# complain(loc, complaint, '%s', msg)
+#
+#def token_name(type):
+# return yytname[YYTRANSLATE(type)]
+#
+#def char_name(c):
+# if c == ord('\''):
+# return '\'\\\'\''
+# else:
+# buf = [None, None, None, None]
+# buf[0] = ord('\'')
+# buf[1] = c
+# buf[2] = ord('\'')
+# buf[3] = ord('\0')
+# return quotearg_style(escape_quoting_style, buf)
+#
+#def current_lhs(sym, loc, ref):
+# current_lhs_symbol = sym
+# current_lhs_location = loc
+# free(current_lhs_named_ref)
+# current_lhs_named_ref = ref
+
+def insert_before(n, str):
+ global gram_piece3
+ lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
+ lex_yy.gram_piece0 += 1
+ gram_piece3 += 1
+
+def insert_after(n, str):
+ global gram_piece3
+ lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
+ lex_yy.gram_piece0 += 1
+ gram_piece3 += 1
--- /dev/null
+/* Bison Grammar Scanner -*- C -*-
+
+ Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+%option debug nodefault noinput noyywrap never-interactive
+%option prefix="gram_" outfile="lex_yy.py"
+
+%{
+ import state
+ import y_tab
+
+ scanner_cursor = state.boundary()
+ gram_last_string = ''
+ bracketed_id_str = None
+ bracketed_id_loc = 0
+ bracketed_id_start = 0
+ bracketed_id_context_state = -1
+
+ obstack_for_string = []
+ def gram_scanner_last_string_free():
+ del obstack_for_string[:]
+
+ gram_piece = []
+ gram_piece0 = 0
+ gram_piece1 = 0
+
+ percent_percent_count = 0;
+
+ # these should be yylex()-local, but moved to here, see further down:
+ nesting = 0
+ context_state = -1
+ id_loc = state.location()
+ code_start = scanner_cursor.copy()
+ token_start = scanner_cursor.copy()
+ #first = True
+ if True: #first:
+ scanner_cursor = y_tab.yylloc.start.copy()
+ #first = False
+%}
+
+%x SC_YACC_COMMENT
+%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
+%x SC_AFTER_IDENTIFIER
+
+%x SC_TAG
+
+%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
+%x SC_COMMENT SC_LINE_COMMENT
+%x SC_STRING SC_CHARACTER
+%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
+%x SC_ELEMENT_GROUP
+
+letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
+id {letter}({letter}|[-0-9])*
+int [0-9]+
+
+/* Zero or more instances of backslash-newline. Following GCC, allow
+ white space between the backslash and the newline. */
+splice (\\[ \f\t\v]*\n)*
+
+/* An equal sign, with optional leading whitespaces. This is used in some
+ deprecated constructs. */
+eqopt ([[:space:]]*=)?
+
+%%
+
+%{
+ # these should be here, but we can't access yylex()-local variables
+ # from an action since the action functions are not nested to yylex():
+ #nesting = 0
+ #context_state = 0
+ #id_loc = state.location()
+ #code_start = scanner_cursor.copy()
+ #token_start = scanner_cursor.copy()
+ #first = True
+ #if first:
+ # scanner_cursor = y_tab.yylloc.start.copy()
+ # first = False
+%}
+
+<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
+{
+ /* Comments and white space. */
+ "," state.complain(state.loc, state.Wother, 'stray \',\' treated as white space')
+ [ \f\n\t\v] |
+ "//".* #continue
+ "/*" {
+ global token_start, context_state
+ token_start = y_tab.yylloc.start
+ context_state = YY_START()
+ BEGIN(SC_YACC_COMMENT)
+ }
+
+ /* #line directives are not documented, and may be withdrawn or
+ modified in future versions of Bison. */
+ ^"#line "{int}(" \"".*"\"")?"\n" #handle_syncline(yytext + sizeof '#line ' - 1, y_tab.yylloc)
+}
+
+
+ /*----------------------------.
+ | Scanning Bison directives. |
+ `----------------------------*/
+
+ /* For directives that are also command line options, the regex must be
+ "%..."
+ after "[-_]"s are removed, and the directive must match the --long
+ option name, with a single string argument. Otherwise, add exceptions
+ to ../build-aux/cross-options.pl. */
+
+<INITIAL>
+{
+ "%binary" return y_tab.PERCENT_NONASSOC
+ "%code" return y_tab.PERCENT_CODE
+ "%debug" {
+ y_tab.yylval = 'parse.trace'
+ return y_tab.PERCENT_FLAG
+ }
+ "%default-prec" return y_tab.PERCENT_DEFAULT_PREC
+ "%define" return y_tab.PERCENT_DEFINE
+ "%defines" return y_tab.PERCENT_DEFINES
+ "%destructor" return y_tab.PERCENT_DESTRUCTOR
+ "%dprec" return y_tab.PERCENT_DPREC
+ "%empty" return y_tab.PERCENT_EMPTY
+ "%error-verbose" return y_tab.PERCENT_ERROR_VERBOSE
+ "%expect" return y_tab.PERCENT_EXPECT
+ "%expect-rr" return y_tab.PERCENT_EXPECT_RR
+ "%file-prefix" return y_tab.PERCENT_FILE_PREFIX
+ "%fixed-output-files" return y_tab.PERCENT_YACC
+ "%initial-action" return y_tab.PERCENT_INITIAL_ACTION
+ "%glr-parser" return y_tab.PERCENT_GLR_PARSER
+ "%language" return y_tab.PERCENT_LANGUAGE
+ "%left" return y_tab.PERCENT_LEFT
+ "%lex-param" {
+ y_tab.yylval = y_tab.param_lex
+ return y_tab.PERCENT_PARAM
+ }
+ "%locations" {
+ y_tab.yylval = 'locations'
+ return y_tab.PERCENT_FLAG
+ }
+ "%merge" return y_tab.PERCENT_MERGE
+ "%name-prefix" return y_tab.PERCENT_NAME_PREFIX
+ "%no-default-prec" return y_tab.PERCENT_NO_DEFAULT_PREC
+ "%no-lines" return y_tab.PERCENT_NO_LINES
+ "%nonassoc" return y_tab.PERCENT_NONASSOC
+ "%nondeterministic-parser" return y_tab.PERCENT_NONDETERMINISTIC_PARSER
+ "%nterm" return y_tab.PERCENT_NTERM
+ "%output" return y_tab.PERCENT_OUTPUT
+ "%param" {
+ y_tab.yylval = y_tab.param_both
+ return y_tab.PERCENT_PARAM
+ }
+ "%parse-param" {
+ y_tab.yylval = y_tab.param_parse
+ return y_tab.PERCENT_PARAM
+ }
+ "%prec" return y_tab.PERCENT_PREC
+ "%precedence" return y_tab.PERCENT_PRECEDENCE
+ "%printer" return y_tab.PERCENT_PRy_tab.INTER
+ "%pure-parser" {
+ y_tab.yylval = 'api.pure'
+ return y_tab.PERCENT_FLAG
+ }
+ "%require" return y_tab.PERCENT_REQUIRE
+ "%right" return y_tab.PERCENT_RIGHT
+ "%skeleton" return y_tab.PERCENT_SKELETON
+ /* Nick added %space */
+ "%space" return y_tab.PERCENT_SPACE
+ "%start" return y_tab.PERCENT_START
+ "%term" return y_tab.PERCENT_TOKEN
+ "%token" return y_tab.PERCENT_TOKEN
+ "%token-table" return y_tab.PERCENT_TOKEN_TABLE
+ "%type" return y_tab.PERCENT_TYPE
+ "%union" return y_tab.PERCENT_UNION
+ "%verbose" return y_tab.PERCENT_VERBOSE
+ "%yacc" return y_tab.PERCENT_YACC
+
+ /* deprecated */
+ "%default"[-_]"prec" {
+ #deprecated_directive(loc, yytext, '%default-prec')
+ scanner_cursor.column -= len('%default-prec')
+ unput('%default-prec')
+ }
+ "%error"[-_]"verbose" {
+ #deprecated_directive(loc, yytext, '%define parse.error verbose')
+ scanner_cursor.column -= len('%define parse.error verbose')
+ unput('%define parse.error verbose')
+ }
+ "%expect"[-_]"rr" {
+ #deprecated_directive(loc, yytext, '%expect-rr')
+ scanner_cursor.column -= len('%expect-rr')
+ unput('%expect-rr')
+ }
+ "%file-prefix"{eqopt} {
+ #deprecated_directive(loc, yytext, '%file-prefix')
+ scanner_cursor.column -= len('%file-prefix')
+ unput('%file-prefix')
+ }
+ "%fixed"[-_]"output"[-_]"files" {
+ #deprecated_directive(loc, yytext, '%fixed-output-files')
+ scanner_cursor.column -= len('%fixed-output-files')
+ unput('%fixed-output-files')
+ }
+ "%name"[-_]"prefix"{eqopt} {
+ #deprecated_directive(loc, yytext, '%name-prefix')
+ scanner_cursor.column -= len('%name-prefix')
+ unput('%name-prefix')
+ }
+ "%no"[-_]"default"[-_]"prec" {
+ #deprecated_directive(loc, yytext, '%no-default-prec')
+ scanner_cursor.column -= len('%no-default-prec')
+ unput('%no-default-prec')
+ }
+ "%no"[-_]"lines" {
+ #deprecated_directive(loc, yytext, '%no-lines')
+ scanner_cursor.column -= len('%no-lines')
+ unput('%no-lines')
+ }
+ "%output"{eqopt} {
+ #deprecated_directive(loc, yytext, '%output')
+ scanner_cursor.column -= len('%output')
+ unput('%output')
+ }
+ "%pure"[-_]"parser" {
+ #deprecated_directive(loc, yytext, '%pure-parser')
+ scanner_cursor.column -= len('%pure-parser')
+ unput('%pure-parser')
+ }
+ "%token"[-_]"table" {
+ #deprecated_directive(loc, yytext, '%token-table')
+ scanner_cursor.column -= len('%token-table')
+ unput('%token-table')
+ }
+
+ "%"{id} state.complain(y_tab.yylloc, state.complaint, 'invalid directive: {0:s}'.format(quote(yytext)))
+
+ "=" return y_tab.EQUAL
+ "|" return y_tab.PIPE
+ ";" return y_tab.SEMICOLON
+
+ {id} {
+ global id_loc, bracketed_id_str
+ y_tab.yylval = yytext
+ id_loc = y_tab.yylloc
+ bracketed_id_str = None
+ BEGIN(SC_AFTER_IDENTIFIER)
+ gram_piece_pack()
+ gram_piece_append('<AST_ID>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_ID>')
+ gram_piece_pack()
+ }
+
+ {int} {
+ y_tab.yylval = scan_integer(yytext, 10, y_tab.yylloc)
+ return y_tab.INT
+ }
+ 0[xX][0-9abcdefABCDEF]+ {
+ y_tab.yylval = scan_integer(yytext, 16, y_tab.yylloc)
+ return y_tab.INT
+ }
+
+ /* Identifiers may not start with a digit. Yet, don't silently
+ accept "1FOO" as "1 FOO". */
+ {int}{id} state.complain(y_tab.yylloc, state.complaint, 'invalid identifier: %s', quote(yytext))
+
+ /* Characters. */
+ "'" {
+ global token_start
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_ESCAPED_CHARACTER)
+ gram_piece_pack()
+ gram_piece_append('<AST_Char>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Strings. */
+ "\"" {
+ global token_start
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_ESCAPED_STRING)
+ gram_piece_pack()
+ gram_piece_append('<AST_String>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Prologue. */
+ "%{" {
+ global code_start
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_PROLOGUE)
+ gram_piece_pack()
+ gram_piece_append('<AST_Section1_Prologue>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Code in between braces. */
+ "{" {
+ global nesting, code_start
+ obstack_for_string.append(yytext)
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_BRACED_CODE)
+ gram_piece_pack()
+ gram_piece_append('<AST_BracedCode>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ /* Semantic predicate. */
+ "%?"[ \f\n\t\v]*"{" {
+ global nesting, code_start
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ gram_piece_pack()
+ gram_piece_append('<AST_BracedPredicate>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ BEGIN(SC_PREDICATE)
+ }
+
+ /* Nick extra rules for element groups */
+ "(?E{" {
+ global nesting, code_start
+ obstack_for_string.append(yytext)
+ nesting = 0
+ code_start = y_tab.yylloc.start
+ BEGIN(SC_ELEMENT_GROUP)
+ gram_piece_pack()
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+ ")" return ord(')')
+
+ /* A type. */
+ "<*>" {
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef><<AST_Text>*</AST_Text>></AST_TagRef>')
+ return ~y_tab.TAG_ANY
+ }
+ "<>" {
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef><<AST_Text />></AST_TagRef>')
+ return ~y_tab.TAG_NONE
+ }
+ "<" {
+ global nesting, token_start
+ nesting = 0
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_TAG)
+ gram_piece_pack()
+ gram_piece_append('<AST_TagRef>')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('<AST_Text>')
+ }
+
+ "%%" {
+ global percent_percent_count
+ percent_percent_count += 1
+ if percent_percent_count == 2:
+ BEGIN(SC_EPILOGUE)
+ gram_piece_pack()
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ gram_piece_pack()
+ return ~y_tab.PERCENT_PERCENT
+ return y_tab.PERCENT_PERCENT
+ }
+
+ "[" {
+ global bracketed_id_str, bracketed_id_start, bracketed_id_context_state
+ bracketed_id_str = None
+ bracketed_id_start = y_tab.yylloc.start
+ bracketed_id_context_state = YY_START()
+ BEGIN(SC_BRACKETED_ID)
+ }
+
+ [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '%s: %s', ngettext('invalid character', 'invalid characters', gram_leng), quote_mem(yytext, gram_leng))
+
+ <<EOF>> {
+ y_tab.yylloc.start = scanner_cursor.copy()
+ y_tab.yylloc.end = scanner_cursor.copy()
+ yyterminate()
+ }
+}
+
+
+ /*--------------------------------------------------------------.
+ | Supporting \0 complexifies our implementation for no expected |
+ | added value. |
+ `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+ \0 state.complain(y_tab.yylloc, state.complaint, 'invalid null character')
+}
+
+
+ /*-----------------------------------------------------------------.
+ | Scanning after an identifier, checking whether a colon is next. |
+ `-----------------------------------------------------------------*/
+
+<SC_AFTER_IDENTIFIER>
+{
+ "[" {
+ global bracketed_id_start, bracketed_id_context_state
+ if bracketed_id_str is not None:
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ BEGIN(SC_RETURN_BRACKETED_ID)
+ y_tab.yylloc = id_loc
+ return y_tab.ID
+ else:
+ bracketed_id_start = y_tab.yylloc.start
+ bracketed_id_context_state = YY_START()
+ BEGIN(SC_BRACKETED_ID)
+ }
+ ":" {
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ gram_piece_escape(yytext)
+ return ~y_tab.ID_COLON
+ }
+ . {
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ return ~y_tab.ID
+ }
+ <<EOF>> {
+ BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+ y_tab.yylloc = id_loc
+ return ~y_tab.ID
+ }
+}
+
+ /*--------------------------------.
+ | Scanning bracketed identifiers. |
+ `--------------------------------*/
+
+<SC_BRACKETED_ID>
+{
+ {id} {
+ global bracketed_id_str, bracketed_id_loc
+ if bracketed_id_str is not None:
+ state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: %s', quote(yytext))
+ else:
+ bracketed_id_str = yytext
+ bracketed_id_loc = y_tab.yylloc
+ }
+ "]" {
+ global bracketed_id_str
+ BEGIN(bracketed_id_context_state)
+ if bracketed_id_str is not None:
+ if INITIAL == bracketed_id_context_state:
+ y_tab.yylval = bracketed_id_str
+ bracketed_id_str = None
+ y_tab.yylloc = bracketed_id_loc
+ return y_tab.BRACKETED_ID
+ else:
+ state.complain(y_tab.yylloc, state.complaint, 'an identifier expected')
+ }
+
+ [^\].A-Za-z0-9_/ \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character in bracketed name' if gram_leng == 1 else 'invalid characters in bracketed name', quote_mem(yytext, gram_leng)))
+
+ <<EOF>> {
+ BEGIN(bracketed_id_context_state)
+ unexpected_eof(bracketed_id_start, ']')
+ }
+}
+
+<SC_RETURN_BRACKETED_ID>
+{
+ . {
+ global bracketed_id_str
+ scanner_cursor.column -= len(yytext)
+ yyless(0)
+ y_tab.yylval = bracketed_id_str
+ bracketed_id_str = None
+ y_tab.yylloc = bracketed_id_loc
+ BEGIN(INITIAL)
+ return y_tab.BRACKETED_ID
+ }
+}
+
+
+ /*---------------------------------------------------------------.
+ | Scanning a Yacc comment. The initial '/ *' is already eaten. |
+ `---------------------------------------------------------------*/
+
+<SC_YACC_COMMENT>
+{
+ "*/" BEGIN(context_state)
+ .|\n #continue
+ <<EOF>> {
+ unexpected_eof(token_start, '*/')
+ BEGIN(context_state)
+ }
+}
+
+
+ /*------------------------------------------------------------.
+ | Scanning a C comment. The initial '/ *' is already eaten. |
+ `------------------------------------------------------------*/
+
+<SC_COMMENT>
+{
+ "*"{splice}"/" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ <<EOF>> {
+ unexpected_eof(token_start, '*/')
+ BEGIN(context_state)
+ }
+}
+
+
+ /*--------------------------------------------------------------.
+ | Scanning a line comment. The initial '//' is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_LINE_COMMENT>
+{
+ "\n" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ {splice} obstack_for_string.append(yytext)
+ <<EOF>> BEGIN(context_state)
+}
+
+
+ /*------------------------------------------------.
+ | Scanning a Bison string, including its escapes. |
+ | The initial quote is already eaten. |
+ `------------------------------------------------*/
+
+<SC_ESCAPED_STRING>
+{
+ "\"" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_String>')
+ gram_piece_pack()
+ return ~y_tab.STRING
+ }
+ <<EOF>> unexpected_eof(token_start, '"')
+ "\n" unexpected_newline(token_start, '"')
+}
+
+ /*----------------------------------------------------------.
+ | Scanning a Bison character literal, decoding its escapes. |
+ | The initial quote is already eaten. |
+ `----------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER>
+{
+ "'" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ if len(gram_last_string) == 0:
+ state.complain(y_tab.yylloc, state.Wother, 'empty character literal')
+ y_tab.yylval = ord('\'')
+ else:
+ if len(gram_last_string) > 1:
+ state.complain(y_tab.yylloc, state.Wother, 'extra characters in character literal')
+ y_tab.yylval = ord(gram_last_string[0])
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_Char>')
+ gram_piece_pack()
+ return ~y_tab.CHAR
+ }
+ "\n" unexpected_newline(token_start, '\'')
+ <<EOF>> unexpected_eof(token_start, '\'')
+}
+
+
+
+ /*--------------------------------------------------------------.
+ | Scanning a tag. The initial angle bracket is already eaten. |
+ `--------------------------------------------------------------*/
+
+<SC_TAG>
+{
+ ">" {
+ global nesting, gram_last_string
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = token_start
+ y_tab.yylval = gram_last_string
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_TagRef>')
+ gram_piece_pack()
+ return ~y_tab.TAG
+ obstack_for_string.append(yytext)
+ }
+
+ ([^<>]|->)+ obstack_for_string.append(yytext)
+ "<"+ {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting += gram_leng
+ }
+
+ <<EOF>> unexpected_eof(token_start, '>')
+}
+
+ /*----------------------------.
+ | Decode escaped characters. |
+ `----------------------------*/
+
+<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+{
+ \\[0-7]{1,3} {
+ c = strtoul(yytext + 1, None, 8)
+ if not c or 0x7f * 2 + 1 < c:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\x[0-9abcdefABCDEF]+ {
+ c = strtoul(yytext + 2, None, 16)
+ if not c or 0x7f * 2 + 1 < c:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\a {
+ obstack_for_string.append('\a')
+ gram_piece_append('<AST_Text_Escape character="7">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\b {
+ obstack_for_string.append('\b')
+ gram_piece_append('<AST_Text_Escape character="8">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\f {
+ obstack_for_string.append('\f')
+ gram_piece_append('<AST_Text_Escape character="12">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\n {
+ obstack_for_string.append('\n')
+ gram_piece_append('<AST_Text_Escape character="10">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\r {
+ obstack_for_string.append('\r')
+ gram_piece_append('<AST_Text_Escape character="13">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\t {
+ obstack_for_string.append('\t')
+ gram_piece_append('<AST_Text_Escape character="9">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\v {
+ obstack_for_string.append('\v')
+ gram_piece_append('<AST_Text_Escape character="11">')
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
+ \\("\""|"'"|"?"|"\\") {
+ obstack_for_string.append(yytext[1])
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+
+ \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+ c = convert_ucn_to_byte(yytext)
+ if c <= 0:
+ state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+ else:
+ obstack_for_string.append(chr(c))
+ rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
+ gram_piece_append(gram_piece_temp)
+ gram_piece_flush(len(yytext))
+ gram_piece_append('</AST_Text_Escape>')
+ }
+ \\(.|\n) {
+ p = yytext + 1
+ if c_isspace(int(*p)) and c_isprint(int(*p)):
+ p = quote(p)
+ else:
+ p = quotearg_style_mem(escape_quoting_style, p, 1)
+ state.complain(y_tab.yylloc, state.complaint, 'invalid character after \\-escape: %s', p)
+ }
+}
+
+ /*--------------------------------------------.
+ | Scanning user-code characters and strings. |
+ `--------------------------------------------*/
+
+<SC_CHARACTER,SC_STRING>
+{
+ {splice}|\\{splice}[^\n\[\]] obstack_for_string.append(yytext)
+}
+
+<SC_CHARACTER>
+{
+ "'" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ \n unexpected_newline(token_start, '\'')
+ <<EOF>> unexpected_eof(token_start, '\'')
+}
+
+<SC_STRING>
+{
+ "\"" {
+ obstack_for_string.append(yytext)
+ BEGIN(context_state)
+ }
+ \n unexpected_newline(token_start, '"')
+ <<EOF>> unexpected_eof(token_start, '"')
+}
+
+
+ /*---------------------------------------------------.
+ | Strings, comments etc. can be found in user code. |
+ `---------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+ "'" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_CHARACTER)
+ }
+ "\"" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_STRING)
+ }
+ "/"{splice}"*" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ token_start = y_tab.yylloc.start
+ BEGIN(SC_COMMENT)
+ }
+ "/"{splice}"/" {
+ global context_state, token_start
+ obstack_for_string.append(yytext)
+ context_state = YY_START()
+ BEGIN(SC_LINE_COMMENT)
+ }
+}
+
+
+
+ /*-----------------------------------------------------------.
+ | Scanning some code in braces (actions, predicates). The |
+ | initial "{" is already eaten. |
+ `-----------------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+ "{"|"<"{splice}"%" {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting += 1
+ }
+ "%"{splice}">" {
+ global nesting
+ obstack_for_string.append(yytext)
+ nesting -= 1
+ }
+
+ /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+ (as '<' '<%'). */
+ "<"{splice}"<" obstack_for_string.append(yytext)
+
+ <<EOF>> unexpected_eof(code_start, '}')
+}
+
+<SC_BRACED_CODE>
+{
+ "}" {
+ global nesting, gram_last_string
+ obstack_for_string.append('}')
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_BracedCode>')
+ gram_piece_pack()
+ return ~y_tab.BRACED_CODE
+ }
+}
+
+<SC_PREDICATE>
+{
+ "}" {
+ global nesting, gram_last_string
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_BracedPredicate>')
+ gram_piece_pack()
+ return ~y_tab.BRACED_PREDICATE
+ else:
+ obstack_for_string.append('}')
+ }
+}
+
+ /* Nick extra rules for element groups */
+<SC_ELEMENT_GROUP>
+{
+ "}" {
+ global nesting, gram_last_string
+ obstack_for_string.append('}')
+ nesting -= 1
+ if nesting < 0:
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ #del obstack_for_string[:]
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ return ~ord('(')
+ }
+}
+
+ /*--------------------------------------------------------------.
+ | Scanning some prologue: from "%{" (already scanned) to "%}". |
+ `--------------------------------------------------------------*/
+
+<SC_PROLOGUE>
+{
+ "%}" {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_append('</AST_Text>')
+ gram_piece_escape(yytext)
+ gram_piece_append('</AST_Section1_Prologue>')
+ gram_piece_pack()
+ return ~y_tab.PROLOGUE
+ }
+
+ <<EOF>> unexpected_eof(code_start, '%}')
+}
+
+
+ /*---------------------------------------------------------------.
+ | Scanning the epilogue (everything after the second "%%", which |
+ | has already been eaten). |
+ `---------------------------------------------------------------*/
+
+<SC_EPILOGUE>
+{
+ <<EOF>> {
+ global gram_last_string
+ gram_last_string = ''.join(obstack_for_string)
+ del obstack_for_string[:] # not strictly correct
+ y_tab.yylloc.start = code_start
+ y_tab.yylval = gram_last_string
+ BEGIN(INITIAL)
+ gram_piece_pack()
+ return ~y_tab.EPILOGUE
+ }
+}
+
+
+ /*-----------------------------------------------------.
+ | By default, grow the string obstack with the input. |
+ `-----------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER,SC_ELEMENT_GROUP>. |
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_ELEMENT_GROUP>\n obstack_for_string.append(yytext)
+
+
+%%
+
+#def no_cr_read(fp, buf, size):
+# bytes_read = fread_unlocked(buf, 1, size, fp)
+# if bytes_read:
+# w = memchr(buf, ord('\r'), bytes_read)
+# if w:
+# r = ++w
+# lim = buf + bytes_read
+# pass
+# while True:
+# w[-1] = ord('\n')
+# if r == lim:
+# ch = getc_unlocked(fp)
+# if ch != ord('\n') and ungetc(ch, fp) != ch:
+# break
+# else:
+# if *r == ord('\n'):
+# r += 1
+# while True:
+# if r == lim:
+# return w - buf
+# if not ((*w++ = *r++) != ord('\r')):
+# break
+# pass
+# return w - buf
+# return bytes_read
+#
+#def scan_integer(number, base, loc):
+# num = strtoul(number, None, base)
+# if 0x7fffffff < num:
+# state.complain(y_tab.yylloc, state.complaint, 'integer out of range: %s', quote(number))
+# num = 0x7fffffff
+# return num
+#
+#def convert_ucn_to_byte(ucn):
+# code = strtoul(ucn + 2, None, 16)
+# if 0x7f * 2 + 1 < code:
+# return -1
+# return code
+#
+#def handle_syncline(args, loc):
+# file = None
+# lineno = strtoul(args, &file, 10)
+# if 0x7fffffff <= lineno:
+# state.complain(y_tab.yylloc, state.Wother, 'line number overflow')
+# lineno = 0x7fffffff
+# file = strchr(file, ord('"'))
+# if file:
+# *strchr(file + 1, ord('"')) = ord('\0')
+# current_file = uniqstr_new(file + 1)
+# boundary_set(&scanner_cursor, current_file, lineno, 1)
+
+def unexpected_end(start, msg, token_end):
+ loc = state.location(start.copy(), scanner_cursor.copy())
+ scanner_cursor.column -= i
+ unput(token_end)
+ token_end = quote(token_end)
+ if token_end == '\'\\\'\'':
+ token_end = '"\'"'
+ state.complain(y_tab.yylloc, state.complaint, msg, token_end)
+
+def unexpected_eof(start, token_end):
+ unexpected_end(start, 'missing {0:s} at end of file'.format(token_end))
+
+def unexpected_newline(start, token_end):
+ unexpected_end(start, 'missing {0:s} at end of line'.format(token_end))
+
+#def gram_scanner_initialize():
+# global obstack_for_string
+# obstack_for_string = []
+#
+#def gram_scanner_free():
+# del obstack_for_string[:]
+# yy_delete_buffer(YY_CURRENT_BUFFER)
+
+def gram_piece_append(str):
+ gram_piece.append(str)
+
+def gram_piece_insert(n, str):
+ gram_piece[n:n] = [str]
+
+xml_escape = {'<': '<', '>': '>', '&': '&'}
+def gram_piece_escape(str):
+ gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
+
+def gram_piece_flush(n):
+ global yytext
+ gram_piece_escape(yytext[:n])
+ yytext = yytext[n:]
+
+def gram_piece_pack():
+ global gram_piece0
+ gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
+ gram_piece0 += 1
+
+def gram_lex():
+ result = yylex()
+ if result < 0:
+ return ~result
+ gram_piece_pack()
+ gram_piece_escape(yytext)
+ gram_piece_pack()
+ return result
--- /dev/null
+import bisect
+import sys
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYReject(Exception):
+ pass
+
+class YYContinue(Exception):
+ pass
+
+class YYTerminate(Exception):
+ pass
+
+class YYBufferList:
+ def __init__(self, next = None):
+ self.next = next
+
+class YYBufferBlock(YYBufferList):
+ def __init__(self, next = None, pos = 0, text = ''):
+ YYBufferList.__init__(self, next)
+ self.pos = pos
+ self.text = text
+
+class YYBufferState(YYBufferList):
+ def __init__(self, next = None, file_in = None, at_bol = True):
+ YYBufferList.__init__(self, next)
+ self.file_in = file_in
+ self.at_bol = at_bol
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState()]
+
+yystart = INITIAL
+yystart_stack = []
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yy_group_text = None
+yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
+yy_action = None
+yytext = ''
+yytext_len = 0
+
+YY_NULL = 0
+
+def REJECT():
+ raise YYReject()
+
+def yyterminate():
+ raise YYTerminate()
+
+def yyless(i):
+ global yytext, yytext_len
+ assert yytext_len >= i
+ yytext = yytext[:i]
+ yytext_len = i
+
+def unput(text):
+ global yyin, yytext_len
+ gram_piece_flush(len(yytext))
+ while yytext_len:
+ block = yy_buffer_stack[-1].next
+ while block is None or block.pos >= len(block.text):
+ if block is None:
+ yy_buffer_stack.pop()
+ block = yy_buffer_stack[-1].next
+ yyin = yy_buffer_stack[-1].file_in
+ else:
+ block = block.next
+ yy_buffer_stack[-1].next = block
+ i = min(yytext_len, len(block.text) - block.pos)
+ block.pos += i
+ yytext_len -= i
+ yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+def ECHO():
+ yyout.write(yytext)
+
+def yy_rule_start():
+ global yytext, yytext_len
+ yytext = yy_group_text[:yy_group_stack[-1]]
+ yytext_len = yy_group_stack[-1]
+ del yy_group_stack[-2:]
+ # note that this should also be done after yyless() and REJECT(),
+ # and state should be saved in case they result in a null string,
+ # however, it doesn't seem to be in flex, maintain compatibility:
+ if len(yytext):
+ yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
+
+def yy_group_end():
+ pass
+
+def BEGIN(start):
+ global yystart
+ yystart = start
+
+def YY_START():
+ return yystart
+
+def yy_push_state(start):
+ global yystart
+ yystart_stack.append(yystart)
+ yystart = start
+
+def yy_pop_state():
+ global yystart
+ yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+ return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+ yy_buffer_stack[-1].at_bol = at_bol
+
+# GENERATE SECTION2
+
+def yylex():
+ global \
+ yyin, \
+ yy_threads0, \
+ yy_threads1, \
+ yy_prefix_slop, \
+ yy_group_text, \
+ yy_group_stack, \
+ yy_action, \
+ yytext, \
+ yytext_len
+
+ # GENERATE SECTION2INITIAL
+
+ while True:
+ while yytext_len:
+ block = yy_buffer_stack[-1].next
+ while block is None or block.pos >= len(block.text):
+ if block is None:
+ yy_buffer_stack.pop()
+ block = yy_buffer_stack[-1].next
+ yyin = yy_buffer_stack[-1].file_in
+ else:
+ block = block.next
+ yy_buffer_stack[-1].next = block
+ i = min(yytext_len, len(block.text) - block.pos)
+ block.pos += i
+ yytext_len -= i
+
+ match = ''
+ match_len = 0
+
+ del yy_threads0[yy_prefix_slop:]
+ yy_threads0.append(None)
+
+ buffer_ptr = len(yy_buffer_stack) - 1
+ block_prev = yy_buffer_stack[buffer_ptr]
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+
+ action = yy_dfa_start_action[
+ yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+ ]
+ while action != -1:
+ state, transition = yy_dfa_actions[action]
+ #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+ i = yy_prefix_slop
+ assert len(yy_threads1) == yy_prefix_slop
+ for trans in transition:
+ if trans[0] == 0: #DFA.TRANSITION_POP:
+ i += trans[1]
+ elif trans[0] == 1: #DFA.TRANSITION_DUP:
+ while i < trans[1]:
+ yy_threads0[:0] = [None] * yy_prefix_slop
+ yy_threads1[:0] = [None] * yy_prefix_slop
+ i += yy_prefix_slop
+ yy_prefix_slop *= 2
+ yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+ i -= trans[1]
+ elif trans[0] == 2: #DFA.TRANSITION_MARK:
+ yy_threads0[i:i + trans[1]] = [
+ (match_len, trans[2], thread)
+ for thread in yy_threads0[i:i + trans[1]]
+ ]
+ elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+ yy_threads1.extend(yy_threads0[i:i + trans[1]])
+ i += trans[1]
+ #elif trans[0] == DFA.TRANSITION_DEL:
+ # del yy_threads1[-trans[1]:]
+ else:
+ assert False
+ assert i == len(yy_threads0)
+ yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+ del yy_threads1[yy_prefix_slop:]
+
+ if state == 0:
+ # there is only one match, which is complete
+ assert len(yy_threads0) == yy_prefix_slop + 1
+ assert yy_dfa_states[state][2] == [0]
+ break
+
+ yy_buffer_stack[-1].file_in = yyin
+ while block is None or block_pos >= len(block.text):
+ if block is None:
+ file_in = yy_buffer_stack[buffer_ptr].file_in
+ text = '' if file_in is None else file_in.readline()
+ if len(text):
+ block = YYBufferBlock(None, 0, text)
+ block_pos = 0
+ block_prev.next = block
+ else:
+ # do not re-attempt read once EOF is reached
+ yy_buffer_stack[buffer_ptr].file_in = None
+ yyin = yy_buffer_stack[-1].file_in
+ buffer_ptr -= 1
+ if buffer_ptr < 0:
+ break # EOF
+ block_prev = yy_buffer_stack[buffer_ptr]
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+ else:
+ i = match_len - len(match)
+ if i:
+ match += block.text[block_pos - i:]
+ block_prev = block
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+ else:
+ #print('block_pos', block_pos, 'block.text', block.text)
+ action = yy_dfa_states[state][1][
+ bisect.bisect_right(
+ yy_dfa_states[state][0],
+ ord(block.text[block_pos])
+ )
+ ]
+ block_pos += 1
+ match_len += 1
+ continue
+ # EOF
+ if i == 0:
+ y_tab.yylloc = y_tab.YYLTYPE() # stopgap
+ try:
+ return yy_eof_actions[yystart]()
+ except YYTerminate:
+ return 0
+ break
+
+ i = match_len - len(match)
+ if i:
+ assert block is not None
+ match += block.text[block_pos - i:]
+
+ for i in yy_dfa_states[state][2]:
+ yy_group_text = match
+ yy_group_stack = []
+ yy_groups = None
+ yy_groups_by_name = None
+ yy_action = None
+ yytext = None
+ yytext_len = None
+
+ thread = yy_threads0[yy_prefix_slop + i]
+ #print('thread', thread)
+ while thread is not None:
+ pos, ref_data, thread = thread
+ yy_group_stack.append(pos)
+ ref_data()
+
+ y_tab.yylloc = y_tab.YYLTYPE() # stopgap
+ try:
+ return yy_action()
+ except YYReject:
+ pass
+ except YYContinue:
+ gram_piece_escape(yytext)
+ break
+ except YYTerminate:
+ return 0
+ else:
+ raise Exception('scanner jammed')
+
+# GENERATE SECTION3
--- /dev/null
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-with-bison-exception
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+# As a special exception, you may create a larger work that contains part or
+# all of the Bison or piyacc parser skeleton and distribute that work under
+# terms of your choice, so long as that work isn't itself a parser generator
+# using the skeleton or a modified version thereof as a parser skeleton.
+# Alternatively, if you modify or redistribute the parser skeleton itself, you
+# may (at your option) remove this special exception, which will cause the
+# skeleton and the resulting Bison or piyacc output files to be licensed under
+# the GNU General Public License without this special exception.
+
+import bisect
+import lex_yy
+
+# this can be redefined in SECTION1
+class YYLTYPE:
+ def __init__(
+ self,
+ first_line = 0,
+ first_column = 0,
+ last_line = 0,
+ last_column = 0
+ ):
+ self.first_line = first_line
+ self.first_column = first_column
+ self.last_line = last_line
+ self.last_column = last_column
+
+# GENERATE SECTION1
+
+# GENERATE TOKENS
+
+yystack = None
+yychar = None
+YYEMPTY = -1
+
+yyval = None
+yyloc = None
+
+yylval = None
+yylloc = YYLTYPE()
+
+# GENERATE SECTION2
+
+def yyparse():
+ global yystack, yychar, yyval, yyloc, yylval, yylloc, gram_piece2, gram_piece3
+
+ # GENERATE INITIALACTION
+
+ state = 0
+ yystack = []
+ yylval = None
+ yychar = -1
+ while True:
+ #print('state', state, 'yystack', yystack)
+ reduce = yy_lr1dfa_states[state][4]
+ if reduce == -1:
+ if yychar == -1:
+ yylval = None
+ yylloc = None
+ yychar = lex_yy.gram_lex()
+ #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
+ action = yy_lr1dfa_states[state][1][
+ bisect.bisect_right(yy_lr1dfa_states[state][0], yychar)
+ ]
+ if action == -1:
+ raise Exception('syntax error')
+ if (action & 1) == 0:
+ yystack.append((state, yylval, yylloc))
+ state = action >> 1
+ #print('shift', state)
+ yychar = -1
+ continue
+ reduce = action >> 1
+ #print('reduce', reduce)
+ len_symbols, ref_data = yy_lr1dfa_productions[reduce]
+ base = len(yystack) - len_symbols
+ yystack.append((state, None, None))
+ state, yyval, yyloc = yystack[base]
+ n = base * 2
+ gram_piece2 = n + 1
+ gram_piece3 = n + len_symbols * 2
+ if len_symbols == 0:
+ lex_yy.gram_piece[n:n] = ['', '']
+ gram_piece3 = n + 2
+ lex_yy.gram_piece0 += 2
+ ref_data()
+ lex_yy.gram_piece[gram_piece2:gram_piece3] = [''.join(lex_yy.gram_piece[gram_piece2:gram_piece3])]
+ lex_yy.gram_piece0 += gram_piece2 + 1 - gram_piece3
+ del yystack[base:]
+ if reduce == 0:
+ assert base == 0
+ break
+ yystack.append((state, yyval, yyloc))
+ state = yy_lr1dfa_states[state][3][
+ bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
+ ]
+ assert state != -1
+
+# GENERATE SECTION3
--- /dev/null
+import sys
+
+# miscellaneous state accessed by scan-gram.l and parse-gram.y
+class boundary:
+ def __init__(self, file = '<stdin>', line = 0, column = 0):
+ self.file = file
+ self.line = line
+ self.column = column
+ def copy(self):
+ return boundary(self.file, self.line, self.column)
+
+class location:
+ def __init__(self, start = None, end = None):
+ self.start = boundary() if start is None else start
+ self.end = boundary() if end is None else end
+ def copy(self):
+ return location(self.start.copy(), self.end.copy())
+
+warning_midrule_values = 0
+warning_yacc = 1
+warning_conflicts_sr = 2
+warning_conflicts_rr = 3
+warning_empty_rule = 3
+warning_deprecated = 4
+warning_precedence = 5
+warning_other = 6
+warnings_size = 7
+
+Wnone = 0
+Wmidrule_values = 1 << warning_midrule_values
+Wyacc = 1 << warning_yacc
+Wconflicts_sr = 1 << warning_conflicts_sr
+Wconflicts_rr = 1 << warning_conflicts_rr
+Wdeprecated = 1 << warning_deprecated
+Wempty_rule = 1 << warning_empty_rule
+Wprecedence = 1 << warning_precedence
+Wother = 1 << warning_other
+Werror = 1 << 10
+complaint = 1 << 11
+fatal = 1 << 12
+silent = 1 << 13
+no_caret = 1 << 14
+Weverything = ~complaint & ~fatal & ~silent
+Wall = Weverything & ~Wyacc
+
+def complain(loc, flags, message):
+ #severity s = warning_severity (flags);
+ #if ((flags & complaint) && complaint_status < status_complaint)
+ # complaint_status = status_complaint;
+ #
+ #if (severity_warning <= s)
+ # {
+ # const char* prefix =
+ # s == severity_fatal ? _("fatal error")
+ # : s == severity_error ? _("error")
+ # : _("warning");
+ # if (severity_error <= s && ! complaint_status)
+ # complaint_status = status_warning_as_error;
+ # error_message (loc, flags, prefix, message, args);
+ # }
+ #
+ #if (flags & fatal)
+ # exit (EXIT_FAILURE);
+ print(message)
+ sys.exit(1)
+
+undef_assoc = 0
+right_assoc = 1
+left_assoc = 2
+non_assoc = 3
+precedence_assoc = 4
'''# GENERATE SECTION1 BEGIN
{0:s}{1:s}# GENERATE END
'''.format(
+ ''.join(
+ [
+ ast_text_to_python(i, '')
+ for i in _ast.requires_text
+ ]
+ ),
''.join(
[
ast_text_to_python(i, '')
'''# GENERATE SECTION2 BEGIN
YYDEBUG = {0:s}
YYERROR_VERBOSE = {1:s}
-{2:s}{3:s}{4:s}
-# GENERATE END
+{2:s}{3:s}{4:s}# GENERATE END
'''.format(
'True' if _ast[0].debug else 'False',
'True' if _ast[0].error_verbose else 'False',
'''.format(
''.join(
[
- plex_text_to_python(i, ' ')
+ ast_text_to_python(i, ' ')
for i in _ast.initial_action_text
]
).replace('(yyval)', '(yylval').replace('(yyloc)', '(yylloc)') # hack
yylval = None
yylloc = None
yychar = lex_yy.yylex()
- #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc)
+ #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
#print('lex_yy.yy_element_space')
#xml.etree.ElementTree.dump(lex_yy.yy_element_space)
#print('lex_yy.yy_element_token')