Add Python version of bootstrap_bison.git parser (adds markup to *.y file)
authorNick Downing <nick@ndcode.org>
Thu, 17 Jan 2019 23:12:16 +0000 (10:12 +1100)
committerNick Downing <nick@ndcode.org>
Thu, 17 Jan 2019 23:12:16 +0000 (10:12 +1100)
.gitignore
bootstrap/Makefile [new file with mode: 0644]
bootstrap/element.py [new file with mode: 0644]
bootstrap/markup.py [new file with mode: 0755]
bootstrap/parse-gram.y [new file with mode: 0644]
bootstrap/scan-gram.l [new file with mode: 0644]
bootstrap/skel_lex_yy.py [new file with mode: 0644]
bootstrap/skel_y_tab.py [new file with mode: 0644]
bootstrap/state.py [new file with mode: 0644]
generate_py.py
skel/skel_py_element.py

index 92f8b71..19a1d26 100644 (file)
@@ -1,4 +1,7 @@
 __pycache__
+bootstrap/*.xml
+bootstrap/lex_yy.py
+bootstrap/y_tab.py
 lex-yacc-examples/*.c
 lex-yacc-examples/*.h
 lex-yacc-examples/*.o
diff --git a/bootstrap/Makefile b/bootstrap/Makefile
new file mode 100644 (file)
index 0000000..38fb1d6
--- /dev/null
@@ -0,0 +1,12 @@
+all: lex_yy.py y_tab.py
+
+lex_yy.py: scan-gram.l skel_lex_yy.py
+       ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+       ../../pilex.git/pilex.py --python --skel skel_lex_yy.py $<.xml
+
+y_tab.py: parse-gram.y skel_y_tab.py
+       ../../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
+       ../piyacc.py --python --skel skel_y_tab.py $<.xml
+
+clean:
+       rm -f lex_yy.py y_tab.py *.xml
diff --git a/bootstrap/element.py b/bootstrap/element.py
new file mode 100644 (file)
index 0000000..2d02217
--- /dev/null
@@ -0,0 +1,179 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+import xml.etree.ElementTree
+
+class Element(xml.etree.ElementTree._Element_Py):
+  def __init__(self, tag = 'Element', attrib = {}, text = '', children = []):
+    xml.etree.ElementTree._Element_Py.__init__(self, tag, attrib)
+    self.ref = -1
+    self.seen = False
+    set_text(self, 0, text)
+    self[:] = children
+  def serialize(self, ref_list):
+    for i in self:
+      # parented, enforce that child can only be parented at most once
+      # (although there can be unlimited numbers of numeric refs to it)
+      assert not i.seen
+      i.seen = True
+      if i.ref == -1:
+        i.serialize(ref_list)
+  def deserialize(self, ref_list):
+    for i in self:
+      i.deserialize(ref_list)
+  def copy(self, factory = None):
+    result = (Element if factory is None else factory)(self.tag, self.attrib)
+    result.text = self.text
+    result.tail = self.tail
+    result[:] = [i.copy() for i in self]
+    return result
+  def repr_serialize(self, params):
+    if len(self):
+      params.append(
+        'children = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self])
+        )
+      )
+  def __repr__(self):
+    params = []
+    self.repr_serialize(params)
+    return 'element.Element({0:s})'.format(', '.join(params))
+
+bool_to_str = ['false', 'true']
+def serialize_bool(value):
+  return bool_to_str[int(value)]
+
+str_to_bool = {'false': False, 'true': True}
+def deserialize_bool(text):
+  return str_to_bool[text]
+
+def serialize_int(value):
+  return str(value)
+
+def deserialize_int(text):
+  return int(text)
+
+def serialize_ref(value, ref_list):
+  if value is None:
+    ref = -1
+  else:
+    ref = value.ref
+    if ref == -1:
+      ref = len(ref_list)
+      ref_list.append(value)
+      value.ref = ref
+      value.set('ref', str(ref))
+      # this doesn't set the seen flag, so it will be parented by the
+      # root, unless it is already parented or gets parented later on
+      if not value.seen:
+        value.serialize(ref_list)
+  return str(ref)
+
+def deserialize_ref(text, ref_list):
+  ref = int(text)
+  return None if ref < 0 else ref_list[ref]
+
+def serialize_str(value):
+  return value
+
+def deserialize_str(text):
+  return text
+
+def serialize(value, fout, encoding = 'unicode'):
+  ref_list = []
+  serialize_ref(value, ref_list)
+  parents = [i for i in ref_list if not i.seen]
+  root = Element('root', children = parents)
+  for i in range(len(root)):
+    set_text(root, i, '\n  ')
+  set_text(root, len(root), '\n')
+  root.tail = '\n'
+  xml.etree.ElementTree.ElementTree(root).write(fout, encoding)
+  for i in root:
+    i.tail = None
+  for i in ref_list:
+    i.ref = -1
+    del i.attrib['ref']
+  i = 0
+  while i < len(parents):
+    for j in parents[i]:
+      j.seen = False
+      parents.append(j)
+    i += 1
+
+def deserialize(fin, factory = Element, encoding = 'unicode'):
+  root = xml.etree.ElementTree.parse(
+    fin,
+    xml.etree.ElementTree.XMLParser(
+      target = xml.etree.ElementTree.TreeBuilder(factory),
+      encoding = encoding
+    )
+  ).getroot()
+  assert root.tag == 'root'
+  for i in root:
+    i.tail = None
+  i = 0
+  parents = root[:]
+  ref_list = []
+  while i < len(parents):
+    j = parents[i]
+    if 'ref' in j.attrib:
+      ref = int(j.attrib['ref'])
+      del j.attrib['ref']
+      if len(ref_list) < ref + 1:
+        ref_list.extend([None] * (ref + 1 - len(ref_list)))
+      ref_list[ref] = j
+    parents.extend(j[:])
+    i += 1
+  for i in root:
+    i.deserialize(ref_list)
+  return ref_list[0]
+
+# compatibility scheme to access arbitrary xml.etree.ElementTree.Element-like
+# objects (not just Element defined above) using a more consistent interface:
+def get_text(root, i):
+  if i < 0:
+    i += len(root) + 1
+  text = root.text if i == 0 else root[i - 1].tail
+  return '' if text is None else text
+
+def set_text(root, i, text):
+  if i < 0:
+    i += len(root) + 1
+  if len(text) == 0:
+    text = None
+  if i == 0:
+    root.text = text
+  else:
+    root[i - 1].tail = text
+
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
+def concatenate(children, factory = Element, *args, **kwargs):
+  root = factory(*args, **kwargs)
+  for child in children:
+    i = len(root)
+    set_text(root, i, get_text(root, i) + get_text(child, 0))
+    root[i:] = child[:]
+  return root
diff --git a/bootstrap/markup.py b/bootstrap/markup.py
new file mode 100755 (executable)
index 0000000..1817bee
--- /dev/null
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+import lex_yy
+import y_tab
+import sys
+
+lex_yy.gram_piece_append('<root>\n  <AST ref=\"0\">')
+y_tab.yyparse()
+lex_yy.gram_piece_append('</AST>\n</root>\n')
+sys.stdout.write(''.join(lex_yy.gram_piece))
diff --git a/bootstrap/parse-gram.y b/bootstrap/parse-gram.y
new file mode 100644 (file)
index 0000000..fa57397
--- /dev/null
@@ -0,0 +1,1151 @@
+/* Bison Grammar Parser                             -*- C -*-
+
+   Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+   This file is part of Bison, the GNU Compiler Compiler.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+%code requires
+{
+  import state
+
+  YYLTYPE = state.location
+}
+
+%code top
+{
+}
+
+%code
+{
+  #current_prec = 0
+  #current_lhs_location = 0
+  #current_lhs_named_ref = []
+  #current_lhs_symbol = []
+  #current_class = unknown_sym
+  #current_type = None
+  gram_piece2 = 0
+  gram_piece3 = 0
+  nested_rhs = 0
+}
+
+/* Nick %define api.prefix {gram_} */
+%define api.pure full
+%define locations
+%define parse.error verbose
+%define parse.lac full
+%define parse.trace
+/* Nick %defines */
+%expect 0
+/* Nick %verbose */
+
+%initial-action
+{
+  #boundary_set(&@$.start, current_file, 1, 1)
+  #boundary_set(&@$.end, current_file, 1, 1)
+}
+
+/* Define the tokens together with their human representation.  */
+%token GRAM_EOF 0 "end of file"
+%token STRING     "string"
+
+%token PERCENT_TOKEN       "%token"
+%token PERCENT_NTERM       "%nterm"
+
+%token PERCENT_TYPE        "%type"
+%token PERCENT_DESTRUCTOR  "%destructor"
+%token PERCENT_PRINTER     "%printer"
+
+%token PERCENT_LEFT        "%left"
+%token PERCENT_RIGHT       "%right"
+%token PERCENT_NONASSOC    "%nonassoc"
+%token PERCENT_PRECEDENCE  "%precedence"
+
+%token PERCENT_PREC          "%prec"
+%token PERCENT_DPREC         "%dprec"
+%token PERCENT_MERGE         "%merge"
+
+/*----------------------.
+| Global Declarations.  |
+`----------------------*/
+
+%token
+  PERCENT_CODE            "%code"
+  PERCENT_DEFAULT_PREC    "%default-prec"
+  PERCENT_DEFINE          "%define"
+  PERCENT_DEFINES         "%defines"
+  PERCENT_ERROR_VERBOSE   "%error-verbose"
+  PERCENT_EXPECT          "%expect"
+  PERCENT_EXPECT_RR       "%expect-rr"
+  PERCENT_FLAG            "%<flag>"
+  PERCENT_FILE_PREFIX     "%file-prefix"
+  PERCENT_GLR_PARSER      "%glr-parser"
+  PERCENT_INITIAL_ACTION  "%initial-action"
+  PERCENT_LANGUAGE        "%language"
+  PERCENT_NAME_PREFIX     "%name-prefix"
+  PERCENT_NO_DEFAULT_PREC "%no-default-prec"
+  PERCENT_NO_LINES        "%no-lines"
+  PERCENT_NONDETERMINISTIC_PARSER
+                          "%nondeterministic-parser"
+  PERCENT_OUTPUT          "%output"
+  PERCENT_REQUIRE         "%require"
+  PERCENT_SKELETON        "%skeleton"
+  PERCENT_START           "%start"
+  PERCENT_TOKEN_TABLE     "%token-table"
+  PERCENT_VERBOSE         "%verbose"
+  PERCENT_YACC            "%yacc"
+;
+
+%token BRACED_CODE     "{...}"
+%token BRACED_PREDICATE "%?{...}"
+%token BRACKETED_ID    "[identifier]"
+%token CHAR            "char"
+%token EPILOGUE        "epilogue"
+%token EQUAL           "="
+%token ID              "identifier"
+%token ID_COLON        "identifier:"
+%token PERCENT_PERCENT "%%"
+%token PIPE            "|"
+%token PROLOGUE        "%{...%}"
+%token SEMICOLON       ";"
+%token TAG             "<tag>"
+%token TAG_ANY         "<*>"
+%token TAG_NONE        "<>"
+
+/*%union {
+  character = 0
+}*/
+/*%type <character> CHAR*/
+%printer {
+  fputs_unlocked(char_name($$), yyo)
+} CHAR
+
+/*%union {
+  code = ''
+};*/
+/*%type <code> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING*/
+%printer {
+  fputs_unlocked(quotearg_style(c_quoting_style, $$), yyo)
+} STRING
+/*%printer {
+  rpl_fprintf(yyo, '{\n%s\n}', $$)
+} <code>*/
+
+/*%union {
+  uniqstr = ''
+}*/
+/*%type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag variable*/
+/*%printer {
+  fputs_unlocked($$, yyo)
+} <uniqstr>*/
+%printer {
+  rpl_fprintf(yyo, '[%s]', $$)
+} BRACKETED_ID
+%printer {
+  rpl_fprintf(yyo, '%s:', $$)
+} ID_COLON
+%printer {
+  rpl_fprintf(yyo, '%%%s', $$)
+} PERCENT_FLAG
+%printer {
+  rpl_fprintf(yyo, '<%s>', $$)
+} TAG tag
+
+/*%union {
+  integer = 0
+};*/
+/*%token <integer> INT "integer"*/
+%token INT "integer"
+/*%printer {
+  rpl_fprintf(yyo, '%d', $$)
+} <integer>*/
+
+/*%union {
+  symbol = []
+}*/
+/*%type <symbol> id id_colon string_as_id symbol symbol.prec*/
+/*%printer {
+  rpl_fprintf(yyo, '%s', $$->tag)
+} <symbol>*/
+%printer {
+  rpl_fprintf(yyo, '%s:', $$->tag)
+} id_colon
+
+/*%union {
+  assoc = 0
+};*/
+/*%type <assoc> precedence_declarator*/
+
+/*%union {
+  list = []
+}*/
+/*%type <list>  symbols.1 symbols.prec generic_symlist generic_symlist_item*/
+
+/*%union {
+  named_ref = []
+}*/
+/*%type <named_ref> named_ref.opt*/
+
+/*---------.
+| %param.  |
+`---------*/
+%code requires
+{
+  param_none = 0
+  param_lex = 1 << 0
+  param_parse = 1 << 1
+  param_both = param_lex | param_parse
+};
+%code
+{
+  current_param = param_none
+};
+/*%union {
+  param = 0
+}*/
+/*%token <param> PERCENT_PARAM "%param";*/
+%token PERCENT_PARAM "%param";
+/*%printer
+{
+  if $$ == param_lex:
+    fputs_unlocked('%' 'lex-param', yyo)
+    break
+  elif $$ == param_parse:
+    fputs_unlocked('%' 'parse-param', yyo)
+    break
+  elif $$ == param_both:
+    fputs_unlocked('%' 'param', yyo)
+    break
+  elif $$ == param_none:
+    assert(False)
+    break
+} <param>;*/
+
+                     /*==========\
+                     | Grammar.  |
+                     \==========*/
+%%
+
+input
+  : prologue_declarations "%%" grammar epilogue.opt
+    {
+      insert_after(2, '</AST_Section2>')
+      insert_before(2, '<AST_Section2>')
+      insert_after(0, '</AST_Section1>')
+      insert_before(0, '<AST_Section1>')
+    }
+  ;
+
+        /*------------------------------------.
+        | Declarations: before the first %%.  |
+        `------------------------------------*/
+
+prologue_declarations
+  : %empty
+    {
+      global yychar
+      if yychar == YYEMPTY:
+        yychar = lex_yy.gram_lex()
+      temp = lex_yy.gram_piece[gram_piece2 + 1]
+      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+      lex_yy.gram_piece[gram_piece2 - 1] = temp
+    }
+  | prologue_declarations prologue_declaration
+  ;
+
+prologue_declaration
+  : grammar_declaration
+  | "%{...%}"
+    {
+      #code_piece_append('<AST_Section1_Prologue>%{<AST_Text>')
+      #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      #code_piece_append('</AST_Text>%}</AST_Section1_Prologue>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2] = code_piece[code_piece0 - 1]
+    }
+  | "%<flag>"
+    {
+      #muscle_percent_define_ensure($1, @1, True)
+      if $1 == 'api.pure':
+        insert_after(0, '</AST_Section1_PureParser>')
+        insert_before(0, '<AST_Section1_PureParser>')
+      elif $1 == 'locations':
+        insert_after(0, '</AST_Section1_Locations>')
+        insert_before(0, '<AST_Section1_Locations>')
+      elif $1 == 'parse.trace':
+        insert_after(0, '</AST_Section1_Debug>')
+        insert_before(0, '<AST_Section1_Debug>')
+      else:
+        assert False
+    }
+  | "%define" variable value
+    {
+      #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+      insert_after(2, '</AST_Section1_Define>')
+      insert_before(0, '<AST_Section1_Define>')
+    }
+  | "%defines"
+    {
+      #defines_flag = True
+      insert_after(0, '</AST_Section1_Defines>')
+      insert_before(0, '<AST_Section1_Defines>')
+    }
+  | "%defines" STRING
+    {
+      #defines_flag = True
+      #spec_defines_file = xstrdup($2)
+      insert_after(1, '</AST_Section1_Defines>')
+      insert_before(0, '<AST_Section1_Defines>')
+    }
+  | "%error-verbose"
+    {
+      #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+      insert_after(0, '</AST_Section1_ErrorVerbose>')
+      insert_before(0, '<AST_Section1_ErrorVerbose>')
+    }
+  | "%expect" INT
+    {
+      #expected_sr_conflicts = $2
+      insert_after(1, '</AST_Section1_Expect>')
+      insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
+    }
+  | "%expect-rr" INT
+    {
+      #expected_rr_conflicts = $2
+      insert_after(1, '</AST_Section1_ExpectRR>')
+      insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
+    }
+  | "%file-prefix" STRING
+    {
+      #spec_file_prefix = $2
+      insert_after(1, '</AST_Section1_FilePrefix>')
+      insert_before(0, '<AST_Section1_FilePrefix>')
+    }
+  | "%glr-parser"
+    {
+      #nondeterministic_parser = True
+      #glr_parser = True
+      insert_after(0, '</AST_Section1_GLRParser>')
+      insert_before(0, '<AST_Section1_GLRParser>')
+    }
+  | "%initial-action" "{...}"
+    {
+      #code_piece_append('<AST_Text>')
+      #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      #code_piece_append('</AST_Text>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+      insert_after(1, '</AST_Section1_InitialAction>')
+      insert_before(0, '<AST_Section1_InitialAction>')
+    }
+  | "%language" STRING
+    {
+      #language_argmatch($2, grammar_prio, @1)
+      insert_after(1, '</AST_Section1_Language>')
+      insert_before(0, '<AST_Section1_Language>')
+    }
+  | "%name-prefix" STRING
+    {
+      #spec_name_prefix = $2
+      insert_after(1, '</AST_Section1_NamePrefix>')
+      insert_before(0, '<AST_Section1_NamePrefix>')
+    }
+  | "%no-lines"
+    {
+      #no_lines_flag = True
+      insert_after(0, '</AST_Section1_Lines>')
+      insert_before(0, '<AST_Section1_Lines value="false">')
+    }
+  | "%nondeterministic-parser"
+    {
+      #nondeterministic_parser = True
+      insert_after(0, '</AST_Section1_NonDeterministicParser>')
+      insert_before(0, '<AST_Section1_NonDeterministicParser>')
+    }
+  | "%output" STRING
+    {
+      #spec_outfile = $2
+      insert_after(1, '</AST_Section1_Output>')
+      insert_before(0, '<AST_Section1_Output>')
+    }
+  | "%param"
+    {
+      #current_param = $1
+    }
+    params
+    {
+      #current_param = param_none
+      insert_after(2, '</AST_Section1_Param>')
+      insert_before(0, '<AST_Section1_Param>')
+    }
+  | "%require" STRING
+    {
+      #version_check(&@2, $2)
+      insert_after(1, '</AST_Section1_Require>')
+      insert_before(0, '<AST_Section1_Require>')
+    }
+  | "%skeleton" STRING
+    {
+      #skeleton_user = $2
+      #if strchr(skeleton_user, ord('/')):
+      #  dir_length = len(current_file)
+      #  skeleton_build = None
+      #  while dir_length and current_file[dir_length - 1] != ord('/'):
+      #    dir_length -= 1
+      #  while dir_length and current_file[dir_length - 1] == ord('/'):
+      #    dir_length -= 1
+      #  skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
+      #  if dir_length > 0:
+      #    memcpy(skeleton_build, current_file, dir_length)
+      #    skeleton_build[dir_length++] = ord('/')
+      #  strcpy(skeleton_build + dir_length, skeleton_user)
+      #  skeleton_user = uniqstr_new(skeleton_build)
+      #  free(skeleton_build)
+      #skeleton_arg(skeleton_user, grammar_prio, @1)
+      insert_after(1, '</AST_Section1_Skeleton>')
+      insert_before(0, '<AST_Section1_Skeleton>')
+    }
+  | "%token-table"
+    {
+      #token_table_flag = True
+      insert_after(0, '</AST_Section1_TokenTable>')
+      insert_before(0, '<AST_Section1_TokenTable>')
+    }
+  | "%verbose"
+    {
+      #report_flag |= report_states
+      insert_after(0, '</AST_Section1_Verbose>')
+      insert_before(0, '<AST_Section1_Verbose>')
+    }
+  | "%yacc"
+    {
+      #yacc_flag = True
+      insert_after(0, '</AST_Section1_YACC>')
+      insert_before(0, '<AST_Section1_YACC>')
+    }
+  | /*FIXME: Err?  What is this horror doing here? */ ";"
+  ;
+
+params
+  : params "{...}"
+    {
+      #add_param(current_param, $2, @2)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+    }
+  | "{...}"
+    {
+      #add_param(current_param, $1, @1)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+    }
+  ;
+
+/*----------------------.
+| grammar_declaration.  |
+`----------------------*/
+
+grammar_declaration
+  : precedence_declaration
+  | symbol_declaration
+  | "%start" symbol
+    {
+      #grammar_start_symbol_set($2, @2)
+      insert_after(1, '</AST_Section1Or2_Start>')
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(1, '<AST_SymbolRef>')
+      insert_before(0, '<AST_Section1Or2_Start>')
+    }
+  | code_props_type "{...}" generic_symlist
+    {
+      #code = None
+      #code_props_symbol_action_init(&code, $2, @2)
+      #code_piece_append('<AST_Text>')
+      #code_props_translate_code(&code)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_piece_append('</AST_Text>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+      #list = None
+      #list = $3
+      #while list:
+      #  symbol_list_code_props_set(list, $1, &code)
+      #  list = list->next
+      #symbol_list_free($3)
+      insert_after(2, '</AST_Section1Or2_CodeProps>')
+      insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
+    }
+  | "%default-prec"
+    {
+      #default_prec = True
+      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+      insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
+    }
+  | "%no-default-prec"
+    {
+      #default_prec = False
+      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
+      insert_before(0, '<AST_Section1Or2_DefaultPrec>')
+    }
+  | "%code" "{...}"
+    {
+      #code_piece_append('<AST_BracedCode>{<AST_Text>')
+      #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      #code_piece_append('</AST_Text>}</AST_BracedCode>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+      insert_after(1, '</AST_Section1Or2_Code>')
+      insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
+    }
+  | "%code" ID "{...}"
+    {
+      #code_piece_append('<AST_BracedCode>{<AST_Text>')
+      #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      #code_piece_append('</AST_Text>}</AST_BracedCode>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 4] = code_piece[code_piece0 - 1]
+      insert_after(2, '</AST_Section1Or2_Code>')
+      insert_before(0, '<AST_Section1Or2_Code>')
+    }
+  ;
+
+/*%type <code_type> code_props_type;*/
+/*%union {
+  code_type = 0
+};*/
+/*%printer {
+  rpl_fprintf(yyo, '%s', code_props_type_string($$))
+} <code_type>;*/
+
+code_props_type
+  : "%destructor"
+    {
+      $$ = destructor
+    }
+  | "%printer"
+    {
+      $$ = printer
+    }
+  ;
+
+/*---------.
+| %union.  |
+`---------*/
+
+%token PERCENT_UNION "%union";
+
+union_name
+  : %empty
+    {
+      insert_before(0, '<AST_ID />')
+    }
+  | ID
+    {
+      #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
+    }
+  ;
+
+grammar_declaration
+  : "%union" union_name "{...}"
+    {
+      #union_seen = True
+      #code_piece_append('<AST_BracedCode>{<AST_Text>')
+      #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      #code_piece_append('</AST_Text>}</AST_BracedCode>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 4] = code_piece[code_piece0 - 1]
+      insert_after(2, '</AST_Section1Or2_Union>')
+      insert_before(0, '<AST_Section1Or2_Union>')
+    }
+  ;
+
+symbol_declaration
+  : "%nterm"
+    {
+      #current_class = nterm_sym
+    }
+    symbol_defs.1
+    {
+      #current_class = unknown_sym
+      #current_type = None
+      insert_after(2, '</AST_Section1Or2_NTerm>')
+      insert_before(0, '<AST_Section1Or2_NTerm>')
+    }
+  | "%token"
+    {
+      #current_class = token_sym
+    }
+    symbol_defs.1
+    {
+      #current_class = unknown_sym
+      #current_type = None
+      insert_after(2, '</AST_Section1Or2_Token>')
+      insert_before(0, '<AST_Section1Or2_Token>')
+    }
+  | "%type" TAG symbols.1
+    {
+      #list = None
+      #tag_seen = True
+      #list = $3
+      #while list:
+      #  symbol_type_set(list->content.sym, $2, @2)
+      #  list = list->next
+      #symbol_list_free($3)
+      insert_after(2, '</AST_Section1Or2_Type>')
+      insert_before(0, '<AST_Section1Or2_Type>')
+    }
+  ;
+
+precedence_declaration
+  : precedence_declarator tag.opt symbols.prec
+    {
+      #list = None
+      #current_prec += 1
+      #list = $3
+      #while list:
+      #  symbol_type_set(list->content.sym, current_type, @2)
+      #  symbol_precedence_set(list->content.sym, current_prec, $1, @1)
+      #  list = list->next
+      #symbol_list_free($3)
+      #current_type = None
+      insert_after(2, '</AST_Section1Or2_Precedence>')
+      insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
+    }
+  ;
+
+precedence_declarator
+  : "%left"
+    {
+      $$ = state.left_assoc
+    }
+  | "%right"
+    {
+      $$ = state.right_assoc
+    }
+  | "%nonassoc"
+    {
+      $$ = state.non_assoc
+    }
+  | "%precedence"
+    {
+      $$ = state.precedence_assoc
+    }
+  ;
+
+tag.opt
+  : %empty
+    {
+      #current_type = None
+    }
+  | TAG
+    {
+      #current_type = $1
+      #tag_seen = True
+    }
+  ;
+
+/* Just like symbols.1 but accept INT for the sake of POSIX.  */
+symbols.prec
+  : symbol.prec
+    {
+      #$$ = symbol_list_sym_new($1, @1)
+    }
+  | symbols.prec symbol.prec
+    {
+      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+    }
+  ;
+
+symbol.prec
+  : symbol
+    {
+      #$$ = $1
+      #symbol_class_set($1, token_sym, @1, False)
+      insert_after(0, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+    }
+  | symbol INT
+    {
+      #$$ = $1
+      #symbol_user_token_number_set($1, $2, @2)
+      #symbol_class_set($1, token_sym, @1, False)
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+    }
+  ;
+
+/* One or more symbols to be %typed. */
+symbols.1
+  : symbol
+    {
+      #$$ = symbol_list_sym_new($1, @1)
+      insert_after(0, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+    }
+  | symbols.1 symbol
+    {
+      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(1, '<AST_SymbolRef>')
+    }
+  ;
+
+generic_symlist
+  : generic_symlist_item
+    {
+      #$$ = $1
+    }
+  | generic_symlist generic_symlist_item
+    {
+      #$$ = symbol_list_append($1, $2)
+    }
+  ;
+
+generic_symlist_item
+  : symbol
+    {
+      #$$ = symbol_list_sym_new($1, @1)
+      insert_after(0, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+    }
+  | tag
+    {
+      #$$ = symbol_list_type_new($1, @1)
+    }
+  ;
+
+tag
+  : TAG
+  | "<*>"
+    {
+      #$$ = uniqstr_new('*')
+    }
+  | "<>"
+    {
+      #$$ = uniqstr_new('')
+    }
+  ;
+
+/* One token definition.  */
+symbol_def
+  : TAG
+    {
+      #current_type = $1
+      #tag_seen = True
+    }
+  | id
+    {
+      #symbol_class_set($1, current_class, @1, True)
+      #symbol_type_set($1, current_type, @1)
+      insert_after(0, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+    }
+  | id INT
+    {
+      #symbol_class_set($1, current_class, @1, True)
+      #symbol_type_set($1, current_type, @1)
+      #symbol_user_token_number_set($1, $2, @2)
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+    }
+  | id string_as_id
+    {
+      #symbol_class_set($1, current_class, @1, True)
+      #symbol_type_set($1, current_type, @1)
+      #symbol_make_alias($1, $2, @$)
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+    }
+  | id INT string_as_id
+    {
+      #symbol_class_set($1, current_class, @1, True)
+      #symbol_type_set($1, current_type, @1)
+      #symbol_user_token_number_set($1, $2, @2)
+      #symbol_make_alias($1, $3, @$)
+      insert_after(2, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
+    }
+  ;
+
+/* One or more symbol definitions. */
+symbol_defs.1
+  : symbol_def
+  | symbol_defs.1 symbol_def
+  ;
+
+        /*------------------------------------------.
+        | The grammar section: between the two %%.  |
+        `------------------------------------------*/
+
+grammar
+  : rules_or_grammar_declaration
+  | grammar rules_or_grammar_declaration
+  ;
+
+/* As a Bison extension, one can use the grammar declarations in the
+   body of the grammar.  */
+rules_or_grammar_declaration
+  : rules
+  | grammar_declaration ";"
+  | error ";"
+    {
+      #yyerrok
+    }
+  ;
+
+rules
+  : id_colon named_ref.opt
+    {
+      #current_lhs($1, @1, $2)
+    }
+    rhses.1
+    {
+      #current_lhs(0, @1, 0)
+      insert_after(3, '</AST_Section2_Rules>')
+      insert_after(0, '</AST_SymbolRef>')
+      insert_before(0, '<AST_SymbolRef>')
+      insert_before(0, '<AST_Section2_Rules>')
+    }
+  ;
+
+rhses.1
+  : rhs
+    {
+      #grammar_current_rule_end(@1)
+      insert_after(0, '</AST_Production>')
+      insert_before(0, '<AST_Production>')
+    }
+  | rhses.1 "|" rhs
+    {
+      #grammar_current_rule_end(@3)
+      insert_after(2, '</AST_Production>')
+      insert_before(2, '<AST_Production>')
+    }
+  | rhses.1 ";"
+  ;
+
+%token PERCENT_EMPTY "%empty";
+/* Nick added %space */
+%token PERCENT_SPACE "%space";
+
+rhs
+  : %empty
+    {
+      global yychar
+      #if nested_rhs:
+      #  nested_rhs -= 1
+      #else:
+      #  grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+      if yychar == YYEMPTY:
+        yychar = lex_yy.gram_lex()
+      temp = lex_yy.gram_piece[gram_piece2 + 1]
+      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
+      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
+      lex_yy.gram_piece[gram_piece2 - 1] = temp
+    }
+  | rhs symbol named_ref.opt
+    {
+      #grammar_current_rule_symbol_append($2, @2, $3)
+      insert_after(2, '</AST_Production_SymbolRef>')
+      insert_after(1, '</AST_SymbolRef>')
+      insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+    }
+  | rhs "{...}" named_ref.opt
+    {
+      #code_piece_append('<AST_Text>')
+      #grammar_current_rule_action_append($2, @2, $3, False)
+      #code_piece_append('</AST_Text>')
+      #code_piece_pack()
+      #lex_yy.gram_piece[gram_piece2 + 2] = code_piece[code_piece0 - 1]
+      insert_after(2, '</AST_Production_Action>')
+      insert_before(1, '<AST_Production_Action>')
+    }
+  | rhs "%?{...}"
+    {
+      #grammar_current_rule_action_append($2, @2, None, True)
+    }
+  | rhs "%empty"
+    {
+      #grammar_current_rule_empty_set(@2)
+      insert_after(1, '</AST_Production_Empty>')
+      insert_before(1, '<AST_Production_Empty>')
+    }
+  | rhs "%prec" symbol
+    {
+      #grammar_current_rule_prec_set($3, @3)
+      insert_after(2, '</AST_Production_Prec>')
+      insert_after(2, '</AST_SymbolRef>')
+      insert_before(2, '<AST_SymbolRef>')
+      insert_before(1, '<AST_Production_Prec>')
+    }
+  | rhs "%dprec" INT
+    {
+      #grammar_current_rule_dprec_set($3, @3)
+      insert_after(2, '</AST_Production_DPrec>')
+      insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
+    }
+  | rhs "%merge" TAG
+    {
+      #grammar_current_rule_merge_set($3, @3)
+      insert_after(2, '</AST_Production_Merge>')
+      insert_before(1, '<AST_Production_Merge>')
+    }
+  /* Nick extra rules for element groups */
+| rhs '('
+    {
+      #nested_rhs += 1
+    }
+    rhs ')'
+    {
+      insert_after(3, '</AST_Production_GroupElement>')
+      insert_before(1, '<AST_Production_GroupElement>')
+    }
+  /* Nick added %space */
+| rhs "%space"
+    {
+      insert_after(1, '</AST_Production_Space>')
+      insert_before(1, '<AST_Production_Space>')
+    }
+  ;
+
+named_ref.opt
+  : %empty
+    {
+      #$$ = 0
+    }
+  | BRACKETED_ID
+    {
+      #$$ = named_ref_new($1, @1)
+    }
+  ;
+
+/*---------------------.
+| variable and value.  |
+`---------------------*/
+
+/* The STRING form of variable is deprecated and is not M4-friendly.
+   For example, M4 fails for '%define "[" "value"'.  */
+variable
+  : ID
+  | STRING
+    {
+      #$$ = uniqstr_new($1)
+    }
+  ;
+
+/* Some content or empty by default. */
+%code requires {
+};
+/*%union
+{
+  value = 0
+};*/
+/*%type <value> value;*/
+/*%printer
+{
+  if $$.kind == muscle_code:
+    rpl_fprintf(yyo, '{%s}', $$.chars)
+    break
+  elif $$.kind == muscle_keyword:
+    rpl_fprintf(yyo, '%s', $$.chars)
+    break
+  elif $$.kind == muscle_string:
+    rpl_fprintf(yyo, '"%s"', $$.chars)
+    break
+} <value>;*/
+
+value
+  : %empty
+    {
+      #$$.kind = muscle_keyword
+      #$$.chars = ''
+    }
+  | ID
+    {
+      #$$.kind = muscle_keyword
+      #$$.chars = $1
+    }
+  | STRING
+    {
+      #$$.kind = muscle_string
+      #$$.chars = $1
+    }
+  | "{...}"
+    {
+      #$$.kind = muscle_code
+      #$$.chars = strip_braces($1)
+    }
+  ;
+
+/*--------------.
+| Identifiers.  |
+`--------------*/
+
+/* Identifiers are returned as uniqstr values by the scanner.
+   Depending on their use, we may need to make them genuine symbols.  */
+
+id
+  : ID
+    {
+      #$$ = symbol_from_uniqstr($1, @1)
+    }
+  | CHAR
+    {
+      #$$ = symbol_get(char_name($1), @1)
+      #symbol_class_set($$, token_sym, @1, False)
+      #symbol_user_token_number_set($$, $1, @1)
+    }
+  ;
+
+id_colon
+  : ID_COLON
+    {
+      #$$ = symbol_from_uniqstr($1, @1)
+    }
+  ;
+
+symbol
+  : id
+  | string_as_id
+  ;
+
+/* A string used as an ID: quote it.  */
+string_as_id
+  : STRING
+    {
+      #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
+      #symbol_class_set($$, token_sym, @1, False)
+    }
+  ;
+
+epilogue.opt
+  : %empty
+  | "%%" EPILOGUE
+    {
+      #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
+      lex_yy.gram_scanner_last_string_free() # normally done in above
+      #code_scanner_last_string_free()
+      insert_after(1, '</AST_Section3>')
+      insert_after(0, '<AST_Section3>')
+    }
+  ;
+
+%%
+
+#def lloc_default(rhs, n):
+#  i = None
+#  loc = None
+#  loc.start = rhs[n].end
+#  loc.end = rhs[n].end
+#  i = 1
+#  while i <= n:
+#    if not equal_boundaries(rhs[i].start, rhs[i].end):
+#      loc.start = rhs[i].start
+#      break
+#    i += 1
+#  return loc
+#
+#def strip_braces(code):
+#  code[len(code) - 1] = 0
+#  return code + 1
+#
+#def translate_code(code, loc, plain):
+#  plain_code = None
+#  if plain:
+#    code_props_plain_init(&plain_code, code, loc)
+#  else:
+#    code_props_symbol_action_init(&plain_code, code, loc)
+#  code_props_translate_code(&plain_code)
+#  lex_yy.gram_scanner_last_string_free()
+#  return plain_code.code
+#
+#def translate_code_braceless(code, loc):
+#  return translate_code(strip_braces(code), loc, True)
+#
+#def add_param(type, decl, loc):
+#  alphanum = 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '_' '0123456789'
+#  name_start = None
+#  p = None
+#  p = decl
+#  while p[1]:
+#    if (p == decl or not memchr(alphanum, p[-1], sizeof alphanum - 1)) and memchr(alphanum, p[0], sizeof alphanum - 10 - 1):
+#      name_start = p
+#    p += 1
+#  p -= 1
+#  while c_isspace(int(*p)):
+#    p -= 1
+#  p[1] = ord('\0')
+#  decl += 1
+#  while c_isspace(int(*decl)):
+#    decl += 1
+#  if not name_start:
+#    complain(&loc, complaint, gettext('missing identifier in parameter declaration'))
+#  else:
+#    name = xmemdup0(name_start, strspn(name_start, alphanum))
+#    if type & param_lex:
+#      muscle_pair_list_grow('lex_param', decl, name)
+#    if type & param_parse:
+#      muscle_pair_list_grow('parse_param', decl, name)
+#    free(name)
+#  lex_yy.gram_scanner_last_string_free()
+#
+#def version_check(loc, version):
+#  if strverscmp(version, '3.0.5') > 0:
+#    complain(loc, complaint, 'require bison %s, but have %s', version, '3.0.5')
+#    exit(63)
+#
+#def gram_error(loc, msg):
+#  complain(loc, complaint, '%s', msg)
+#
+#def token_name(type):
+#  return yytname[YYTRANSLATE(type)]
+#
+#def char_name(c):
+#  if c == ord('\''):
+#    return '\'\\\'\''
+#  else:
+#    buf = [None, None, None, None]
+#    buf[0] = ord('\'')
+#    buf[1] = c
+#    buf[2] = ord('\'')
+#    buf[3] = ord('\0')
+#    return quotearg_style(escape_quoting_style, buf)
+#
+#def current_lhs(sym, loc, ref):
+#  current_lhs_symbol = sym
+#  current_lhs_location = loc
+#  free(current_lhs_named_ref)
+#  current_lhs_named_ref = ref
+
+def insert_before(n, str):
+  global gram_piece3
+  lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
+  lex_yy.gram_piece0 += 1
+  gram_piece3 += 1
+
+def insert_after(n, str):
+  global gram_piece3
+  lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
+  lex_yy.gram_piece0 += 1
+  gram_piece3 += 1
diff --git a/bootstrap/scan-gram.l b/bootstrap/scan-gram.l
new file mode 100644 (file)
index 0000000..da36550
--- /dev/null
@@ -0,0 +1,1060 @@
+/* Bison Grammar Scanner                             -*- C -*-
+
+   Copyright (C) 2002-2015, 2018 Free Software Foundation, Inc.
+
+   This file is part of Bison, the GNU Compiler Compiler.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+%option debug nodefault noinput noyywrap never-interactive
+%option prefix="gram_" outfile="lex_yy.py"
+
+%{
+  import state
+  import y_tab
+
+  scanner_cursor = state.boundary()
+  gram_last_string = ''
+  bracketed_id_str = None
+  bracketed_id_loc = 0
+  bracketed_id_start = 0
+  bracketed_id_context_state = -1
+
+  obstack_for_string = []
+  def gram_scanner_last_string_free():
+    del obstack_for_string[:]
+
+  gram_piece = []
+  gram_piece0 = 0
+  gram_piece1 = 0
+
+  percent_percent_count = 0;
+
+  # these should be yylex()-local, but moved to here, see further down:
+  nesting = 0
+  context_state = -1
+  id_loc = state.location()
+  code_start = scanner_cursor.copy()
+  token_start = scanner_cursor.copy()
+  #first = True
+  if True: #first:
+    scanner_cursor = y_tab.yylloc.start.copy()
+    #first = False
+%}
+
+%x SC_YACC_COMMENT
+%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
+%x SC_AFTER_IDENTIFIER
+
+%x SC_TAG
+
+%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
+%x SC_COMMENT SC_LINE_COMMENT
+%x SC_STRING SC_CHARACTER
+%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
+%x SC_ELEMENT_GROUP
+
+letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
+notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
+id        {letter}({letter}|[-0-9])*
+int       [0-9]+
+
+/* Zero or more instances of backslash-newline.  Following GCC, allow
+   white space between the backslash and the newline.  */
+splice   (\\[ \f\t\v]*\n)*
+
+/* An equal sign, with optional leading whitespaces. This is used in some
+   deprecated constructs. */
+eqopt    ([[:space:]]*=)?
+
+%%
+
+%{
+  # these should be here, but we can't access yylex()-local variables
+  # from an action since the action functions are not nested to yylex():
+  #nesting = 0
+  #context_state = 0
+  #id_loc = state.location()
+  #code_start = scanner_cursor.copy()
+  #token_start = scanner_cursor.copy()
+  #first = True
+  #if first:
+  #  scanner_cursor = y_tab.yylloc.start.copy()
+  #  first = False
+%}
+
+<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
+{
+  /* Comments and white space.  */
+  ","                          state.complain(state.loc, state.Wother, 'stray \',\' treated as white space')
+  [ \f\n\t\v]                  |
+  "//".*                       #continue
+  "/*" {
+    global token_start, context_state
+    token_start = y_tab.yylloc.start
+    context_state = YY_START()
+    BEGIN(SC_YACC_COMMENT)
+  }
+
+  /* #line directives are not documented, and may be withdrawn or
+     modified in future versions of Bison.  */
+  ^"#line "{int}(" \"".*"\"")?"\n" #handle_syncline(yytext + sizeof '#line ' - 1, y_tab.yylloc)
+}
+
+
+  /*----------------------------.
+  | Scanning Bison directives.  |
+  `----------------------------*/
+
+  /* For directives that are also command line options, the regex must be
+        "%..."
+     after "[-_]"s are removed, and the directive must match the --long
+     option name, with a single string argument.  Otherwise, add exceptions
+     to ../build-aux/cross-options.pl.  */
+
+<INITIAL>
+{
+  "%binary"                    return y_tab.PERCENT_NONASSOC
+  "%code"                      return y_tab.PERCENT_CODE
+  "%debug" {
+    y_tab.yylval = 'parse.trace'
+    return y_tab.PERCENT_FLAG
+  }
+  "%default-prec"              return y_tab.PERCENT_DEFAULT_PREC
+  "%define"                    return y_tab.PERCENT_DEFINE
+  "%defines"                   return y_tab.PERCENT_DEFINES
+  "%destructor"                        return y_tab.PERCENT_DESTRUCTOR
+  "%dprec"                     return y_tab.PERCENT_DPREC
+  "%empty"                     return y_tab.PERCENT_EMPTY
+  "%error-verbose"             return y_tab.PERCENT_ERROR_VERBOSE
+  "%expect"                    return y_tab.PERCENT_EXPECT
+  "%expect-rr"                 return y_tab.PERCENT_EXPECT_RR
+  "%file-prefix"               return y_tab.PERCENT_FILE_PREFIX
+  "%fixed-output-files"                return y_tab.PERCENT_YACC
+  "%initial-action"            return y_tab.PERCENT_INITIAL_ACTION
+  "%glr-parser"                        return y_tab.PERCENT_GLR_PARSER
+  "%language"                  return y_tab.PERCENT_LANGUAGE
+  "%left"                      return y_tab.PERCENT_LEFT
+  "%lex-param" {
+    y_tab.yylval = y_tab.param_lex
+    return y_tab.PERCENT_PARAM
+  }
+  "%locations" {
+    y_tab.yylval = 'locations'
+    return y_tab.PERCENT_FLAG
+  }
+  "%merge"                     return y_tab.PERCENT_MERGE
+  "%name-prefix"               return y_tab.PERCENT_NAME_PREFIX
+  "%no-default-prec"           return y_tab.PERCENT_NO_DEFAULT_PREC
+  "%no-lines"                  return y_tab.PERCENT_NO_LINES
+  "%nonassoc"                  return y_tab.PERCENT_NONASSOC
+  "%nondeterministic-parser"   return y_tab.PERCENT_NONDETERMINISTIC_PARSER
+  "%nterm"                     return y_tab.PERCENT_NTERM
+  "%output"                    return y_tab.PERCENT_OUTPUT
+  "%param" {
+    y_tab.yylval = y_tab.param_both
+    return y_tab.PERCENT_PARAM
+  }
+  "%parse-param" {
+    y_tab.yylval = y_tab.param_parse
+    return y_tab.PERCENT_PARAM
+  }
+  "%prec"                      return y_tab.PERCENT_PREC
+  "%precedence"                        return y_tab.PERCENT_PRECEDENCE
+  "%printer"                   return y_tab.PERCENT_PRy_tab.INTER
+  "%pure-parser" {
+    y_tab.yylval = 'api.pure'
+    return y_tab.PERCENT_FLAG
+  }
+  "%require"                   return y_tab.PERCENT_REQUIRE
+  "%right"                     return y_tab.PERCENT_RIGHT
+  "%skeleton"                  return y_tab.PERCENT_SKELETON
+  /* Nick added %space */
+  "%space"                     return y_tab.PERCENT_SPACE
+  "%start"                     return y_tab.PERCENT_START
+  "%term"                      return y_tab.PERCENT_TOKEN
+  "%token"                     return y_tab.PERCENT_TOKEN
+  "%token-table"               return y_tab.PERCENT_TOKEN_TABLE
+  "%type"                      return y_tab.PERCENT_TYPE
+  "%union"                     return y_tab.PERCENT_UNION
+  "%verbose"                   return y_tab.PERCENT_VERBOSE
+  "%yacc"                      return y_tab.PERCENT_YACC
+
+  /* deprecated */
+  "%default"[-_]"prec" {
+    #deprecated_directive(loc, yytext, '%default-prec')
+    scanner_cursor.column -= len('%default-prec')
+    unput('%default-prec')
+  }
+  "%error"[-_]"verbose" {
+    #deprecated_directive(loc, yytext, '%define parse.error verbose')
+    scanner_cursor.column -= len('%define parse.error verbose')
+    unput('%define parse.error verbose')
+  }
+  "%expect"[-_]"rr" {
+    #deprecated_directive(loc, yytext, '%expect-rr')
+    scanner_cursor.column -= len('%expect-rr')
+    unput('%expect-rr')
+  }
+  "%file-prefix"{eqopt} {
+    #deprecated_directive(loc, yytext, '%file-prefix')
+    scanner_cursor.column -= len('%file-prefix')
+    unput('%file-prefix')
+  }
+  "%fixed"[-_]"output"[-_]"files" {
+    #deprecated_directive(loc, yytext, '%fixed-output-files')
+    scanner_cursor.column -= len('%fixed-output-files')
+    unput('%fixed-output-files')
+  }
+  "%name"[-_]"prefix"{eqopt} {
+    #deprecated_directive(loc, yytext, '%name-prefix')
+    scanner_cursor.column -= len('%name-prefix')
+    unput('%name-prefix')
+  }
+  "%no"[-_]"default"[-_]"prec" {
+    #deprecated_directive(loc, yytext, '%no-default-prec')
+    scanner_cursor.column -= len('%no-default-prec')
+    unput('%no-default-prec')
+  }
+  "%no"[-_]"lines" {
+    #deprecated_directive(loc, yytext, '%no-lines')
+    scanner_cursor.column -= len('%no-lines')
+    unput('%no-lines')
+  }
+  "%output"{eqopt} {
+    #deprecated_directive(loc, yytext, '%output')
+    scanner_cursor.column -= len('%output')
+    unput('%output')
+  }
+  "%pure"[-_]"parser" {
+    #deprecated_directive(loc, yytext, '%pure-parser')
+    scanner_cursor.column -= len('%pure-parser')
+    unput('%pure-parser')
+  }
+  "%token"[-_]"table" {
+    #deprecated_directive(loc, yytext, '%token-table')
+    scanner_cursor.column -= len('%token-table')
+    unput('%token-table')
+  }
+
+  "%"{id}                      state.complain(y_tab.yylloc, state.complaint, 'invalid directive: {0:s}'.format(quote(yytext)))
+
+  "="                          return y_tab.EQUAL
+  "|"                          return y_tab.PIPE
+  ";"                          return y_tab.SEMICOLON
+
+  {id} {
+    global id_loc, bracketed_id_str
+    y_tab.yylval = yytext
+    id_loc = y_tab.yylloc
+    bracketed_id_str = None
+    BEGIN(SC_AFTER_IDENTIFIER)
+    gram_piece_pack()
+    gram_piece_append('<AST_ID>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_ID>')
+    gram_piece_pack()
+  }
+
+  {int} {
+    y_tab.yylval = scan_integer(yytext, 10, y_tab.yylloc)
+    return y_tab.INT
+  }
+  0[xX][0-9abcdefABCDEF]+ {
+    y_tab.yylval = scan_integer(yytext, 16, y_tab.yylloc)
+    return y_tab.INT
+  }
+
+  /* Identifiers may not start with a digit.  Yet, don't silently
+     accept "1FOO" as "1 FOO".  */
+  {int}{id}                    state.complain(y_tab.yylloc, state.complaint, 'invalid identifier: %s', quote(yytext))
+
+  /* Characters.  */
+  "'" {
+    global token_start
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_ESCAPED_CHARACTER)
+    gram_piece_pack()
+    gram_piece_append('<AST_Char>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+
+  /* Strings. */
+  "\"" {
+    global token_start
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_ESCAPED_STRING)
+    gram_piece_pack()
+    gram_piece_append('<AST_String>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+
+  /* Prologue. */
+  "%{" {
+    global code_start
+    code_start = y_tab.yylloc.start
+    BEGIN(SC_PROLOGUE)
+    gram_piece_pack()
+    gram_piece_append('<AST_Section1_Prologue>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+
+  /* Code in between braces.  */
+  "{" {
+    global nesting, code_start
+    obstack_for_string.append(yytext)
+    nesting = 0
+    code_start = y_tab.yylloc.start
+    BEGIN(SC_BRACED_CODE)
+    gram_piece_pack()
+    gram_piece_append('<AST_BracedCode>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+
+  /* Semantic predicate. */
+  "%?"[ \f\n\t\v]*"{" {
+    global nesting, code_start
+    nesting = 0
+    code_start = y_tab.yylloc.start
+    gram_piece_pack()
+    gram_piece_append('<AST_BracedPredicate>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+    BEGIN(SC_PREDICATE)
+  }
+
+  /* Nick extra rules for element groups */
+  "(?E{" {
+    global nesting, code_start
+    obstack_for_string.append(yytext)
+    nesting = 0
+    code_start = y_tab.yylloc.start
+    BEGIN(SC_ELEMENT_GROUP)
+    gram_piece_pack()
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+  ")"                          return ord(')')
+
+  /* A type. */
+  "<*>" {
+    gram_piece_pack()
+    gram_piece_append('<AST_TagRef>&lt;<AST_Text>*</AST_Text>&gt;</AST_TagRef>')
+    return ~y_tab.TAG_ANY
+  }
+  "<>" {
+    gram_piece_pack()
+    gram_piece_append('<AST_TagRef>&lt;<AST_Text />&gt;</AST_TagRef>')
+    return ~y_tab.TAG_NONE
+  }
+  "<" {
+    global nesting, token_start
+    nesting = 0
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_TAG)
+    gram_piece_pack()
+    gram_piece_append('<AST_TagRef>')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('<AST_Text>')
+  }
+
+  "%%" {
+    global percent_percent_count
+    percent_percent_count += 1
+    if percent_percent_count == 2:
+      BEGIN(SC_EPILOGUE)
+      gram_piece_pack()
+      gram_piece_escape(yytext)
+      gram_piece_pack()
+      gram_piece_pack()
+      return ~y_tab.PERCENT_PERCENT
+    return y_tab.PERCENT_PERCENT
+  }
+
+  "[" {
+    global bracketed_id_str, bracketed_id_start, bracketed_id_context_state
+    bracketed_id_str = None
+    bracketed_id_start = y_tab.yylloc.start
+    bracketed_id_context_state = YY_START()
+    BEGIN(SC_BRACKETED_ID)
+  }
+
+  [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. state.complain(y_tab.yylloc, state.complaint, '%s: %s', ngettext('invalid character', 'invalid characters', gram_leng), quote_mem(yytext, gram_leng))
+
+  <<EOF>> {
+    y_tab.yylloc.start = scanner_cursor.copy()
+    y_tab.yylloc.end = scanner_cursor.copy()
+    yyterminate()
+  }
+}
+
+
+  /*--------------------------------------------------------------.
+  | Supporting \0 complexifies our implementation for no expected |
+  | added value.                                                  |
+  `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+  \0                           state.complain(y_tab.yylloc, state.complaint, 'invalid null character')
+}
+
+
+  /*-----------------------------------------------------------------.
+  | Scanning after an identifier, checking whether a colon is next.  |
+  `-----------------------------------------------------------------*/
+
+<SC_AFTER_IDENTIFIER>
+{
+  "[" {
+    global bracketed_id_start, bracketed_id_context_state
+    if bracketed_id_str is not None:
+      scanner_cursor.column -= len(yytext)
+      yyless(0)
+      BEGIN(SC_RETURN_BRACKETED_ID)
+      y_tab.yylloc = id_loc
+      return y_tab.ID
+    else:
+      bracketed_id_start = y_tab.yylloc.start
+      bracketed_id_context_state = YY_START()
+      BEGIN(SC_BRACKETED_ID)
+  }
+  ":" {
+    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+    y_tab.yylloc = id_loc
+    gram_piece_escape(yytext)
+    return ~y_tab.ID_COLON
+  }
+  . {
+    scanner_cursor.column -= len(yytext)
+    yyless(0)
+    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+    y_tab.yylloc = id_loc
+    return ~y_tab.ID
+  }
+  <<EOF>> {
+    BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
+    y_tab.yylloc = id_loc
+    return ~y_tab.ID
+  }
+}
+
+  /*--------------------------------.
+  | Scanning bracketed identifiers. |
+  `--------------------------------*/
+
+<SC_BRACKETED_ID>
+{
+  {id} {
+    global bracketed_id_str, bracketed_id_loc
+    if bracketed_id_str is not None:
+      state.complain(y_tab.yylloc, state.complaint, 'unexpected identifier in bracketed name: %s', quote(yytext))
+    else:
+      bracketed_id_str = yytext
+      bracketed_id_loc = y_tab.yylloc
+  }
+  "]" {
+    global bracketed_id_str
+    BEGIN(bracketed_id_context_state)
+    if bracketed_id_str is not None:
+      if INITIAL == bracketed_id_context_state:
+        y_tab.yylval = bracketed_id_str
+        bracketed_id_str = None
+        y_tab.yylloc = bracketed_id_loc
+        return y_tab.BRACKETED_ID
+    else:
+      state.complain(y_tab.yylloc, state.complaint, 'an identifier expected')
+  }
+
+  [^\].A-Za-z0-9_/ \f\n\t\v]+|.        state.complain(y_tab.yylloc, state.complaint, '{0:s}: {1:s}'.format('invalid character in bracketed name' if gram_leng == 1 else 'invalid characters in bracketed name', quote_mem(yytext, gram_leng)))
+
+  <<EOF>> {
+    BEGIN(bracketed_id_context_state)
+    unexpected_eof(bracketed_id_start, ']')
+  }
+}
+
+<SC_RETURN_BRACKETED_ID>
+{
+  . {
+    global bracketed_id_str
+    scanner_cursor.column -= len(yytext)
+    yyless(0)
+    y_tab.yylval = bracketed_id_str
+    bracketed_id_str = None
+    y_tab.yylloc = bracketed_id_loc
+    BEGIN(INITIAL)
+    return y_tab.BRACKETED_ID
+  }
+}
+
+
+  /*---------------------------------------------------------------.
+  | Scanning a Yacc comment.  The initial '/ *' is already eaten.  |
+  `---------------------------------------------------------------*/
+
+<SC_YACC_COMMENT>
+{
+  "*/"                         BEGIN(context_state)
+  .|\n                         #continue
+  <<EOF>> {
+    unexpected_eof(token_start, '*/')
+    BEGIN(context_state)
+  }
+}
+
+
+  /*------------------------------------------------------------.
+  | Scanning a C comment.  The initial '/ *' is already eaten.  |
+  `------------------------------------------------------------*/
+
+<SC_COMMENT>
+{
+  "*"{splice}"/" {
+    obstack_for_string.append(yytext)
+    BEGIN(context_state)
+  }
+  <<EOF>> {
+    unexpected_eof(token_start, '*/')
+    BEGIN(context_state)
+  }
+}
+
+
+  /*--------------------------------------------------------------.
+  | Scanning a line comment.  The initial '//' is already eaten.  |
+  `--------------------------------------------------------------*/
+
+<SC_LINE_COMMENT>
+{
+  "\n" {
+    obstack_for_string.append(yytext)
+    BEGIN(context_state)
+  }
+  {splice}                     obstack_for_string.append(yytext)
+  <<EOF>>                      BEGIN(context_state)
+}
+
+
+  /*------------------------------------------------.
+  | Scanning a Bison string, including its escapes. |
+  | The initial quote is already eaten.             |
+  `------------------------------------------------*/
+
+<SC_ESCAPED_STRING>
+{
+  "\"" {
+    global gram_last_string
+    gram_last_string = ''.join(obstack_for_string)
+    del obstack_for_string[:] # not strictly correct
+    y_tab.yylloc.start = token_start
+    y_tab.yylval = gram_last_string
+    BEGIN(INITIAL)
+    gram_piece_append('</AST_Text>')
+    gram_piece_escape(yytext)
+    gram_piece_append('</AST_String>')
+    gram_piece_pack()
+    return ~y_tab.STRING
+  }
+  <<EOF>>                      unexpected_eof(token_start, '"')
+  "\n"                         unexpected_newline(token_start, '"')
+}
+
+  /*----------------------------------------------------------.
+  | Scanning a Bison character literal, decoding its escapes. |
+  | The initial quote is already eaten.                       |
+  `----------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER>
+{
+  "'" {
+    global gram_last_string
+    gram_last_string = ''.join(obstack_for_string)
+    del obstack_for_string[:] # not strictly correct
+    y_tab.yylloc.start = token_start
+    if len(gram_last_string) == 0:
+      state.complain(y_tab.yylloc, state.Wother, 'empty character literal')
+      y_tab.yylval = ord('\'')
+    else:
+      if len(gram_last_string) > 1:
+        state.complain(y_tab.yylloc, state.Wother, 'extra characters in character literal')
+      y_tab.yylval = ord(gram_last_string[0])
+    #del obstack_for_string[:]
+    BEGIN(INITIAL)
+    gram_piece_append('</AST_Text>')
+    gram_piece_escape(yytext)
+    gram_piece_append('</AST_Char>')
+    gram_piece_pack()
+    return ~y_tab.CHAR
+  }
+  "\n"                         unexpected_newline(token_start, '\'')
+  <<EOF>>                      unexpected_eof(token_start, '\'')
+}
+
+
+
+  /*--------------------------------------------------------------.
+  | Scanning a tag.  The initial angle bracket is already eaten.  |
+  `--------------------------------------------------------------*/
+
+<SC_TAG>
+{
+  ">" {
+    global nesting, gram_last_string
+    nesting -= 1
+    if nesting < 0:
+      gram_last_string = ''.join(obstack_for_string)
+      del obstack_for_string[:] # not strictly correct
+      y_tab.yylloc.start = token_start
+      y_tab.yylval = gram_last_string
+      #del obstack_for_string[:]
+      BEGIN(INITIAL)
+      gram_piece_append('</AST_Text>')
+      gram_piece_escape(yytext)
+      gram_piece_append('</AST_TagRef>')
+      gram_piece_pack()
+      return ~y_tab.TAG
+    obstack_for_string.append(yytext)
+  }
+
+  ([^<>]|->)+                  obstack_for_string.append(yytext)
+  "<"+ {
+    global nesting
+    obstack_for_string.append(yytext)
+    nesting += gram_leng
+  }
+
+  <<EOF>>                      unexpected_eof(token_start, '>')
+}
+
+  /*----------------------------.
+  | Decode escaped characters.  |
+  `----------------------------*/
+
+<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+{
+  \\[0-7]{1,3} {
+    c = strtoul(yytext + 1, None, 8)
+    if not c or 0x7f * 2 + 1 < c:
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+    else:
+      obstack_for_string.append(chr(c))
+      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+      gram_piece_append(gram_piece_temp)
+      gram_piece_flush(len(yytext))
+      gram_piece_append('</AST_Text_Escape>')
+  }
+
+  \\x[0-9abcdefABCDEF]+ {
+    c = strtoul(yytext + 2, None, 16)
+    if not c or 0x7f * 2 + 1 < c:
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+    else:
+      obstack_for_string.append(chr(c))
+      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
+      gram_piece_append(gram_piece_temp)
+      gram_piece_flush(len(yytext))
+      gram_piece_append('</AST_Text_Escape>')
+  }
+
+  \\a {
+    obstack_for_string.append('\a')
+    gram_piece_append('<AST_Text_Escape character="7">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\b {
+    obstack_for_string.append('\b')
+    gram_piece_append('<AST_Text_Escape character="8">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\f {
+    obstack_for_string.append('\f')
+    gram_piece_append('<AST_Text_Escape character="12">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\n {
+    obstack_for_string.append('\n')
+    gram_piece_append('<AST_Text_Escape character="10">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\r {
+    obstack_for_string.append('\r')
+    gram_piece_append('<AST_Text_Escape character="13">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\t {
+    obstack_for_string.append('\t')
+    gram_piece_append('<AST_Text_Escape character="9">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\v {
+    obstack_for_string.append('\v')
+    gram_piece_append('<AST_Text_Escape character="11">')
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+
+  /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
+  \\("\""|"'"|"?"|"\\") {
+    obstack_for_string.append(yytext[1])
+    rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
+    gram_piece_append(gram_piece_temp)
+    gram_piece_flush(len(yytext))
+    gram_piece_append('</AST_Text_Escape>')
+  }
+  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+    c = convert_ucn_to_byte(yytext)
+    if c <= 0:
+      state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: %s', yytext + 1)
+    else:
+      obstack_for_string.append(chr(c))
+      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
+      gram_piece_append(gram_piece_temp)
+      gram_piece_flush(len(yytext))
+      gram_piece_append('</AST_Text_Escape>')
+  }
+  \\(.|\n) {
+    p = yytext + 1
+    if c_isspace(int(*p)) and c_isprint(int(*p)):
+      p = quote(p)
+    else:
+      p = quotearg_style_mem(escape_quoting_style, p, 1)
+    state.complain(y_tab.yylloc, state.complaint, 'invalid character after \\-escape: %s', p)
+  }
+}
+
+  /*--------------------------------------------.
+  | Scanning user-code characters and strings.  |
+  `--------------------------------------------*/
+
+<SC_CHARACTER,SC_STRING>
+{
+  {splice}|\\{splice}[^\n\[\]] obstack_for_string.append(yytext)
+}
+
+<SC_CHARACTER>
+{
+  "'" {
+    obstack_for_string.append(yytext)
+    BEGIN(context_state)
+  }
+  \n                           unexpected_newline(token_start, '\'')
+  <<EOF>>                      unexpected_eof(token_start, '\'')
+}
+
+<SC_STRING>
+{
+  "\"" {
+    obstack_for_string.append(yytext)
+    BEGIN(context_state)
+  }
+  \n                           unexpected_newline(token_start, '"')
+  <<EOF>>                      unexpected_eof(token_start, '"')
+}
+
+
+  /*---------------------------------------------------.
+  | Strings, comments etc. can be found in user code.  |
+  `---------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+  "'" {
+    global context_state, token_start
+    obstack_for_string.append(yytext)
+    context_state = YY_START()
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_CHARACTER)
+  }
+  "\"" {
+    global context_state, token_start
+    obstack_for_string.append(yytext)
+    context_state = YY_START()
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_STRING)
+  }
+  "/"{splice}"*" {
+    global context_state, token_start
+    obstack_for_string.append(yytext)
+    context_state = YY_START()
+    token_start = y_tab.yylloc.start
+    BEGIN(SC_COMMENT)
+  }
+  "/"{splice}"/" {
+    global context_state, token_start
+    obstack_for_string.append(yytext)
+    context_state = YY_START()
+    BEGIN(SC_LINE_COMMENT)
+  }
+}
+
+
+
+  /*-----------------------------------------------------------.
+  | Scanning some code in braces (actions, predicates). The    |
+  | initial "{" is already eaten.                              |
+  `-----------------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_BRACED_CODE,SC_PREDICATE,SC_ELEMENT_GROUP>
+{
+  "{"|"<"{splice}"%" {
+    global nesting
+    obstack_for_string.append(yytext)
+    nesting += 1
+  }
+  "%"{splice}">" {
+    global nesting
+    obstack_for_string.append(yytext)
+    nesting -= 1
+  }
+
+  /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
+     (as '<' '<%').  */
+  "<"{splice}"<"               obstack_for_string.append(yytext)
+
+  <<EOF>>                      unexpected_eof(code_start, '}')
+}
+
+<SC_BRACED_CODE>
+{
+  "}" {
+    global nesting, gram_last_string
+    obstack_for_string.append('}')
+    nesting -= 1
+    if nesting < 0:
+      gram_last_string = ''.join(obstack_for_string)
+      del obstack_for_string[:] # not strictly correct
+      y_tab.yylloc.start = code_start
+      y_tab.yylval = gram_last_string
+      BEGIN(INITIAL)
+      gram_piece_append('</AST_Text>')
+      gram_piece_escape(yytext)
+      gram_piece_append('</AST_BracedCode>')
+      gram_piece_pack()
+      return ~y_tab.BRACED_CODE
+  }
+}
+
+<SC_PREDICATE>
+{
+  "}" {
+    global nesting, gram_last_string
+    nesting -= 1
+    if nesting < 0:
+      gram_last_string = ''.join(obstack_for_string)
+      del obstack_for_string[:] # not strictly correct
+      y_tab.yylloc.start = code_start
+      y_tab.yylval = gram_last_string
+      BEGIN(INITIAL)
+      gram_piece_append('</AST_Text>')
+      gram_piece_escape(yytext)
+      gram_piece_append('</AST_BracedPredicate>')
+      gram_piece_pack()
+      return ~y_tab.BRACED_PREDICATE
+    else:
+      obstack_for_string.append('}')
+  }
+}
+
+ /* Nick extra rules for element groups */
+<SC_ELEMENT_GROUP>
+{
+  "}" {
+    global nesting, gram_last_string
+    obstack_for_string.append('}')
+    nesting -= 1
+    if nesting < 0:
+      gram_last_string = ''.join(obstack_for_string)
+      del obstack_for_string[:] # not strictly correct
+      y_tab.yylloc.start = code_start
+      #del obstack_for_string[:]
+      BEGIN(INITIAL)
+      gram_piece_append('</AST_Text>')
+      gram_piece_escape(yytext)
+      gram_piece_pack()
+      return ~ord('(')
+  }
+}
+
+  /*--------------------------------------------------------------.
+  | Scanning some prologue: from "%{" (already scanned) to "%}".  |
+  `--------------------------------------------------------------*/
+
+<SC_PROLOGUE>
+{
+  "%}" {
+    global gram_last_string
+    gram_last_string = ''.join(obstack_for_string)
+    del obstack_for_string[:] # not strictly correct
+    y_tab.yylloc.start = code_start
+    y_tab.yylval = gram_last_string
+    BEGIN(INITIAL)
+    gram_piece_append('</AST_Text>')
+    gram_piece_escape(yytext)
+    gram_piece_append('</AST_Section1_Prologue>')
+    gram_piece_pack()
+    return ~y_tab.PROLOGUE
+  }
+
+  <<EOF>>                      unexpected_eof(code_start, '%}')
+}
+
+
+  /*---------------------------------------------------------------.
+  | Scanning the epilogue (everything after the second "%%", which |
+  | has already been eaten).                                       |
+  `---------------------------------------------------------------*/
+
+<SC_EPILOGUE>
+{
+  <<EOF>> {
+    global gram_last_string
+    gram_last_string = ''.join(obstack_for_string)
+    del obstack_for_string[:] # not strictly correct
+    y_tab.yylloc.start = code_start
+    y_tab.yylval = gram_last_string
+    BEGIN(INITIAL)
+    gram_piece_pack()
+    return ~y_tab.EPILOGUE
+  }
+}
+
+
+  /*-----------------------------------------------------.
+  | By default, grow the string obstack with the input.  |
+  `-----------------------------------------------------*/
+
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER,SC_ELEMENT_GROUP>. |
+ /* Nick added: SC_ELEMENT_GROUP */
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_ELEMENT_GROUP>\n obstack_for_string.append(yytext)
+
+
+%%
+
+#def no_cr_read(fp, buf, size):
+#  bytes_read = fread_unlocked(buf, 1, size, fp)
+#  if bytes_read:
+#    w = memchr(buf, ord('\r'), bytes_read)
+#    if w:
+#      r = ++w
+#      lim = buf + bytes_read
+#      pass
+#      while True:
+#        w[-1] = ord('\n')
+#        if r == lim:
+#          ch = getc_unlocked(fp)
+#          if ch != ord('\n') and ungetc(ch, fp) != ch:
+#            break
+#        else:
+#          if *r == ord('\n'):
+#            r += 1
+#        while True:
+#          if r == lim:
+#            return w - buf
+#          if not ((*w++ = *r++) != ord('\r')):
+#            break
+#        pass
+#      return w - buf
+#  return bytes_read
+#
+#def scan_integer(number, base, loc):
+#  num = strtoul(number, None, base)
+#  if 0x7fffffff < num:
+#    state.complain(y_tab.yylloc, state.complaint, 'integer out of range: %s', quote(number))
+#    num = 0x7fffffff
+#  return num
+#
+#def convert_ucn_to_byte(ucn):
+#  code = strtoul(ucn + 2, None, 16)
+#  if 0x7f * 2 + 1 < code:
+#    return -1
+#  return code
+#
+#def handle_syncline(args, loc):
+#  file = None
+#  lineno = strtoul(args, &file, 10)
+#  if 0x7fffffff <= lineno:
+#    state.complain(y_tab.yylloc, state.Wother, 'line number overflow')
+#    lineno = 0x7fffffff
+#  file = strchr(file, ord('"'))
+#  if file:
+#    *strchr(file + 1, ord('"')) = ord('\0')
+#    current_file = uniqstr_new(file + 1)
+#  boundary_set(&scanner_cursor, current_file, lineno, 1)
+
+def unexpected_end(start, msg, token_end):
+  loc = state.location(start.copy(), scanner_cursor.copy())
+  scanner_cursor.column -= i
+  unput(token_end)
+  token_end = quote(token_end)
+  if token_end == '\'\\\'\'':
+    token_end = '"\'"'
+  state.complain(y_tab.yylloc, state.complaint, msg, token_end)
+
+def unexpected_eof(start, token_end):
+  unexpected_end(start, 'missing {0:s} at end of file'.format(token_end))
+
+def unexpected_newline(start, token_end):
+  unexpected_end(start, 'missing {0:s} at end of line'.format(token_end))
+
+#def gram_scanner_initialize():
+#  global obstack_for_string
+#  obstack_for_string = []
+#
+#def gram_scanner_free():
+#  del obstack_for_string[:]
+#  yy_delete_buffer(YY_CURRENT_BUFFER)
+
+def gram_piece_append(str):
+  gram_piece.append(str)
+
+def gram_piece_insert(n, str):
+  gram_piece[n:n] = [str]
+
+xml_escape = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
+def gram_piece_escape(str):
+  gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
+
+def gram_piece_flush(n):
+  global yytext
+  gram_piece_escape(yytext[:n])
+  yytext = yytext[n:]
+
+def gram_piece_pack():
+  global gram_piece0
+  gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
+  gram_piece0 += 1
+
+def gram_lex():
+  result = yylex()
+  if result < 0:
+    return ~result
+  gram_piece_pack()
+  gram_piece_escape(yytext)
+  gram_piece_pack()
+  return result
diff --git a/bootstrap/skel_lex_yy.py b/bootstrap/skel_lex_yy.py
new file mode 100644 (file)
index 0000000..e2acca1
--- /dev/null
@@ -0,0 +1,289 @@
+import bisect
+import sys
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYReject(Exception):
+  pass
+
+class YYContinue(Exception):
+  pass
+
+class YYTerminate(Exception):
+  pass
+
+class YYBufferList:
+  def __init__(self, next = None):
+    self.next = next
+
+class YYBufferBlock(YYBufferList):
+  def __init__(self, next = None, pos = 0, text = ''):
+    YYBufferList.__init__(self, next)
+    self.pos = pos
+    self.text = text
+
+class YYBufferState(YYBufferList):
+  def __init__(self, next = None, file_in = None, at_bol = True):
+    YYBufferList.__init__(self, next)
+    self.file_in = file_in
+    self.at_bol = at_bol
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState()]
+
+yystart = INITIAL
+yystart_stack = []
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yy_group_text = None
+yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
+yy_action = None
+yytext = ''
+yytext_len = 0
+
+YY_NULL = 0
+
+def REJECT():
+  raise YYReject()
+
+def yyterminate():
+  raise YYTerminate()
+
+def yyless(i):
+  global yytext, yytext_len
+  assert yytext_len >= i
+  yytext = yytext[:i]
+  yytext_len = i
+
+def unput(text):
+  global yyin, yytext_len
+  gram_piece_flush(len(yytext))
+  while yytext_len:
+    block = yy_buffer_stack[-1].next
+    while block is None or block.pos >= len(block.text):
+      if block is None:
+        yy_buffer_stack.pop()
+        block = yy_buffer_stack[-1].next
+        yyin = yy_buffer_stack[-1].file_in
+      else:
+        block = block.next
+        yy_buffer_stack[-1].next = block
+    i = min(yytext_len, len(block.text) - block.pos)
+    block.pos += i
+    yytext_len -= i
+  yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+def ECHO():
+  yyout.write(yytext)
+
+def yy_rule_start():
+  global yytext, yytext_len
+  yytext = yy_group_text[:yy_group_stack[-1]]
+  yytext_len = yy_group_stack[-1]
+  del yy_group_stack[-2:]
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
+
+def yy_group_end():
+  pass
+
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
+# GENERATE SECTION2
+
+def yylex():
+  global \
+    yyin, \
+    yy_threads0, \
+    yy_threads1, \
+    yy_prefix_slop, \
+    yy_group_text, \
+    yy_group_stack, \
+    yy_action, \
+    yytext, \
+    yytext_len
+
+  # GENERATE SECTION2INITIAL
+
+  while True:
+    while yytext_len:
+      block = yy_buffer_stack[-1].next
+      while block is None or block.pos >= len(block.text):
+        if block is None:
+          yy_buffer_stack.pop()
+          block = yy_buffer_stack[-1].next
+          yyin = yy_buffer_stack[-1].file_in
+        else:
+          block = block.next
+          yy_buffer_stack[-1].next = block
+      i = min(yytext_len, len(block.text) - block.pos)
+      block.pos += i
+      yytext_len -= i
+
+    match = ''
+    match_len = 0
+
+    del yy_threads0[yy_prefix_slop:]
+    yy_threads0.append(None)
+
+    buffer_ptr = len(yy_buffer_stack) - 1
+    block_prev = yy_buffer_stack[buffer_ptr]
+    block = block_prev.next
+    if block is not None:
+      block_pos = block.pos
+
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
+    while action != -1:
+      state, transition = yy_dfa_actions[action]
+      #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+      i = yy_prefix_slop
+      assert len(yy_threads1) == yy_prefix_slop
+      for trans in transition:
+        if trans[0] == 0: #DFA.TRANSITION_POP:
+          i += trans[1]
+        elif trans[0] == 1: #DFA.TRANSITION_DUP:
+          while i < trans[1]:
+            yy_threads0[:0] = [None] * yy_prefix_slop
+            yy_threads1[:0] = [None] * yy_prefix_slop
+            i += yy_prefix_slop
+            yy_prefix_slop *= 2
+          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+          i -= trans[1]
+        elif trans[0] == 2: #DFA.TRANSITION_MARK:
+          yy_threads0[i:i + trans[1]] = [
+            (match_len, trans[2], thread)
+            for thread in yy_threads0[i:i + trans[1]]
+          ]
+        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+          yy_threads1.extend(yy_threads0[i:i + trans[1]])
+          i += trans[1]
+        #elif trans[0] == DFA.TRANSITION_DEL:
+        #  del yy_threads1[-trans[1]:]
+        else:
+          assert False
+      assert i == len(yy_threads0)
+      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+      del yy_threads1[yy_prefix_slop:]
+
+      if state == 0:
+        # there is only one match, which is complete
+        assert len(yy_threads0) == yy_prefix_slop + 1
+        assert yy_dfa_states[state][2] == [0]
+        break
+
+      yy_buffer_stack[-1].file_in = yyin
+      while block is None or block_pos >= len(block.text):
+        if block is None:
+          file_in = yy_buffer_stack[buffer_ptr].file_in
+          text = '' if file_in is None else file_in.readline()
+          if len(text):
+            block = YYBufferBlock(None, 0, text)
+            block_pos = 0
+            block_prev.next = block
+          else:
+            # do not re-attempt read once EOF is reached
+            yy_buffer_stack[buffer_ptr].file_in = None
+            yyin = yy_buffer_stack[-1].file_in
+            buffer_ptr -= 1
+            if buffer_ptr < 0:
+              break # EOF
+            block_prev = yy_buffer_stack[buffer_ptr]
+            block = block_prev.next
+            if block is not None:
+              block_pos = block.pos
+        else:
+          i = match_len - len(match)
+          if i:
+            match += block.text[block_pos - i:]
+          block_prev = block
+          block = block_prev.next
+          if block is not None:
+            block_pos = block.pos
+      else: 
+        #print('block_pos', block_pos, 'block.text', block.text)
+        action = yy_dfa_states[state][1][
+          bisect.bisect_right(
+            yy_dfa_states[state][0],
+            ord(block.text[block_pos])
+          )
+        ]
+        block_pos += 1
+        match_len += 1
+        continue
+      # EOF
+      if i == 0:
+        y_tab.yylloc = y_tab.YYLTYPE() # stopgap
+        try:
+          return yy_eof_actions[yystart]()
+        except YYTerminate:
+          return 0
+      break
+
+    i = match_len - len(match)
+    if i:
+      assert block is not None
+      match += block.text[block_pos - i:]
+
+    for i in yy_dfa_states[state][2]:
+      yy_group_text = match
+      yy_group_stack = []
+      yy_groups = None
+      yy_groups_by_name = None
+      yy_action = None
+      yytext = None
+      yytext_len = None
+
+      thread = yy_threads0[yy_prefix_slop + i]
+      #print('thread', thread)
+      while thread is not None:
+        pos, ref_data, thread = thread
+        yy_group_stack.append(pos)
+        ref_data()
+
+      y_tab.yylloc = y_tab.YYLTYPE() # stopgap
+      try:
+        return yy_action()
+      except YYReject:
+        pass
+      except YYContinue:
+        gram_piece_escape(yytext)
+        break
+      except YYTerminate:
+        return 0
+    else:
+      raise Exception('scanner jammed')
+
+# GENERATE SECTION3
diff --git a/bootstrap/skel_y_tab.py b/bootstrap/skel_y_tab.py
new file mode 100644 (file)
index 0000000..3b179a0
--- /dev/null
@@ -0,0 +1,114 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-with-bison-exception
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+# As a special exception, you may create a larger work that contains part or
+# all of the Bison or piyacc parser skeleton and distribute that work under
+# terms of your choice, so long as that work isn't itself a parser generator
+# using the skeleton or a modified version thereof as a parser skeleton.
+# Alternatively, if you modify or redistribute the parser skeleton itself, you
+# may (at your option) remove this special exception, which will cause the
+# skeleton and the resulting Bison or piyacc output files to be licensed under
+# the GNU General Public License without this special exception.
+
+import bisect
+import lex_yy
+
+# this can be redefined in SECTION1
+class YYLTYPE:
+  def __init__(
+    self,
+    first_line = 0,
+    first_column = 0,
+    last_line = 0,
+    last_column = 0
+  ):
+    self.first_line = first_line
+    self.first_column = first_column
+    self.last_line = last_line
+    self.last_column = last_column
+
+# GENERATE SECTION1
+
+# GENERATE TOKENS
+
+yystack = None
+yychar = None
+YYEMPTY = -1
+
+yyval = None
+yyloc = None
+
+yylval = None
+yylloc = YYLTYPE()
+
+# GENERATE SECTION2
+
+def yyparse():
+  global yystack, yychar, yyval, yyloc, yylval, yylloc, gram_piece2, gram_piece3
+
+  # GENERATE INITIALACTION
+
+  state = 0
+  yystack = []
+  yylval = None
+  yychar = -1
+  while True:
+    #print('state', state, 'yystack', yystack)
+    reduce = yy_lr1dfa_states[state][4]
+    if reduce == -1:
+      if yychar == -1:
+        yylval = None
+        yylloc = None
+        yychar = lex_yy.gram_lex()
+        #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
+      action = yy_lr1dfa_states[state][1][
+        bisect.bisect_right(yy_lr1dfa_states[state][0], yychar)
+      ]
+      if action == -1:
+        raise Exception('syntax error')
+      if (action & 1) == 0:
+        yystack.append((state, yylval, yylloc))
+        state = action >> 1
+        #print('shift', state)
+        yychar = -1
+        continue
+      reduce = action >> 1
+    #print('reduce', reduce)
+    len_symbols, ref_data = yy_lr1dfa_productions[reduce]
+    base = len(yystack) - len_symbols
+    yystack.append((state, None, None))
+    state, yyval, yyloc = yystack[base]
+    n = base * 2
+    gram_piece2 = n + 1
+    gram_piece3 = n + len_symbols * 2
+    if len_symbols == 0:
+      lex_yy.gram_piece[n:n] = ['', '']
+      gram_piece3 = n + 2
+      lex_yy.gram_piece0 += 2
+    ref_data()
+    lex_yy.gram_piece[gram_piece2:gram_piece3] = [''.join(lex_yy.gram_piece[gram_piece2:gram_piece3])]
+    lex_yy.gram_piece0 += gram_piece2 + 1 - gram_piece3
+    del yystack[base:]
+    if reduce == 0:
+      assert base == 0
+      break
+    yystack.append((state, yyval, yyloc))
+    state = yy_lr1dfa_states[state][3][
+      bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
+    ]
+    assert state != -1
+
+# GENERATE SECTION3
diff --git a/bootstrap/state.py b/bootstrap/state.py
new file mode 100644 (file)
index 0000000..03591b7
--- /dev/null
@@ -0,0 +1,71 @@
+import sys
+
+# miscellaneous state accessed by scan-gram.l and parse-gram.y
+class boundary:
+  def __init__(self, file = '<stdin>', line = 0, column = 0):
+    self.file = file
+    self.line = line
+    self.column = column
+  def copy(self):
+    return boundary(self.file, self.line, self.column)
+
+class location:
+  def __init__(self, start = None, end = None):
+    self.start = boundary() if start is None else start
+    self.end = boundary() if end is None else end
+  def copy(self):
+    return location(self.start.copy(), self.end.copy())
+
+warning_midrule_values = 0
+warning_yacc = 1
+warning_conflicts_sr = 2
+warning_conflicts_rr = 3
+warning_empty_rule = 3
+warning_deprecated = 4
+warning_precedence = 5
+warning_other = 6
+warnings_size = 7
+Wnone = 0
+Wmidrule_values = 1 << warning_midrule_values
+Wyacc = 1 << warning_yacc
+Wconflicts_sr = 1 << warning_conflicts_sr
+Wconflicts_rr = 1 << warning_conflicts_rr
+Wdeprecated = 1 << warning_deprecated
+Wempty_rule = 1 << warning_empty_rule
+Wprecedence = 1 << warning_precedence
+Wother = 1 << warning_other
+Werror = 1 << 10
+complaint = 1 << 11
+fatal = 1 << 12
+silent = 1 << 13
+no_caret = 1 << 14
+Weverything = ~complaint & ~fatal & ~silent
+Wall = Weverything & ~Wyacc
+
+def complain(loc, flags, message):
+  #severity s = warning_severity (flags);
+  #if ((flags & complaint) && complaint_status < status_complaint)
+  #  complaint_status = status_complaint;
+  #
+  #if (severity_warning <= s)
+  #  {
+  #    const char* prefix =
+  #      s == severity_fatal ? _("fatal error")
+  #      : s == severity_error ? _("error")
+  #      : _("warning");
+  #    if (severity_error <= s && ! complaint_status)
+  #      complaint_status = status_warning_as_error;
+  #    error_message (loc, flags, prefix, message, args);
+  #  }
+  #
+  #if (flags & fatal)
+  #  exit (EXIT_FAILURE);
+  print(message)
+  sys.exit(1)
+
+undef_assoc = 0
+right_assoc = 1
+left_assoc = 2
+non_assoc = 3
+precedence_assoc = 4
index 9ab835d..b900f30 100644 (file)
@@ -80,6 +80,12 @@ def generate_py(
             '''# GENERATE SECTION1 BEGIN
 {0:s}{1:s}# GENERATE END
 '''.format(
+              ''.join(
+                [
+                  ast_text_to_python(i, '')
+                  for i in _ast.requires_text
+                ]
+              ),
               ''.join(
                 [
                   ast_text_to_python(i, '')
@@ -116,8 +122,7 @@ def generate_py(
             '''# GENERATE SECTION2 BEGIN
 YYDEBUG = {0:s}
 YYERROR_VERBOSE = {1:s}
-{2:s}{3:s}{4:s}
-# GENERATE END
+{2:s}{3:s}{4:s}# GENERATE END
 '''.format(
               'True' if _ast[0].debug else 'False',
               'True' if _ast[0].error_verbose else 'False',
@@ -217,7 +222,7 @@ YYERROR_VERBOSE = {1:s}
 '''.format(
               ''.join(
                 [
-                  plex_text_to_python(i, '  ')
+                  ast_text_to_python(i, '  ')
                   for i in _ast.initial_action_text
                 ]
               ).replace('(yyval)', '(yylval').replace('(yyloc)', '(yylloc)') # hack
index 56b57e2..e9c9780 100644 (file)
@@ -65,7 +65,7 @@ def yyparse(factory, *args, **kwargs):
         yylval = None
         yylloc = None
         yychar = lex_yy.yylex()
-        #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc)
+        #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
         #print('lex_yy.yy_element_space')
         #xml.etree.ElementTree.dump(lex_yy.yy_element_space)
         #print('lex_yy.yy_element_token')