Get direct AST generation working properly, does not exactly match bootstrap_bison...
authorNick Downing <nick@ndcode.org>
Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
committerNick Downing <nick@ndcode.org>
Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
Makefile
bootstrap/parse-gram.y
bootstrap/scan-gram.l
element.py
generate_bison.py
generate_py.py
markup.py
parse-gram.y
scan-gram.l
skel/skel_py.py
skel/skel_py_element.py

index 52a6513..d3f7d3a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
 all: lex_yy.py lex_yy_code.py y_tab.py
 
-lex_yy.py: scan-gram.l bootstrap/skel_lex_yy.py
+lex_yy.py: scan-gram.l
        ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
-       ../pilex.git/pilex.py --python --skel bootstrap/skel_lex_yy.py $<.xml
+       ../pilex.git/pilex.py --element --python $<.xml
 
 lex_yy_code.py: scan-code.l
        ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
        ../pilex.git/pilex.py --element --python -o $@ $<.xml
 
-y_tab.py: parse-gram.y bootstrap/skel_y_tab.py
+y_tab.py: parse-gram.y
        ../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
-       ./piyacc.py --python --skel bootstrap/skel_y_tab.py $<.xml
+       ./piyacc.py --element --python $<.xml
 
 clean:
        rm -f lex_yy.py lex_yy_code.py y_tab.py *.xml
index 8d3cbc8..3280411 100644 (file)
@@ -256,8 +256,10 @@ input
 prologue_declarations
   : %empty
     {
-      global yychar
+      global yychar, yylval, yylloc
       if yychar == YYEMPTY:
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
         yychar = lex_yy.gram_lex()
       temp = lex_yy.gram_piece[gram_piece2 + 1]
       lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
@@ -881,12 +883,14 @@ rhses.1
 rhs
   : %empty
     {
-      global yychar
+      global yychar, yylval, yylloc
       #if nested_rhs:
       #  nested_rhs -= 1
       #else:
       #  grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
       if yychar == YYEMPTY:
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
         yychar = lex_yy.gram_lex()
       temp = lex_yy.gram_piece[gram_piece2 + 1]
       lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
index 009267d..8f64284 100644 (file)
   gram_piece1 = 0
   markup_stack = []
 
-  percent_percent_count = 0;
+  percent_percent_count = 0
 
   # these should be yylex()-local, but moved to here, see further down:
   nesting = 0
   context_state = -1
   id_loc = state.location()
-  code_start = scanner_cursor.copy()
-  token_start = scanner_cursor.copy()
-  #first = True
-  if True: #first:
-    scanner_cursor = y_tab.yylloc.start.copy()
-    #first = False
+  code_start = None
+  token_start = None
+  scanner_cursor = state.boundary('<stdin>', 0, 0)
 %}
 
 %x SC_YACC_COMMENT
@@ -322,9 +319,14 @@ eqopt    ([[:space:]]*=)?
     code_start = y_tab.yylloc.start
     BEGIN(SC_BRACED_CODE)
     gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedCode>
-    gram_piece_flush(len(yytext))
+    # new way, includes braces, wrapped by <AST_Production_Action> later
     markup_stack.append(len(gram_piece)) # <AST_Text>
+    gram_piece_flush(len(yytext))
+    # old way
+    #markup_stack.append(len(gram_piece)) # <AST_BracedCode>
+    #gram_piece_flush(len(yytext))
+    #markup_stack.append(len(gram_piece)) # <AST_Text>
+    # to here
   }
 
   /* Semantic predicate. */
@@ -429,16 +431,19 @@ eqopt    ([[:space:]]*=)?
       yyless(0)
       BEGIN(SC_RETURN_BRACKETED_ID)
       y_tab.yylloc = id_loc
-      return y_tab.ID
+      return ~y_tab.ID
     else:
       bracketed_id_start = y_tab.yylloc.start
       bracketed_id_context_state = YY_START()
       BEGIN(SC_BRACKETED_ID)
   }
   ":" {
+    global gram_piece0
     BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
     y_tab.yylloc = id_loc
     gram_piece_escape(yytext)
+    gram_piece0 -= 1
+    gram_piece_pack()
     return ~y_tab.ID_COLON
   }
   . {
@@ -656,25 +661,23 @@ eqopt    ([[:space:]]*=)?
 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 {
   \\[0-7]{1,3} {
-    c = strtoul(yytext + 1, None, 8)
+    c = int(yytext[1:], 8)
     if not c or 0x7f * 2 + 1 < c:
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
       gram_piece_flush(len(yytext))
       gram_piece_append('</AST_Text_Escape>')
   }
 
   \\x[0-9abcdefABCDEF]+ {
-    c = strtoul(yytext + 2, None, 16)
+    c = int(yytext[2:], 16)
     if not c or 0x7f * 2 + 1 < c:
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
       gram_piece_flush(len(yytext))
       gram_piece_append('</AST_Text_Escape>')
   }
@@ -725,8 +728,7 @@ eqopt    ([[:space:]]*=)?
   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
   \\("\""|"'"|"?"|"\\") {
     obstack_for_string.append(yytext[1])
-    rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
-    gram_piece_append(gram_piece_temp)
+    gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(ord(yytext[1])))
     gram_piece_flush(len(yytext))
     gram_piece_append('</AST_Text_Escape>')
   }
@@ -737,8 +739,7 @@ eqopt    ([[:space:]]*=)?
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
       gram_piece_flush(len(yytext))
       gram_piece_append('</AST_Text_Escape>')
   }
@@ -858,11 +859,17 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylloc.start = code_start
       y_tab.yylval = gram_last_string
       BEGIN(INITIAL)
+      # new way, includes braces, wrapped by <AST_Production_Action> later
+      gram_piece_escape(yytext)
       gram_piece_insert(markup_stack.pop(), '<AST_Text>')
       gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
-      gram_piece_append('</AST_BracedCode>')
+      # old way
+      #gram_piece_insert(markup_stack.pop(), '<AST_Text>')
+      #gram_piece_append('</AST_Text>')
+      #gram_piece_escape(yytext)
+      #gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
+      #gram_piece_append('</AST_BracedCode>')
+      # to here
       gram_piece_pack()
       return ~y_tab.BRACED_CODE
   }
@@ -1004,12 +1011,12 @@ def scan_integer(number, base, loc):
     num = 0x7fffffff
   return num
 
-#def convert_ucn_to_byte(ucn):
-#  code = strtoul(ucn + 2, None, 16)
-#  if 0x7f * 2 + 1 < code:
-#    return -1
-#  return code
-#
+def convert_ucn_to_byte(ucn):
+  code = int(ucn[2:], 16)
+  if 0x7f * 2 + 1 < code:
+    return -1
+  return code
+
 #def handle_syncline(args, loc):
 #  file = None
 #  lineno = strtoul(args, &file, 10)
index 2121e08..2d02217 100644 (file)
@@ -160,6 +160,16 @@ def set_text(root, i, text):
   else:
     root[i - 1].tail = text
 
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
 def concatenate(children, factory = Element, *args, **kwargs):
   root = factory(*args, **kwargs)
   for child in children:
index 9f665c2..b3bd033 100644 (file)
@@ -718,10 +718,10 @@ static const yytype_int16 yyr2[] =
     break;
 '''.format(
                       i,
-                      _lr1dfa.productions[i][1].get_text()
+                      _lr1dfa.productions[i][1][4].get_text()
                     )
                     for i in range(len(_lr1dfa.productions))
-                    if _lr1dfa.productions[i][1] is not None
+                    if _lr1dfa.productions[i][1][4] is not None
                   ]
                 )
               )
index e4f37e8..ed093ff 100644 (file)
@@ -140,10 +140,10 @@ YYERROR_VERBOSE = {1:s}
                     (
                       '''  if yychar == -1:
     yylval = None
-    yylloc = None
+    yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
     yychar = lex_yy.yylex()
   yy_element_stack.append(lex_yy.yy_element_space)
-  lex_yy.yy_element_space = element.Element('root')
+  lex_yy.yy_element_space = element.Element() # should use factory here
 '''
                     if actions[i][0] else # take_space_right
                       ''
@@ -152,7 +152,6 @@ YYERROR_VERBOSE = {1:s}
                       [
                         '''  yy_element_stack[{0:s}:{1:s}] = [
     element.Element(
-      'root',
       children = [
         element.concatenate(
           yy_element_stack[{2:s}:{3:s}],
@@ -172,7 +171,7 @@ YYERROR_VERBOSE = {1:s}
                     ),
                     (
                       '''  yy_element_stack[{0:s}:{1:s}] = [
-    element.Element('root')
+    element.Element()
   ]
 '''.format(
                         (
index 1817bee..caf5a41 100755 (executable)
--- a/markup.py
+++ b/markup.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
 
-import lex_yy
+import ast
+import element
 import y_tab
 import sys
 
-lex_yy.gram_piece_append('<root>\n  <AST ref=\"0\">')
-y_tab.yyparse()
-lex_yy.gram_piece_append('</AST>\n</root>\n')
-sys.stdout.write(''.join(lex_yy.gram_piece))
+element.serialize(y_tab.yyparse(ast.AST), sys.stdout)
index 7796dae..1c151b5 100644 (file)
   import element
   import lex_yy_code
   import state
-  import xml.etree.ElementTree
 
   YYLTYPE = state.location
 }
 
-%code top
-{
-}
-
-%code
-{
-  #current_prec = 0
-  #current_lhs_location = 0
-  #current_lhs_named_ref = []
-  #current_lhs_symbol = []
-  #current_class = unknown_sym
-  #current_type = None
-  gram_piece2 = 0
-  gram_piece3 = 0
-  #nested_rhs = 0
-}
-
 /* Nick %define api.prefix {gram_} */
 %define api.pure full
 %define locations
 %%
 
 input
-  : prologue_declarations "%%" grammar epilogue.opt
-    {
-      insert_after(2, '</AST_Section2>')
-      insert_before(2, '<AST_Section2>')
-      insert_after(0, '</AST_Section1>')
-      insert_before(0, '<AST_Section1>')
-    }
+  : %space (?E{ast.AST.Section1}prologue_declarations) "%%" %space (?E{ast.AST.Section2}grammar) epilogue.opt
   ;
 
         /*------------------------------------.
@@ -259,13 +235,13 @@ input
 prologue_declarations
   : %empty
     {
-      global yychar
+      global yychar, yylval, yylloc
       if yychar == YYEMPTY:
-        yychar = lex_yy.gram_lex()
-      temp = lex_yy.gram_piece[gram_piece2 + 1]
-      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
-      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
-      lex_yy.gram_piece[gram_piece2 - 1] = temp
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+      yy_element_stack[-1] = lex_yy.yy_element_space
+      lex_yy.yy_element_space = element.Element()
     }
   | prologue_declarations prologue_declaration
   ;
@@ -273,206 +249,56 @@ prologue_declarations
 prologue_declaration
   : grammar_declaration
   | "%{...%}"
+  | %space (?E{
+      (
+        ast.AST.Section1.PureParser
+      if $1 == 'api.pure' else
+        ast.AST.Section1.Locations
+      if $1 == 'locations' else
+        ast.AST.Section1.Debug
+      if $1 == 'parse.trace' else
+        element.Element
+      )
+    }"%<flag>")
+  | %space (?E{ast.AST.Section1.Define}"%define" variable value)
+  | %space (?E{ast.AST.Section1.Defines}"%defines")
+  | %space (?E{ast.AST.Section1.Defines}"%defines" STRING)
+  | %space (?E{ast.AST.Section1.ErrorVerbose}"%error-verbose")
+  | %space (?E{ast.AST.Section1.Expect, value = $2}"%expect" INT)
+  | %space (?E{ast.AST.Section1.ExpectRR, value = $2}"%expect-rr" INT)
+  | %space (?E{ast.AST.Section1.FilePrefix}"%file-prefix" STRING)
+  | %space (?E{ast.AST.Section1.GLRParser}"%glr-parser")
+  | %space (?E{ast.AST.Section1.InitialAction}"%initial-action" "{...}")
     {
-      #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($1)
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2] = \
-        '<AST_Section1_Prologue>%{{{0:s}%}}</AST_Section1_Prologue>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-    }
-  | "%<flag>"
-    {
-      #muscle_percent_define_ensure($1, @1, True)
-      if $1 == 'api.pure':
-        insert_after(0, '</AST_Section1_PureParser>')
-        insert_before(0, '<AST_Section1_PureParser>')
-      elif $1 == 'locations':
-        insert_after(0, '</AST_Section1_Locations>')
-        insert_before(0, '<AST_Section1_Locations>')
-      elif $1 == 'parse.trace':
-        insert_after(0, '</AST_Section1_Debug>')
-        insert_before(0, '<AST_Section1_Debug>')
-      else:
-        assert False
-    }
-  | "%define" variable value
-    {
-      #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-      insert_after(2, '</AST_Section1_Define>')
-      insert_before(0, '<AST_Section1_Define>')
-    }
-  | "%defines"
-    {
-      #defines_flag = True
-      insert_after(0, '</AST_Section1_Defines>')
-      insert_before(0, '<AST_Section1_Defines>')
-    }
-  | "%defines" STRING
-    {
-      #defines_flag = True
-      #spec_defines_file = xstrdup($2)
-      insert_after(1, '</AST_Section1_Defines>')
-      insert_before(0, '<AST_Section1_Defines>')
-    }
-  | "%error-verbose"
-    {
-      #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-      insert_after(0, '</AST_Section1_ErrorVerbose>')
-      insert_before(0, '<AST_Section1_ErrorVerbose>')
-    }
-  | "%expect" INT
-    {
-      #expected_sr_conflicts = $2
-      insert_after(1, '</AST_Section1_Expect>')
-      insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
-    }
-  | "%expect-rr" INT
-    {
-      #expected_rr_conflicts = $2
-      insert_after(1, '</AST_Section1_ExpectRR>')
-      insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
-    }
-  | "%file-prefix" STRING
-    {
-      #spec_file_prefix = $2
-      insert_after(1, '</AST_Section1_FilePrefix>')
-      insert_before(0, '<AST_Section1_FilePrefix>')
-    }
-  | "%glr-parser"
-    {
-      #nondeterministic_parser = True
-      #glr_parser = True
-      insert_after(0, '</AST_Section1_GLRParser>')
-      insert_before(0, '<AST_Section1_GLRParser>')
-    }
-  | "%initial-action" "{...}"
-    {
-      #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
       lex_yy_code.yyin = None
       lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
       lex_yy_code.yytext = '' # fool unput()
       lex_yy_code.unput($2)
       lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
       lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
-      )
-      insert_after(1, '</AST_Section1_InitialAction>')
-      insert_before(0, '<AST_Section1_InitialAction>')
-    }
-  | "%language" STRING
-    {
-      #language_argmatch($2, grammar_prio, @1)
-      insert_after(1, '</AST_Section1_Language>')
-      insert_before(0, '<AST_Section1_Language>')
-    }
-  | "%name-prefix" STRING
-    {
-      #spec_name_prefix = $2
-      insert_after(1, '</AST_Section1_NamePrefix>')
-      insert_before(0, '<AST_Section1_NamePrefix>')
-    }
-  | "%no-lines"
-    {
-      #no_lines_flag = True
-      insert_after(0, '</AST_Section1_Lines>')
-      insert_before(0, '<AST_Section1_Lines value="false">')
-    }
-  | "%nondeterministic-parser"
-    {
-      #nondeterministic_parser = True
-      insert_after(0, '</AST_Section1_NonDeterministicParser>')
-      insert_before(0, '<AST_Section1_NonDeterministicParser>')
-    }
-  | "%output" STRING
-    {
-      #spec_outfile = $2
-      insert_after(1, '</AST_Section1_Output>')
-      insert_before(0, '<AST_Section1_Output>')
-    }
-  | "%param"
-    {
-      #current_param = $1
-    }
-    params
-    {
-      #current_param = param_none
-      insert_after(2, '</AST_Section1_Param>')
-      insert_before(0, '<AST_Section1_Param>')
-    }
-  | "%require" STRING
-    {
-      #version_check(&@2, $2)
-      insert_after(1, '</AST_Section1_Require>')
-      insert_before(0, '<AST_Section1_Require>')
-    }
-  | "%skeleton" STRING
-    {
-      #skeleton_user = $2
-      #if strchr(skeleton_user, ord('/')):
-      #  dir_length = len(current_file)
-      #  skeleton_build = None
-      #  while dir_length and current_file[dir_length - 1] != ord('/'):
-      #    dir_length -= 1
-      #  while dir_length and current_file[dir_length - 1] == ord('/'):
-      #    dir_length -= 1
-      #  skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
-      #  if dir_length > 0:
-      #    memcpy(skeleton_build, current_file, dir_length)
-      #    skeleton_build[dir_length++] = ord('/')
-      #  strcpy(skeleton_build + dir_length, skeleton_user)
-      #  skeleton_user = uniqstr_new(skeleton_build)
-      #  free(skeleton_build)
-      #skeleton_arg(skeleton_user, grammar_prio, @1)
-      insert_after(1, '</AST_Section1_Skeleton>')
-      insert_before(0, '<AST_Section1_Skeleton>')
-    }
-  | "%token-table"
-    {
-      #token_table_flag = True
-      insert_after(0, '</AST_Section1_TokenTable>')
-      insert_before(0, '<AST_Section1_TokenTable>')
-    }
-  | "%verbose"
-    {
-      #report_flag |= report_states
-      insert_after(0, '</AST_Section1_Verbose>')
-      insert_before(0, '<AST_Section1_Verbose>')
-    }
-  | "%yacc"
-    {
-      #yacc_flag = True
-      insert_after(0, '</AST_Section1_YACC>')
-      insert_before(0, '<AST_Section1_YACC>')
-    }
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1.InitialAction)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
+    }
+  | %space (?E{ast.AST.Section1.Language}"%language" STRING)
+  | %space (?E{ast.AST.Section1.NamePrefix}"%name-prefix" STRING)
+  | %space (?E{ast.AST.Section1.Lines, value = False}"%no-lines")
+  | %space (?E{ast.AST.Section1.NonDeterministicParser}"%nondeterministic-parser")
+  | %space (?E{ast.AST.Section1.Output}"%output" STRING)
+  | %space (?E{ast.AST.Section1.Param}"%param" params)
+  | %space (?E{ast.AST.Section1.Require}"%require" STRING)
+  | %space (?E{ast.AST.Section1.Skeleton}"%skeleton" STRING)
+  | %space (?E{ast.AST.Section1.TokenTable}"%token-table")
+  | %space (?E{ast.AST.Section1.Verbose}"%verbose")
+  | %space (?E{ast.AST.Section1.YACC}"%yacc")
   | /*FIXME: Err?  What is this horror doing here? */ ";"
   ;
 
 params
   : params "{...}"
-    {
-      #add_param(current_param, $2, @2)
-    }
   | "{...}"
-    {
-      #add_param(current_param, $1, @1)
-    }
   ;
 
 /*----------------------.
@@ -482,96 +308,69 @@ params
 grammar_declaration
   : precedence_declaration
   | symbol_declaration
-  | "%start" symbol
-    {
-      #grammar_start_symbol_set($2, @2)
-      insert_after(1, '</AST_Section1Or2_Start>')
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_SymbolRef>')
-      insert_before(0, '<AST_Section1Or2_Start>')
-    }
-  | code_props_type "{...}" generic_symlist
+  /* ) ) should be )) */
+  | %space (?E{ast.AST.Section1Or2.Start}"%start" %space (?E{ast.AST.SymbolRef}symbol) )
+  | %space (?E{ast.AST.Section1Or2.CodeProps, _type = $1}code_props_type "{...}" generic_symlist)
     {
-      #code = None
-      #code_props_symbol_action_init(&code, $2, @2)
-      #code_props_translate_code(&code)
       lex_yy_code.yyin = None
       lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
       lex_yy_code.yytext = '' # fool unput()
       lex_yy_code.unput($2)
       lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
       lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.CodeProps)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
+    }
+  | %space (?E{ast.AST.Section1Or2.DefaultPrec, value = True}"%default-prec")
+  | %space (?E{ast.AST.Section1Or2.DefaultPrec}"%no-default-prec")
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  | %space (?E{ast.AST.Section1Or2.Code}"%code" (?E{ast.AST.ID}) %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
       )
-      #list = None
-      #list = $3
-      #while list:
-      #  symbol_list_code_props_set(list, $1, &code)
-      #  list = list->next
-      #symbol_list_free($3)
-      insert_after(2, '</AST_Section1Or2_CodeProps>')
-      insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
-    }
-  | "%default-prec"
-    {
-      #default_prec = True
-      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
-      insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
-    }
-  | "%no-default-prec"
-    {
-      #default_prec = False
-      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
-      insert_before(0, '<AST_Section1Or2_DefaultPrec>')
-    }
-  | "%code" "{...}"
-    {
-      #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($2[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(1, '</AST_Section1Or2_Code>')
-      insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
     }
-  | "%code" ID "{...}"
-    {
-      #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($3[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 4] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(2, '</AST_Section1Or2_Code>')
-      insert_before(0, '<AST_Section1Or2_Code>')
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  | %space (?E{ast.AST.Section1Or2.Code}"%code" ID %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+      )
     }
   ;
 
@@ -601,94 +400,44 @@ code_props_type
 %token PERCENT_UNION "%union";
 
 union_name
-  : %empty
-    {
-      insert_before(0, '<AST_ID />')
-    }
+  : (?E{ast.AST.ID}%empty)
   | ID
-    {
-      #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-    }
   ;
 
 grammar_declaration
-  : "%union" union_name "{...}"
-    {
-      #union_seen = True
-      #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($3[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 4] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(2, '</AST_Section1Or2_Union>')
-      insert_before(0, '<AST_Section1Or2_Union>')
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  : %space (?E{ast.AST.Section1Or2.Union}"%union" union_name %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Union)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+      )
     }
   ;
 
 symbol_declaration
-  : "%nterm"
-    {
-      #current_class = nterm_sym
-    }
-    symbol_defs.1
-    {
-      #current_class = unknown_sym
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_NTerm>')
-      insert_before(0, '<AST_Section1Or2_NTerm>')
-    }
-  | "%token"
-    {
-      #current_class = token_sym
-    }
-    symbol_defs.1
-    {
-      #current_class = unknown_sym
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_Token>')
-      insert_before(0, '<AST_Section1Or2_Token>')
-    }
-  | "%type" TAG symbols.1
-    {
-      #list = None
-      #tag_seen = True
-      #list = $3
-      #while list:
-      #  symbol_type_set(list->content.sym, $2, @2)
-      #  list = list->next
-      #symbol_list_free($3)
-      insert_after(2, '</AST_Section1Or2_Type>')
-      insert_before(0, '<AST_Section1Or2_Type>')
-    }
+  : %space (?E{ast.AST.Section1Or2.NTerm}"%nterm" symbol_defs.1)
+  | %space (?E{ast.AST.Section1Or2.Token}"%token" symbol_defs.1)
+  | %space (?E{ast.AST.Section1Or2.Type}"%type" TAG symbols.1)
   ;
 
 precedence_declaration
-  : precedence_declarator tag.opt symbols.prec
-    {
-      #list = None
-      #current_prec += 1
-      #list = $3
-      #while list:
-      #  symbol_type_set(list->content.sym, current_type, @2)
-      #  symbol_precedence_set(list->content.sym, current_prec, $1, @1)
-      #  list = list->next
-      #symbol_list_free($3)
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_Precedence>')
-      insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
-    }
+  : %space (?E{ast.AST.Section1Or2.Precedence, _type = ($1 & 3) - 1}precedence_declarator tag.opt symbols.prec)
   ;
 
 precedence_declarator
@@ -712,137 +461,49 @@ precedence_declarator
 
 tag.opt
   : %empty
-    {
-      #current_type = None
-    }
   | TAG
-    {
-      #current_type = $1
-      #tag_seen = True
-    }
   ;
 
 /* Just like symbols.1 but accept INT for the sake of POSIX.  */
 symbols.prec
   : symbol.prec
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-    }
   | symbols.prec symbol.prec
-    {
-      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
-    }
   ;
 
 symbol.prec
-  : symbol
-    {
-      #$$ = $1
-      #symbol_class_set($1, token_sym, @1, False)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | symbol INT
-    {
-      #$$ = $1
-      #symbol_user_token_number_set($1, $2, @2)
-      #symbol_class_set($1, token_sym, @1, False)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}symbol INT)
   ;
 
 /* One or more symbols to be %typed. */
 symbols.1
-  : symbol
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | symbols.1 symbol
-    {
-      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_SymbolRef>')
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
+  | symbols.1 %space (?E{ast.AST.SymbolRef}symbol)
   ;
 
 generic_symlist
   : generic_symlist_item
-    {
-      #$$ = $1
-    }
   | generic_symlist generic_symlist_item
-    {
-      #$$ = symbol_list_append($1, $2)
-    }
   ;
 
 generic_symlist_item
-  : symbol
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
   | tag
-    {
-      #$$ = symbol_list_type_new($1, @1)
-    }
   ;
 
 tag
   : TAG
   | "<*>"
-    {
-      #$$ = uniqstr_new('*')
-    }
   | "<>"
-    {
-      #$$ = uniqstr_new('')
-    }
   ;
 
 /* One token definition.  */
 symbol_def
   : TAG
-    {
-      #current_type = $1
-      #tag_seen = True
-    }
-  | id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | id INT
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_user_token_number_set($1, $2, @2)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
-  | id string_as_id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_make_alias($1, $2, @$)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | id INT string_as_id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_user_token_number_set($1, $2, @2)
-      #symbol_make_alias($1, $3, @$)
-      insert_after(2, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
+  | %space (?E{ast.AST.SymbolRef}id)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT)
+  | %space (?E{ast.AST.SymbolRef}id string_as_id)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT string_as_id)
   ;
 
 /* One or more symbol definitions. */
@@ -872,33 +533,12 @@ rules_or_grammar_declaration
   ;
 
 rules
-  : id_colon named_ref.opt
-    {
-      #current_lhs($1, @1, $2)
-    }
-    rhses.1
-    {
-      #current_lhs(0, @1, 0)
-      insert_after(3, '</AST_Section2_Rules>')
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-      insert_before(0, '<AST_Section2_Rules>')
-    }
+  : %space (?E{ast.AST.Section2.Rules}(?E{ast.AST.SymbolRef}id_colon) named_ref.opt rhses.1)
   ;
 
 rhses.1
-  : rhs
-    {
-      #grammar_current_rule_end(@1)
-      insert_after(0, '</AST_Production>')
-      insert_before(0, '<AST_Production>')
-    }
-  | rhses.1 "|" rhs
-    {
-      #grammar_current_rule_end(@3)
-      insert_after(2, '</AST_Production>')
-      insert_before(2, '<AST_Production>')
-    }
+  : %space (?E{ast.AST.Production}rhs)
+  | rhses.1 "|" %space (?E{ast.AST.Production}rhs)
   | rhses.1 ";"
   ;
 
@@ -909,80 +549,40 @@ rhses.1
 rhs
   : %empty
     {
-      global yychar
-      #if nested_rhs:
-      #  nested_rhs -= 1
-      #else:
-      #  grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+      global yychar, yylval, yylloc
       if yychar == YYEMPTY:
-        yychar = lex_yy.gram_lex()
-      temp = lex_yy.gram_piece[gram_piece2 + 1]
-      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
-      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
-      lex_yy.gram_piece[gram_piece2 - 1] = temp
-    }
-  | rhs symbol named_ref.opt
-    {
-      #grammar_current_rule_symbol_append($2, @2, $3)
-      insert_after(2, '</AST_Production_SymbolRef>')
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+      yy_element_stack[-1] = lex_yy.yy_element_space
+      lex_yy.yy_element_space = element.Element()
     }
-  | rhs "{...}" named_ref.opt
+  | rhs %space (?E{ast.AST.Production.SymbolRef}(?E{ast.AST.SymbolRef}symbol) named_ref.opt)
+  | rhs %space (?E{ast.AST.Production.Action}"{...}" named_ref.opt)
     {
-      #grammar_current_rule_action_append($2, @2, $3, False)
       lex_yy_code.yyin = None
       lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
       lex_yy_code.yytext = '' # fool unput()
       lex_yy_code.unput($2)
       lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
       lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
-      )
-      insert_after(2, '</AST_Production_Action>')
-      insert_before(1, '<AST_Production_Action>')
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Production.Action)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
     }
   | rhs "%?{...}"
     {
       #grammar_current_rule_action_append($2, @2, None, True)
     }
-  | rhs "%empty"
-    {
-      #grammar_current_rule_empty_set(@2)
-      insert_after(1, '</AST_Production_Empty>')
-      insert_before(1, '<AST_Production_Empty>')
-    }
-  | rhs "%prec" symbol
-    {
-      #grammar_current_rule_prec_set($3, @3)
-      insert_after(2, '</AST_Production_Prec>')
-      insert_after(2, '</AST_SymbolRef>')
-      insert_before(2, '<AST_SymbolRef>')
-      insert_before(1, '<AST_Production_Prec>')
-    }
-  | rhs "%dprec" INT
-    {
-      #grammar_current_rule_dprec_set($3, @3)
-      insert_after(2, '</AST_Production_DPrec>')
-      insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
-    }
-  | rhs "%merge" TAG
-    {
-      #grammar_current_rule_merge_set($3, @3)
-      insert_after(2, '</AST_Production_Merge>')
-      insert_before(1, '<AST_Production_Merge>')
-    }
+  | rhs %space (?E{ast.AST.Production.Empty}"%empty")
+  /* ) ) should be )) */
+  | rhs %space (?E{ast.AST.Production.Prec}"%prec" %space (?E{ast.AST.SymbolRef}symbol) )
+  | rhs %space (?E{ast.AST.Production.DPrec, value = $3}"%dprec" INT)
+  | rhs %space (?E{ast.AST.Production.Merge}"%merge" TAG)
   /* Nick extra rules for element groups */
-  | rhs '('
-    /*{
-      #nested_rhs += 1
-    }*/
-    rhs ')'
+  | rhs %space (?E{ast.AST.Production.GroupElement}'(' rhs ')')
     {
       lex_yy_code.yyin = None
       lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
@@ -990,36 +590,19 @@ rhs
       lex_yy_code.unput($2[4:-1])
       lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
       lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = \
-        '(?E{{{0:s}}}'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(3, '</AST_Production_GroupElement>')
-      insert_before(1, '<AST_Production_GroupElement>')
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Production.GroupElement)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
     }
   /* Nick added %space */
-  | rhs "%space"
-    {
-      insert_after(1, '</AST_Production_Space>')
-      insert_before(1, '<AST_Production_Space>')
-    }
+  | rhs %space (?E{ast.AST.Production.Space}"%space")
   ;
 
 named_ref.opt
   : %empty
-    {
-      #$$ = 0
-    }
   | BRACKETED_ID
-    {
-      #$$ = named_ref_new($1, @1)
-    }
   ;
 
 /*---------------------.
@@ -1031,9 +614,6 @@ named_ref.opt
 variable
   : ID
   | STRING
-    {
-      #$$ = uniqstr_new($1)
-    }
   ;
 
 /* Some content or empty by default. */
@@ -1059,25 +639,9 @@ variable
 
 value
   : %empty
-    {
-      #$$.kind = muscle_keyword
-      #$$.chars = ''
-    }
   | ID
-    {
-      #$$.kind = muscle_keyword
-      #$$.chars = $1
-    }
   | STRING
-    {
-      #$$.kind = muscle_string
-      #$$.chars = $1
-    }
   | "{...}"
-    {
-      #$$.kind = muscle_code
-      #$$.chars = strip_braces($1)
-    }
   ;
 
 /*--------------.
@@ -1089,22 +653,11 @@ value
 
 id
   : ID
-    {
-      #$$ = symbol_from_uniqstr($1, @1)
-    }
   | CHAR
-    {
-      #$$ = symbol_get(char_name($1), @1)
-      #symbol_class_set($$, token_sym, @1, False)
-      #symbol_user_token_number_set($$, $1, @1)
-    }
   ;
 
 id_colon
   : ID_COLON
-    {
-      #$$ = symbol_from_uniqstr($1, @1)
-    }
   ;
 
 symbol
@@ -1115,21 +668,11 @@ symbol
 /* A string used as an ID: quote it.  */
 string_as_id
   : STRING
-    {
-      #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
-      #symbol_class_set($$, token_sym, @1, False)
-    }
   ;
 
 epilogue.opt
   : %empty
-  | "%%" EPILOGUE
-    {
-      #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
-      #code_scanner_last_string_free()
-      insert_after(1, '</AST_Section3>')
-      insert_after(0, '<AST_Section3>')
-    }
+  | "%%" (?E{ast.AST.Section3}EPILOGUE %space)
   ;
 
 %%
@@ -1218,15 +761,3 @@ epilogue.opt
 #  current_lhs_location = loc
 #  free(current_lhs_named_ref)
 #  current_lhs_named_ref = ref
-
-def insert_before(n, str):
-  global gram_piece3
-  lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
-  lex_yy.gram_piece0 += 1
-  gram_piece3 += 1
-
-def insert_after(n, str):
-  global gram_piece3
-  lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
-  lex_yy.gram_piece0 += 1
-  gram_piece3 += 1
index 4b3a89b..e6863a3 100644 (file)
@@ -20,6 +20,7 @@
 %option debug nodefault noinput noyywrap never-interactive
 
 %{
+  import ast
   import state
   import y_tab
 
   #def gram_scanner_last_string_free():
   #  del obstack_for_string[:]
 
-  gram_piece = []
-  gram_piece0 = 0
-  gram_piece1 = 0
   markup_stack = []
 
-  percent_percent_count = 0;
+  percent_percent_count = 0
 
   # these should be yylex()-local, but moved to here, see further down:
   nesting = 0
   context_state = -1
   id_loc = state.location()
-  code_start = scanner_cursor.copy()
-  token_start = scanner_cursor.copy()
-  #first = True
-  if True: #first:
-    scanner_cursor = y_tab.yylloc.start.copy()
-    #first = False
+  code_start = None
+  token_start = None
+  scanner_cursor = state.boundary('<stdin>', 0, 0)
 %}
 
 %x SC_YACC_COMMENT
@@ -254,17 +249,13 @@ eqopt    ([[:space:]]*=)?
   "|"                          return y_tab.PIPE
   ";"                          return y_tab.SEMICOLON
 
-  {id} {
+  (?E{ast.AST.ID}{id}) {
     global id_loc, bracketed_id_str
     y_tab.yylval = yytext
     id_loc = y_tab.yylloc
     bracketed_id_str = None
+    markup_push(element.Element)
     BEGIN(SC_AFTER_IDENTIFIER)
-    gram_piece_pack()
-    gram_piece_append('<AST_ID>')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_ID>')
-    gram_piece_pack()
   }
 
   {int} {
@@ -285,10 +276,10 @@ eqopt    ([[:space:]]*=)?
     global token_start
     token_start = y_tab.yylloc.start
     BEGIN(SC_ESCAPED_CHARACTER)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_Char>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.Char)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
 
   /* Strings. */
@@ -296,10 +287,10 @@ eqopt    ([[:space:]]*=)?
     global token_start
     token_start = y_tab.yylloc.start
     BEGIN(SC_ESCAPED_STRING)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_String>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.String)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
 
   /* Prologue. */
@@ -307,10 +298,10 @@ eqopt    ([[:space:]]*=)?
     global code_start
     code_start = y_tab.yylloc.start
     BEGIN(SC_PROLOGUE)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_Section1_Prologue>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.Section1.Prologue)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
 
   /* Code in between braces.  */
@@ -320,10 +311,15 @@ eqopt    ([[:space:]]*=)?
     nesting = 0
     code_start = y_tab.yylloc.start
     BEGIN(SC_BRACED_CODE)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedCode>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    # new way, includes braces, wrapped by <AST_Production_Action> later
+    markup_push(ast.AST.Text)
+    markup_flush(len(yytext))
+    # old way 
+    #markup_push(ast.AST.BracedCode)
+    #markup_flush(len(yytext))
+    #markup_push(ast.AST.Text)
+    # to here
   }
 
   /* Semantic predicate. */
@@ -331,11 +327,11 @@ eqopt    ([[:space:]]*=)?
     global nesting, code_start
     nesting = 0
     code_start = y_tab.yylloc.start
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedPredicate>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
     BEGIN(SC_PREDICATE)
+    markup_push(element.Element)
+    markup_push(ast.AST.BracedPredicate)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
 
   /* Nick extra rules for element groups */
@@ -345,32 +341,28 @@ eqopt    ([[:space:]]*=)?
     nesting = 0
     code_start = y_tab.yylloc.start
     BEGIN(SC_ELEMENT_GROUP)
-    gram_piece_pack()
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
   ")"                          return ord(')')
 
   /* A type. */
-  "<*>" {
-    gram_piece_pack()
-    gram_piece_append('<AST_TagRef>&lt;<AST_Text>*</AST_Text>&gt;</AST_TagRef>')
-    return ~y_tab.TAG_ANY
+  "(?E{ast.AST.TagRef}<(?E{ast.AST.Text}*)>)" {
+    return y_tab.TAG_ANY
   }
-  "<>" {
-    gram_piece_pack()
-    gram_piece_append('<AST_TagRef>&lt;<AST_Text />&gt;</AST_TagRef>')
-    return ~y_tab.TAG_NONE
+  "(?E{ast.AST.TagRef}<(?E{ast.AST.Text})>)" {
+    return y_tab.TAG_NONE
   }
   "<" {
     global nesting, token_start
     nesting = 0
     token_start = y_tab.yylloc.start
     BEGIN(SC_TAG)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_TagRef>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.TagRef)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
   }
 
   "%%" {
@@ -378,11 +370,6 @@ eqopt    ([[:space:]]*=)?
     percent_percent_count += 1
     if percent_percent_count == 2:
       BEGIN(SC_EPILOGUE)
-      gram_piece_pack()
-      gram_piece_escape(yytext)
-      gram_piece_pack()
-      gram_piece_pack()
-      return ~y_tab.PERCENT_PERCENT
     return y_tab.PERCENT_PERCENT
   }
 
@@ -425,32 +412,43 @@ eqopt    ([[:space:]]*=)?
     global bracketed_id_start, bracketed_id_context_state
     if bracketed_id_str is not None:
       scanner_cursor.column -= len(yytext)
-      yyless(0)
+      markup_yyless(0)
+      markup_pop_token() # element.Element
       BEGIN(SC_RETURN_BRACKETED_ID)
       y_tab.yylloc = id_loc
       return y_tab.ID
     else:
+      markup_pop_token() # element.Element
       bracketed_id_start = y_tab.yylloc.start
       bracketed_id_context_state = YY_START()
       BEGIN(SC_BRACKETED_ID)
   }
   ":" {
+    markup_pop_token() # element.Element
     BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
     y_tab.yylloc = id_loc
-    gram_piece_escape(yytext)
-    return ~y_tab.ID_COLON
+    markup_flush(len(yytext))
+    return y_tab.ID_COLON
   }
   . {
     scanner_cursor.column -= len(yytext)
-    yyless(0)
+    markup_yyless(0)
+
+    # total kludge: put back all whitespace/comments after the ID, and rescan
+    # (this will mess up the position tracking, need to revisit and fix later)
+    unput(element.get_text(yy_element_space, len(yy_element_space)))
+    element.set_text(yy_element_space, len(yy_element_space), '')
+
+    markup_pop_token() # element.Element
     BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
     y_tab.yylloc = id_loc
-    return ~y_tab.ID
+    return y_tab.ID
   }
   <<EOF>> {
+    markup_pop_token() # element.Element
     BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
     y_tab.yylloc = id_loc
-    return ~y_tab.ID
+    return y_tab.ID
   }
 }
 
@@ -494,7 +492,7 @@ eqopt    ([[:space:]]*=)?
   . {
     global bracketed_id_str
     scanner_cursor.column -= len(yytext)
-    yyless(0)
+    markup_yyless(0)
     y_tab.yylval = bracketed_id_str
     bracketed_id_str = None
     y_tab.yylloc = bracketed_id_loc
@@ -565,13 +563,11 @@ eqopt    ([[:space:]]*=)?
     y_tab.yylloc.start = token_start
     y_tab.yylval = gram_last_string
     BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_String>')
-    gram_piece_append('</AST_String>')
-    gram_piece_pack()
-    return ~y_tab.STRING
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.String
+    markup_pop_token() # element.Element
+    return y_tab.STRING
   }
   <<EOF>>                      unexpected_eof(token_start, '"')
   "\n"                         unexpected_newline(token_start, '"')
@@ -598,13 +594,11 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylval = ord(gram_last_string[0])
     #del obstack_for_string[:]
     BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_Char>')
-    gram_piece_append('</AST_Char>')
-    gram_piece_pack()
-    return ~y_tab.CHAR
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.Char
+    markup_pop_token() # element.Element
+    return y_tab.CHAR
   }
   "\n"                         unexpected_newline(token_start, '\'')
   <<EOF>>                      unexpected_eof(token_start, '\'')
@@ -628,13 +622,11 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylval = gram_last_string
       #del obstack_for_string[:]
       BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_TagRef>')
-      gram_piece_append('</AST_TagRef>')
-      gram_piece_pack()
-      return ~y_tab.TAG
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.TagRef
+      markup_pop_token() # element.Element
+      return y_tab.TAG
     obstack_for_string.append(yytext)
   }
 
@@ -654,92 +646,55 @@ eqopt    ([[:space:]]*=)?
 
 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 {
-  \\[0-7]{1,3} {
-    c = strtoul(yytext + 1, None, 8)
+  (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 8)}\\([0-7]{1,3})) {
+    c = int(yytext + 1, 8)
     if not c or 0x7f * 2 + 1 < c:
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
   }
 
-  \\x[0-9abcdefABCDEF]+ {
-    c = strtoul(yytext + 2, None, 16)
+  (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 16)}\\x([0-9abcdefABCDEF]+)) {
+    c = int(yytext + 2, 16)
     if not c or 0x7f * 2 + 1 < c:
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
   }
 
-  \\a {
+  (?E{ast.AST.Text.Escape, character = 7}\\a) {
     obstack_for_string.append('\a')
-    gram_piece_append('<AST_Text_Escape character="7">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\b {
+  (?E{ast.AST.Text.Escape, character = 8}\\b) {
     obstack_for_string.append('\b')
-    gram_piece_append('<AST_Text_Escape character="8">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\f {
+  (?E{ast.AST.Text.Escape, character = 12}\\f) {
     obstack_for_string.append('\f')
-    gram_piece_append('<AST_Text_Escape character="12">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\n {
+  (?E{ast.AST.Text.Escape, character = 10}\\n) {
     obstack_for_string.append('\n')
-    gram_piece_append('<AST_Text_Escape character="10">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\r {
+  (?E{ast.AST.Text.Escape, character = 13}\\r) {
     obstack_for_string.append('\r')
-    gram_piece_append('<AST_Text_Escape character="13">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\t {
+  (?E{ast.AST.Text.Escape, character = 9}\\t) {
     obstack_for_string.append('\t')
-    gram_piece_append('<AST_Text_Escape character="9">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
-  \\v {
+  (?E{ast.AST.Text.Escape, character = 11}\\v) {
     obstack_for_string.append('\v')
-    gram_piece_append('<AST_Text_Escape character="11">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
 
   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
-  \\("\""|"'"|"?"|"\\") {
+  (?E{ast.AST.Text.Escape, character = ord(yy_groups[2])}\\("\""|"'"|"?"|"\\")) {
     obstack_for_string.append(yytext[1])
-    rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
-    gram_piece_append(gram_piece_temp)
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
   }
  
-  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+  (?E{ast.AST.Text.Escape, character = int(('' if yy_groups[3] is None else yy_groups[3]) + yy_groups[4], 16)}\\(u|U([0-9abcdefABCDEF]{4}))([0-9abcdefABCDEF]{4})) {
     c = convert_ucn_to_byte(yytext)
     if c <= 0:
       state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
     else:
       obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
   }
   \\(.|\n) {
     p = yytext[1:]
@@ -857,13 +812,16 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylloc.start = code_start
       y_tab.yylval = gram_last_string
       BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
-      gram_piece_append('</AST_BracedCode>')
-      gram_piece_pack()
-      return ~y_tab.BRACED_CODE
+      # new way, includes braces, wrapped by <AST_Production_Action> later
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.Text
+      # old way
+      #markup_pop() # ast.AST.Text
+      #markup_flush(len(yytext))
+      #markup_pop() # ast.AST.BracedCode
+      # to here
+      markup_pop_token() # element.Element
+      return y_tab.BRACED_CODE
   }
 }
 
@@ -878,13 +836,11 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylloc.start = code_start
       y_tab.yylval = gram_last_string
       BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedPredicate>')
-      gram_piece_append('</AST_BracedPredicate>')
-      gram_piece_pack()
-      return ~y_tab.BRACED_PREDICATE
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.BracedPredicate
+      markup_pop_token() # element.Element
+      return y_tab.BRACED_PREDICATE
     else:
       obstack_for_string.append('}')
   }
@@ -904,11 +860,10 @@ eqopt    ([[:space:]]*=)?
       y_tab.yylval = gram_last_string
       #del obstack_for_string[:]
       BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_pack()
-      return ~ord('(')
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop_token() # element.Element
+      return ord('(')
   }
 }
 
@@ -925,13 +880,11 @@ eqopt    ([[:space:]]*=)?
     y_tab.yylloc.start = code_start
     y_tab.yylval = gram_last_string
     BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_Section1_Prologue>')
-    gram_piece_append('</AST_Section1_Prologue>')
-    gram_piece_pack()
-    return ~y_tab.PROLOGUE
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.Section1.Prologue
+    markup_pop_token() # element.Element
+    return y_tab.PROLOGUE
   }
 
   <<EOF>>                      unexpected_eof(code_start, '%}')
@@ -952,8 +905,7 @@ eqopt    ([[:space:]]*=)?
     y_tab.yylloc.start = code_start
     y_tab.yylval = gram_last_string
     BEGIN(INITIAL)
-    gram_piece_pack()
-    return ~y_tab.EPILOGUE
+    return y_tab.EPILOGUE
   }
 }
 
@@ -1003,12 +955,12 @@ def scan_integer(number, base, loc):
     num = 0x7fffffff
   return num
 
-#def convert_ucn_to_byte(ucn):
-#  code = strtoul(ucn + 2, None, 16)
-#  if 0x7f * 2 + 1 < code:
-#    return -1
-#  return code
-#
+def convert_ucn_to_byte(ucn):
+  code = int(ucn[2:], 16)
+  if 0x7f * 2 + 1 < code:
+    return -1
+  return code
+
 #def handle_syncline(args, loc):
 #  file = None
 #  lineno = strtoul(args, &file, 10)
@@ -1044,31 +996,52 @@ def unexpected_newline(start, token_end):
 #  del obstack_for_string[:]
 #  yy_delete_buffer(YY_CURRENT_BUFFER)
 
-def gram_piece_append(str):
-  gram_piece.append(str)
-
-def gram_piece_insert(n, str):
-  gram_piece[n:n] = [str]
-
-xml_escape = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
-def gram_piece_escape(str):
-  gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
-
-def gram_piece_flush(n):
-  global yytext
-  gram_piece_escape(yytext[:n])
-  yytext = yytext[n:]
-
-def gram_piece_pack():
-  global gram_piece0
-  gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
-  gram_piece0 += 1
-
-def gram_lex():
-  result = yylex()
-  if result < 0:
-    return ~result
-  gram_piece_pack()
-  gram_piece_escape(yytext)
-  gram_piece_pack()
-  return result
+# these exist for the purpose of adding markup to sequences that are
+# recognized by several iterations of yylex(), it would be better to
+# try to use more complex regular expressions to match all in one go:
+
+def markup_flush(n):
+  text = element.get_text(yy_element_token, 0)
+  element.set_text(
+    yy_element_space,
+    len(yy_element_space),
+    element.get_text(yy_element_space, len(yy_element_space)) + text[:n]
+  )
+  element.set_text(yy_element_token, 0, text[n:])
+
+def markup_yyless(n):
+  yyless(n)
+  element.set_text(
+    yy_element_token,
+    0,
+    element.get_text(yy_element_token, 0)[:n]
+  )
+
+def markup_push(factory, *args, **kwargs):
+  global yy_element_space
+  markup_stack.append(yy_element_space)
+  yy_element_space = factory(*args, **kwargs)
+
+def markup_pop():
+  global yy_element_space
+  _element = yy_element_space
+  yy_element_space = markup_stack.pop()
+  yy_element_space.append(_element)
+  #element.set_text(yy_element_space, len(yy_element_space), '')
+
+def markup_pop_token():
+  global yy_element_space, yy_element_token
+
+  # append yy_element_token contents onto yy_element_space
+  i = len(yy_element_space)
+  element.set_text(
+    yy_element_space,
+    i,
+    element.get_text(yy_element_space, i) +
+    element.get_text(yy_element_token, 0)
+  )
+  yy_element_space[i:] = yy_element_token[:]
+
+  # exchange, so that space is now prepended onto token
+  yy_element_token = yy_element_space
+  yy_element_space = markup_stack.pop()
index cb6d892..ed700a0 100644 (file)
 # the GNU General Public License without this special exception.
 
 import bisect
+import lex_yy
+
+# this can be redefined in SECTION1
+class YYLTYPE:
+  def __init__(
+    self,
+    first_line = 0,
+    first_column = 0,
+    last_line = 0,
+    last_column = 0
+  ):
+    self.first_line = first_line
+    self.first_column = first_column
+    self.last_line = last_line
+    self.last_column = last_column
 
 # GENERATE SECTION1
 
@@ -37,7 +52,7 @@ yyval = None
 yyloc = None
 
 yylval = None
-yylloc = None
+yylloc = YYLTYPE()
 
 # GENERATE SECTION2
 
@@ -56,9 +71,9 @@ def yyparse():
     if reduce == -1:
       if yychar == -1:
         yylval = None
-        yylloc = None
-        yychar = yylex()
-        print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc)
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+        #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
       action = yy_lr1dfa_states[state][1][
         bisect.bisect_right(yy_lr1dfa_states[state][0], yychar)
       ]
index e9c9780..843d913 100644 (file)
@@ -28,6 +28,20 @@ import element
 import lex_yy
 #import xml.etree.ElementTree
 
+# this can be redefined in SECTION1
+class YYLTYPE:
+  def __init__(
+    self,
+    first_line = 0,
+    first_column = 0,
+    last_line = 0,
+    last_column = 0
+  ):
+    self.first_line = first_line
+    self.first_column = first_column
+    self.last_line = last_line
+    self.last_column = last_column
 # GENERATE SECTION1
 
 # GENERATE TOKENS
@@ -40,7 +54,7 @@ yyval = None
 yyloc = None
 
 yylval = None
-yylloc = None
+yylloc = YYLTYPE()
 
 yy_element_stack = None
 
@@ -63,7 +77,7 @@ def yyparse(factory, *args, **kwargs):
     if reduce == -1:
       if yychar == -1:
         yylval = None
-        yylloc = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
         yychar = lex_yy.yylex()
         #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
         #print('lex_yy.yy_element_space')
@@ -108,8 +122,7 @@ def yyparse(factory, *args, **kwargs):
     yy_element_stack[base * 2 + 1:] = [
       element.concatenate(
         yy_element_stack[base * 2 + 1:],
-        element.Element,
-        'root'
+        element.Element
       )
     ]