Get direct AST generation working properly, does not exactly match bootstrap_bison...

author Nick Downing <nick@ndcode.org>

Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)

committer Nick Downing <nick@ndcode.org>

Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
author Nick Downing <nick@ndcode.org>
Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
committer Nick Downing <nick@ndcode.org>
Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
diff --git a/Makefile b/Makefile

index 52a6513..d3f7d3a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,16 +1,16 @@
  all: lex_yy.py lex_yy_code.py y_tab.py
  
-lex_yy.py: scan-gram.l bootstrap/skel_lex_yy.py
+lex_yy.py: scan-gram.l
         ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
-       ../pilex.git/pilex.py --python --skel bootstrap/skel_lex_yy.py $<.xml
+       ../pilex.git/pilex.py --element --python $<.xml
  
  lex_yy_code.py: scan-code.l
         ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
         ../pilex.git/pilex.py --element --python -o $@ $<.xml
  
-y_tab.py: parse-gram.y bootstrap/skel_y_tab.py
+y_tab.py: parse-gram.y
         ../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
-       ./piyacc.py --python --skel bootstrap/skel_y_tab.py $<.xml
+       ./piyacc.py --element --python $<.xml
  
  clean:
         rm -f lex_yy.py lex_yy_code.py y_tab.py *.xml
diff --git a/bootstrap/parse-gram.y b/bootstrap/parse-gram.y

index 8d3cbc8..3280411 100644 (file)
--- a/bootstrap/parse-gram.y
+++ b/bootstrap/parse-gram.y
@@ -256,8 +256,10 @@ input
  prologue_declarations
    : %empty
      {
-      global yychar
+      global yychar, yylval, yylloc
        if yychar == YYEMPTY:
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
          yychar = lex_yy.gram_lex()
        temp = lex_yy.gram_piece[gram_piece2 + 1]
        lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
@@ -881,12 +883,14 @@ rhses.1
  rhs
    : %empty
      {
-      global yychar
+      global yychar, yylval, yylloc
        #if nested_rhs:
        #  nested_rhs -= 1
        #else:
        #  grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
        if yychar == YYEMPTY:
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
          yychar = lex_yy.gram_lex()
        temp = lex_yy.gram_piece[gram_piece2 + 1]
        lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
diff --git a/bootstrap/scan-gram.l b/bootstrap/scan-gram.l

index 009267d..8f64284 100644 (file)
--- a/bootstrap/scan-gram.l
+++ b/bootstrap/scan-gram.l
@@ -40,18 +40,15 @@
    gram_piece1 = 0
    markup_stack = []
  
-  percent_percent_count = 0;
+  percent_percent_count = 0
  
    # these should be yylex()-local, but moved to here, see further down:
    nesting = 0
    context_state = -1
    id_loc = state.location()
-  code_start = scanner_cursor.copy()
-  token_start = scanner_cursor.copy()
-  #first = True
-  if True: #first:
-    scanner_cursor = y_tab.yylloc.start.copy()
-    #first = False
+  code_start = None
+  token_start = None
+  scanner_cursor = state.boundary('<stdin>', 0, 0)
  %}
  
  %x SC_YACC_COMMENT
@@ -322,9 +319,14 @@ eqopt    ([[:space:]]*=)?
      code_start = y_tab.yylloc.start
      BEGIN(SC_BRACED_CODE)
      gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedCode>
-    gram_piece_flush(len(yytext))
+    # new way, includes braces, wrapped by <AST_Production_Action> later
      markup_stack.append(len(gram_piece)) # <AST_Text>
+    gram_piece_flush(len(yytext))
+    # old way
+    #markup_stack.append(len(gram_piece)) # <AST_BracedCode>
+    #gram_piece_flush(len(yytext))
+    #markup_stack.append(len(gram_piece)) # <AST_Text>
+    # to here
    }
  
    /* Semantic predicate. */
@@ -429,16 +431,19 @@ eqopt    ([[:space:]]*=)?
        yyless(0)
        BEGIN(SC_RETURN_BRACKETED_ID)
        y_tab.yylloc = id_loc
-      return y_tab.ID
+      return ~y_tab.ID
      else:
        bracketed_id_start = y_tab.yylloc.start
        bracketed_id_context_state = YY_START()
        BEGIN(SC_BRACKETED_ID)
    }
    ":" {
+    global gram_piece0
      BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
      y_tab.yylloc = id_loc
      gram_piece_escape(yytext)
+    gram_piece0 -= 1
+    gram_piece_pack()
      return ~y_tab.ID_COLON
    }
    . {
@@ -656,25 +661,23 @@ eqopt    ([[:space:]]*=)?
  <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
  {
    \\[0-7]{1,3} {
-    c = strtoul(yytext + 1, None, 8)
+    c = int(yytext[1:], 8)
      if not c or 0x7f * 2 + 1 < c:
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
        gram_piece_flush(len(yytext))
        gram_piece_append('</AST_Text_Escape>')
    }
  
    \\x[0-9abcdefABCDEF]+ {
-    c = strtoul(yytext + 2, None, 16)
+    c = int(yytext[2:], 16)
      if not c or 0x7f * 2 + 1 < c:
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
        gram_piece_flush(len(yytext))
        gram_piece_append('</AST_Text_Escape>')
    }
@@ -725,8 +728,7 @@ eqopt    ([[:space:]]*=)?
    /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
    \\("\""|"'"|"?"|"\\") {
      obstack_for_string.append(yytext[1])
-    rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
-    gram_piece_append(gram_piece_temp)
+    gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(ord(yytext[1])))
      gram_piece_flush(len(yytext))
      gram_piece_append('</AST_Text_Escape>')
    }
@@ -737,8 +739,7 @@ eqopt    ([[:space:]]*=)?
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
-      gram_piece_append(gram_piece_temp)
+      gram_piece_append('<AST_Text_Escape character="{0:d}">'.format(c))
        gram_piece_flush(len(yytext))
        gram_piece_append('</AST_Text_Escape>')
    }
@@ -858,11 +859,17 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylloc.start = code_start
        y_tab.yylval = gram_last_string
        BEGIN(INITIAL)
+      # new way, includes braces, wrapped by <AST_Production_Action> later
+      gram_piece_escape(yytext)
        gram_piece_insert(markup_stack.pop(), '<AST_Text>')
        gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
-      gram_piece_append('</AST_BracedCode>')
+      # old way
+      #gram_piece_insert(markup_stack.pop(), '<AST_Text>')
+      #gram_piece_append('</AST_Text>')
+      #gram_piece_escape(yytext)
+      #gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
+      #gram_piece_append('</AST_BracedCode>')
+      # to here
        gram_piece_pack()
        return ~y_tab.BRACED_CODE
    }
@@ -1004,12 +1011,12 @@ def scan_integer(number, base, loc):
      num = 0x7fffffff
    return num
  
-#def convert_ucn_to_byte(ucn):
-#  code = strtoul(ucn + 2, None, 16)
-#  if 0x7f * 2 + 1 < code:
-#    return -1
-#  return code
-#
+def convert_ucn_to_byte(ucn):
+  code = int(ucn[2:], 16)
+  if 0x7f * 2 + 1 < code:
+    return -1
+  return code
+
  #def handle_syncline(args, loc):
  #  file = None
  #  lineno = strtoul(args, &file, 10)
diff --git a/element.py b/element.py

index 2121e08..2d02217 100644 (file)
--- a/element.py
+++ b/element.py
@@ -160,6 +160,16 @@ def set_text(root, i, text):
    else:
      root[i - 1].tail = text
  
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
  def concatenate(children, factory = Element, *args, **kwargs):
    root = factory(*args, **kwargs)
    for child in children:
diff --git a/generate_bison.py b/generate_bison.py

index 9f665c2..b3bd033 100644 (file)
--- a/generate_bison.py
+++ b/generate_bison.py
@@ -718,10 +718,10 @@ static const yytype_int16 yyr2[] =
      break;
  '''.format(
                        i,
-                      _lr1dfa.productions[i][1].get_text()
+                      _lr1dfa.productions[i][1][4].get_text()
                      )
                      for i in range(len(_lr1dfa.productions))
-                    if _lr1dfa.productions[i][1] is not None
+                    if _lr1dfa.productions[i][1][4] is not None
                    ]
                  )
                )
diff --git a/generate_py.py b/generate_py.py

index e4f37e8..ed093ff 100644 (file)
--- a/generate_py.py
+++ b/generate_py.py
@@ -140,10 +140,10 @@ YYERROR_VERBOSE = {1:s}
                      (
                        '''  if yychar == -1:
      yylval = None
-    yylloc = None
+    yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
      yychar = lex_yy.yylex()
    yy_element_stack.append(lex_yy.yy_element_space)
-  lex_yy.yy_element_space = element.Element('root')
+  lex_yy.yy_element_space = element.Element() # should use factory here
  '''
                      if actions[i][0] else # take_space_right
                        ''
@@ -152,7 +152,6 @@ YYERROR_VERBOSE = {1:s}
                        [
                          '''  yy_element_stack[{0:s}:{1:s}] = [
      element.Element(
-      'root',
        children = [
          element.concatenate(
            yy_element_stack[{2:s}:{3:s}],
@@ -172,7 +171,7 @@ YYERROR_VERBOSE = {1:s}
                      ),
                      (
                        '''  yy_element_stack[{0:s}:{1:s}] = [
-    element.Element('root')
+    element.Element()
    ]
  '''.format(
                          (
diff --git a/markup.py b/markup.py

index 1817bee..caf5a41 100755 (executable)
--- a/markup.py
+++ b/markup.py
@@ -1,10 +1,8 @@
  #!/usr/bin/env python3
  
-import lex_yy
+import ast
+import element
  import y_tab
  import sys
  
-lex_yy.gram_piece_append('<root>\n  <AST ref=\"0\">')
-y_tab.yyparse()
-lex_yy.gram_piece_append('</AST>\n</root>\n')
-sys.stdout.write(''.join(lex_yy.gram_piece))
+element.serialize(y_tab.yyparse(ast.AST), sys.stdout)
diff --git a/parse-gram.y b/parse-gram.y

index 7796dae..1c151b5 100644 (file)
--- a/parse-gram.y
+++ b/parse-gram.y
@@ -23,28 +23,10 @@
    import element
    import lex_yy_code
    import state
-  import xml.etree.ElementTree
  
    YYLTYPE = state.location
  }
  
-%code top
-{
-}
-
-%code
-{
-  #current_prec = 0
-  #current_lhs_location = 0
-  #current_lhs_named_ref = []
-  #current_lhs_symbol = []
-  #current_class = unknown_sym
-  #current_type = None
-  gram_piece2 = 0
-  gram_piece3 = 0
-  #nested_rhs = 0
-}
-
  /* Nick %define api.prefix {gram_} */
  %define api.pure full
  %define locations
@@ -243,13 +225,7 @@
  %%
  
  input
-  : prologue_declarations "%%" grammar epilogue.opt
-    {
-      insert_after(2, '</AST_Section2>')
-      insert_before(2, '<AST_Section2>')
-      insert_after(0, '</AST_Section1>')
-      insert_before(0, '<AST_Section1>')
-    }
+  : %space (?E{ast.AST.Section1}prologue_declarations) "%%" %space (?E{ast.AST.Section2}grammar) epilogue.opt
    ;
  
          /*------------------------------------.
@@ -259,13 +235,13 @@ input
  prologue_declarations
    : %empty
      {
-      global yychar
+      global yychar, yylval, yylloc
        if yychar == YYEMPTY:
-        yychar = lex_yy.gram_lex()
-      temp = lex_yy.gram_piece[gram_piece2 + 1]
-      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
-      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
-      lex_yy.gram_piece[gram_piece2 - 1] = temp
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+      yy_element_stack[-1] = lex_yy.yy_element_space
+      lex_yy.yy_element_space = element.Element()
      }
    | prologue_declarations prologue_declaration
    ;
@@ -273,206 +249,56 @@ prologue_declarations
  prologue_declaration
    : grammar_declaration
    | "%{...%}"
+  | %space (?E{
+      (
+        ast.AST.Section1.PureParser
+      if $1 == 'api.pure' else
+        ast.AST.Section1.Locations
+      if $1 == 'locations' else
+        ast.AST.Section1.Debug
+      if $1 == 'parse.trace' else
+        element.Element
+      )
+    }"%<flag>")
+  | %space (?E{ast.AST.Section1.Define}"%define" variable value)
+  | %space (?E{ast.AST.Section1.Defines}"%defines")
+  | %space (?E{ast.AST.Section1.Defines}"%defines" STRING)
+  | %space (?E{ast.AST.Section1.ErrorVerbose}"%error-verbose")
+  | %space (?E{ast.AST.Section1.Expect, value = $2}"%expect" INT)
+  | %space (?E{ast.AST.Section1.ExpectRR, value = $2}"%expect-rr" INT)
+  | %space (?E{ast.AST.Section1.FilePrefix}"%file-prefix" STRING)
+  | %space (?E{ast.AST.Section1.GLRParser}"%glr-parser")
+  | %space (?E{ast.AST.Section1.InitialAction}"%initial-action" "{...}")
      {
-      #muscle_code_grow('post_prologue' if union_seen else 'pre_prologue', translate_code($1, @1, True), @1)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($1)
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2] = \
-        '<AST_Section1_Prologue>%{{{0:s}%}}</AST_Section1_Prologue>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-    }
-  | "%<flag>"
-    {
-      #muscle_percent_define_ensure($1, @1, True)
-      if $1 == 'api.pure':
-        insert_after(0, '</AST_Section1_PureParser>')
-        insert_before(0, '<AST_Section1_PureParser>')
-      elif $1 == 'locations':
-        insert_after(0, '</AST_Section1_Locations>')
-        insert_before(0, '<AST_Section1_Locations>')
-      elif $1 == 'parse.trace':
-        insert_after(0, '</AST_Section1_Debug>')
-        insert_before(0, '<AST_Section1_Debug>')
-      else:
-        assert False
-    }
-  | "%define" variable value
-    {
-      #muscle_percent_define_insert($2, @2, $3.kind, $3.chars, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-      insert_after(2, '</AST_Section1_Define>')
-      insert_before(0, '<AST_Section1_Define>')
-    }
-  | "%defines"
-    {
-      #defines_flag = True
-      insert_after(0, '</AST_Section1_Defines>')
-      insert_before(0, '<AST_Section1_Defines>')
-    }
-  | "%defines" STRING
-    {
-      #defines_flag = True
-      #spec_defines_file = xstrdup($2)
-      insert_after(1, '</AST_Section1_Defines>')
-      insert_before(0, '<AST_Section1_Defines>')
-    }
-  | "%error-verbose"
-    {
-      #muscle_percent_define_insert('parse.error', @1, muscle_keyword, 'verbose', MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-      insert_after(0, '</AST_Section1_ErrorVerbose>')
-      insert_before(0, '<AST_Section1_ErrorVerbose>')
-    }
-  | "%expect" INT
-    {
-      #expected_sr_conflicts = $2
-      insert_after(1, '</AST_Section1_Expect>')
-      insert_before(0, '<AST_Section1_Expect value="{0:d}">'.format($2))
-    }
-  | "%expect-rr" INT
-    {
-      #expected_rr_conflicts = $2
-      insert_after(1, '</AST_Section1_ExpectRR>')
-      insert_before(0, '<AST_Section1_ExpectRR value="{0:d}">'.format($2))
-    }
-  | "%file-prefix" STRING
-    {
-      #spec_file_prefix = $2
-      insert_after(1, '</AST_Section1_FilePrefix>')
-      insert_before(0, '<AST_Section1_FilePrefix>')
-    }
-  | "%glr-parser"
-    {
-      #nondeterministic_parser = True
-      #glr_parser = True
-      insert_after(0, '</AST_Section1_GLRParser>')
-      insert_before(0, '<AST_Section1_GLRParser>')
-    }
-  | "%initial-action" "{...}"
-    {
-      #muscle_code_grow('initial_action', translate_code($2, @2, False), @2)
        lex_yy_code.yyin = None
        lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
        lex_yy_code.yytext = '' # fool unput()
        lex_yy_code.unput($2)
        lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
        lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
-      )
-      insert_after(1, '</AST_Section1_InitialAction>')
-      insert_before(0, '<AST_Section1_InitialAction>')
-    }
-  | "%language" STRING
-    {
-      #language_argmatch($2, grammar_prio, @1)
-      insert_after(1, '</AST_Section1_Language>')
-      insert_before(0, '<AST_Section1_Language>')
-    }
-  | "%name-prefix" STRING
-    {
-      #spec_name_prefix = $2
-      insert_after(1, '</AST_Section1_NamePrefix>')
-      insert_before(0, '<AST_Section1_NamePrefix>')
-    }
-  | "%no-lines"
-    {
-      #no_lines_flag = True
-      insert_after(0, '</AST_Section1_Lines>')
-      insert_before(0, '<AST_Section1_Lines value="false">')
-    }
-  | "%nondeterministic-parser"
-    {
-      #nondeterministic_parser = True
-      insert_after(0, '</AST_Section1_NonDeterministicParser>')
-      insert_before(0, '<AST_Section1_NonDeterministicParser>')
-    }
-  | "%output" STRING
-    {
-      #spec_outfile = $2
-      insert_after(1, '</AST_Section1_Output>')
-      insert_before(0, '<AST_Section1_Output>')
-    }
-  | "%param"
-    {
-      #current_param = $1
-    }
-    params
-    {
-      #current_param = param_none
-      insert_after(2, '</AST_Section1_Param>')
-      insert_before(0, '<AST_Section1_Param>')
-    }
-  | "%require" STRING
-    {
-      #version_check(&@2, $2)
-      insert_after(1, '</AST_Section1_Require>')
-      insert_before(0, '<AST_Section1_Require>')
-    }
-  | "%skeleton" STRING
-    {
-      #skeleton_user = $2
-      #if strchr(skeleton_user, ord('/')):
-      #  dir_length = len(current_file)
-      #  skeleton_build = None
-      #  while dir_length and current_file[dir_length - 1] != ord('/'):
-      #    dir_length -= 1
-      #  while dir_length and current_file[dir_length - 1] == ord('/'):
-      #    dir_length -= 1
-      #  skeleton_build = xmalloc(dir_length + 1 + len(skeleton_user) + 1)
-      #  if dir_length > 0:
-      #    memcpy(skeleton_build, current_file, dir_length)
-      #    skeleton_build[dir_length++] = ord('/')
-      #  strcpy(skeleton_build + dir_length, skeleton_user)
-      #  skeleton_user = uniqstr_new(skeleton_build)
-      #  free(skeleton_build)
-      #skeleton_arg(skeleton_user, grammar_prio, @1)
-      insert_after(1, '</AST_Section1_Skeleton>')
-      insert_before(0, '<AST_Section1_Skeleton>')
-    }
-  | "%token-table"
-    {
-      #token_table_flag = True
-      insert_after(0, '</AST_Section1_TokenTable>')
-      insert_before(0, '<AST_Section1_TokenTable>')
-    }
-  | "%verbose"
-    {
-      #report_flag |= report_states
-      insert_after(0, '</AST_Section1_Verbose>')
-      insert_before(0, '<AST_Section1_Verbose>')
-    }
-  | "%yacc"
-    {
-      #yacc_flag = True
-      insert_after(0, '</AST_Section1_YACC>')
-      insert_before(0, '<AST_Section1_YACC>')
-    }
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1.InitialAction)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
+    }
+  | %space (?E{ast.AST.Section1.Language}"%language" STRING)
+  | %space (?E{ast.AST.Section1.NamePrefix}"%name-prefix" STRING)
+  | %space (?E{ast.AST.Section1.Lines, value = False}"%no-lines")
+  | %space (?E{ast.AST.Section1.NonDeterministicParser}"%nondeterministic-parser")
+  | %space (?E{ast.AST.Section1.Output}"%output" STRING)
+  | %space (?E{ast.AST.Section1.Param}"%param" params)
+  | %space (?E{ast.AST.Section1.Require}"%require" STRING)
+  | %space (?E{ast.AST.Section1.Skeleton}"%skeleton" STRING)
+  | %space (?E{ast.AST.Section1.TokenTable}"%token-table")
+  | %space (?E{ast.AST.Section1.Verbose}"%verbose")
+  | %space (?E{ast.AST.Section1.YACC}"%yacc")
    | /*FIXME: Err?  What is this horror doing here? */ ";"
    ;
  
  params
    : params "{...}"
-    {
-      #add_param(current_param, $2, @2)
-    }
    | "{...}"
-    {
-      #add_param(current_param, $1, @1)
-    }
    ;
  
  /*----------------------.
@@ -482,96 +308,69 @@ params
  grammar_declaration
    : precedence_declaration
    | symbol_declaration
-  | "%start" symbol
-    {
-      #grammar_start_symbol_set($2, @2)
-      insert_after(1, '</AST_Section1Or2_Start>')
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_SymbolRef>')
-      insert_before(0, '<AST_Section1Or2_Start>')
-    }
-  | code_props_type "{...}" generic_symlist
+  /* ) ) should be )) */
+  | %space (?E{ast.AST.Section1Or2.Start}"%start" %space (?E{ast.AST.SymbolRef}symbol) )
+  | %space (?E{ast.AST.Section1Or2.CodeProps, _type = $1}code_props_type "{...}" generic_symlist)
      {
-      #code = None
-      #code_props_symbol_action_init(&code, $2, @2)
-      #code_props_translate_code(&code)
        lex_yy_code.yyin = None
        lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
        lex_yy_code.yytext = '' # fool unput()
        lex_yy_code.unput($2)
        lex_yy_code.sc_context = lex_yy_code.SC_SYMBOL_ACTION # CODE_PROPS_SYMBOL_ACTION
        lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.CodeProps)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
+    }
+  | %space (?E{ast.AST.Section1Or2.DefaultPrec, value = True}"%default-prec")
+  | %space (?E{ast.AST.Section1Or2.DefaultPrec}"%no-default-prec")
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  | %space (?E{ast.AST.Section1Or2.Code}"%code" (?E{ast.AST.ID}) %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
        )
-      #list = None
-      #list = $3
-      #while list:
-      #  symbol_list_code_props_set(list, $1, &code)
-      #  list = list->next
-      #symbol_list_free($3)
-      insert_after(2, '</AST_Section1Or2_CodeProps>')
-      insert_before(0, '<AST_Section1Or2_CodeProps _type="{0:d}">'.format($1))
-    }
-  | "%default-prec"
-    {
-      #default_prec = True
-      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
-      insert_before(0, '<AST_Section1Or2_DefaultPrec value="true">')
-    }
-  | "%no-default-prec"
-    {
-      #default_prec = False
-      insert_after(0, '</AST_Section1Or2_DefaultPrec>')
-      insert_before(0, '<AST_Section1Or2_DefaultPrec>')
-    }
-  | "%code" "{...}"
-    {
-      #muscle_code_grow('percent_code()', translate_code_braceless($2, @2), @2)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($2[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(1, '</AST_Section1Or2_Code>')
-      insert_before(0, '<AST_Section1Or2_Code><AST_ID />')
      }
-  | "%code" ID "{...}"
-    {
-      #muscle_percent_code_grow($2, @2, translate_code_braceless($3, @3), @3)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($3[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 4] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(2, '</AST_Section1Or2_Code>')
-      insert_before(0, '<AST_Section1Or2_Code>')
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  | %space (?E{ast.AST.Section1Or2.Code}"%code" ID %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Code)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+      )
      }
    ;
  
@@ -601,94 +400,44 @@ code_props_type
  %token PERCENT_UNION "%union";
  
  union_name
-  : %empty
-    {
-      insert_before(0, '<AST_ID />')
-    }
+  : (?E{ast.AST.ID}%empty)
    | ID
-    {
-      #muscle_percent_define_insert('api.value.union.name', @1, muscle_keyword, $1, MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE)
-    }
    ;
  
  grammar_declaration
-  : "%union" union_name "{...}"
-    {
-      #union_seen = True
-      #muscle_code_grow('union_members', translate_code_braceless($3, @3), @3)
-      lex_yy_code.yyin = None
-      lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
-      lex_yy_code.yytext = '' # fool unput()
-      lex_yy_code.unput($3[1:-1])
-      lex_yy_code.sc_context = lex_yy_code.INITIAL # CODE_PROPS_PLAIN
-      lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 4] = \
-        '<AST_BracedCode>{{{0:s}}}</AST_BracedCode>'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(2, '</AST_Section1Or2_Union>')
-      insert_before(0, '<AST_Section1Or2_Union>')
+  /* the BracedCode should be removed altogether, put here for compatibility */
+  : %space (?E{ast.AST.Section1Or2.Union}"%union" union_name %space (?E{ast.AST.BracedCode}"{...}") )
+    {
+      # most egregious (leftover from when we used to run code scanner on it):
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Section1Or2.Union)
+      assert isinstance(yy_element_stack[-1][0][1], ast.AST.BracedCode)
+      assert isinstance(yy_element_stack[-1][0][1][0], ast.AST.Text)
+      element.set_text(
+        yy_element_stack[-1][0][1][0],
+        0,
+        element.get_text(yy_element_stack[-1][0][1][0], 0)[1:-1] # remove { }
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        0,
+        element.get_text(yy_element_stack[-1][0][1], 0) + '{'
+      )
+      element.set_text(
+        yy_element_stack[-1][0][1],
+        1,
+        '}' + element.get_text(yy_element_stack[-1][0][1], 1)
+      )
      }
    ;
  
  symbol_declaration
-  : "%nterm"
-    {
-      #current_class = nterm_sym
-    }
-    symbol_defs.1
-    {
-      #current_class = unknown_sym
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_NTerm>')
-      insert_before(0, '<AST_Section1Or2_NTerm>')
-    }
-  | "%token"
-    {
-      #current_class = token_sym
-    }
-    symbol_defs.1
-    {
-      #current_class = unknown_sym
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_Token>')
-      insert_before(0, '<AST_Section1Or2_Token>')
-    }
-  | "%type" TAG symbols.1
-    {
-      #list = None
-      #tag_seen = True
-      #list = $3
-      #while list:
-      #  symbol_type_set(list->content.sym, $2, @2)
-      #  list = list->next
-      #symbol_list_free($3)
-      insert_after(2, '</AST_Section1Or2_Type>')
-      insert_before(0, '<AST_Section1Or2_Type>')
-    }
+  : %space (?E{ast.AST.Section1Or2.NTerm}"%nterm" symbol_defs.1)
+  | %space (?E{ast.AST.Section1Or2.Token}"%token" symbol_defs.1)
+  | %space (?E{ast.AST.Section1Or2.Type}"%type" TAG symbols.1)
    ;
  
  precedence_declaration
-  : precedence_declarator tag.opt symbols.prec
-    {
-      #list = None
-      #current_prec += 1
-      #list = $3
-      #while list:
-      #  symbol_type_set(list->content.sym, current_type, @2)
-      #  symbol_precedence_set(list->content.sym, current_prec, $1, @1)
-      #  list = list->next
-      #symbol_list_free($3)
-      #current_type = None
-      insert_after(2, '</AST_Section1Or2_Precedence>')
-      insert_before(0, '<AST_Section1Or2_Precedence _type="{0:d}">'.format(($1 & 3) - 1))
-    }
+  : %space (?E{ast.AST.Section1Or2.Precedence, _type = ($1 & 3) - 1}precedence_declarator tag.opt symbols.prec)
    ;
  
  precedence_declarator
@@ -712,137 +461,49 @@ precedence_declarator
  
  tag.opt
    : %empty
-    {
-      #current_type = None
-    }
    | TAG
-    {
-      #current_type = $1
-      #tag_seen = True
-    }
    ;
  
  /* Just like symbols.1 but accept INT for the sake of POSIX.  */
  symbols.prec
    : symbol.prec
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-    }
    | symbols.prec symbol.prec
-    {
-      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
-    }
    ;
  
  symbol.prec
-  : symbol
-    {
-      #$$ = $1
-      #symbol_class_set($1, token_sym, @1, False)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | symbol INT
-    {
-      #$$ = $1
-      #symbol_user_token_number_set($1, $2, @2)
-      #symbol_class_set($1, token_sym, @1, False)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}symbol INT)
    ;
  
  /* One or more symbols to be %typed. */
  symbols.1
-  : symbol
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | symbols.1 symbol
-    {
-      #$$ = symbol_list_append($1, symbol_list_sym_new($2, @2))
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_SymbolRef>')
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
+  | symbols.1 %space (?E{ast.AST.SymbolRef}symbol)
    ;
  
  generic_symlist
    : generic_symlist_item
-    {
-      #$$ = $1
-    }
    | generic_symlist generic_symlist_item
-    {
-      #$$ = symbol_list_append($1, $2)
-    }
    ;
  
  generic_symlist_item
-  : symbol
-    {
-      #$$ = symbol_list_sym_new($1, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
+  : %space (?E{ast.AST.SymbolRef}symbol)
    | tag
-    {
-      #$$ = symbol_list_type_new($1, @1)
-    }
    ;
  
  tag
    : TAG
    | "<*>"
-    {
-      #$$ = uniqstr_new('*')
-    }
    | "<>"
-    {
-      #$$ = uniqstr_new('')
-    }
    ;
  
  /* One token definition.  */
  symbol_def
    : TAG
-    {
-      #current_type = $1
-      #tag_seen = True
-    }
-  | id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | id INT
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_user_token_number_set($1, $2, @2)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
-  | id string_as_id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_make_alias($1, $2, @$)
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-    }
-  | id INT string_as_id
-    {
-      #symbol_class_set($1, current_class, @1, True)
-      #symbol_type_set($1, current_type, @1)
-      #symbol_user_token_number_set($1, $2, @2)
-      #symbol_make_alias($1, $3, @$)
-      insert_after(2, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef user_token="{0:d}">'.format($2))
-    }
+  | %space (?E{ast.AST.SymbolRef}id)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT)
+  | %space (?E{ast.AST.SymbolRef}id string_as_id)
+  | %space (?E{ast.AST.SymbolRef, user_token = $2}id INT string_as_id)
    ;
  
  /* One or more symbol definitions. */
@@ -872,33 +533,12 @@ rules_or_grammar_declaration
    ;
  
  rules
-  : id_colon named_ref.opt
-    {
-      #current_lhs($1, @1, $2)
-    }
-    rhses.1
-    {
-      #current_lhs(0, @1, 0)
-      insert_after(3, '</AST_Section2_Rules>')
-      insert_after(0, '</AST_SymbolRef>')
-      insert_before(0, '<AST_SymbolRef>')
-      insert_before(0, '<AST_Section2_Rules>')
-    }
+  : %space (?E{ast.AST.Section2.Rules}(?E{ast.AST.SymbolRef}id_colon) named_ref.opt rhses.1)
    ;
  
  rhses.1
-  : rhs
-    {
-      #grammar_current_rule_end(@1)
-      insert_after(0, '</AST_Production>')
-      insert_before(0, '<AST_Production>')
-    }
-  | rhses.1 "|" rhs
-    {
-      #grammar_current_rule_end(@3)
-      insert_after(2, '</AST_Production>')
-      insert_before(2, '<AST_Production>')
-    }
+  : %space (?E{ast.AST.Production}rhs)
+  | rhses.1 "|" %space (?E{ast.AST.Production}rhs)
    | rhses.1 ";"
    ;
  
@@ -909,80 +549,40 @@ rhses.1
  rhs
    : %empty
      {
-      global yychar
-      #if nested_rhs:
-      #  nested_rhs -= 1
-      #else:
-      #  grammar_current_rule_begin(current_lhs_symbol, current_lhs_location, current_lhs_named_ref)
+      global yychar, yylval, yylloc
        if yychar == YYEMPTY:
-        yychar = lex_yy.gram_lex()
-      temp = lex_yy.gram_piece[gram_piece2 + 1]
-      lex_yy.gram_piece[gram_piece2 + 1] = lex_yy.gram_piece[gram_piece2]
-      lex_yy.gram_piece[gram_piece2] = lex_yy.gram_piece[gram_piece2 - 1]
-      lex_yy.gram_piece[gram_piece2 - 1] = temp
-    }
-  | rhs symbol named_ref.opt
-    {
-      #grammar_current_rule_symbol_append($2, @2, $3)
-      insert_after(2, '</AST_Production_SymbolRef>')
-      insert_after(1, '</AST_SymbolRef>')
-      insert_before(1, '<AST_Production_SymbolRef><AST_SymbolRef>')
+        yylval = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+      yy_element_stack[-1] = lex_yy.yy_element_space
+      lex_yy.yy_element_space = element.Element()
      }
-  | rhs "{...}" named_ref.opt
+  | rhs %space (?E{ast.AST.Production.SymbolRef}(?E{ast.AST.SymbolRef}symbol) named_ref.opt)
+  | rhs %space (?E{ast.AST.Production.Action}"{...}" named_ref.opt)
      {
-      #grammar_current_rule_action_append($2, @2, $3, False)
        lex_yy_code.yyin = None
        lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
        lex_yy_code.yytext = '' # fool unput()
        lex_yy_code.unput($2)
        lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
        lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = xml.etree.ElementTree.tostring(
-        lex_yy_code.yy_element_space,
-        encoding = 'unicode'
-      )
-      insert_after(2, '</AST_Production_Action>')
-      insert_before(1, '<AST_Production_Action>')
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Production.Action)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
      }
    | rhs "%?{...}"
      {
        #grammar_current_rule_action_append($2, @2, None, True)
      }
-  | rhs "%empty"
-    {
-      #grammar_current_rule_empty_set(@2)
-      insert_after(1, '</AST_Production_Empty>')
-      insert_before(1, '<AST_Production_Empty>')
-    }
-  | rhs "%prec" symbol
-    {
-      #grammar_current_rule_prec_set($3, @3)
-      insert_after(2, '</AST_Production_Prec>')
-      insert_after(2, '</AST_SymbolRef>')
-      insert_before(2, '<AST_SymbolRef>')
-      insert_before(1, '<AST_Production_Prec>')
-    }
-  | rhs "%dprec" INT
-    {
-      #grammar_current_rule_dprec_set($3, @3)
-      insert_after(2, '</AST_Production_DPrec>')
-      insert_before(1, '<AST_Production_DPrec value="{0:d}">'.format($3))
-    }
-  | rhs "%merge" TAG
-    {
-      #grammar_current_rule_merge_set($3, @3)
-      insert_after(2, '</AST_Production_Merge>')
-      insert_before(1, '<AST_Production_Merge>')
-    }
+  | rhs %space (?E{ast.AST.Production.Empty}"%empty")
+  /* ) ) should be )) */
+  | rhs %space (?E{ast.AST.Production.Prec}"%prec" %space (?E{ast.AST.SymbolRef}symbol) )
+  | rhs %space (?E{ast.AST.Production.DPrec, value = $3}"%dprec" INT)
+  | rhs %space (?E{ast.AST.Production.Merge}"%merge" TAG)
    /* Nick extra rules for element groups */
-  | rhs '('
-    /*{
-      #nested_rhs += 1
-    }*/
-    rhs ')'
+  | rhs %space (?E{ast.AST.Production.GroupElement}'(' rhs ')')
      {
        lex_yy_code.yyin = None
        lex_yy_code.yy_buffer_stack = [lex_yy_code.YYBufferState()]
@@ -990,36 +590,19 @@ rhs
        lex_yy_code.unput($2[4:-1])
        lex_yy_code.sc_context = lex_yy_code.SC_RULE_ACTION # CODE_PROPS_RULE_ACTION
        lex_yy_code.yylex(ast.AST.Text)
-      ref_list = []
-      element.serialize_ref(lex_yy_code.yy_element_space, ref_list)
-      del lex_yy_code.yy_element_space.attrib['ref']
-      lex_yy.gram_piece[gram_piece2 + 2] = \
-        '(?E{{{0:s}}}'.format(
-          xml.etree.ElementTree.tostring(
-            lex_yy_code.yy_element_space,
-            encoding = 'unicode'
-          )
-        )
-      insert_after(3, '</AST_Production_GroupElement>')
-      insert_before(1, '<AST_Production_GroupElement>')
+      assert isinstance(yy_element_stack[-1][0], ast.AST.Production.GroupElement)
+      assert isinstance(yy_element_stack[-1][0][0], ast.AST.Text)
+      tail = element.get_text(yy_element_stack[-1][0], 1)
+      yy_element_stack[-1][0][0] = lex_yy_code.yy_element_space
+      element.set_text(yy_element_stack[-1][0], 1, tail)
      }
    /* Nick added %space */
-  | rhs "%space"
-    {
-      insert_after(1, '</AST_Production_Space>')
-      insert_before(1, '<AST_Production_Space>')
-    }
+  | rhs %space (?E{ast.AST.Production.Space}"%space")
    ;
  
  named_ref.opt
    : %empty
-    {
-      #$$ = 0
-    }
    | BRACKETED_ID
-    {
-      #$$ = named_ref_new($1, @1)
-    }
    ;
  
  /*---------------------.
@@ -1031,9 +614,6 @@ named_ref.opt
  variable
    : ID
    | STRING
-    {
-      #$$ = uniqstr_new($1)
-    }
    ;
  
  /* Some content or empty by default. */
@@ -1059,25 +639,9 @@ variable
  
  value
    : %empty
-    {
-      #$$.kind = muscle_keyword
-      #$$.chars = ''
-    }
    | ID
-    {
-      #$$.kind = muscle_keyword
-      #$$.chars = $1
-    }
    | STRING
-    {
-      #$$.kind = muscle_string
-      #$$.chars = $1
-    }
    | "{...}"
-    {
-      #$$.kind = muscle_code
-      #$$.chars = strip_braces($1)
-    }
    ;
  
  /*--------------.
@@ -1089,22 +653,11 @@ value
  
  id
    : ID
-    {
-      #$$ = symbol_from_uniqstr($1, @1)
-    }
    | CHAR
-    {
-      #$$ = symbol_get(char_name($1), @1)
-      #symbol_class_set($$, token_sym, @1, False)
-      #symbol_user_token_number_set($$, $1, @1)
-    }
    ;
  
  id_colon
    : ID_COLON
-    {
-      #$$ = symbol_from_uniqstr($1, @1)
-    }
    ;
  
  symbol
@@ -1115,21 +668,11 @@ symbol
  /* A string used as an ID: quote it.  */
  string_as_id
    : STRING
-    {
-      #$$ = symbol_get(quotearg_style(c_quoting_style, $1), @1)
-      #symbol_class_set($$, token_sym, @1, False)
-    }
    ;
  
  epilogue.opt
    : %empty
-  | "%%" EPILOGUE
-    {
-      #muscle_code_grow('epilogue', translate_code($2, @2, True), @2)
-      #code_scanner_last_string_free()
-      insert_after(1, '</AST_Section3>')
-      insert_after(0, '<AST_Section3>')
-    }
+  | "%%" (?E{ast.AST.Section3}EPILOGUE %space)
    ;
  
  %%
@@ -1218,15 +761,3 @@ epilogue.opt
  #  current_lhs_location = loc
  #  free(current_lhs_named_ref)
  #  current_lhs_named_ref = ref
-
-def insert_before(n, str):
-  global gram_piece3
-  lex_yy.gram_piece_insert(gram_piece2 + n * 2, str)
-  lex_yy.gram_piece0 += 1
-  gram_piece3 += 1
-
-def insert_after(n, str):
-  global gram_piece3
-  lex_yy.gram_piece_insert(gram_piece2 + n * 2 + 1, str)
-  lex_yy.gram_piece0 += 1
-  gram_piece3 += 1
diff --git a/scan-gram.l b/scan-gram.l

index 4b3a89b..e6863a3 100644 (file)
--- a/scan-gram.l
+++ b/scan-gram.l
@@ -20,6 +20,7 @@
  %option debug nodefault noinput noyywrap never-interactive
  
  %{
+  import ast
    import state
    import y_tab
  
@@ -34,23 +35,17 @@
    #def gram_scanner_last_string_free():
    #  del obstack_for_string[:]
  
-  gram_piece = []
-  gram_piece0 = 0
-  gram_piece1 = 0
    markup_stack = []
  
-  percent_percent_count = 0;
+  percent_percent_count = 0
  
    # these should be yylex()-local, but moved to here, see further down:
    nesting = 0
    context_state = -1
    id_loc = state.location()
-  code_start = scanner_cursor.copy()
-  token_start = scanner_cursor.copy()
-  #first = True
-  if True: #first:
-    scanner_cursor = y_tab.yylloc.start.copy()
-    #first = False
+  code_start = None
+  token_start = None
+  scanner_cursor = state.boundary('<stdin>', 0, 0)
  %}
  
  %x SC_YACC_COMMENT
@@ -254,17 +249,13 @@ eqopt    ([[:space:]]*=)?
    "|"                          return y_tab.PIPE
    ";"                          return y_tab.SEMICOLON
  
-  {id} {
+  (?E{ast.AST.ID}{id}) {
      global id_loc, bracketed_id_str
      y_tab.yylval = yytext
      id_loc = y_tab.yylloc
      bracketed_id_str = None
+    markup_push(element.Element)
      BEGIN(SC_AFTER_IDENTIFIER)
-    gram_piece_pack()
-    gram_piece_append('<AST_ID>')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_ID>')
-    gram_piece_pack()
    }
  
    {int} {
@@ -285,10 +276,10 @@ eqopt    ([[:space:]]*=)?
      global token_start
      token_start = y_tab.yylloc.start
      BEGIN(SC_ESCAPED_CHARACTER)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_Char>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.Char)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
  
    /* Strings. */
@@ -296,10 +287,10 @@ eqopt    ([[:space:]]*=)?
      global token_start
      token_start = y_tab.yylloc.start
      BEGIN(SC_ESCAPED_STRING)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_String>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.String)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
  
    /* Prologue. */
@@ -307,10 +298,10 @@ eqopt    ([[:space:]]*=)?
      global code_start
      code_start = y_tab.yylloc.start
      BEGIN(SC_PROLOGUE)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_Section1_Prologue>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.Section1.Prologue)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
  
    /* Code in between braces.  */
@@ -320,10 +311,15 @@ eqopt    ([[:space:]]*=)?
      nesting = 0
      code_start = y_tab.yylloc.start
      BEGIN(SC_BRACED_CODE)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedCode>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    # new way, includes braces, wrapped by <AST_Production_Action> later
+    markup_push(ast.AST.Text)
+    markup_flush(len(yytext))
+    # old way 
+    #markup_push(ast.AST.BracedCode)
+    #markup_flush(len(yytext))
+    #markup_push(ast.AST.Text)
+    # to here
    }
  
    /* Semantic predicate. */
@@ -331,11 +327,11 @@ eqopt    ([[:space:]]*=)?
      global nesting, code_start
      nesting = 0
      code_start = y_tab.yylloc.start
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_BracedPredicate>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
      BEGIN(SC_PREDICATE)
+    markup_push(element.Element)
+    markup_push(ast.AST.BracedPredicate)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
  
    /* Nick extra rules for element groups */
@@ -345,32 +341,28 @@ eqopt    ([[:space:]]*=)?
      nesting = 0
      code_start = y_tab.yylloc.start
      BEGIN(SC_ELEMENT_GROUP)
-    gram_piece_pack()
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
    ")"                          return ord(')')
  
    /* A type. */
-  "<*>" {
-    gram_piece_pack()
-    gram_piece_append('<AST_TagRef>&lt;<AST_Text>*</AST_Text>&gt;</AST_TagRef>')
-    return ~y_tab.TAG_ANY
+  "(?E{ast.AST.TagRef}<(?E{ast.AST.Text}*)>)" {
+    return y_tab.TAG_ANY
    }
-  "<>" {
-    gram_piece_pack()
-    gram_piece_append('<AST_TagRef>&lt;<AST_Text />&gt;</AST_TagRef>')
-    return ~y_tab.TAG_NONE
+  "(?E{ast.AST.TagRef}<(?E{ast.AST.Text})>)" {
+    return y_tab.TAG_NONE
    }
    "<" {
      global nesting, token_start
      nesting = 0
      token_start = y_tab.yylloc.start
      BEGIN(SC_TAG)
-    gram_piece_pack()
-    markup_stack.append(len(gram_piece)) # <AST_TagRef>
-    gram_piece_flush(len(yytext))
-    markup_stack.append(len(gram_piece)) # <AST_Text>
+    markup_push(element.Element)
+    markup_push(ast.AST.TagRef)
+    markup_flush(len(yytext))
+    markup_push(ast.AST.Text)
    }
  
    "%%" {
@@ -378,11 +370,6 @@ eqopt    ([[:space:]]*=)?
      percent_percent_count += 1
      if percent_percent_count == 2:
        BEGIN(SC_EPILOGUE)
-      gram_piece_pack()
-      gram_piece_escape(yytext)
-      gram_piece_pack()
-      gram_piece_pack()
-      return ~y_tab.PERCENT_PERCENT
      return y_tab.PERCENT_PERCENT
    }
  
@@ -425,32 +412,43 @@ eqopt    ([[:space:]]*=)?
      global bracketed_id_start, bracketed_id_context_state
      if bracketed_id_str is not None:
        scanner_cursor.column -= len(yytext)
-      yyless(0)
+      markup_yyless(0)
+      markup_pop_token() # element.Element
        BEGIN(SC_RETURN_BRACKETED_ID)
        y_tab.yylloc = id_loc
        return y_tab.ID
      else:
+      markup_pop_token() # element.Element
        bracketed_id_start = y_tab.yylloc.start
        bracketed_id_context_state = YY_START()
        BEGIN(SC_BRACKETED_ID)
    }
    ":" {
+    markup_pop_token() # element.Element
      BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
      y_tab.yylloc = id_loc
-    gram_piece_escape(yytext)
-    return ~y_tab.ID_COLON
+    markup_flush(len(yytext))
+    return y_tab.ID_COLON
    }
    . {
      scanner_cursor.column -= len(yytext)
-    yyless(0)
+    markup_yyless(0)
+
+    # total kludge: put back all whitespace/comments after the ID, and rescan
+    # (this will mess up the position tracking, need to revisit and fix later)
+    unput(element.get_text(yy_element_space, len(yy_element_space)))
+    element.set_text(yy_element_space, len(yy_element_space), '')
+
+    markup_pop_token() # element.Element
      BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
      y_tab.yylloc = id_loc
-    return ~y_tab.ID
+    return y_tab.ID
    }
    <<EOF>> {
+    markup_pop_token() # element.Element
      BEGIN(SC_RETURN_BRACKETED_ID if bracketed_id_str else INITIAL)
      y_tab.yylloc = id_loc
-    return ~y_tab.ID
+    return y_tab.ID
    }
  }
  
@@ -494,7 +492,7 @@ eqopt    ([[:space:]]*=)?
    . {
      global bracketed_id_str
      scanner_cursor.column -= len(yytext)
-    yyless(0)
+    markup_yyless(0)
      y_tab.yylval = bracketed_id_str
      bracketed_id_str = None
      y_tab.yylloc = bracketed_id_loc
@@ -565,13 +563,11 @@ eqopt    ([[:space:]]*=)?
      y_tab.yylloc.start = token_start
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_String>')
-    gram_piece_append('</AST_String>')
-    gram_piece_pack()
-    return ~y_tab.STRING
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.String
+    markup_pop_token() # element.Element
+    return y_tab.STRING
    }
    <<EOF>>                      unexpected_eof(token_start, '"')
    "\n"                         unexpected_newline(token_start, '"')
@@ -598,13 +594,11 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylval = ord(gram_last_string[0])
      #del obstack_for_string[:]
      BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_Char>')
-    gram_piece_append('</AST_Char>')
-    gram_piece_pack()
-    return ~y_tab.CHAR
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.Char
+    markup_pop_token() # element.Element
+    return y_tab.CHAR
    }
    "\n"                         unexpected_newline(token_start, '\'')
    <<EOF>>                      unexpected_eof(token_start, '\'')
@@ -628,13 +622,11 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylval = gram_last_string
        #del obstack_for_string[:]
        BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_TagRef>')
-      gram_piece_append('</AST_TagRef>')
-      gram_piece_pack()
-      return ~y_tab.TAG
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.TagRef
+      markup_pop_token() # element.Element
+      return y_tab.TAG
      obstack_for_string.append(yytext)
    }
  
@@ -654,92 +646,55 @@ eqopt    ([[:space:]]*=)?
  
  <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
  {
-  \\[0-7]{1,3} {
-    c = strtoul(yytext + 1, None, 8)
+  (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 8)}\\([0-7]{1,3})) {
+    c = int(yytext + 1, 8)
      if not c or 0x7f * 2 + 1 < c:
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
    }
  
-  \\x[0-9abcdefABCDEF]+ {
-    c = strtoul(yytext + 2, None, 16)
+  (?E{ast.AST.Text.Escape, character = int(yy_groups[2], 16)}\\x([0-9abcdefABCDEF]+)) {
+    c = int(yytext + 2, 16)
      if not c or 0x7f * 2 + 1 < c:
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', int(c))
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
    }
  
-  \\a {
+  (?E{ast.AST.Text.Escape, character = 7}\\a) {
      obstack_for_string.append('\a')
-    gram_piece_append('<AST_Text_Escape character="7">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\b {
+  (?E{ast.AST.Text.Escape, character = 8}\\b) {
      obstack_for_string.append('\b')
-    gram_piece_append('<AST_Text_Escape character="8">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\f {
+  (?E{ast.AST.Text.Escape, character = 12}\\f) {
      obstack_for_string.append('\f')
-    gram_piece_append('<AST_Text_Escape character="12">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\n {
+  (?E{ast.AST.Text.Escape, character = 10}\\n) {
      obstack_for_string.append('\n')
-    gram_piece_append('<AST_Text_Escape character="10">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\r {
+  (?E{ast.AST.Text.Escape, character = 13}\\r) {
      obstack_for_string.append('\r')
-    gram_piece_append('<AST_Text_Escape character="13">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\t {
+  (?E{ast.AST.Text.Escape, character = 9}\\t) {
      obstack_for_string.append('\t')
-    gram_piece_append('<AST_Text_Escape character="9">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
-  \\v {
+  (?E{ast.AST.Text.Escape, character = 11}\\v) {
      obstack_for_string.append('\v')
-    gram_piece_append('<AST_Text_Escape character="11">')
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
  
    /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
-  \\("\""|"'"|"?"|"\\") {
+  (?E{ast.AST.Text.Escape, character = ord(yy_groups[2])}\\("\""|"'"|"?"|"\\")) {
      obstack_for_string.append(yytext[1])
-    rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', yytext[1])
-    gram_piece_append(gram_piece_temp)
-    gram_piece_flush(len(yytext))
-    gram_piece_append('</AST_Text_Escape>')
    }
   
-  \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
+  (?E{ast.AST.Text.Escape, character = int(('' if yy_groups[3] is None else yy_groups[3]) + yy_groups[4], 16)}\\(u|U([0-9abcdefABCDEF]{4}))([0-9abcdefABCDEF]{4})) {
      c = convert_ucn_to_byte(yytext)
      if c <= 0:
        state.complain(y_tab.yylloc, state.complaint, 'invalid number after \\-escape: {0:s}'.format(yytext[1:]))
      else:
        obstack_for_string.append(chr(c))
-      rpl_sprintf(gram_piece_temp, '<AST_Text_Escape character="%d">', c)
-      gram_piece_append(gram_piece_temp)
-      gram_piece_flush(len(yytext))
-      gram_piece_append('</AST_Text_Escape>')
    }
    \\(.|\n) {
      p = yytext[1:]
@@ -857,13 +812,16 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylloc.start = code_start
        y_tab.yylval = gram_last_string
        BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedCode>')
-      gram_piece_append('</AST_BracedCode>')
-      gram_piece_pack()
-      return ~y_tab.BRACED_CODE
+      # new way, includes braces, wrapped by <AST_Production_Action> later
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.Text
+      # old way
+      #markup_pop() # ast.AST.Text
+      #markup_flush(len(yytext))
+      #markup_pop() # ast.AST.BracedCode
+      # to here
+      markup_pop_token() # element.Element
+      return y_tab.BRACED_CODE
    }
  }
  
@@ -878,13 +836,11 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylloc.start = code_start
        y_tab.yylval = gram_last_string
        BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_insert(markup_stack.pop(), '<AST_BracedPredicate>')
-      gram_piece_append('</AST_BracedPredicate>')
-      gram_piece_pack()
-      return ~y_tab.BRACED_PREDICATE
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop() # ast.AST.BracedPredicate
+      markup_pop_token() # element.Element
+      return y_tab.BRACED_PREDICATE
      else:
        obstack_for_string.append('}')
    }
@@ -904,11 +860,10 @@ eqopt    ([[:space:]]*=)?
        y_tab.yylval = gram_last_string
        #del obstack_for_string[:]
        BEGIN(INITIAL)
-      gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-      gram_piece_append('</AST_Text>')
-      gram_piece_escape(yytext)
-      gram_piece_pack()
-      return ~ord('(')
+      markup_pop() # ast.AST.Text
+      markup_flush(len(yytext))
+      markup_pop_token() # element.Element
+      return ord('(')
    }
  }
  
@@ -925,13 +880,11 @@ eqopt    ([[:space:]]*=)?
      y_tab.yylloc.start = code_start
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
-    gram_piece_insert(markup_stack.pop(), '<AST_Text>')
-    gram_piece_append('</AST_Text>')
-    gram_piece_escape(yytext)
-    gram_piece_insert(markup_stack.pop(), '<AST_Section1_Prologue>')
-    gram_piece_append('</AST_Section1_Prologue>')
-    gram_piece_pack()
-    return ~y_tab.PROLOGUE
+    markup_pop() # ast.AST.Text
+    markup_flush(len(yytext))
+    markup_pop() # ast.AST.Section1.Prologue
+    markup_pop_token() # element.Element
+    return y_tab.PROLOGUE
    }
  
    <<EOF>>                      unexpected_eof(code_start, '%}')
@@ -952,8 +905,7 @@ eqopt    ([[:space:]]*=)?
      y_tab.yylloc.start = code_start
      y_tab.yylval = gram_last_string
      BEGIN(INITIAL)
-    gram_piece_pack()
-    return ~y_tab.EPILOGUE
+    return y_tab.EPILOGUE
    }
  }
  
@@ -1003,12 +955,12 @@ def scan_integer(number, base, loc):
      num = 0x7fffffff
    return num
  
-#def convert_ucn_to_byte(ucn):
-#  code = strtoul(ucn + 2, None, 16)
-#  if 0x7f * 2 + 1 < code:
-#    return -1
-#  return code
-#
+def convert_ucn_to_byte(ucn):
+  code = int(ucn[2:], 16)
+  if 0x7f * 2 + 1 < code:
+    return -1
+  return code
+
  #def handle_syncline(args, loc):
  #  file = None
  #  lineno = strtoul(args, &file, 10)
@@ -1044,31 +996,52 @@ def unexpected_newline(start, token_end):
  #  del obstack_for_string[:]
  #  yy_delete_buffer(YY_CURRENT_BUFFER)
  
-def gram_piece_append(str):
-  gram_piece.append(str)
-
-def gram_piece_insert(n, str):
-  gram_piece[n:n] = [str]
-
-xml_escape = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
-def gram_piece_escape(str):
-  gram_piece.append(''.join([xml_escape.get(i, i) for i in str]))
-
-def gram_piece_flush(n):
-  global yytext
-  gram_piece_escape(yytext[:n])
-  yytext = yytext[n:]
-
-def gram_piece_pack():
-  global gram_piece0
-  gram_piece[gram_piece0:] = [''.join(gram_piece[gram_piece0:])]
-  gram_piece0 += 1
-
-def gram_lex():
-  result = yylex()
-  if result < 0:
-    return ~result
-  gram_piece_pack()
-  gram_piece_escape(yytext)
-  gram_piece_pack()
-  return result
+# these exist for the purpose of adding markup to sequences that are
+# recognized by several iterations of yylex(), it would be better to
+# try to use more complex regular expressions to match all in one go:
+
+def markup_flush(n):
+  text = element.get_text(yy_element_token, 0)
+  element.set_text(
+    yy_element_space,
+    len(yy_element_space),
+    element.get_text(yy_element_space, len(yy_element_space)) + text[:n]
+  )
+  element.set_text(yy_element_token, 0, text[n:])
+
+def markup_yyless(n):
+  yyless(n)
+  element.set_text(
+    yy_element_token,
+    0,
+    element.get_text(yy_element_token, 0)[:n]
+  )
+
+def markup_push(factory, *args, **kwargs):
+  global yy_element_space
+  markup_stack.append(yy_element_space)
+  yy_element_space = factory(*args, **kwargs)
+
+def markup_pop():
+  global yy_element_space
+  _element = yy_element_space
+  yy_element_space = markup_stack.pop()
+  yy_element_space.append(_element)
+  #element.set_text(yy_element_space, len(yy_element_space), '')
+
+def markup_pop_token():
+  global yy_element_space, yy_element_token
+
+  # append yy_element_token contents onto yy_element_space
+  i = len(yy_element_space)
+  element.set_text(
+    yy_element_space,
+    i,
+    element.get_text(yy_element_space, i) +
+    element.get_text(yy_element_token, 0)
+  )
+  yy_element_space[i:] = yy_element_token[:]
+
+  # exchange, so that space is now prepended onto token
+  yy_element_token = yy_element_space
+  yy_element_space = markup_stack.pop()
diff --git a/skel/skel_py.py b/skel/skel_py.py

index cb6d892..ed700a0 100644 (file)
--- a/skel/skel_py.py
+++ b/skel/skel_py.py
@@ -24,6 +24,21 @@
  # the GNU General Public License without this special exception.
  
  import bisect
+import lex_yy
+
+# this can be redefined in SECTION1
+class YYLTYPE:
+  def __init__(
+    self,
+    first_line = 0,
+    first_column = 0,
+    last_line = 0,
+    last_column = 0
+  ):
+    self.first_line = first_line
+    self.first_column = first_column
+    self.last_line = last_line
+    self.last_column = last_column
  
  # GENERATE SECTION1
  
@@ -37,7 +52,7 @@ yyval = None
  yyloc = None
  
  yylval = None
-yylloc = None
+yylloc = YYLTYPE()
  
  # GENERATE SECTION2
  
@@ -56,9 +71,9 @@ def yyparse():
      if reduce == -1:
        if yychar == -1:
          yylval = None
-        yylloc = None
-        yychar = yylex()
-        print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc)
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
+        yychar = lex_yy.yylex()
+        #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
        action = yy_lr1dfa_states[state][1][
          bisect.bisect_right(yy_lr1dfa_states[state][0], yychar)
        ]
diff --git a/skel/skel_py_element.py b/skel/skel_py_element.py

index e9c9780..843d913 100644 (file)
--- a/skel/skel_py_element.py
+++ b/skel/skel_py_element.py
@@ -28,6 +28,20 @@ import element
  import lex_yy
  #import xml.etree.ElementTree
  
+# this can be redefined in SECTION1
+class YYLTYPE:
+  def __init__(
+    self,
+    first_line = 0,
+    first_column = 0,
+    last_line = 0,
+    last_column = 0
+  ):
+    self.first_line = first_line
+    self.first_column = first_column
+    self.last_line = last_line
+    self.last_column = last_column
+ 
  # GENERATE SECTION1
  
  # GENERATE TOKENS
@@ -40,7 +54,7 @@ yyval = None
  yyloc = None
  
  yylval = None
-yylloc = None
+yylloc = YYLTYPE()
  
  yy_element_stack = None
  
@@ -63,7 +77,7 @@ def yyparse(factory, *args, **kwargs):
      if reduce == -1:
        if yychar == -1:
          yylval = None
-        yylloc = None
+        yylloc = YYLTYPE() # temporary until lex_yy updated, should be None
          yychar = lex_yy.yylex()
          #print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
          #print('lex_yy.yy_element_space')
@@ -108,8 +122,7 @@ def yyparse(factory, *args, **kwargs):
      yy_element_stack[base * 2 + 1:] = [
        element.concatenate(
          yy_element_stack[base * 2 + 1:],
-        element.Element,
-        'root'
+        element.Element
        )
      ]
author	Nick Downing <nick@ndcode.org>
	Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
committer	Nick Downing <nick@ndcode.org>
	Mon, 21 Jan 2019 06:19:37 +0000 (17:19 +1100)
Makefile		patch \| blob \| history
bootstrap/parse-gram.y		patch \| blob \| history
bootstrap/scan-gram.l		patch \| blob \| history
element.py		patch \| blob \| history
generate_bison.py		patch \| blob \| history
generate_py.py		patch \| blob \| history
markup.py		patch \| blob \| history
parse-gram.y		patch \| blob \| history
scan-gram.l		patch \| blob \| history
skel/skel_py.py		patch \| blob \| history
skel/skel_py_element.py		patch \| blob \| history