Add element-based Python scanner/parser in root of repository which builds AST direct...
authorNick Downing <nick@ndcode.org>
Sun, 20 Jan 2019 09:30:15 +0000 (20:30 +1100)
committerNick Downing <nick@ndcode.org>
Sun, 20 Jan 2019 09:30:15 +0000 (20:30 +1100)
.gitignore
Makefile [new file with mode: 0644]
markup.py [new file with mode: 0755]
n.sh [new file with mode: 0755]
parse.y [new file with mode: 0644]
reserialize.py [new file with mode: 0755]
scan.l [new file with mode: 0644]
state.py [new file with mode: 0644]

index 1f8d7f0..499e059 100644 (file)
@@ -1,21 +1,25 @@
 __pycache__
-bootstrap/*.xml
-bootstrap/lex_yy.py
-bootstrap/out
-bootstrap/y_tab.py
-lex-yacc-examples/*.c
-lex-yacc-examples/*.h
-lex-yacc-examples/*.o
-lex-yacc-examples/*.xml
-lex-yacc-examples/example4
-lex-yacc-examples/example7
-skel/skel_flex.c.orig
-tests/*.c
-tests/*.o
-tests/*.xml
-tests/cal
-tests/flex0
-tests/flex1
-tests/lex_yy.py
-tests_ast/*.xml
-tests_ast/lex_yy.py
+/*.xml
+/bootstrap/*.xml
+/bootstrap/lex_yy.py
+/bootstrap/out
+/bootstrap/y_tab.py
+/lex-yacc-examples/*.c
+/lex-yacc-examples/*.h
+/lex-yacc-examples/*.o
+/lex-yacc-examples/*.xml
+/lex-yacc-examples/example4
+/lex-yacc-examples/example7
+/lex_yy.py
+/out
+/skel/skel_flex.c.orig
+/tests/*.c
+/tests/*.o
+/tests/*.xml
+/tests/cal
+/tests/flex0
+/tests/flex1
+/tests/lex_yy.py
+/tests_ast/*.xml
+/tests_ast/lex_yy.py
+/y_tab.py
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..b5abfcd
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+all: lex_yy.py y_tab.py
+
+lex_yy.py: scan.l
+       ../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+       ./pilex.py --element --python $<.xml
+
+y_tab.py: parse.y
+       ../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
+       ../piyacc.git/piyacc.py --element --python $<.xml
+
+clean:
+       rm -f lex_yy.py y_tab.py *.xml
diff --git a/markup.py b/markup.py
new file mode 100755 (executable)
index 0000000..caf5a41
--- /dev/null
+++ b/markup.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+import ast
+import element
+import y_tab
+import sys
+
+element.serialize(y_tab.yyparse(ast.AST), sys.stdout)
diff --git a/n.sh b/n.sh
new file mode 100755 (executable)
index 0000000..b72211f
--- /dev/null
+++ b/n.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+if ! test -d out
+then
+  mkdir out
+  bootstrap/markup.py <tests/cal.l |./reserialize.py >out/cal.l.xml.ok
+  bootstrap/markup.py <../bootstrap_flex.git/src/scan.l |./reserialize.py >out/scan.l.xml.ok
+  bootstrap/markup.py <../bootstrap_bison.git/src/scan-code.l |./reserialize.py >out/scan-code.l.xml.ok
+  bootstrap/markup.py <../bootstrap_bison.git/src/scan-gram.l |./reserialize.py >out/scan-gram.l.xml.ok
+  bootstrap/markup.py <../bootstrap_bison.git/src/scan-skel.l |./reserialize.py >out/scan-skel.l.xml.ok
+fi
+./markup.py <tests/cal.l >out/cal.l.xml
+diff -q out/cal.l.xml.ok out/cal.l.xml
+./markup.py <../bootstrap_flex.git/src/scan.l >out/scan.l.xml
+diff -q out/scan.l.xml.ok out/scan.l.xml
+./markup.py <../bootstrap_bison.git/src/scan-code.l >out/scan-code.l.xml
+diff -q out/scan-code.l.xml.ok out/scan-code.l.xml
+./markup.py <../bootstrap_bison.git/src/scan-gram.l >out/scan-gram.l.xml
+diff -q out/scan-gram.l.xml.ok out/scan-gram.l.xml
+./markup.py <../bootstrap_bison.git/src/scan-skel.l >out/scan-skel.l.xml
+diff -q out/scan-skel.l.xml.ok out/scan-skel.l.xml
diff --git a/parse.y b/parse.y
new file mode 100644 (file)
index 0000000..9de59de
--- /dev/null
+++ b/parse.y
@@ -0,0 +1,993 @@
+/* parse.y - parser for flex input */
+
+%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
+%token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
+%token TOK_TABLES_FILE
+
+%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
+%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
+
+%left CCL_OP_DIFF CCL_OP_UNION
+
+/* Nick extra rules for action groups */
+%token TOK_ACTION_GROUP TOK_ELEMENT_GROUP
+
+/* Nick temporary, until options processing moved fully into parser */
+%token TOK_OPTION_OTHER
+
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation.  This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'.  The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
+%{
+  import ast
+  import regex
+  import state
+
+  #pat = 0
+  #scnum = 0
+  #eps = 0
+  #headcnt = 0
+  #trailcnt = 0
+  #lastchar = 0
+  #i = 0
+  #rulelen = 0
+  trlcontxt = False
+  xcluflg = 0
+  #currccl = 0
+  #cclsorted = 0
+  #varlength = 0
+  #variable_trail_rule = 0
+  #scon_stk = []
+  #scon_stk_ptr = 0
+  #madeany = False
+  #ccldot = 0
+  #cclany = 0
+  #previous_continued_action = 0
+%}
+
+%%
+
+goal
+  : %space (?E{ast.AST.Section1}initlex sect1 %space) sect1end (?E{ast.AST.Section2}sect2 %space) sect2end initforrule
+    {
+      #def_rule = None
+      #pat = cclinit()
+      #cclnegate(pat)
+      #def_rule = mkstate(-pat)
+      #default_rule = num_rules
+      #finish_rule(def_rule, False, 0, 0, 0)
+      #i = 1
+      #while i <= lastsc:
+      #  scset[i] = mkbranch(scset[i], def_rule)
+      #  i += 1
+      #if spprdflt:
+      #  add_action('YY_FATAL_ERROR( "flex scanner jammed" )')
+      #else:
+      #  add_action('ECHO')
+      #add_action(';\n\tYY_BREAK]]\n')
+    }
+  ;
+
+initlex
+  :
+    {
+      #scinstal('INITIAL', False)
+    }
+  ;
+
+sect1
+  : sect1 %space (?E{ast.AST.Section1.StartConditions, exclusive = xcluflg} startconddecl namelist1)
+  | sect1 %space (?E{ast.AST.Section1.Options}options)
+  |
+  | error
+    {
+      state.synerr('unknown error processing section 1')
+    }
+  ;
+
+sect1end
+  : SECTEND
+    {
+      #check_options()
+      #scon_stk = allocate_array(lastsc + 1, 4)
+      #scon_stk_ptr = 0
+    }
+  ;
+
+/* Nick */
+sect2end
+  : SECTEND (?E{ast.AST.Section3}%space)
+  |
+  ;
+
+startconddecl
+  : SCDECL
+    {
+      global xcluflg
+      xcluflg = False
+    }
+  | XSCDECL
+    {
+      global xcluflg
+      xcluflg = True
+    }
+  ;
+
+namelist1
+  : namelist1 NAME
+    {
+      #scinstal(nmstr, xcluflg)
+    }
+  | NAME
+    {
+      #scinstal(nmstr, xcluflg)
+    }
+  | error
+    {
+      state.synerr('bad start condition list')
+    }
+  ;
+
+options
+  : TOK_OPTION optionlist
+  ;
+
+optionlist
+  : optionlist option
+  |
+  ;
+
+option
+  : %space (?E{ast.AST.Section1.Options.OutFile}TOK_OUTFILE '=' NAME)
+    {
+      #outfilename = xstrdup(nmstr)
+      #did_outfilename = 1
+    }
+  | %space (?E{ast.AST.Section1.Options.ExtraType}TOK_EXTRA_TYPE '=' NAME)
+    {
+      #extra_type = xstrdup(nmstr)
+    }
+  | %space (?E{ast.AST.Section1.Options.Prefix}TOK_PREFIX '=' NAME)
+    {
+      #prefix = xstrdup(nmstr)
+      #if strchr(prefix, ord('[')) or strchr(prefix, ord(']')):
+      #  flexerror('Prefix must not contain [ or ]')
+    }
+  | %space (?E{ast.AST.Section1.Options.YYClass}TOK_YYCLASS '=' NAME)
+    {
+      #yyclass = xstrdup(nmstr)
+    }
+  | %space (?E{ast.AST.Section1.Options.HeaderFile}TOK_HEADER_FILE '=' NAME)
+    {
+      #headerfilename = xstrdup(nmstr)
+    }
+  | %space (?E{ast.AST.Section1.Options.TablesFile}TOK_TABLES_FILE '=' NAME)
+    {
+      #tablesext = True
+      #tablesfilename = xstrdup(nmstr)
+    }
+  /* Nick */
+  | TOK_OPTION_OTHER
+  ;
+
+sect2
+  : sect2 %space (?E{ast.AST.Section2.Rule}scon initforrule flexrule '\n')
+    {
+      #scon_stk_ptr = $2
+    }
+  | sect2 %space (?E{ast.AST.Section2.CompoundRule}scon '{' sect2 '}')
+    {
+      #scon_stk_ptr = $2
+    }
+  |
+  ;
+
+initforrule
+  :
+    {
+      global trlcontxt
+      trlcontxt = False #variable_trail_rule = varlength = False
+      #trailcnt = headcnt = rulelen = 0
+      #current_state_type = 0x1
+      #previous_continued_action = continued_action
+      state.in_rule = True
+      #new_rule()
+    }
+  ;
+
+flexrule
+  : %space (?E{ast.AST.Section2.Rule.FLexRule, bol = True}'^' rule)
+    {
+      #pat = $2
+      #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+      #if scon_stk_ptr > 0:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    scbol[scon_stk[i]] = mkbranch(scbol[scon_stk[i]], pat)
+      #    i += 1
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not scxclu[i]:
+      #      scbol[i] = mkbranch(scbol[i], pat)
+      #    i += 1
+      #if not bol_needed:
+      #  bol_needed = True
+      #  if performance_report > 1:
+      #    pinpoint_message('\'^\' operator results in sub-optimal performance')
+    }
+  | %space (?E{ast.AST.Section2.Rule.FLexRule}rule)
+    {
+      #pat = $1
+      #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+      #if scon_stk_ptr > 0:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    scset[scon_stk[i]] = mkbranch(scset[scon_stk[i]], pat)
+      #    i += 1
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not scxclu[i]:
+      #      scset[i] = mkbranch(scset[i], pat)
+      #    i += 1
+    }
+  | %space (?E{ast.AST.Section2.Rule.EOFRule}EOF_OP)
+    {
+      #if scon_stk_ptr > 0:
+      #  build_eof_action()
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not sceof[i]:
+      #      scon_stk[++scon_stk_ptr] = i
+      #    i += 1
+      #  if scon_stk_ptr == 0:
+      #    lwarn('all start conditions already have <<EOF>> rules')
+      #  else:
+      #    build_eof_action()
+    }
+  | error
+    {
+      state.synerr('unrecognized rule')
+    }
+  ;
+
+scon_stk_ptr
+  :
+    {
+      #$$ = scon_stk_ptr
+    }
+  ;
+
+scon
+  : %space (?E{ast.AST.Section2.StartConditions}'<' scon_stk_ptr namelist2 '>')
+    {
+      #$$ = $2
+    }
+  | %space (?E{ast.AST.Section2.StartConditions, wildcard = True}'<' '*' '>')
+    {
+      #$$ = scon_stk_ptr
+      #i = 1
+      #while i <= lastsc:
+      #  j = None
+      #  j = 1
+      #  while j <= scon_stk_ptr:
+      #    if scon_stk[j] == i:
+      #      break
+      #    j += 1
+      #  if j > scon_stk_ptr:
+      #    scon_stk[++scon_stk_ptr] = i
+      #  i += 1
+    }
+  | %space (?E{ast.AST.Section2.StartConditions})
+    {
+      #$$ = scon_stk_ptr
+    }
+  ;
+
+namelist2
+  : namelist2 ',' sconname
+  | sconname
+  | error
+    {
+      state.synerr('bad start condition list')
+    }
+  ;
+
+sconname
+  : NAME
+    {
+      #if (scnum = sclookup(nmstr)) == 0:
+      #  format_pinpoint_message('undeclared start condition %s', nmstr)
+      #else:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    if scon_stk[i] == scnum:
+      #      format_warn('<%s> specified twice', scname[scnum])
+      #      break
+      #    i += 1
+      #  if i > scon_stk_ptr:
+      #    scon_stk[++scon_stk_ptr] = scnum
+    }
+  ;
+
+/* this rule handles trailing context, it must produce two separate regexes,
+ * where the first is the expression to be matched, and the second is the
+ * trailing context, RegexEmpty (matches empty string) if no trailing context
+ */
+rule
+  : re2 re
+    {
+      #if transchar[lastst[$2]] != 256 + 1:
+      #  $2 = link_machines($2, mkstate(256 + 1))
+      #mark_beginning_as_normal($2)
+      #current_state_type = 0x1
+      #if previous_continued_action:
+      #  if not varlength or headcnt != 0:
+      #    lwarn('trailing context made variable due to preceding \'|\' action')
+      #  varlength = True
+      #  headcnt = 0
+      #if lex_compat or varlength and headcnt == 0:
+      #  add_accept($1, num_rules | 0x4000)
+      #  variable_trail_rule = True
+      #else:
+      #  trailcnt = rulelen
+      #$$ = link_machines($1, $2)
+    }
+  | re2 re '$'
+    {
+      state.synerr('trailing context used twice')
+    }
+  | re %space (?E{regex.RegexCharacterLiteral, character_set = [10, 11]}'$')
+    {
+      global trlcontxt
+      #headcnt = 0
+      #trailcnt = 1
+      #rulelen = 1
+      #varlength = False
+      #current_state_type = 0x2
+      #if trlcontxt:
+      #  state.synerr('trailing context used twice')
+      #  $$ = mkstate(256 + 1)
+      #else:
+      #  if previous_continued_action:
+      #    lwarn('trailing context made variable due to preceding \'|\' action')
+      #    varlength = True
+      #if lex_compat or varlength:
+      #  add_accept($1, num_rules | 0x4000)
+      #  variable_trail_rule = True
+      trlcontxt = True
+      #eps = mkstate(256 + 1)
+      #$$ = link_machines($1, link_machines(eps, mkstate(ord('\n'))))
+    }
+  | re (?E{regex.RegexEmpty})
+    {
+      #$$ = $1
+      #if trlcontxt:
+      #  if lex_compat or varlength and headcnt == 0:
+      #    variable_trail_rule = True
+      #  else:
+      #    trailcnt = rulelen
+    }
+  ;
+
+re
+  : %space (?E{regex.RegexOr}re '|' series)
+    {
+      #varlength = True
+      #$$ = mkor($1, $3)
+    }
+  | series
+    {
+      #$$ = $1
+    }
+  ;
+
+re2
+  : re '/'
+    {
+      global trlcontxt
+      if trlcontxt:
+        state.synerr('trailing context used twice')
+      else:
+        trlcontxt = True
+      #if varlength:
+      #  varlength = False
+      #else:
+      #  headcnt = rulelen
+      #rulelen = 0
+      #current_state_type = 0x2
+      #$$ = $1
+    }
+  ;
+
+series
+  : %space (?E{regex.RegexSequence}series singleton)
+    {
+      #$$ = link_machines($1, $2)
+    }
+  | singleton
+    {
+      #$$ = $1
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3, count1 = $5}series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX)
+    {
+      #varlength = True
+      #if $3 > $5 or $3 < 0:
+      #  state.synerr('bad iteration values')
+      #  $$ = $1
+      #else:
+      #  if $3 == 0:
+      #    if $5 <= 0:
+      #      state.synerr('bad iteration values')
+      #      $$ = $1
+      #    else:
+      #      $$ = mkopt(mkrep($1, 1, $5))
+      #  else:
+      #    $$ = mkrep($1, $3, $5)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3}series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX)
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = mkrep($1, $3, -1)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3, count1 = $3}series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX)
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = link_machines($1, copysingl($1, $3 - 1))
+    }
+  ;
+
+singleton
+  : %space (?E{regex.RegexRepeat, count0 = 0}singleton '*')
+    {
+      #varlength = True
+      #$$ = mkclos($1)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = 1}singleton '+')
+    {
+      #varlength = True
+      #$$ = mkposcl($1)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = 0, count1 = 1}singleton '?')
+    {
+      #varlength = True
+      #$$ = mkopt($1)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3, count1 = $5}singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX)
+    {
+      #varlength = True
+      #if $3 > $5 or $3 < 0:
+      #  state.synerr('bad iteration values')
+      #  $$ = $1
+      #else:
+      #  if $3 == 0:
+      #    if $5 <= 0:
+      #      state.synerr('bad iteration values')
+      #      $$ = $1
+      #    else:
+      #      $$ = mkopt(mkrep($1, 1, $5))
+      #  else:
+      #    $$ = mkrep($1, $3, $5)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3}singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX)
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = mkrep($1, $3, -1)
+    }
+  | %space (?E{regex.RegexRepeat, count0 = $3, count1 = $3}singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX)
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = link_machines($1, copysingl($1, $3 - 1))
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 256] if state._sf_stk[-1] & 2 else [0, 10, 11, 256]}'.')
+    {
+      #if not madeany:
+      #  ccldot = cclinit()
+      #  ccladd(ccldot, ord('\n'))
+      #  cclnegate(ccldot)
+      #  if useecs:
+      #    mkeccl(ccltbl + cclmap[ccldot], ccllen[ccldot], nextecm, ecgroup, csize, csize)
+      #  cclany = cclinit()
+      #  cclnegate(cclany)
+      #  if useecs:
+      #    mkeccl(ccltbl + cclmap[cclany], ccllen[cclany], nextecm, ecgroup, csize, csize)
+      #  madeany = True
+      #rulelen += 1
+      #if state._sf_stk[-1] & 2:
+      #  $$ = mkstate(-cclany)
+      #else:
+      #  $$ = mkstate(-ccldot)
+    }
+  | fullccl
+    {
+      #qsort(ccltbl + cclmap[$1], int(ccllen[$1]), sizeof *ccltbl, cclcmp)
+      #if useecs:
+      #  mkeccl(ccltbl + cclmap[$1], ccllen[$1], nextecm, ecgroup, csize, csize)
+      #rulelen += 1
+      #if ccl_has_nl[$1]:
+      #  rule_has_nl[num_rules] = True
+      #$$ = mkstate(-$1)
+    }
+  | PREVCCL
+    {
+      #rulelen += 1
+      #if ccl_has_nl[$1]:
+      #  rule_has_nl[num_rules] = True
+      #$$ = mkstate(-$1)
+    }
+  | '"' string '"'
+    {
+      #$$ = $2
+    }
+  | %space (?E{regex.RegexGroup}'(' re ')')
+    {
+      #$$ = $2
+    }
+  /* Nick extra rules for unnumbered groups */
+  | '(' ':' re ')'
+    {
+      #$$ = $3
+    }
+  /* Nick extra rules for named groups */
+  | %space (?E{regex.RegexGroupName}'(' NAME re ')')
+    {
+      #$$ = $3
+    }
+  /* Nick extra rules for action groups */
+  | %space (?E{regex.RegexGroupAction}'(' TOK_ACTION_GROUP re ')')
+    {
+      #$$ = $3
+    }
+  | %space (?E{regex.RegexGroupElement}'(' TOK_ELEMENT_GROUP re ')')
+    {
+      #$$ = $3
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [$1, $1 + 1]}CHAR)
+    {
+      #rulelen += 1
+      #if $1 == nlch:
+      #  rule_has_nl[num_rules] = True
+      #if state._sf_stk[-1] & 1 and has_case($1):
+      #  $$ = mkor(mkstate($1), mkstate(reverse_case($1)))
+      #else:
+      #  $$ = mkstate($1)
+    }
+  ;
+
+fullccl
+  /* note: ) ) should be )), work around bootstrap_bison.git issue for now */
+  : %space (?E{regex.RegexCharacterAnd}fullccl CCL_OP_DIFF %space (?E{regex.RegexCharacterNot}braceccl) )
+    {
+      #$$ = ccl_set_diff($1, $3)
+    }
+  | %space (?E{regex.RegexCharacterOr}fullccl CCL_OP_UNION braceccl)
+    {
+      #$$ = ccl_set_union($1, $3)
+    }
+  | braceccl
+  ;
+
+braceccl
+  : '[' ccl ']'
+    {
+      #$$ = $2
+    }
+  | '[' %space (?E{regex.RegexCharacterNot}'^' ccl) ']'
+    {
+      #cclnegate($3)
+      #$$ = $3
+    }
+  ;
+
+ccl
+  /* note: ) ) should be )), work around bootstrap_bison.git issue for now */
+  : %space (?E{regex.RegexCharacterOr}ccl %space (?E{regex.RegexCharacterLiteral, character_set = [$2, $4 + 1]}CHAR '-' CHAR) )
+    {
+      #if state._sf_stk[-1] & 1:
+      #  if has_case($2) != has_case($4) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISlower) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISlower) else False) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISupper) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISupper) else False):
+      #    fw3_msg = []
+      #    snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+      #    lwarn(fw3_msg)
+      #  else:
+      #    if not has_case($2) and not has_case($4) and not range_covers_case($2, $4):
+      #      fw3_msg = []
+      #      snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+      #      lwarn(fw3_msg)
+      #if $2 > $4:
+      #  state.synerr('negative range in character class')
+      #else:
+      #  i = $2
+      #  while i <= $4:
+      #    ccladd($1, i)
+      #    i += 1
+      #  cclsorted = cclsorted and $2 > lastchar
+      #  lastchar = $4
+      #  if state._sf_stk[-1] & 1 and has_case($2) and has_case($4):
+      #    $2 = reverse_case($2)
+      #    $4 = reverse_case($4)
+      #    i = $2
+      #    while i <= $4:
+      #      ccladd($1, i)
+      #      i += 1
+      #    cclsorted = cclsorted and $2 > lastchar
+      #    lastchar = $4
+      #$$ = $1
+    }
+  /* note: ) ) should be )), work around bootstrap_bison.git issue for now */
+  | %space (?E{regex.RegexCharacterOr}ccl %space (?E{regex.RegexCharacterLiteral, character_set = [$2, $2 + 1]}CHAR) )
+    {
+      #ccladd($1, $2)
+      #cclsorted = cclsorted and $2 > lastchar
+      #lastchar = $2
+      #if state._sf_stk[-1] & 1 and has_case($2):
+      #  $2 = reverse_case($2)
+      #  ccladd($1, $2)
+      #  cclsorted = cclsorted and $2 > lastchar
+      #  lastchar = $2
+      #$$ = $1
+    }
+  | %space (?E{regex.RegexCharacterOr}ccl ccl_expr)
+    {
+      #cclsorted = False
+      #$$ = $1
+    }
+  | %space (?E{regex.RegexCharacterLiteral})
+    {
+      #cclsorted = True
+      #lastchar = 0
+      #currccl = $$ = cclinit()
+    }
+  ;
+
+ccl_expr
+  : %space (?E{regex.RegexCharacterLiteral, character_set = [48, 58, 65, 91, 97, 123]}CCE_ALNUM)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalnum):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [65, 91, 97, 123]}CCE_ALPHA)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalpha):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [9, 10, 32, 33]}CCE_BLANK)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (c == ord(' ') or c == ord('\t')):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 32]}CCE_CNTRL)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_IScntrl):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [48, 58]}CCE_DIGIT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISdigit):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [33, 127]}CCE_GRAPH)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISgraph):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [65, 91, 97, 123] if state._sf_stk[-1] & 1 else [97, 123]}CCE_LOWER)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+      #    ccladd(currccl, c)
+      #  c += 1
+      #if state._sf_stk[-1] & 1:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+      #      ccladd(currccl, c)
+      #    c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [32, 127]}CCE_PRINT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISprint):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [33, 48, 58, 65, 91, 97, 123, 127]}CCE_PUNCT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISpunct):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [9, 14, 32, 33]}CCE_SPACE)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISspace):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [48, 58, 65, 71, 97, 103]}CCE_XDIGIT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISxdigit):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [65, 91, 97, 123] if state._sf_stk[-1] & 1 else [65, 91]}CCE_UPPER)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+      #    ccladd(currccl, c)
+      #  c += 1
+      #if state._sf_stk[-1] & 1:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+      #      ccladd(currccl, c)
+      #    c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 48, 58, 65, 91, 97, 123, 256]}CCE_NEG_ALNUM)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISalnum)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 65, 91, 97, 123, 256]}CCE_NEG_ALPHA)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISalpha)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 9, 10, 32, 33, 256]}CCE_NEG_BLANK)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not (c == ord(' ') or c == ord('\t')):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [32, 256]}CCE_NEG_CNTRL)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_IScntrl)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 48, 58, 256]}CCE_NEG_DIGIT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISdigit)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 33, 127, 256]}CCE_NEG_GRAPH)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISgraph)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 32, 127, 256]}CCE_NEG_PRINT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISprint)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 33, 48, 58, 65, 91, 97, 123, 127, 256]}CCE_NEG_PUNCT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISpunct)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 9, 14, 32, 33, 256]}CCE_NEG_SPACE)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISspace)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 48, 58, 65, 71, 97, 103, 256]}CCE_NEG_XDIGIT)
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISxdigit)):
+      #    ccladd(currccl, c)
+      #  c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 97, 123, 256]}CCE_NEG_LOWER)
+    {
+      #if state._sf_stk[-1] & 1:
+      #  lwarn('[:^lower:] is ambiguous in case insensitive scanner')
+      #else:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if not ((*__ctype_b_loc())[int(c)] & int(_ISlower)):
+      #      ccladd(currccl, c)
+      #    c += 1
+    }
+  | %space (?E{regex.RegexCharacterLiteral, character_set = [0, 65, 91, 256]}CCE_NEG_UPPER)
+    {
+      #if state._sf_stk[-1] & 1:
+      #  lwarn('[:^upper:] ambiguous in case insensitive scanner')
+      #else:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if not ((*__ctype_b_loc())[int(c)] & int(_ISupper)):
+      #      ccladd(currccl, c)
+      #    c += 1
+    }
+  ;
+
+string
+  /* note: ) ) should be )), work around bootstrap_bison.git issue for now */
+  : %space (?E{regex.RegexSequence}string (?E{regex.RegexCharacterLiteral, character_set = [$2, $2 + 1]}CHAR) )
+    {
+      #if $2 == nlch:
+      #  rule_has_nl[num_rules] = True
+      #rulelen += 1
+      #if state._sf_stk[-1] & 1 and has_case($2):
+      #  $$ = mkor(mkstate($2), mkstate(reverse_case($2)))
+      #else:
+      #  $$ = mkstate($2)
+      #$$ = link_machines($1, $$)
+    }
+  | %space (?E{regex.RegexEmpty})
+    {
+      #$$ = mkstate(256 + 1)
+    }
+  ;
+
+%%
+
+#def build_eof_action():
+#  i = None
+#  action_text = []
+#  i = 1
+#  while i <= scon_stk_ptr:
+#    if sceof[scon_stk[i]]:
+#      format_pinpoint_message('multiple <<EOF>> rules for start condition %s', scname[scon_stk[i]])
+#    else:
+#      sceof[scon_stk[i]] = True
+#      if previous_continued_action:
+#        add_action('YY_RULE_SETUP\n')
+#      snprintf(action_text, sizeof action_text, 'case YY_STATE_EOF(%s):\n', scname[scon_stk[i]])
+#      add_action(action_text)
+#    i += 1
+#  line_directive_out(None, 1)
+#  add_action('[[')
+#  num_rules -= 1
+#  num_eof_rules += 1
+#
+#def format_state.synerr(msg, arg):
+#  errmsg = []
+#  NoneType(snprintf(errmsg, sizeof errmsg, msg, arg))
+#  state.synerr(errmsg)
+#
+#def synerr(str):
+#  syntaxerror = True
+#  pinpoint_message(str)
+#
+#def format_warn(msg, arg):
+#  warn_msg = []
+#  snprintf(warn_msg, sizeof warn_msg, msg, arg)
+#  lwarn(warn_msg)
+#
+#def lwarn(str):
+#  line_warning(str, linenum)
+#
+#def format_pinpoint_message(msg, arg):
+#  errmsg = []
+#  snprintf(errmsg, sizeof errmsg, msg, arg)
+#  pinpoint_message(errmsg)
+#
+#def pinpoint_message(str):
+#  line_pinpoint(str, linenum)
+#
+#def line_warning(str, line):
+#  warning = []
+#  if not nowarn:
+#    snprintf(warning, sizeof warning, 'warning, %s', str)
+#    line_pinpoint(warning, line)
+#
+#def line_pinpoint(str, line):
+#  fprintf(stderr, '%s:%d: %s\n', infilename, line, str)
+
+def yyerror(msg):
+  pass
diff --git a/reserialize.py b/reserialize.py
new file mode 100755 (executable)
index 0000000..b8346d3
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+import ast
+import element
+import sys
+
+element.serialize(element.deserialize(sys.stdin, ast.factory), sys.stdout)
diff --git a/scan.l b/scan.l
new file mode 100644 (file)
index 0000000..f4392d7
--- /dev/null
+++ b/scan.l
@@ -0,0 +1,1634 @@
+/* scan.l - scanner for flex input -*-C-*- */
+
+%{
+  import ast
+  import regex
+  import state
+  import y_tab
+
+  markup_stack = []
+
+  # these should be yylex()-local, but moved to here, see further down:
+  bracelevel = 0
+  didadef = False
+  indented_code = False
+  doing_rule_action = False
+  #option_sense = False
+
+  doing_codeblock = False
+  brace_depth = 0
+  brace_start_line = 0
+%}
+
+%option caseless nodefault noreject stack noyy_top_state
+%option nostdinit
+
+%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
+%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
+%x OPTION LINEDIR CODEBLOCK_MATCH_BRACE
+%x GROUP_WITH_PARAMS
+%x GROUP_MINUS_PARAMS
+%x EXTENDED_COMMENT
+%x COMMENT_DISCARD CODE_COMMENT
+%x SECT3_NOESCAPE
+%x CHARACTER_CONSTANT
+/* Nick extra rules for action groups */
+%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED
+
+WS             [[:blank:]]+
+OPTWS          [[:blank:]]*
+NOT_WS         [^[:blank:]\r\n]
+
+NL             \r?\n
+
+NAME           ([[:alpha:]_][[:alnum:]_-]*)
+NOT_NAME       [^[:alpha:]_*\n]+
+
+SCNAME         {NAME}
+
+ESCSEQ         (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
+
+FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
+CCL_CHAR       ([^\\\n\]]|{ESCSEQ})
+CCL_EXPR       ("[:"^?[[:alpha:]]+":]")
+
+LEXOPT         [aceknopr]
+
+M4QSTART    "[""["
+M4QEND      "]""]"
+
+%%
+
+  # these should be here, but we can't access yylex()-local variables
+  # from an action since the action functions are not nested to yylex():
+  #bracelevel = 0
+  #didadef = False
+  #indented_code = False
+  #doing_rule_action = False
+  #option_sense = False
+
+  #doing_codeblock = False
+  #brace_depth = 0
+  #brace_start_line = 0
+
+<INITIAL>{
+  ^{WS} {
+    global indented_code
+    if not indented_code:
+      state.linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = True
+    #add_action(yytext)
+    markup_push() # ast.AST.Section1Or2.CodeBlock
+    markup_push() # ast.AST.Text
+  }
+  ^"/*" {
+    #add_action('/*[' '[')
+    yy_push_state(COMMENT)
+  }
+  ^#{OPTWS}line{WS} yy_push_state(LINEDIR)
+  ^"%s"{NAME}? return y_tab.SCDECL
+  ^"%x"{NAME}? return y_tab.XSCDECL
+  ^"%{".*{NL} {
+    global indented_code
+    if not indented_code:
+      state.linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = False
+    markup_push() # ast.AST.Section1Or2.CodeBlock
+    markup_flush(len(yytext))
+    markup_push() # ast.AST.Text
+  }
+  ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
+    global brace_start_line, brace_depth
+    brace_start_line = state.linenum
+    state.linenum += 1
+    #buf_linedir(&top_buf, infilename if infilename else '<stdin>', state.linenum)
+    brace_depth = 1
+    yy_push_state(CODEBLOCK_MATCH_BRACE)
+  }
+
+  ^"%top".*                    state.synerr('malformed \'%top\' directive')
+
+  {WS}
+
+  ^"%%".* {
+    global bracelevel
+    sectnum = 2
+    bracelevel = 0
+    #mark_defs1()
+    #line_directive_out(None, 1)
+    BEGIN(SECT2PROLOG)
+    return y_tab.SECTEND
+  }
+
+  ^(?E{ast.AST.Section1.Options}(?E{ast.AST.Section1.Options.Array}"%pointer".*)){NL} {
+    #yytext_is_array = False
+    state.linenum += 1
+  }
+  ^(?E{ast.AST.Section1.Options}(?E{ast.AST.Section1.Options.Array, value = True}"%array".*)){NL} {
+    #yytext_is_array = True
+    state.linenum += 1
+  }
+
+  ^"%option" {
+    BEGIN(OPTION)
+    return y_tab.TOK_OPTION
+  }
+
+  ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} state.linenum += 1
+  ^"%"{LEXOPT}{WS}.*{NL}       state.linenum += 1
+
+       /* xgettext: no-c-format */
+  ^"%"[^sxaceknopr{}].*                state.synerr('unrecognized \'%\' directive')
+
+  ^{NAME} {
+    global didadef
+    state.nmstr = yytext
+    didadef = False
+    BEGIN(PICKUPDEF)
+  }
+
+  (?E{ast.AST.Name}{SCNAME}) {
+    state.nmstr = yytext
+    return y_tab.NAME
+  }
+  ^{OPTWS}{NL}                 state.linenum += 1
+  {OPTWS}{NL} {
+    #add_action(yytext)
+    state.linenum += 1
+  }
+}
+
+
+<COMMENT,CODE_COMMENT>{ /* */
+  [^\[\]\*\n]*                 #add_action(yytext)
+  .                            #add_action(yytext)
+
+  {NL} {
+    state.linenum += 1
+    #add_action(yytext)
+  }
+}
+<COMMENT>{
+  "*/" {
+    #add_action('*/]' ']')
+    yy_pop_state()
+  }
+}
+<CODE_COMMENT>{
+  "*/" {
+    #add_action(yytext)
+    yy_pop_state()
+  }
+}
+
+<COMMENT_DISCARD>{
+        /* This is the same as COMMENT, but is discarded rather than output. */
+  "*/"                         yy_pop_state()
+  "*"
+  [^*\n]
+  {NL}                         state.linenum += 1
+}
+
+<EXTENDED_COMMENT>{
+  ")"                          yy_pop_state()
+  [^\n\)]+
+  {NL}                         state.linenum += 1
+}
+
+<LINEDIR>{
+  \n                           yy_pop_state()
+  [[:digit:]]+ {
+    state.linenum = int(yytext)
+  }
+
+  \"[^"\n]*\" {
+    state.infilename = yytext[1:-1]
+  }
+  .
+}
+<ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,CHARACTER_CONSTANT,COMMENT,CODE_COMMENT>{
+  {M4QSTART}                   #add_action('[' ']' ']' '[' '[' '[' ']' ']' '[' '[')
+  {M4QEND}                     #add_action(']' ']' ']' '[' '[' ']' ']' ']' '[' '[')
+}
+
+<CODEBLOCK>{
+  ^"%}".*{NL} {
+    state.linenum += 1
+    yy_pop_state()
+    #add_action(']' ']')
+    #if not indented_code:
+    #  line_directive_out(None, 0)
+    markup_pop(ast.AST.Text)
+    markup_flush(len(yytext))
+    markup_pop(ast.AST.Section1Or2.CodeBlock)
+  }
+  [^\n%\[\]]*                  #add_action(yytext)
+  .                            #add_action(yytext)
+  {NL} {
+    state.linenum += 1
+    #add_action(yytext)
+    if indented_code:
+      yy_pop_state()
+      #add_action(']' ']')
+      #if not indented_code:
+      #  line_directive_out(None, 0)
+      markup_flush(len(yytext))
+      markup_pop(ast.AST.Text)
+      markup_pop(ast.AST.Section1Or2.CodeBlock)
+  }
+}
+
+<CODEBLOCK_MATCH_BRACE>{
+  "}" {
+    global brace_depth
+    brace_depth -= 1
+    if brace_depth == 0:
+      yy_pop_state()
+    #else:
+    #  buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  "{" {
+    global brace_depth
+    brace_depth += 1
+    #buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  {NL} {
+    state.linenum += 1
+    #buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  {M4QSTART}                   #buf_strnappend(&top_buf, escaped_qstart, int(len(escaped_qstart)))
+  {M4QEND}                     #buf_strnappend(&top_buf, escaped_qend, int(len(escaped_qend)))
+  ([^{}\r\n\[\]]+)|[^{}\r\n]   #buf_strnappend(&top_buf, yytext, len(yytext))
+
+  <<EOF>> {
+    state.linenum = brace_start_line
+    state.synerr('Unmatched \'{\'')
+    yyterminate()
+  }
+}
+
+
+<PICKUPDEF>{
+  {WS}
+
+  {NOT_WS}[^\r\n]* {
+    global didadef
+    state.ndinstal(state.nmstr, yytext.rstrip('\t '))
+    didadef = True
+  }
+
+  {NL} {
+    if not didadef:
+      state.synerr('incomplete name definition')
+    BEGIN(INITIAL)
+    state.linenum += 1
+  }
+}
+
+ /* Nick added ("no"*) prefix to all, instead of it being a separate rule */
+<OPTION>{
+  {NL} {
+    state.linenum += 1
+    BEGIN(INITIAL)
+  }
+  {WS} {
+    #global option_sense
+    #option_sense = True
+  }
+
+  "="                  return ord('=')
+
+  /*no {
+    global option_sense
+    option_sense = not option_sense
+  }*/
+
+  (?E{ast.AST.Section1.Options.SevenBit}("no"*)7bit) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #csize = 128 if option_sense else 256
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.SevenBit}("no"*)8bit) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #csize = 256 if option_sense else 128
+    yy_element_token[0].value = not option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.Section1.Options.Align}("no"*)align) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #long_align = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.AlwaysInteractive}("no"*)always-interactive) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_ALWAYS_INTERACTIVE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_ALWAYS_INTERACTIVE')
+    #interactive = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Array}("no"*)array) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #yytext_is_array = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Backup}("no"*)backup) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #backing_up_report = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Interactive}("no"*)batch) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #interactive = not option_sense
+    yy_element_token[0].value = not option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.BisonBridge}("no"*)bison-bridge) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #bison_bridge_lval = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.BisonLocations}("no"*)bison-locations) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if bison_bridge_lloc = option_sense:
+    #  bison_bridge_lval = True
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.CPlusPlus}("no"*)"c++") {
+    option_sense = (len(yy_groups[1]) & 2) == 0
+    #C_plus_plus = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Caseless}("no"*)caseful|case-sensitive) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    if not option_sense:
+      state._sf_stk[-1] |= 1
+    else:
+      state._sf_stk[-1] &= ~1
+    yy_element_token[0].value = not option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Caseless}("no"*)caseless|case-insensitive) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    if option_sense:
+      state._sf_stk[-1] |= 1
+    else:
+      state._sf_stk[-1] &= ~1
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Debug}("no"*)debug) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #ddebug = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Default}("no"*)default) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #spprdflt = not option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.ECS}("no"*)ecs) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #useecs = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Fast}("no"*)fast) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #useecs = usemecs = False
+    #use_read = fullspd = True
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Full}("no"*)full) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #useecs = usemecs = False
+    #use_read = fulltbl = True
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Input}("no"*)input) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  action_define('YY_NO_INPUT', 1)
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Interactive}("no"*)interactive) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #interactive = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.LexCompat}("no"*)lex-compat) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    state.lex_compat = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.PosixCompat}("no"*)posix-compat) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    state.posix_compat = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Line}("no"*)line) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #gen_line_dirs = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Main}("no"*)main) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_MAIN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_MAIN')
+    #if option_sense:
+    #  do_yywrap = False
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.MetaECS}("no"*)meta-ecs) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #usemecs = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.NeverInteractive}("no"*)never-interactive) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NEVER_INTERACTIVE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NEVER_INTERACTIVE')
+    #interactive = not option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.PerfReport}("no"*)perf-report) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #performance_report += 1 if option_sense else -1
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Array}("no"*)pointer) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #yytext_is_array = not option_sense
+    yy_element_token[0].value = not option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Read}("no"*)read) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #use_read = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Reentrant}("no"*)reentrant) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #reentrant = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Reject}("no"*)reject) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #reject_really_used = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Stack}("no"*)stack) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_STACK_USED', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_STACK_USED')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.StdInit}("no"*)stdinit) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #do_stdinit = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.StdOut}("no"*)stdout) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #use_stdout = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.UniStd}("no"*)unistd) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  action_define('YY_NO_UNISTD_H', 1)
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Unput}("no"*)unput) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_UNPUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_UNPUT')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Verbose}("no"*)verbose) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #printstats = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.Warn}("no"*)warn) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #nowarn = not option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYLineNo}("no"*)yylineno) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #do_yylineno = option_sense
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_USE_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_USE_LINENO')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYMore}("no"*)yymore) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #yymore_really_used = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYWrap}("no"*)yywrap) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #do_yywrap = option_sense
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.Section1.Options.YYPushState}("no"*)yy_push_state) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_PUSH_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_PUSH_STATE')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYPopState}("no"*)yy_pop_state) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_POP_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_POP_STATE')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYTopState}("no"*)yy_top_state) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_TOP_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_TOP_STATE')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.Section1.Options.YYScanBuffer}("no"*)yy_scan_buffer) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_BUFFER', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_BUFFER')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYScanBytes}("no"*)yy_scan_bytes) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_BYTES', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_BYTES')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYScanString}("no"*)yy_scan_string) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_STRING', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_STRING')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.Section1.Options.YYAlloc}("no"*)yyalloc) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_ALLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_ALLOC')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYRealloc}("no"*)yyrealloc) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_REALLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_REALLOC')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYFree}("no"*)yyfree) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_FREE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_FREE')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.Section1.Options.YYGetDebug}("no"*)yyget_debug) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_DEBUG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_DEBUG')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetDebug}("no"*)yyset_debug) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_DEBUG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_DEBUG')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetExtra}("no"*)yyget_extra) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_EXTRA', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_EXTRA')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetExtra}("no"*)yyset_extra) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_EXTRA', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_EXTRA')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetLeng}("no"*)yyget_leng) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LENG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LENG')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetText}("no"*)yyget_text) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_TEXT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_TEXT')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetLineNo}("no"*)yyget_lineno) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LINENO')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetLineNo}("no"*)yyset_lineno) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LINENO')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetIn}("no"*)yyget_in) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_IN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_IN')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetIn}("no"*)yyset_in) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_IN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_IN')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetOut}("no"*)yyget_out) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_OUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_OUT')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetOut}("no"*)yyset_out) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_OUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_OUT')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetLVal}("no"*)yyget_lval) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LVAL', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LVAL')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetLVal}("no"*)yyset_lval) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LVAL', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LVAL')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYGetLLoc}("no"*)yyget_lloc) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LLOC')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+  (?E{ast.AST.Section1.Options.YYSetLLoc}("no"*)yyset_lloc) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LLOC')
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  extra-type           return y_tab.TOK_EXTRA_TYPE
+  outfile              return y_tab.TOK_OUTFILE
+  prefix               return y_tab.TOK_PREFIX
+  yyclass              return y_tab.TOK_YYCLASS
+  header(-file)?       return y_tab.TOK_HEADER_FILE
+  tables-file          return y_tab.TOK_TABLES_FILE
+  (?E{ast.AST.Section1.Options.TablesVerify}("no"*)tables-verify) {
+    option_sense = (len(yy_groups[2]) & 2) == 0
+    #tablesverify = option_sense
+    #if not tablesext and option_sense:
+    #  tablesext = True
+    yy_element_token[0].value = option_sense
+    return y_tab.TOK_OPTION_OTHER # Nick
+  }
+
+  (?E{ast.AST.String}\"(?E{ast.AST.Text}[^"\n]*)\") {
+    state.nmstr = yytext[1:-1]
+    return y_tab.NAME
+  }
+
+  (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
+    state.synerr('unrecognized %option: {0:s}'.format(yytext))
+    BEGIN(RECOVER)
+  }
+}
+
+<RECOVER>.*{NL} {
+  state.linenum += 1
+  BEGIN(INITIAL)
+}
+
+
+<SECT2PROLOG>{
+  ^"%{".* {
+    global bracelevel
+    bracelevel += 1
+    markup_yyless(2)
+  }
+  ^"%}".* {
+    global bracelevel
+    bracelevel -= 1
+    markup_yyless(2)
+  }
+
+  ^{WS} {
+    global indented_code
+    if not indented_code:
+      state.linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = True
+    #add_action(yytext)
+    markup_push() # ast.AST.Section1Or2.CodeBlock
+    markup_push() # ast.AST.Text
+  }
+
+  ^{NOT_WS}.* {
+    global indented_code
+    if bracelevel <= 0:
+      markup_yyless(0)
+      yy_set_bol(True)
+      #mark_prolog()
+      BEGIN(SECT2)
+    else:
+      if not indented_code:
+        state.linenum += 1
+        #line_directive_out(None, 1)
+      #add_action('[' '[')
+      yy_push_state(CODEBLOCK)
+      indented_code = True
+      #add_action(yytext)
+      markup_push() # ast.AST.Section1Or2.CodeBlock
+      markup_push() # ast.AST.Text
+  }
+
+  .                            #add_action(yytext)
+  {NL} {
+    state.linenum += 1
+    #add_action(yytext)
+  }
+
+  <<EOF>> {
+    #mark_prolog()
+    sectnum = 0
+    return YY_NULL
+  }
+}
+
+<SECT2>{
+  ^{OPTWS}{NL}                 state.linenum += 1
+
+  ^{OPTWS}"%{" {
+    global indented_code, doing_codeblock, bracelevel
+    indented_code = False
+    doing_codeblock = True
+    bracelevel = 1
+    BEGIN(PERCENT_BRACE_ACTION)
+    markup_flush(len(yytext) - 2)
+    markup_push() # ast.AST.Section1Or2.CodeBlock
+    markup_flush(2)
+    markup_push() # ast.AST.Text
+  }
+
+  ^{OPTWS}"<" {
+    if not (state._sf_stk[-1] & 4):
+      BEGIN(SC)
+    markup_flush(len(yytext) - 1)
+    return ord('<')
+  }
+  ^{OPTWS}"^" {
+    markup_flush(len(yytext) - 1)
+    return ord('^')
+  }
+  \" {
+    BEGIN(QUOTE)
+    return ord('"')
+  }
+  "{"/[[:digit:]] {
+    BEGIN(NUM)
+    if state.lex_compat or state.posix_compat:
+      return y_tab.BEGIN_REPEAT_POSIX
+    else:
+      return y_tab.BEGIN_REPEAT_FLEX
+  }
+  "$"/([[:blank:]]|{NL}) return ord('$')
+
+  {WS}"%{" {
+    global bracelevel, doing_rule_action
+    if not state.in_rule:
+      state.synerr('action outside rule')
+    bracelevel = 1
+    BEGIN(PERCENT_BRACE_ACTION)
+    markup_flush(len(yytext) - 2)
+    markup_push() # ast.AST.Section2.Rule.Action
+    doing_rule_action = True
+    state.in_rule = False
+    markup_flush(2)
+    markup_push() # ast.AST.Text
+  }
+  {WS}(?E{ast.AST.Section2.Rule.Action, continued = True}"|".*{NL}) {
+    if state._sf_stk[-1] & 4:
+      markup_yyless(yytext.index('|'))
+    else:
+      #add_action(']' ']')
+      continued_action = True
+      state.linenum += 1
+      return ord('\n')
+  }
+
+  ^{WS}"/*" {
+    global bracelevel
+    if state._sf_stk[-1] & 4:
+      yy_push_state(COMMENT_DISCARD)
+    else:
+      markup_yyless(len(yytext) - 2)
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+  }
+
+  ^{WS}
+
+  {WS} {
+    global bracelevel, doing_rule_action
+    if not (state._sf_stk[-1] & 4):
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+      if state.in_rule:
+        doing_rule_action = True
+        state.in_rule = False
+        markup_flush(len(yytext))
+        markup_push() # ast.AST.Section2.Rule.Action
+        markup_push() # ast.AST.Text
+  }
+
+  {OPTWS}{NL} {
+    global bracelevel, doing_rule_action
+    if state._sf_stk[-1] & 4:
+      state.linenum += 1
+    else:
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+      markup_yyless(len(yytext) - 1)
+      if state.in_rule:
+        doing_rule_action = True
+        state.in_rule = False
+        markup_flush(len(yytext))
+        markup_push() # ast.AST.Section2.Rule.Action
+        markup_push() # ast.AST.Text
+  }
+
+  ^{OPTWS}"<<EOF>>" |
+  "<<EOF>>" {
+    markup_flush(len(yytext) - 7)
+    return y_tab.EOF_OP
+  }
+
+  ^"%%".* {
+    sectnum = 3
+    BEGIN(SECT3_NOESCAPE if state.no_section3_escape else SECT3)
+    #outn('/* Begin user sect3 */')
+    return y_tab.SECTEND
+  }
+
+  "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
+    #cclval = None
+    state.nmstr = yytext
+    #if 0 and (cclval = ccllookup(state.nmstr)) != 0:
+    #  if input() != ord(']'):
+    #    state.synerr('bad character class')
+    #  y_tab.yylval = cclval
+    #  cclreuse += 1
+    #  return y_tab.PREVCCL
+    if True: #else:
+      #cclinstal(state.nmstr, lastccl + 1)
+      markup_yyless(1)
+      BEGIN(FIRSTCCL)
+      return ord('[')
+  }
+  "{-}"                                return y_tab.CCL_OP_DIFF
+  "{+}"                                return y_tab.CCL_OP_UNION
+
+
+    /* Check for :space: at the end of the rule so we don't
+     * wrap the expanded regex in '(' ')' -- breaking trailing
+     * context.
+     */
+  "{"{NAME}"}"[[:space:]]? {
+    end_ch = yytext[-1]
+    end_is_ws = end_ch != '}'
+    state.nmstr = yytext[1:-1 - int(end_is_ws)]
+    nmdef = state.ndlookup(state.nmstr)
+    if nmdef is None:
+      state.synerr('undefined definition {{{0:s}}}'.format(state.nmstr))
+    else:
+      if end_is_ws:
+        markup_yyless(len(yytext) - 1)
+      if state.lex_compat or len(nmdef) and (nmdef[0] == '^' or nmdef[-1] == '$') or end_is_ws and y_tab.trlcontxt and not (state._sf_stk[-1] & 4):
+        unput(nmdef)
+        if len(nmdef) and nmdef[0] == '^':
+          BEGIN(CARETISBOL)
+      else:
+        unput(
+          '({0:s}{1:s})'.format(
+            '' if state.lex_compat or state.posix_compat else '?:',
+            nmdef
+          )
+        )
+  }
+
+  "/*" {
+    if state._sf_stk[-1] & 4:
+      yy_push_state(COMMENT_DISCARD)
+    else:
+      markup_yyless(1)
+      return ord('/')
+  }
+
+  "(?#" {
+    if state.lex_compat or state.posix_compat:
+      markup_yyless(1)
+      state.sf_push()
+      return ord('(')
+    else:
+      yy_push_state(EXTENDED_COMMENT)
+  }
+  "(?" {
+    state.sf_push()
+    if state.lex_compat or state.posix_compat:
+      markup_yyless(1)
+    else:
+      BEGIN(GROUP_WITH_PARAMS)
+    return ord('(')
+  }
+  "(" {
+    state.sf_push()
+    return ord('(')
+  }
+  ")" {
+    if len(state._sf_stk) > 1:
+      state.sf_pop()
+      return ord(')')
+    else:
+      state.synerr('unbalanced parenthesis')
+  }
+
+  [/|*+?.(){}]                 return ord(yytext[0])
+  . {
+    y_tab.yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+}
+
+
+<SC>{
+  {OPTWS}{NL}{OPTWS}           state.linenum += 1
+  [,*]                         return ord(yytext[0])
+  ">" {
+    BEGIN(SECT2)
+    return ord('>')
+  }
+  ">"/^ {
+    BEGIN(CARETISBOL)
+    return ord('>')
+  }
+  (?E{ast.AST.Name}{SCNAME}) {
+    state.nmstr = yytext
+    return y_tab.NAME
+  }
+  .                            state.synerr('bad <start condition>: {0:s}'.format(yytext))
+}
+
+<CARETISBOL>"^" {
+  BEGIN(SECT2)
+  return ord('^')
+}
+
+
+<QUOTE>{
+  [^"\n] {
+    y_tab.yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+  \" {
+    BEGIN(SECT2)
+    return ord('"')
+  }
+
+  {NL} {
+    state.synerr('missing quote')
+    BEGIN(SECT2)
+    state.linenum += 1
+    return ord('"')
+  }
+}
+
+<GROUP_WITH_PARAMS>{
+    /* Nick extra rules for named groups */
+  "'"(?E{regex.RegexGroupName.Text}{NAME})"'" |
+  "<"(?E{regex.RegexGroupName.Text}{NAME})">" {
+    BEGIN(SECT2)
+    return y_tab.NAME
+  }
+    /* Nick extra rules for action groups */
+  "A{" {
+    global bracelevel
+    BEGIN(SECT2)
+    yy_push_state(ACTION_GROUP)
+    bracelevel = 1
+    markup_flush(len(yytext))
+    markup_push() # regex.RegexGroupAction.Text
+  }
+  "E{" {
+    global bracelevel
+    BEGIN(SECT2)
+    yy_push_state(ELEMENT_GROUP)
+    bracelevel = 1
+    markup_push() # RegexGroupElement.Text
+  }
+  ":" {
+    BEGIN(SECT2)
+    return ord(':')
+  }
+  "-"                          BEGIN(GROUP_MINUS_PARAMS)
+  i                            state._sf_stk[-1] |= 1
+  s                            state._sf_stk[-1] |= 2
+  x                            state._sf_stk[-1] |= 4
+}
+<GROUP_MINUS_PARAMS>{
+  ":" {
+    BEGIN(SECT2)
+    return ord(':')
+  }
+  i                            state._sf_stk[-1] &= ~1
+  s                            state._sf_stk[-1] &= ~2
+  x                            state._sf_stk[-1] &= ~4
+}
+
+<FIRSTCCL>{
+  "^"/[^-\]\n] {
+    BEGIN(CCL)
+    return ord('^')
+  }
+  "^"/("-"|"]")                        return ord('^')
+  . {
+    BEGIN(CCL)
+    y_tab.yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+}
+
+<CCL>{
+  -/[^\]\n]                    return ord('-')
+  [^\]\n] {
+    y_tab.yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+  "]" {
+    BEGIN(SECT2)
+    return ord(']')
+  }
+  .|{NL} {
+    state.synerr('bad character class')
+    BEGIN(SECT2)
+    return ord(']')
+  }
+}
+
+<FIRSTCCL,CCL>{
+  "[:alnum:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_ALNUM
+  }
+  "[:alpha:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_ALPHA
+  }
+  "[:blank:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_BLANK
+  }
+  "[:cntrl:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_CNTRL
+  }
+  "[:digit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_DIGIT
+  }
+  "[:graph:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_GRAPH
+  }
+  "[:lower:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_LOWER
+  }
+  "[:print:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_PRINT
+  }
+  "[:punct:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_PUNCT
+  }
+  "[:space:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_SPACE
+  }
+  "[:upper:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_UPPER
+  }
+  "[:xdigit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_XDIGIT
+  }
+
+  "[:^alnum:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_ALNUM
+  }
+  "[:^alpha:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_ALPHA
+  }
+  "[:^blank:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_BLANK
+  }
+  "[:^cntrl:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_CNTRL
+  }
+  "[:^digit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_DIGIT
+  }
+  "[:^graph:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_GRAPH
+  }
+  "[:^lower:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_LOWER
+  }
+  "[:^print:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_PRINT
+  }
+  "[:^punct:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_PUNCT
+  }
+  "[:^space:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_SPACE
+  }
+  "[:^upper:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_UPPER
+  }
+  "[:^xdigit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_XDIGIT
+  }
+  {CCL_EXPR} {
+    state.synerr('bad character class expression: {0:s}'.format(yytext))
+    BEGIN(CCL)
+    return y_tab.CCE_ALNUM
+  }
+}
+
+<NUM>{
+  [[:digit:]]+ {
+    y_tab.yylval = int(yytext)
+    return y_tab.NUMBER
+  }
+
+  ","                          return ord(',')
+  "}" {
+    BEGIN(SECT2)
+    if state.lex_compat or state.posix_compat:
+      return y_tab.END_REPEAT_POSIX
+    else:
+      return y_tab.END_REPEAT_FLEX
+  }
+
+  . {
+    state.synerr('bad character inside {}\'s')
+    BEGIN(SECT2)
+    return ord('}')
+  }
+
+  {NL} {
+    state.synerr('missing }')
+    BEGIN(SECT2)
+    state.linenum += 1
+    return ord('}')
+  }
+}
+
+
+<PERCENT_BRACE_ACTION>{
+  {OPTWS}"%}".* {
+    global bracelevel
+    bracelevel = 0
+    markup_pop(ast.AST.Text)
+  }
+
+  <ACTION>"/*" {
+    #add_action(yytext)
+    yy_push_state(CODE_COMMENT)
+  }
+
+    <CODEBLOCK,ACTION>{
+    "reject" {
+      #add_action(yytext)
+      if all_upper(yytext):
+        reject = True
+    }
+    "yymore" {
+      #add_action(yytext)
+      if all_lower(yytext):
+        yymore_used = True
+    }
+  }
+
+  .                            #add_action(yytext)
+  {NL} {
+    global doing_rule_action, doing_codeblock
+    state.linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0 or doing_codeblock and indented_code:
+      #if doing_rule_action:
+      #  add_action('\tYY_BREAK]' ']\n')
+      doing_rule_action = False
+      BEGIN(SECT2)
+      markup_flush(len(yytext))
+      if doing_codeblock:
+        markup_pop(ast.AST.Section1Or2.CodeBlock)
+        doing_codeblock = False
+      else:
+        markup_pop(ast.AST.Section2.Rule.Action)
+        return ord('\n')
+  }
+}
+
+
+       /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
+<ACTION>{
+  "{" {
+    global bracelevel
+    #add_action(yytext)
+    bracelevel += 1
+  }
+  "}" {
+    global bracelevel
+    #add_action(yytext)
+    bracelevel -= 1
+  }
+  [^[:alpha:]_{}\"'/\n\[\]]+   #add_action(yytext)
+  {NAME}                       #add_action(yytext)
+  "'"([^\'\\\n]|\\.)"'"                #add_action(yytext)
+  "'" {
+    #add_action(yytext)
+    BEGIN(CHARACTER_CONSTANT)
+  }
+  \" {
+    #add_action(yytext)
+    BEGIN(ACTION_STRING)
+  }
+  {NL} {
+    global doing_rule_action
+    state.linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0:
+      BEGIN(SECT2)
+      if doing_rule_action:
+        doing_rule_action = False
+        #add_action('\tYY_BREAK]' ']\n')
+        markup_flush(len(yytext))
+        markup_pop(ast.AST.Text)
+        markup_pop(ast.AST.Section2.Rule.Action)
+        return ord('\n')
+  }
+  .                            #add_action(yytext)
+}
+
+<ACTION_STRING>{
+  [^\[\]\"\\\n]+               #add_action(yytext)
+  \" {
+    #add_action(yytext)
+    BEGIN(ACTION)
+  }
+}
+<CHARACTER_CONSTANT>{
+  [^\[\]\'\\\n]+               #add_action(yytext)
+  \' {
+    #add_action(yytext)
+    BEGIN(ACTION)
+  }
+}
+<ACTION_STRING,CHARACTER_CONSTANT>{
+  (\\\n)*                      #add_action(yytext)
+  \\(\\\n)*.                   #add_action(yytext)
+  {NL} {
+    state.linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0:
+      BEGIN(SECT2)
+      markup_flush(len(yytext))
+      if doing_rule_action:
+        doing_rule_action = False # Nick added, error in the original?
+        markup_pop(ast.AST.Text)
+        markup_pop(ast.AST.Section2.Rule.Action)
+        return '\n'
+    else:
+      BEGIN(ACTION)
+  }
+  .                            #add_action(yytext)
+}
+
+ /* Nick extra rules for action groups */
+ /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>> {
+  state.synerr('EOF encountered inside an action')
+  yyterminate()
+}
+
+<EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> {
+  state.synerr('EOF encountered inside pattern')
+  yyterminate()
+}
+
+<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
+  y_tab.yylval = state.myesc(yytext)
+  if YY_START() == FIRSTCCL:
+    BEGIN(CCL)
+  return y_tab.CHAR
+}
+
+<SECT3>{
+  {M4QSTART}                   #yyout.write(escaped_qstart)
+  {M4QEND}                     #yyout.write(escaped_qend)
+  [^\[\]]*                     #ECHO()
+  [][]                         #ECHO()
+  <<EOF>> {
+    sectnum = 0
+    return YY_NULL
+  }
+}
+<SECT3_NOESCAPE>{
+  {M4QSTART}                   #yyout.write('[' '[{0:s}]' ']'.format(escaped_qstart))
+  {M4QEND}                     #yyout.write('[' '[{0:s}]' ']'.format(escaped_qend))
+  [^][]*                       #ECHO()
+  [][]                         #ECHO()
+  <<EOF>> {
+    sectnum = 0
+    return YY_NULL
+  }
+}
+
+ /* Nick extra rules for action groups */
+<ACTION_GROUP,ELEMENT_GROUP>{
+  "{" {
+    global bracelevel
+    bracelevel += 1
+  }
+}
+<ACTION_GROUP>{
+  "}" {
+    global bracelevel
+    bracelevel -= 1
+    if bracelevel == 0:
+      yy_pop_state()
+      markup_pop(regex.RegexGroupAction.Text)
+      return y_tab.TOK_ACTION_GROUP
+  }
+}
+<ELEMENT_GROUP>{
+  "}" {
+    global bracelevel
+    bracelevel -= 1
+    if bracelevel == 0:
+      yy_pop_state()
+      markup_pop(regex.RegexGroupElement.Text)
+      return y_tab.TOK_ELEMENT_GROUP
+  }
+}
+<ACTION_GROUP,ELEMENT_GROUP>{
+  "'"                          yy_push_state(SINGLE_QUOTED)
+  \"                           yy_push_state(DOUBLE_QUOTED)
+  "/*"                         yy_push_state(COMMENT_DISCARD)
+}
+<SINGLE_QUOTED>{
+  [^\[\]\'\\\n]+
+  \'                           yy_pop_state()
+}
+<DOUBLE_QUOTED>{
+  [^\[\]\"\\\n]+
+  \"                           yy_pop_state()
+}
+<SINGLE_QUOTED,DOUBLE_QUOTED>{
+  (\\\n)*
+  \\(\\\n)*.
+}
+<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
+  {NL}                         state.linenum += 1
+  .
+}
+
+<*>.|\n                                state.synerr('bad character: {0:s}'.format(yytext))
+
+%%
+
+#def yywrap():
+#  if --num_input_files > 0:
+#    set_input_file(*++input_files)
+#    return 0
+#  else:
+#    return 1
+#
+#def set_input_file(file):
+#  if file and strcmp(file, '-'):
+#    state.infilename = xstrdup(file)
+#    yyin = fopen(infilename, 'r')
+#    if yyin == None:
+#      lerr('can\'t open %s', file)
+#  else:
+#    yyin = stdin
+#    state.infilename = xstrdup('<stdin>')
+#  state.linenum = 1
+
+# these exist for the purpose of adding markup to sequences that are
+# recognized by several iterations of yylex(), it would be better to
+# try to use more complex regular expressions to match all in one go:
+
+def markup_flush(n):
+  text = element.get_text(yy_element_token, 0)
+  element.set_text(
+    yy_element_space,
+    len(yy_element_space),
+    element.get_text(yy_element_space, len(yy_element_space)) + text[:n]
+  )
+  element.set_text(yy_element_token, 0, text[n:])
+
+def markup_yyless(n):
+  yyless(n)
+  element.set_text(
+    yy_element_token,
+    0,
+    element.get_text(yy_element_token, 0)[:n]
+  )
+
+def markup_push():
+  global yy_element_space
+  markup_stack.append(yy_element_space)
+  yy_element_space = element.Element('root')
+
+def markup_pop(factory, *args, **kwargs):
+  global yy_element_space
+  _element = markup_stack.pop()
+  _element.append(
+    factory(
+      text = element.get_text(yy_element_space, 0),
+      children = yy_element_space[:],
+      *args,
+      **kwargs
+    )
+  )
+  yy_element_space = _element
diff --git a/state.py b/state.py
new file mode 100644 (file)
index 0000000..5608fca
--- /dev/null
+++ b/state.py
@@ -0,0 +1,59 @@
+import sys
+
+# miscellaneous state accessed by scan.l and parse.y
+in_rule = False
+lex_compat = False
+nmstr = ''
+no_section3_escape = False
+posix_compat = False
+
+_sf_stk = [0]
+def sf_push():
+  _sf_stk.append(_sf_stk[-1])
+def sf_pop():
+  _sf_stk.pop()
+
+name_defs = {}
+def ndinstal(key, value):
+  if key in name_defs:
+    synerr('name defined twice')
+  else:
+    name_defs[key] = value
+def ndlookup(key):
+  return name_defs.get(key)
+
+infilename = '<stdin>'
+linenum = 1
+def synerr(str):
+  sys.stderr.write(
+    '{0:s}:{1:d}: {2:s}\n'.format(infilename, linenum, str)
+  )
+
+esc = {
+  'b': ord('\b'),
+  'f': ord('\f'),
+  'n': ord('\n'),
+  'r': ord('\r'),
+  't': ord('\t'),
+  'a': ord('\a'),
+  'v': ord('\v')
+}
+def myesc(str):
+  assert str[0] == '\\'
+  result = esc.get(str[1])
+  if result is None:
+    if str[1] == '0':
+      i = 2
+      j = min(5, len(str))
+      while i < j and str[i] in '01234567':
+        i += 1
+      result = int(str[1:i], 8)
+    elif str[1] == 'x':
+      i = 2
+      j = min(4, len(str))
+      while i < j and str[i] in '0123456789ABCDEFabcdef':
+        i += 1
+      result = int(str[2:i], 16)
+    else:
+      result = ord(str[1])
+  return result