Add Python version of bootstrap_flex.git scanner (adds markup to *.l file)
authorNick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 02:47:36 +0000 (13:47 +1100)
committerNick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 02:47:36 +0000 (13:47 +1100)
.gitignore
bootstrap/Makefile [new file with mode: 0644]
bootstrap/element.py [new file with mode: 0644]
bootstrap/markup.py [new file with mode: 0755]
bootstrap/scan.l [new file with mode: 0644]
bootstrap/skel_py.py [new file with mode: 0644]
bootstrap/y_tab.py [new file with mode: 0644]

index 148e42c..0c800cc 100644 (file)
@@ -1,4 +1,6 @@
 __pycache__
+bootstrap/lex_yy.py
+bootstrap/scan.l.xml
 lex-yacc-examples/*.c
 lex-yacc-examples/*.h
 lex-yacc-examples/*.o
diff --git a/bootstrap/Makefile b/bootstrap/Makefile
new file mode 100644 (file)
index 0000000..28f58f0
--- /dev/null
@@ -0,0 +1,6 @@
+lex_yy.py: scan.l skel_py.py
+       ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+       ../../pilex.git/pilex.py --python --skel skel_py.py $<.xml
+
+clean:
+       rm -f lex_yy.py *.xml
diff --git a/bootstrap/element.py b/bootstrap/element.py
new file mode 100644 (file)
index 0000000..2d02217
--- /dev/null
@@ -0,0 +1,179 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+import xml.etree.ElementTree
+
+class Element(xml.etree.ElementTree._Element_Py):
+  def __init__(self, tag = 'Element', attrib = {}, text = '', children = []):
+    xml.etree.ElementTree._Element_Py.__init__(self, tag, attrib)
+    self.ref = -1
+    self.seen = False
+    set_text(self, 0, text)
+    self[:] = children
+  def serialize(self, ref_list):
+    for i in self:
+      # parented, enforce that child can only be parented at most once
+      # (although there can be unlimited numbers of numeric refs to it)
+      assert not i.seen
+      i.seen = True
+      if i.ref == -1:
+        i.serialize(ref_list)
+  def deserialize(self, ref_list):
+    for i in self:
+      i.deserialize(ref_list)
+  def copy(self, factory = None):
+    result = (Element if factory is None else factory)(self.tag, self.attrib)
+    result.text = self.text
+    result.tail = self.tail
+    result[:] = [i.copy() for i in self]
+    return result
+  def repr_serialize(self, params):
+    if len(self):
+      params.append(
+        'children = [{0:s}]'.format(
+          ', '.join([repr(i) for i in self])
+        )
+      )
+  def __repr__(self):
+    params = []
+    self.repr_serialize(params)
+    return 'element.Element({0:s})'.format(', '.join(params))
+
+bool_to_str = ['false', 'true']
+def serialize_bool(value):
+  return bool_to_str[int(value)]
+
+str_to_bool = {'false': False, 'true': True}
+def deserialize_bool(text):
+  return str_to_bool[text]
+
+def serialize_int(value):
+  return str(value)
+
+def deserialize_int(text):
+  return int(text)
+
+def serialize_ref(value, ref_list):
+  if value is None:
+    ref = -1
+  else:
+    ref = value.ref
+    if ref == -1:
+      ref = len(ref_list)
+      ref_list.append(value)
+      value.ref = ref
+      value.set('ref', str(ref))
+      # this doesn't set the seen flag, so it will be parented by the
+      # root, unless it is already parented or gets parented later on
+      if not value.seen:
+        value.serialize(ref_list)
+  return str(ref)
+
+def deserialize_ref(text, ref_list):
+  ref = int(text)
+  return None if ref < 0 else ref_list[ref]
+
+def serialize_str(value):
+  return value
+
+def deserialize_str(text):
+  return text
+
+def serialize(value, fout, encoding = 'unicode'):
+  ref_list = []
+  serialize_ref(value, ref_list)
+  parents = [i for i in ref_list if not i.seen]
+  root = Element('root', children = parents)
+  for i in range(len(root)):
+    set_text(root, i, '\n  ')
+  set_text(root, len(root), '\n')
+  root.tail = '\n'
+  xml.etree.ElementTree.ElementTree(root).write(fout, encoding)
+  for i in root:
+    i.tail = None
+  for i in ref_list:
+    i.ref = -1
+    del i.attrib['ref']
+  i = 0
+  while i < len(parents):
+    for j in parents[i]:
+      j.seen = False
+      parents.append(j)
+    i += 1
+
+def deserialize(fin, factory = Element, encoding = 'unicode'):
+  root = xml.etree.ElementTree.parse(
+    fin,
+    xml.etree.ElementTree.XMLParser(
+      target = xml.etree.ElementTree.TreeBuilder(factory),
+      encoding = encoding
+    )
+  ).getroot()
+  assert root.tag == 'root'
+  for i in root:
+    i.tail = None
+  i = 0
+  parents = root[:]
+  ref_list = []
+  while i < len(parents):
+    j = parents[i]
+    if 'ref' in j.attrib:
+      ref = int(j.attrib['ref'])
+      del j.attrib['ref']
+      if len(ref_list) < ref + 1:
+        ref_list.extend([None] * (ref + 1 - len(ref_list)))
+      ref_list[ref] = j
+    parents.extend(j[:])
+    i += 1
+  for i in root:
+    i.deserialize(ref_list)
+  return ref_list[0]
+
+# compatibility scheme to access arbitrary xml.etree.ElementTree.Element-like
+# objects (not just Element defined above) using a more consistent interface:
+def get_text(root, i):
+  if i < 0:
+    i += len(root) + 1
+  text = root.text if i == 0 else root[i - 1].tail
+  return '' if text is None else text
+
+def set_text(root, i, text):
+  if i < 0:
+    i += len(root) + 1
+  if len(text) == 0:
+    text = None
+  if i == 0:
+    root.text = text
+  else:
+    root[i - 1].tail = text
+
+def to_text(root):
+  return ''.join(
+    [
+      j
+      for i in range(len(root))
+      for j in [get_text(root, i), to_text(root[i])]
+    ] +
+    [get_text(root, len(root))]
+  )
+
+def concatenate(children, factory = Element, *args, **kwargs):
+  root = factory(*args, **kwargs)
+  for child in children:
+    i = len(root)
+    set_text(root, i, get_text(root, i) + get_text(child, 0))
+    root[i:] = child[:]
+  return root
diff --git a/bootstrap/markup.py b/bootstrap/markup.py
new file mode 100755 (executable)
index 0000000..587ac17
--- /dev/null
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+import lex_yy
+import sys
+
+while lex_yy.flexscan():
+  pass
+sys.stdout.write(''.join(lex_yy.piece))
diff --git a/bootstrap/scan.l b/bootstrap/scan.l
new file mode 100644 (file)
index 0000000..7672ac6
--- /dev/null
@@ -0,0 +1,1659 @@
+/* scan.l - scanner for flex input -*-C-*- */
+
+%{
+  import y_tab
+
+  #tablesverify = 0
+  #tablesext = 0
+  trlcontxt = False
+  escaped_qstart = ''
+  escaped_qend = ''
+  piece = []
+  piece0 = 0
+
+  # these should be yylex()-local, but moved to here, see further down:
+  bracelevel = 0
+  didadef = False
+  indented_code = False
+  doing_rule_action = False
+  option_sense = False
+
+  doing_codeblock = False
+  brace_depth = 0
+  brace_start_line = 0
+  nmdef = ''
+%}
+
+%option caseless nodefault noreject stack noyy_top_state
+%option nostdinit
+
+%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
+%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
+%x OPTION LINEDIR CODEBLOCK_MATCH_BRACE
+%x GROUP_WITH_PARAMS
+%x GROUP_MINUS_PARAMS
+%x EXTENDED_COMMENT
+%x COMMENT_DISCARD CODE_COMMENT
+%x SECT3_NOESCAPE
+%x CHARACTER_CONSTANT
+/* Nick extra rules for action groups */
+%x ACTION_GROUP ELEMENT_GROUP DOUBLE_QUOTED SINGLE_QUOTED
+
+WS             [[:blank:]]+
+OPTWS          [[:blank:]]*
+NOT_WS         [^[:blank:]\r\n]
+
+NL             \r?\n
+
+NAME           ([[:alpha:]_][[:alnum:]_-]*)
+NOT_NAME       [^[:alpha:]_*\n]+
+
+SCNAME         {NAME}
+
+ESCSEQ         (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
+
+FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
+CCL_CHAR       ([^\\\n\]]|{ESCSEQ})
+CCL_EXPR       ("[:"^?[[:alpha:]]+":]")
+
+LEXOPT         [aceknopr]
+
+M4QSTART    "[""["
+M4QEND      "]""]"
+
+%%
+
+  # these should be here, but we can't access yylex()-local variables
+  # from an action since the action functions are not nested to yylex():
+  #bracelevel = 0
+  #didadef = False
+  #indented_code = False
+  #doing_rule_action = False
+  #option_sense = False
+
+  #doing_codeblock = False
+  #brace_depth = 0
+  #brace_start_line = 0
+  #nmdef = ''
+
+<INITIAL>{
+  ^{WS} {
+    global indented_code, linenum
+    if not indented_code:
+      linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = True
+    #add_action(yytext)
+    piece_append('<AST_Section1Or2_CodeBlock>')
+    piece_append('<AST_Text>')
+  }
+  ^"/*" {
+    #add_action('/*[' '[')
+    yy_push_state(COMMENT)
+  }
+  ^#{OPTWS}line{WS} yy_push_state(LINEDIR)
+  ^"%s"{NAME}? return y_tab.SCDECL
+  ^"%x"{NAME}? return y_tab.XSCDECL
+  ^"%{".*{NL} {
+    global indented_code, linenum
+    if not indented_code:
+      linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = False
+    piece_append('<AST_Section1Or2_CodeBlock>')
+    piece_flush(len(yytext))
+    piece_append('<AST_Text>')
+  }
+  ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
+    global brace_start_line, linenum, brace_depth
+    brace_start_line = linenum
+    linenum += 1
+    #buf_linedir(&top_buf, infilename if infilename else '<stdin>', linenum)
+    brace_depth = 1
+    yy_push_state(CODEBLOCK_MATCH_BRACE)
+  }
+
+  ^"%top".*                    synerr('malformed \'%top\' directive')
+
+  {WS}
+
+  ^"%%".* {
+    global bracelevel
+    sectnum = 2
+    bracelevel = 0
+    #mark_defs1()
+    #line_directive_out(None, 1)
+    BEGIN(SECT2PROLOG)
+    piece_append('</AST_Section1>')
+    piece_pack()
+    piece_escape(yytext)
+    piece_append('<AST_Section2>')
+    piece_pack()
+    return ~y_tab.SECTEND
+  }
+
+  ^"%pointer".*{NL} {
+    global linenum
+    #yytext_is_array = False
+    linenum += 1
+    piece_append('<AST_Section1_Options><AST_Section1_Options_Array>')
+    piece_flush(len(yytext) - 1)
+    piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
+  }
+  ^"%array".*{NL} {
+    global linenum
+    #yytext_is_array = True
+    linenum += 1
+    piece_append('<AST_Section1_Options><AST_Section1_Options_Array value="true">')
+    piece_flush(len(yytext) - 1)
+    piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
+  }
+
+  ^"%option" {
+    BEGIN(OPTION)
+    return y_tab.TOK_OPTION
+  }
+
+  ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} {
+    global linenum
+    linenum += 1
+  }
+  ^"%"{LEXOPT}{WS}.*{NL} {
+    global linenum
+    linenum += 1
+  }
+
+       /* xgettext: no-c-format */
+  ^"%"[^sxaceknopr{}].*                synerr('unrecognized \'%\' directive')
+
+  ^{NAME} {
+    global nmstr, didadef
+    nmstr = yytext
+    didadef = False
+    BEGIN(PICKUPDEF)
+  }
+
+  {SCNAME} {
+    global nmstr
+    nmstr = yytext
+    piece_pack()
+    piece_append('<AST_Name>')
+    piece_escape(yytext)
+    piece_append('</AST_Name>')
+    piece_pack()
+    return ~y_tab.NAME
+  }
+  ^{OPTWS}{NL} {
+    global linenum
+    linenum += 1
+  }
+  {OPTWS}{NL} {
+    global linenum
+    #add_action(yytext)
+    linenum += 1
+  }
+}
+
+
+<COMMENT,CODE_COMMENT>{ /* */
+  [^\[\]\*\n]*                 #add_action(yytext)
+  .                            #add_action(yytext)
+
+  {NL} {
+    global linenum
+    linenum += 1
+    #add_action(yytext)
+  }
+}
+<COMMENT>{
+  "*/" {
+    #add_action('*/]' ']')
+    yy_pop_state()
+  }
+}
+<CODE_COMMENT>{
+  "*/" {
+    #add_action(yytext)
+    yy_pop_state()
+  }
+}
+
+<COMMENT_DISCARD>{
+        /* This is the same as COMMENT, but is discarded rather than output. */
+  "*/"                         yy_pop_state()
+  "*"
+  [^*\n]
+  {NL} {
+    global linenum
+    linenum += 1
+  }
+}
+
+<EXTENDED_COMMENT>{
+  ")"                          yy_pop_state()
+  [^\n\)]+
+  {NL} {
+    global linenum
+    linenum += 1
+  }
+}
+
+<LINEDIR>{
+  \n                           yy_pop_state()
+  [[:digit:]]+ {
+    global linenum
+    linenum = myctoi(yytext)
+  }
+
+  \"[^"\n]*\" {
+    free(infilename)
+    infilename = xstrdup(yytext + 1)
+    infilename[len(infilename) - 1] = ord('\0')
+  }
+  .
+}
+<ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,CHARACTER_CONSTANT,COMMENT,CODE_COMMENT>{
+  {M4QSTART}                   #add_action('[' ']' ']' '[' '[' '[' ']' ']' '[' '[')
+  {M4QEND}                     #add_action(']' ']' ']' '[' '[' ']' ']' ']' '[' '[')
+}
+
+<CODEBLOCK>{
+  ^"%}".*{NL} {
+    global linenum
+    linenum += 1
+    piece_append('</AST_Text>')
+    piece_flush(len(yytext))
+    yy_pop_state()
+    #add_action(']' ']')
+    #if not indented_code:
+    #  line_directive_out(None, 0)
+    piece_append('</AST_Section1Or2_CodeBlock>')
+  }
+  [^\n%\[\]]*                  #add_action(yytext)
+  .                            #add_action(yytext)
+  {NL} {
+    global linenum
+    linenum += 1
+    #add_action(yytext)
+    if indented_code:
+      piece_flush(len(yytext))
+      piece_append('</AST_Text>')
+      yy_pop_state()
+      #add_action(']' ']')
+      #if not indented_code:
+      #  line_directive_out(None, 0)
+      piece_append('</AST_Section1Or2_CodeBlock>')
+  }
+}
+
+<CODEBLOCK_MATCH_BRACE>{
+  "}" {
+    global brace_depth
+    brace_depth -= 1
+    if brace_depth == 0:
+      yy_pop_state()
+    #else:
+    #  buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  "{" {
+    global brace_depth
+    brace_depth += 1
+    #buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  {NL} {
+    global linenum
+    linenum += 1
+    #buf_strnappend(&top_buf, yytext, len(yytext))
+  }
+
+  {M4QSTART}                   #buf_strnappend(&top_buf, escaped_qstart, int(len(escaped_qstart)))
+  {M4QEND}                     #buf_strnappend(&top_buf, escaped_qend, int(len(escaped_qend)))
+  ([^{}\r\n\[\]]+)|[^{}\r\n]   #buf_strnappend(&top_buf, yytext, len(yytext))
+
+  <<EOF>> {
+    global linenum
+    linenum = brace_start_line
+    synerr('Unmatched \'{\'')
+    yyterminate()
+  }
+}
+
+
+<PICKUPDEF>{
+  {WS}
+
+  {NOT_WS}[^\r\n]* {
+    global nmdef, didadef
+    nmdef = yytext.rstrip()
+    #ndinstal(nmstr, nmdef)
+    didadef = True
+  }
+
+  {NL} {
+    global linenum
+    if not didadef:
+      synerr('incomplete name definition')
+    BEGIN(INITIAL)
+    linenum += 1
+  }
+}
+
+
+<OPTION>{
+  {NL} {
+    global linenum
+    linenum += 1
+    BEGIN(INITIAL)
+  }
+  {WS} {
+    global option_sense
+    option_sense = True
+  }
+
+  "="                          return ord('=')
+
+  no {
+    global option_sense
+    option_sense = not option_sense
+  }
+
+  7bit {
+    #csize = 128 if option_sense else 256
+    markup_option('SevenBit', option_sense)
+  }
+  8bit {
+    #csize = 256 if option_sense else 128
+    markup_option('SevenBit', not option_sense)
+  }
+
+  align {
+    #long_align = option_sense
+    markup_option('Align', option_sense)
+  }
+  always-interactive {
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_ALWAYS_INTERACTIVE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_ALWAYS_INTERACTIVE')
+    #interactive = option_sense
+    markup_option('AlwaysInteractive', option_sense)
+  }
+  array {
+    #yytext_is_array = option_sense
+    markup_option('Array', option_sense)
+  }
+  backup {
+    #backing_up_report = option_sense
+    markup_option('Backup', option_sense)
+  }
+  batch {
+    #interactive = not option_sense
+    markup_option('Interactive', not option_sense)
+  }
+  bison-bridge {
+    #bison_bridge_lval = option_sense
+    markup_option('BisonBridge', option_sense)
+  }
+  bison-locations {
+    #if bison_bridge_lloc = option_sense:
+    #  bison_bridge_lval = True
+    markup_option('BisonLocations', option_sense)
+  }
+  "c++" {
+    #C_plus_plus = option_sense
+    markup_option('CPlusPlus', option_sense)
+  }
+  caseful|case-sensitive {
+    #(_sf_stk[_sf_top_ix] |= int(0x0001)) if not option_sense else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+    markup_option('Caseless', not option_sense)
+  }
+  caseless|case-insensitive {
+    #(_sf_stk[_sf_top_ix] |= int(0x0001)) if option_sense else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+    markup_option('Caseless', option_sense)
+  }
+  debug {
+    #ddebug = option_sense
+    markup_option('Debug', option_sense)
+  }
+  default {
+    #spprdflt = not option_sense
+    markup_option('Default', option_sense)
+  }
+  ecs {
+    #useecs = option_sense
+    markup_option('ECS', option_sense)
+  }
+  fast {
+    #useecs = usemecs = False
+    #use_read = fullspd = True
+    markup_option('Fast', option_sense)
+  }
+  full {
+    #useecs = usemecs = False
+    #use_read = fulltbl = True
+    markup_option('Full', option_sense)
+  }
+  input {
+    #if not option_sense:
+    #  action_define('YY_NO_INPUT', 1)
+    markup_option('Input', option_sense)
+  }
+  interactive {
+    #interactive = option_sense
+    markup_option('Interactive', option_sense)
+  }
+  lex-compat {
+    #lex_compat = option_sense
+    markup_option('LexCompat', option_sense)
+  }
+  posix-compat {
+    #posix_compat = option_sense
+    markup_option('PosixCompat', option_sense)
+  }
+  line {
+    #gen_line_dirs = option_sense
+    markup_option('Line', option_sense)
+  }
+  main {
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_MAIN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_MAIN')
+    #if option_sense:
+    #  do_yywrap = False
+    markup_option('Main', option_sense)
+  }
+  meta-ecs {
+    #usemecs = option_sense
+    markup_option('MetaECS', option_sense)
+  }
+  never-interactive {
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NEVER_INTERACTIVE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NEVER_INTERACTIVE')
+    #interactive = not option_sense
+    markup_option('NeverInteractive', option_sense)
+  }
+  perf-report {
+    #performance_report += 1 if option_sense else -1
+    markup_option('PerfReport', option_sense)
+  }
+  pointer {
+    #yytext_is_array = not option_sense
+    markup_option('Array', not option_sense)
+  }
+  read {
+    #use_read = option_sense
+    markup_option('Read', option_sense)
+  }
+  reentrant {
+    #reentrant = option_sense
+    markup_option('Reentrant', option_sense)
+  }
+  reject {
+    #reject_really_used = option_sense
+    markup_option('Reject', option_sense)
+  }
+  stack {
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_STACK_USED', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_STACK_USED')
+    markup_option('Stack', option_sense)
+  }
+  stdinit {
+    #do_stdinit = option_sense
+    markup_option('StdInit', option_sense)
+  }
+  stdout {
+    #use_stdout = option_sense
+    markup_option('StdOut', option_sense)
+  }
+  unistd {
+    #if not option_sense:
+    #  action_define('YY_NO_UNISTD_H', 1)
+    markup_option('UniStd', option_sense)
+  }
+  unput {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_UNPUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_UNPUT')
+    markup_option('Unput', option_sense)
+  }
+  verbose {
+    #printstats = option_sense
+    markup_option('Verbose', option_sense)
+  }
+  warn {
+    #nowarn = not option_sense
+    markup_option('Warn', option_sense)
+  }
+  yylineno {
+    #do_yylineno = option_sense
+    #if option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_USE_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_USE_LINENO')
+    markup_option('YYLineNo', option_sense)
+  }
+  yymore {
+    #yymore_really_used = option_sense
+    markup_option('YYMore', option_sense)
+  }
+  yywrap {
+    #do_yywrap = option_sense
+    markup_option('YYWrap', option_sense)
+  }
+
+  yy_push_state {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_PUSH_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_PUSH_STATE')
+    markup_option('YYPushState', option_sense)
+  }
+  yy_pop_state {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_POP_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_POP_STATE')
+    markup_option('YYPopState', option_sense)
+  }
+  yy_top_state {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_TOP_STATE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_TOP_STATE')
+    markup_option('YYTopState', option_sense)
+  }
+
+  yy_scan_buffer {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_BUFFER', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_BUFFER')
+    markup_option('YYScanBuffer', option_sense)
+  }
+  yy_scan_bytes {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_BYTES', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_BYTES')
+    markup_option('YYScanBytes', option_sense)
+  }
+  yy_scan_string {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SCAN_STRING', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SCAN_STRING')
+    markup_option('YYScanString', option_sense)
+  }
+
+  yyalloc {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_ALLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_ALLOC')
+    markup_option('YYAlloc', option_sense)
+  }
+  yyrealloc {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_REALLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_REALLOC')
+    markup_option('YYRealloc', option_sense)
+  }
+  yyfree {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_FLEX_FREE', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_FLEX_FREE')
+    markup_option('YYFree', option_sense)
+  }
+
+  yyget_debug {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_DEBUG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_DEBUG')
+    markup_option('YYGetDebug', option_sense)
+  }
+  yyset_debug {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_DEBUG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_DEBUG')
+    markup_option('YYSetDebug', option_sense)
+  }
+  yyget_extra {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_EXTRA', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_EXTRA')
+    markup_option('YYGetExtra', option_sense)
+  }
+  yyset_extra {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_EXTRA', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_EXTRA')
+    markup_option('YYSetExtra', option_sense)
+  }
+  yyget_leng {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LENG', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LENG')
+    markup_option('YYGetLeng', option_sense)
+  }
+  yyget_text {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_TEXT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_TEXT')
+    markup_option('YYGetText', option_sense)
+  }
+  yyget_lineno {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LINENO')
+    markup_option('YYGetLineNo', option_sense)
+  }
+  yyset_lineno {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LINENO', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LINENO')
+    markup_option('YYSetLineNo', option_sense)
+  }
+  yyget_in {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_IN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_IN')
+    markup_option('YYGetIn', option_sense)
+  }
+  yyset_in {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_IN', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_IN')
+    markup_option('YYSetIn', option_sense)
+  }
+  yyget_out {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_OUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_OUT')
+    markup_option('YYGetOut', option_sense)
+  }
+  yyset_out {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_OUT', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_OUT')
+    markup_option('YYSetOut', option_sense)
+  }
+  yyget_lval {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LVAL', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LVAL')
+    markup_option('YYGetLVal', option_sense)
+  }
+  yyset_lval {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LVAL', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LVAL')
+    markup_option('YYSetLVal', option_sense)
+  }
+  yyget_lloc {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_GET_LLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_GET_LLOC')
+    markup_option('YYGetLLoc', option_sense)
+  }
+  yyset_lloc {
+    #if not option_sense:
+    #  buf_m4_define(&m4defs_buf, 'M4' '_YY_NO_SET_LLOC', None)
+    #else:
+    #  buf_m4_undefine(&m4defs_buf, 'M4' '_YY_NO_SET_LLOC')
+    markup_option('YYSetLLoc', option_sense)
+  }
+
+  extra-type                   return y_tab.TOK_EXTRA_TYPE
+  outfile                      return y_tab.TOK_OUTFILE
+  prefix                       return y_tab.TOK_PREFIX
+  yyclass                      return y_tab.TOK_YYCLASS
+  header(-file)?               return y_tab.TOK_HEADER_FILE
+  tables-file                  return y_tab.TOK_TABLES_FILE
+  tables-verify {
+    #tablesverify = option_sense
+    #if not tablesext and option_sense:
+    #  tablesext = True
+    markup_option('TablesVerify', option_sense)
+  }
+
+
+  \"[^"\n]*\" {
+    global nmstr
+    nmstr = yytext[1:-1]
+    piece_pack()
+    piece_append('<AST_String>"<AST_Text>')
+    piece_escape(yytext[1:-1])
+    piece_append('</AST_Text>"</AST_String>')
+    piece_pack()
+    return ~y_tab.NAME
+  }
+
+  (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
+    synerr('unrecognized %option: {0:s}'.format(yytext))
+    BEGIN(RECOVER)
+  }
+}
+
+<RECOVER>.*{NL} {
+  global linenum
+  linenum += 1
+  BEGIN(INITIAL)
+}
+
+
+<SECT2PROLOG>{
+  ^"%{".* {
+    global bracelevel
+    bracelevel += 1
+    yyless(2)
+  }
+  ^"%}".* {
+    global bracelevel
+    bracelevel -= 1
+    yyless(2)
+  }
+
+  ^{WS} {
+    global indented_code, linenum
+    if not indented_code:
+      linenum += 1
+      #line_directive_out(None, 1)
+    #add_action('[' '[')
+    yy_push_state(CODEBLOCK)
+    indented_code = True
+    #add_action(yytext)
+    piece_append('<AST_Section1Or2_CodeBlock>')
+    piece_append('<AST_Text>')
+  }
+
+  ^{NOT_WS}.* {
+    global indented_code, linenum
+    if bracelevel <= 0:
+      yyless(0)
+      yy_set_bol(True)
+      #mark_prolog()
+      BEGIN(SECT2)
+    else:
+      if not indented_code:
+        linenum += 1
+        #line_directive_out(None, 1)
+      #add_action('[' '[')
+      yy_push_state(CODEBLOCK)
+      indented_code = True
+      #add_action(yytext)
+      piece_append('<AST_Section1Or2_CodeBlock>')
+      piece_append('<AST_Text>')
+  }
+
+  .                            #add_action(yytext)
+  {NL} {
+    global linenum
+    linenum += 1
+    #add_action(yytext)
+  }
+
+  <<EOF>> {
+    #mark_prolog()
+    sectnum = 0
+    piece_pack()
+    piece_append('</AST_Section2>')
+    piece_pack()
+    return ~YY_NULL
+  }
+}
+
+<SECT2>{
+  ^{OPTWS}{NL} {
+    global linenum
+    linenum += 1
+  }
+
+  ^{OPTWS}"%{" {
+    global indented_code, doing_codeblock, bracelevel
+    indented_code = False
+    doing_codeblock = True
+    bracelevel = 1
+    BEGIN(PERCENT_BRACE_ACTION)
+    piece_flush(len(yytext) - 2)
+    piece_append('<AST_Section1Or2_CodeBlock>')
+    piece_flush(2)
+    piece_append('<AST_Text>')
+  }
+
+  ^{OPTWS}"<" {
+    if True: #not (_sf_stk[_sf_top_ix] & int(0x0004)):
+      BEGIN(SC)
+    piece_flush(len(yytext) - 1)
+    return ord('<')
+  }
+  ^{OPTWS}"^" {
+    piece_flush(len(yytext) - 1)
+    return ord('^')
+  }
+  \" {
+    BEGIN(QUOTE)
+    return ord('"')
+  }
+  "{"/[[:digit:]] {
+    BEGIN(NUM)
+    if lex_compat or posix_compat:
+      return y_tab.BEGIN_REPEAT_POSIX
+    else:
+      return y_tab.BEGIN_REPEAT_FLEX
+  }
+  "$"/([[:blank:]]|{NL}) return ord('$')
+
+  {WS}"%{" {
+    global bracelevel, in_rule, doing_rule_action
+    bracelevel = 1
+    BEGIN(PERCENT_BRACE_ACTION)
+    piece_flush(len(yytext) - 2)
+    piece_append('<AST_Section2_Rule_Action>')
+    if in_rule:
+      doing_rule_action = True
+      in_rule = False
+      piece_pack()
+      piece_escape(yytext[:2])
+      piece_pack()
+      piece_append('<AST_Text>')
+      return ~ord('\n')
+    abort()
+  }
+  {WS}"|".*{NL} {
+    global linenum
+    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+      amt = int(strchr(yytext, ord('|')) - yytext)
+      yyless(amt)
+    else:
+      #add_action(']' ']')
+      continued_action = True
+      linenum += 1
+      i = 0
+      while i < len(yytext) and (yytext[i] == '\t' or yytext[i] == ' '):
+        i += 1
+      piece_flush(i)
+      piece_pack()
+      piece_append('<AST_Section2_Rule_Action continued="true">')
+      piece_escape(yytext)
+      piece_append('</AST_Section2_Rule_Action>')
+      piece_pack()
+      return ~ord('\n')
+  }
+
+  ^{WS}"/*" {
+    global bracelevel
+    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+      yy_push_state(COMMENT_DISCARD)
+    else:
+      yyless(len(yytext) - 2)
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+  }
+
+  ^{WS}
+
+  {WS} {
+    global bracelevel, in_rule, doing_rule_action
+    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+      pass
+    else:
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+      if in_rule:
+        doing_rule_action = True
+        in_rule = False
+        piece_pack()
+        piece_escape(yytext)
+        piece_pack()
+        piece_append('<AST_Section2_Rule_Action><AST_Text>')
+        return ~ord('\n')
+  }
+
+  {OPTWS}{NL} {
+    global linenum, bracelevel, in_rule, doing_rule_action
+    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+      linenum += 1
+    else:
+      bracelevel = 0
+      continued_action = False
+      BEGIN(ACTION)
+      yyless(len(yytext) - 1)
+      if in_rule:
+        doing_rule_action = True
+        in_rule = False
+        piece_pack()
+        piece_escape(yytext)
+        piece_pack()
+        piece_append('<AST_Section2_Rule_Action><AST_Text>')
+        return ~ord('\n')
+  }
+
+  ^{OPTWS}"<<EOF>>" |
+  "<<EOF>>" {
+    piece_flush(len(yytext) - 7)
+    return y_tab.EOF_OP
+  }
+
+  ^"%%".* {
+    sectnum = 3
+    BEGIN(SECT3_NOESCAPE if no_section3_escape else SECT3)
+    #outn('/* Begin user sect3 */')
+    piece_pack()
+    piece_append('</AST_Section2>')
+    piece_escape(yytext)
+    piece_pack()
+    piece_append('<AST_Section3>')
+    # for some reason flex requires an extra EOF after section 2:
+    #return ~YY_NULL
+  }
+
+  "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
+    global nmstr
+    #cclval = None
+    nmstr = yytext
+    #if 0 and (cclval = ccllookup(nmstr)) != 0:
+    #  if input() != ord(']'):
+    #    synerr('bad character class')
+    #  #yylval = cclval
+    #  #cclreuse += 1
+    #  return y_tab.PREVCCL
+    if True: #else:
+      #cclinstal(nmstr, lastccl + 1)
+      yyless(1)
+      BEGIN(FIRSTCCL)
+      return ord('[')
+  }
+  "{-}"                                return y_tab.CCL_OP_DIFF
+  "{+}"                                return y_tab.CCL_OP_UNION
+
+
+    /* Check for :space: at the end of the rule so we don't
+     * wrap the expanded regex in '(' ')' -- breaking trailing
+     * context.
+     */
+  "{"{NAME}"}"[[:space:]]? {
+    # fix this later
+    #global nmstr
+    #nmdefptr = None
+    #end_is_ws = None
+    #end_ch = None
+    #end_ch = yytext[len(yytext) - 1]
+    #end_is_ws = 1 if end_ch != ord('}') else 0
+    #if len(yytext) - 1 < 2048:
+    #  strncpy(nmstr, yytext + 1, sizeof nmstr)
+    #else:
+    #  synerr('Input line too long\n')
+    #  longjmp(flex_main_jmp_buf, 1 + 1)
+    #nmstr[len(yytext) - 2 - end_is_ws] = ord('\0')
+    #if (nmdefptr = ndlookup(nmstr)) == 0:
+    #  synerr('undefined definition {{{0:s}}}'.format(nmstr))
+    #else:
+    #  len = len(nmdefptr)
+    #  if end_is_ws:
+    #    yyless(len(yytext) - 1)
+    #  if lex_compat or nmdefptr[0] == ord('^') or len > 0 and nmdefptr[len - 1] == ord('$') or end_is_ws and trlcontxt and not (_sf_stk[_sf_top_ix] & int(0x0004)):
+    #    i = len(nmdefptr)
+    #    while i > 0:
+    #      unput(nmdefptr[--i])
+    #    if nmdefptr[0] == ord('^'):
+    #      BEGIN(CARETISBOL)
+    #  else:
+    #    unput(ord(')'))
+    #    i = len(nmdefptr)
+    #    while i > 0:
+    #      unput(nmdefptr[--i])
+    #    if not lex_compat and not posix_compat:
+    #      unput(ord(':'))
+    #      unput(ord('?'))
+    #    unput(ord('('))
+  }
+
+  "/*" {
+    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+      yy_push_state(COMMENT_DISCARD)
+    else:
+      yyless(1)
+      return ord('/')
+  }
+
+  "(?#" {
+    if lex_compat or posix_compat:
+      yyless(1)
+      sf_push()
+      return ord('(')
+    else:
+      yy_push_state(EXTENDED_COMMENT)
+  }
+  "(?" {
+    sf_push()
+    if lex_compat or posix_compat:
+      yyless(1)
+    else:
+      BEGIN(GROUP_WITH_PARAMS)
+    return ord('(')
+  }
+  "(" {
+    sf_push()
+    return ord('(')
+  }
+  ")" {
+    if _sf_top_ix > 0:
+      sf_pop()
+      return ord(')')
+    else:
+      synerr('unbalanced parenthesis')
+  }
+
+  [/|*+?.(){}]                 return ord(yytext[0])
+  . {
+    #yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+
+       /* Nick added this rule for consistency with rest of scanner */
+  <<EOF>> {
+    sectnum = 0
+    piece_pack()
+    piece_append('</AST_Section2>')
+    piece_pack()
+    return ~YY_NULL
+  }
+}
+
+
+<SC>{
+  {OPTWS}{NL}{OPTWS} {
+    global linenum
+    linenum += 1
+  }
+  [,*]                         return ord(yytext[0])
+  ">" {
+    BEGIN(SECT2)
+    return ord('>')
+  }
+  ">"/^ {
+    BEGIN(CARETISBOL)
+    return ord('>')
+  }
+  {SCNAME} {
+    global nmstr
+    nmstr = yytext
+    piece_pack()
+    piece_append('<AST_Name>')
+    piece_escape(yytext)
+    piece_append('</AST_Name>')
+    piece_pack()
+    return ~y_tab.NAME
+  }
+  .                            synerr('bad <start condition>: {0:s}'.format(yytext))
+}
+
+<CARETISBOL>"^" {
+  BEGIN(SECT2)
+  return ord('^')
+}
+
+
+<QUOTE>{
+  [^"\n] {
+    #yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+  \" {
+    BEGIN(SECT2)
+    return ord('"')
+  }
+
+  {NL} {
+    global linenum
+    synerr('missing quote')
+    BEGIN(SECT2)
+    linenum += 1
+    return ord('"')
+  }
+}
+
+<GROUP_WITH_PARAMS>{
+    /* Nick extra rules for named groups */
+  "'"{NAME}"'" |
+  "<"{NAME}">" {
+    BEGIN(SECT2)
+    piece_flush(1)
+    piece_pack()
+    piece_append('<RegexGroupName_Text>')
+    piece_flush(len(yytext) - 1)
+    piece_append('</RegexGroupName_Text>')
+    piece_pack()
+    piece_flush(1)
+    return ~y_tab.NAME
+  }
+    /* Nick extra rules for action groups */
+  "A{" {
+    global bracelevel
+    BEGIN(SECT2)
+    yy_push_state(ACTION_GROUP)
+    bracelevel = 1
+    piece_flush(len(yytext))
+    piece_append('<RegexGroupAction_Text>')
+  }
+  "E{" {
+    global bracelevel
+    BEGIN(SECT2)
+    yy_push_state(ELEMENT_GROUP)
+    bracelevel = 1
+    piece_flush(len(yytext))
+    piece_append('<RegexGroupElement_Text>')
+  }
+  ":" {
+    BEGIN(SECT2)
+    return ord(':')
+  }
+  "-"                          BEGIN(GROUP_MINUS_PARAMS)
+  i                            #(_sf_stk[_sf_top_ix] |= int(0x0001)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+  s                            #(_sf_stk[_sf_top_ix] |= int(0x0002)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0002))
+  x                            #(_sf_stk[_sf_top_ix] |= int(0x0004)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0004))
+}
+<GROUP_MINUS_PARAMS>{
+  ":" {
+    BEGIN(SECT2)
+    return ord(':')
+  }
+  i                            #(_sf_stk[_sf_top_ix] |= int(0x0001)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+  s                            #(_sf_stk[_sf_top_ix] |= int(0x0002)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0002))
+  x                            #(_sf_stk[_sf_top_ix] |= int(0x0004)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0004))
+}
+
+<FIRSTCCL>{
+  "^"/[^-\]\n] {
+    BEGIN(CCL)
+    return ord('^')
+  }
+  "^"/("-"|"]")                        return ord('^')
+  . {
+    BEGIN(CCL)
+    #yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+}
+
+<CCL>{
+  -/[^\]\n]                    return ord('-')
+  [^\]\n] {
+    #yylval = ord(yytext[0])
+    return y_tab.CHAR
+  }
+  "]" {
+    BEGIN(SECT2)
+    return ord(']')
+  }
+  .|{NL} {
+    synerr('bad character class')
+    BEGIN(SECT2)
+    return ord(']')
+  }
+}
+
+<FIRSTCCL,CCL>{
+  "[:alnum:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_ALNUM
+  }
+  "[:alpha:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_ALPHA
+  }
+  "[:blank:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_BLANK
+  }
+  "[:cntrl:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_CNTRL
+  }
+  "[:digit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_DIGIT
+  }
+  "[:graph:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_GRAPH
+  }
+  "[:lower:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_LOWER
+  }
+  "[:print:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_PRINT
+  }
+  "[:punct:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_PUNCT
+  }
+  "[:space:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_SPACE
+  }
+  "[:upper:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_UPPER
+  }
+  "[:xdigit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_XDIGIT
+  }
+
+  "[:^alnum:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_ALNUM
+  }
+  "[:^alpha:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_ALPHA
+  }
+  "[:^blank:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_BLANK
+  }
+  "[:^cntrl:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_CNTRL
+  }
+  "[:^digit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_DIGIT
+  }
+  "[:^graph:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_GRAPH
+  }
+  "[:^lower:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_LOWER
+  }
+  "[:^print:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_PRINT
+  }
+  "[:^punct:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_PUNCT
+  }
+  "[:^space:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_SPACE
+  }
+  "[:^upper:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_UPPER
+  }
+  "[:^xdigit:]" {
+    BEGIN(CCL)
+    return y_tab.CCE_NEG_XDIGIT
+  }
+  {CCL_EXPR} {
+    synerr('bad character class expression: {0:s}'.format(yytext))
+    BEGIN(CCL)
+    return y_tab.CCE_ALNUM
+  }
+}
+
+<NUM>{
+  [[:digit:]]+ {
+    #yylval = myctoi(yytext)
+    return y_tab.NUMBER
+  }
+
+  ","                          return ord(',')
+  "}" {
+    BEGIN(SECT2)
+    if lex_compat or posix_compat:
+      return y_tab.END_REPEAT_POSIX
+    else:
+      return y_tab.END_REPEAT_FLEX
+  }
+
+  . {
+    synerr('bad character inside {}\'s')
+    BEGIN(SECT2)
+    return ord('}')
+  }
+
+  {NL} {
+    global linenum
+    synerr('missing }')
+    BEGIN(SECT2)
+    linenum += 1
+    return ord('}')
+  }
+}
+
+
+<PERCENT_BRACE_ACTION>{
+  {OPTWS}"%}".* {
+    global bracelevel
+    bracelevel = 0
+    piece_append('</AST_Text>')
+  }
+
+  <ACTION>"/*" {
+    #add_action(yytext)
+    yy_push_state(CODE_COMMENT)
+  }
+
+    <CODEBLOCK,ACTION>{
+    "reject" {
+      #add_action(yytext)
+      if all_upper(yytext):
+        reject = True
+    }
+    "yymore" {
+      #add_action(yytext)
+      if all_lower(yytext):
+        yymore_used = True
+    }
+  }
+
+  .                            #add_action(yytext)
+  {NL} {
+    global linenum, doing_rule_action, doing_codeblock
+    linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0 or doing_codeblock and indented_code:
+      #if doing_rule_action:
+      #  add_action('\tYY_BREAK]' ']\n')
+      piece_flush(len(yytext))
+      if doing_codeblock:
+        piece_append('</AST_Section1Or2_CodeBlock>')
+      else:
+        markup_action('</AST_Section2_Rule_Action>')
+      doing_rule_action = doing_codeblock = False
+      BEGIN(SECT2)
+  }
+}
+
+
+       /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
+<ACTION>{
+  "{" {
+    global bracelevel
+    #add_action(yytext)
+    bracelevel += 1
+  }
+  "}" {
+    global bracelevel
+    #add_action(yytext)
+    bracelevel -= 1
+  }
+  [^[:alpha:]_{}\"'/\n\[\]]+   #add_action(yytext)
+  {NAME}                       #add_action(yytext)
+  "'"([^\'\\\n]|\\.)"'"                #add_action(yytext)
+  "'" {
+    #add_action(yytext)
+    BEGIN(y_tab.CHARACTER_CONSTANT)
+  }
+  \" {
+    #add_action(yytext)
+    BEGIN(ACTION_STRING)
+  }
+  {NL} {
+    global linenum, doing_rule_action
+    linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0:
+      if doing_rule_action:
+        #add_action('\tYY_BREAK]' ']\n')
+        piece_flush(len(yytext))
+        markup_action('</AST_Text></AST_Section2_Rule_Action>')
+      doing_rule_action = False
+      BEGIN(SECT2)
+  }
+  .                            #add_action(yytext)
+}
+
+<ACTION_STRING>{
+  [^\[\]\"\\\n]+               #add_action(yytext)
+  \" {
+    #add_action(yytext)
+    BEGIN(ACTION)
+  }
+}
+<CHARACTER_CONSTANT>{
+  [^\[\]\'\\\n]+               #add_action(yytext)
+  \' {
+    #add_action(yytext)
+    BEGIN(ACTION)
+  }
+}
+<ACTION_STRING,CHARACTER_CONSTANT>{
+  (\\\n)*                      #add_action(yytext)
+  \\(\\\n)*.                   #add_action(yytext)
+  {NL} {
+    global linenum
+    linenum += 1
+    #add_action(yytext)
+    if bracelevel <= 0:
+      BEGIN(SECT2)
+      piece_flush(len(yytext))
+      if doing_rule_action:
+        markup_action('</AST_Text></AST_Section2_Rule_Action>')
+    else:
+      BEGIN(ACTION)
+  }
+  .                            #add_action(yytext)
+}
+
+ /* Nick extra rules for action groups */
+ /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
+<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>> {
+  synerr('EOF encountered inside an action')
+  yyterminate()
+}
+
+<EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> {
+  synerr('EOF encountered inside pattern')
+  yyterminate()
+}
+
+<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
+  #yylval = myesc(str(yytext))
+  if YY_START() == FIRSTCCL:
+    BEGIN(CCL)
+  return y_tab.CHAR
+}
+
+<SECT3>{
+  {M4QSTART}                   #yyout.write(escaped_qstart)
+  {M4QEND}                     #yyout.write(escaped_qend)
+  [^\[\]]*                     #ECHO()
+  [][]                         #ECHO()
+  <<EOF>> {
+    sectnum = 0
+    piece_pack()
+    piece_append('</AST_Section3>')
+    piece_pack()
+    return ~YY_NULL
+  }
+}
+<SECT3_NOESCAPE>{
+  {M4QSTART}                   #yyout.write('[' '[{0:s}]' ']'.format(escaped_qstart))
+  {M4QEND}                     #yyout.write('[' '[{0:s}]' ']'.format(escaped_qend))
+  [^][]*                       #ECHO()
+  [][]                         #ECHO()
+  <<EOF>> {
+    sectnum = 0
+    piece_pack()
+    piece_append('</AST_Section3>')
+    piece_pack()
+    return ~YY_NULL
+  }
+}
+
+ /* Nick extra rules for action groups */
+<ACTION_GROUP,ELEMENT_GROUP>{
+  "{" {
+    global bracelevel
+    bracelevel += 1
+  }
+}
+<ACTION_GROUP>{
+  "}" {
+    global bracelevel
+    bracelevel -= 1
+    if bracelevel == 0:
+      yy_pop_state()
+      piece_append('</RegexGroupAction_Text>')
+      return TOK_ACTION_GROUP
+  }
+}
+<ELEMENT_GROUP>{
+  "}" {
+    global bracelevel
+    bracelevel -= 1
+    if bracelevel == 0:
+      yy_pop_state()
+      piece_append('</RegexGroupElement_Text>')
+      return TOK_ELEMENT_GROUP
+  }
+}
+<ACTION_GROUP,ELEMENT_GROUP>{
+  "'"                          yy_push_state(SINGLE_QUOTED)
+  \"                           yy_push_state(DOUBLE_QUOTED)
+  "/*"                         yy_push_state(COMMENT_DISCARD)
+}
+<SINGLE_QUOTED>{
+  [^\[\]\'\\\n]+
+  \'                           yy_pop_state()
+}
+<DOUBLE_QUOTED>{
+  [^\[\]\"\\\n]+
+  \"                           yy_pop_state()
+}
+<SINGLE_QUOTED,DOUBLE_QUOTED>{
+  (\\\n)*
+  \\(\\\n)*.
+}
+<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
+  {NL} {
+    global linenum
+    linenum += 1
+  }
+  .
+}
+
+<*>.|\n                                synerr('bad character: {0:s}'.format(yytext))
+
+%%
+
+#def yywrap():
+#  if --num_input_files > 0:
+#    set_input_file(*++input_files)
+#    return 0
+#  else:
+#    return 1
+#
+#def set_input_file(file):
+#  global linenum
+#  if file and strcmp(file, '-'):
+#    infilename = xstrdup(file)
+#    yyin = fopen(infilename, 'r')
+#    if yyin == None:
+#      lerr('can\'t open %s', file)
+#  else:
+#    yyin = stdin
+#    infilename = xstrdup('<stdin>')
+#  linenum = 1
+
+def piece_append(str):
+  piece.append(str)
+
+def piece_insert(n, str):
+  piece[n:n] = [str]
+
+xml_escape = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
+def piece_escape(str):
+  piece.append(''.join([xml_escape.get(i, i) for i in str]))
+
+def piece_flush(n):
+  global yytext
+  piece_escape(yytext[:n])
+  yytext = yytext[n:]
+
+def piece_pack():
+  global piece0
+  piece[piece0:] = [''.join(piece[piece0:])]
+  piece0 += 1
+
+def flexscan():
+  result = yylex()
+  if result < 0:
+    return ~result
+  piece_pack()
+  piece_escape(yytext)
+  piece_pack()
+  return result
+
+def markup_action(text):
+  global piece0
+  piece0 -= 1
+  assert piece[piece0][-20:] == '</AST_Section2_Rule>'
+  piece[piece0] = piece[piece0][:-20]
+  piece_append(text)
+  piece_append('</AST_Section2_Rule>')
+  piece_pack()
+
+def markup_option(name, sense):
+  global piece0
+  i = len(piece) - 1
+  while i >= piece0 and piece[i] == 'no':
+    i -= 1
+  piece_insert(
+    i + 1,
+    '<AST_Section1_Options_{0:s}{1:s}>'.format(
+      name,
+      ' value="true"' if sense else ''
+    )
+  )
+  piece_flush(len(yytext))
+  piece_append('</AST_Section1_Options_{0:s}>'.format(name))
+  piece0 -= 1
+  piece_pack()
+
+# supposed to be somewhere else:
+in_rule = False
+linenum = 1
+nmstr = ''
+no_section3_escape = False
+def synerr(str):
+  sys.stderr.write('{0:d}: {1:s}\n'.format(linenum, str))
diff --git a/bootstrap/skel_py.py b/bootstrap/skel_py.py
new file mode 100644 (file)
index 0000000..36c9d57
--- /dev/null
@@ -0,0 +1,287 @@
+import bisect
+import sys
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYReject(Exception):
+  pass
+
+class YYContinue(Exception):
+  pass
+
+class YYTerminate(Exception):
+  pass
+
+class YYBufferList:
+  def __init__(self, next = None):
+    self.next = next
+
+class YYBufferBlock(YYBufferList):
+  def __init__(self, next = None, pos = 0, text = ''):
+    YYBufferList.__init__(self, next)
+    self.pos = pos
+    self.text = text
+
+class YYBufferState(YYBufferList):
+  def __init__(self, next = None, file_in = None, at_bol = True):
+    YYBufferList.__init__(self, next)
+    self.file_in = file_in
+    self.at_bol = at_bol
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState()]
+
+yystart = INITIAL
+yystart_stack = []
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yy_group_text = None
+yy_group_stack = None
+yy_groups = None
+yy_groups_by_name = None
+yy_action = None
+yytext = ''
+yytext_len = 0
+
+YY_NULL = 0
+
+def REJECT():
+  raise YYReject()
+
+def yyterminate():
+  raise YYTerminate()
+
+def yyless(i):
+  global yytext, yytext_len
+  assert yytext_len >= i
+  yytext = yytext[:i]
+  yytext_len = i
+
+def unput(text):
+  global yyin, yytext_len
+  piece_flush(len(yytext))
+  while yytext_len:
+    block = yy_buffer_stack[-1].next
+    while block is None or block.pos >= len(block.text):
+      if block is None:
+        yy_buffer_stack.pop()
+        block = yy_buffer_stack[-1].next
+        yyin = yy_buffer_stack[-1].file_in
+      else:
+        block = block.next
+        yy_buffer_stack[-1].next = block
+    i = min(yytext_len, len(block.text) - block.pos)
+    block.pos += i
+    yytext_len -= i
+  yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+
+def ECHO():
+  yyout.write(yytext)
+
+def yy_rule_start():
+  global yytext, yytext_len
+  yytext = yy_group_text[:yy_group_stack[-1]]
+  yytext_len = yy_group_stack[-1]
+  del yy_group_stack[-2:]
+  # note that this should also be done after yyless() and REJECT(),
+  # and state should be saved in case they result in a null string,
+  # however, it doesn't seem to be in flex, maintain compatibility:
+  if len(yytext):
+    yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
+
+def yy_group_end():
+  pass
+
+def BEGIN(start):
+  global yystart
+  yystart = start
+
+def YY_START():
+  return yystart
+
+def yy_push_state(start):
+  global yystart
+  yystart_stack.append(yystart)
+  yystart = start
+
+def yy_pop_state():
+  global yystart
+  yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+  return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+  yy_buffer_stack[-1].at_bol = at_bol
+
+# GENERATE SECTION2
+
+def yylex():
+  global \
+    yyin, \
+    yy_threads0, \
+    yy_threads1, \
+    yy_prefix_slop, \
+    yy_group_text, \
+    yy_group_stack, \
+    yy_action, \
+    yytext, \
+    yytext_len
+
+  # GENERATE SECTION2INITIAL
+
+  while True:
+    while yytext_len:
+      block = yy_buffer_stack[-1].next
+      while block is None or block.pos >= len(block.text):
+        if block is None:
+          yy_buffer_stack.pop()
+          block = yy_buffer_stack[-1].next
+          yyin = yy_buffer_stack[-1].file_in
+        else:
+          block = block.next
+          yy_buffer_stack[-1].next = block
+      i = min(yytext_len, len(block.text) - block.pos)
+      block.pos += i
+      yytext_len -= i
+
+    match = ''
+    match_len = 0
+
+    del yy_threads0[yy_prefix_slop:]
+    yy_threads0.append(None)
+
+    buffer_ptr = len(yy_buffer_stack) - 1
+    block_prev = yy_buffer_stack[buffer_ptr]
+    block = block_prev.next
+    if block is not None:
+      block_pos = block.pos
+
+    action = yy_dfa_start_action[
+      yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+    ]
+    while action != -1:
+      state, transition = yy_dfa_actions[action]
+      #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+      i = yy_prefix_slop
+      assert len(yy_threads1) == yy_prefix_slop
+      for trans in transition:
+        if trans[0] == 0: #DFA.TRANSITION_POP:
+          i += trans[1]
+        elif trans[0] == 1: #DFA.TRANSITION_DUP:
+          while i < trans[1]:
+            yy_threads0[:0] = [None] * yy_prefix_slop
+            yy_threads1[:0] = [None] * yy_prefix_slop
+            i += yy_prefix_slop
+            yy_prefix_slop *= 2
+          yy_threads0[i - trans[1]:i] = yy_threads0[i:i + trans[1]]
+          i -= trans[1]
+        elif trans[0] == 2: #DFA.TRANSITION_MARK:
+          yy_threads0[i:i + trans[1]] = [
+            (match_len, trans[2], thread)
+            for thread in yy_threads0[i:i + trans[1]]
+          ]
+        elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+          yy_threads1.extend(yy_threads0[i:i + trans[1]])
+          i += trans[1]
+        #elif trans[0] == DFA.TRANSITION_DEL:
+        #  del yy_threads1[-trans[1]:]
+        else:
+          assert False
+      assert i == len(yy_threads0)
+      yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+      del yy_threads1[yy_prefix_slop:]
+
+      if state == 0:
+        # there is only one match, which is complete
+        assert len(yy_threads0) == yy_prefix_slop + 1
+        assert yy_dfa_states[state][2] == [0]
+        break
+
+      yy_buffer_stack[-1].file_in = yyin
+      while block is None or block_pos >= len(block.text):
+        if block is None:
+          file_in = yy_buffer_stack[buffer_ptr].file_in
+          text = '' if file_in is None else file_in.readline()
+          if len(text):
+            block = YYBufferBlock(None, 0, text)
+            block_pos = 0
+            block_prev.next = block
+          else:
+            # do not re-attempt read once EOF is reached
+            yy_buffer_stack[buffer_ptr].file_in = None
+            yyin = yy_buffer_stack[-1].file_in
+            buffer_ptr -= 1
+            if buffer_ptr < 0:
+              break # EOF
+            block_prev = yy_buffer_stack[buffer_ptr]
+            block = block_prev.next
+            if block is not None:
+              block_pos = block.pos
+        else:
+          i = match_len - len(match)
+          if i:
+            match += block.text[block_pos - i:]
+          block_prev = block
+          block = block_prev.next
+          if block is not None:
+            block_pos = block.pos
+      else: 
+        #print('block_pos', block_pos, 'block.text', block.text)
+        action = yy_dfa_states[state][1][
+          bisect.bisect_right(
+            yy_dfa_states[state][0],
+            ord(block.text[block_pos])
+          )
+        ]
+        block_pos += 1
+        match_len += 1
+        continue
+      # EOF
+      if i == 0:
+        try:
+          return yy_eof_actions[yystart]()
+        except YYTerminate:
+          return 0
+      break
+
+    i = match_len - len(match)
+    if i:
+      assert block is not None
+      match += block.text[block_pos - i:]
+
+    for i in yy_dfa_states[state][2]:
+      yy_group_text = match
+      yy_group_stack = []
+      yy_groups = None
+      yy_groups_by_name = None
+      yy_action = None
+      yytext = None
+      yytext_len = None
+
+      thread = yy_threads0[yy_prefix_slop + i]
+      #print('thread', thread)
+      while thread is not None:
+        pos, ref_data, thread = thread
+        yy_group_stack.append(pos)
+        ref_data()
+
+      try:
+        return yy_action()
+      except YYReject:
+        pass
+      except YYContinue:
+        piece_escape(yytext)
+        break
+      except YYTerminate:
+        return 0
+    else:
+      raise Exception('scanner jammed')
+
+# GENERATE SECTION3
diff --git a/bootstrap/y_tab.py b/bootstrap/y_tab.py
new file mode 100644 (file)
index 0000000..3bb913f
--- /dev/null
@@ -0,0 +1,47 @@
+CHAR = 258
+NUMBER = 259
+SECTEND = 260
+SCDECL = 261
+XSCDECL = 262
+NAME = 263
+PREVCCL = 264
+EOF_OP = 265
+TOK_OPTION = 266
+TOK_OUTFILE = 267
+TOK_PREFIX = 268
+TOK_YYCLASS = 269
+TOK_HEADER_FILE = 270
+TOK_EXTRA_TYPE = 271
+TOK_TABLES_FILE = 272
+CCE_ALNUM = 273
+CCE_ALPHA = 274
+CCE_BLANK = 275
+CCE_CNTRL = 276
+CCE_DIGIT = 277
+CCE_GRAPH = 278
+CCE_LOWER = 279
+CCE_PRINT = 280
+CCE_PUNCT = 281
+CCE_SPACE = 282
+CCE_UPPER = 283
+CCE_XDIGIT = 284
+CCE_NEG_ALNUM = 285
+CCE_NEG_ALPHA = 286
+CCE_NEG_BLANK = 287
+CCE_NEG_CNTRL = 288
+CCE_NEG_DIGIT = 289
+CCE_NEG_GRAPH = 290
+CCE_NEG_LOWER = 291
+CCE_NEG_PRINT = 292
+CCE_NEG_PUNCT = 293
+CCE_NEG_SPACE = 294
+CCE_NEG_UPPER = 295
+CCE_NEG_XDIGIT = 296
+CCL_OP_DIFF = 297
+CCL_OP_UNION = 298
+TOK_ACTION_GROUP = 299
+TOK_ELEMENT_GROUP = 300
+BEGIN_REPEAT_POSIX = 301
+END_REPEAT_POSIX = 302
+BEGIN_REPEAT_FLEX = 303
+END_REPEAT_FLEX = 304