Add Python version of bootstrap_flex.git parser (adds markup to *.l file), doesn...
authorNick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 14:47:39 +0000 (01:47 +1100)
committerNick Downing <nick@ndcode.org>
Mon, 14 Jan 2019 14:48:31 +0000 (01:48 +1100)
.gitignore
bootstrap/Makefile
bootstrap/markup.py
bootstrap/parse.y [new file with mode: 0644]
bootstrap/scan.l
bootstrap/skel_lex_yy.py [moved from bootstrap/skel_py.py with 100% similarity]
bootstrap/skel_y_tab.py [new file with mode: 0644]
bootstrap/state.py [new file with mode: 0644]
bootstrap/y_tab.py [deleted file]

index 0c800cc..fa388bb 100644 (file)
@@ -1,6 +1,7 @@
 __pycache__
+bootstrap/*.xml
 bootstrap/lex_yy.py
-bootstrap/scan.l.xml
+bootstrap/y_tab.py
 lex-yacc-examples/*.c
 lex-yacc-examples/*.h
 lex-yacc-examples/*.o
index 28f58f0..16395c1 100644 (file)
@@ -1,6 +1,12 @@
-lex_yy.py: scan.l skel_py.py
+all: lex_yy.py y_tab.py
+
+lex_yy.py: scan.l skel_lex_yy.py
        ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
-       ../../pilex.git/pilex.py --python --skel skel_py.py $<.xml
+       ../../pilex.git/pilex.py --python --skel skel_lex_yy.py $<.xml
+
+y_tab.py: parse.y skel_y_tab.py
+       ../../bootstrap_bison.git/src/bison -o /dev/null $< 2>$<.xml
+       ../../piyacc.git/piyacc.py --python --skel skel_y_tab.py $<.xml
 
 clean:
-       rm -f lex_yy.py *.xml
+       rm -f lex_yy.py y_tab.py *.xml
index 587ac17..d55c28b 100755 (executable)
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 
 import lex_yy
+import y_tab
 import sys
 
+y_tab.yyparse()
 while lex_yy.flexscan():
   pass
 sys.stdout.write(''.join(lex_yy.piece))
diff --git a/bootstrap/parse.y b/bootstrap/parse.y
new file mode 100644 (file)
index 0000000..de0be93
--- /dev/null
@@ -0,0 +1,1145 @@
+/* parse.y - parser for flex input */
+
+%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
+%token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
+%token TOK_TABLES_FILE
+
+%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
+%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
+
+%left CCL_OP_DIFF CCL_OP_UNION
+
+/* Nick extra rules for action groups */
+%token TOK_ACTION_GROUP TOK_ELEMENT_GROUP
+
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation.  This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'.  The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
+%{
+  import state
+
+  #pat = 0
+  #scnum = 0
+  #eps = 0
+  #headcnt = 0
+  #trailcnt = 0
+  #lastchar = 0
+  #i = 0
+  #rulelen = 0
+  trlcontxt = 0
+  xcluflg = 0
+  #currccl = 0
+  #cclsorted = 0
+  #varlength = 0
+  #variable_trail_rule = 0
+  #scon_stk = []
+  #scon_stk_ptr = 0
+  #madeany = False
+  #ccldot = 0
+  #cclany = 0
+  #previous_continued_action = 0
+  piece2 = 0
+  piece3 = 0
+%}
+
+%%
+
+goal
+  : initlex sect1 sect1end sect2 initforrule
+    {
+      #def_rule = None
+      #pat = cclinit()
+      #cclnegate(pat)
+      #def_rule = mkstate(-pat)
+      #default_rule = num_rules
+      #finish_rule(def_rule, False, 0, 0, 0)
+      #i = 1
+      #while i <= lastsc:
+      #  scset[i] = mkbranch(scset[i], def_rule)
+      #  i += 1
+      #if spprdflt:
+      #  add_action('YY_FATAL_ERROR( "flex scanner jammed" )')
+      #else:
+      #  add_action('ECHO')
+      #add_action(';\n\tYY_BREAK]]\n')
+    }
+  ;
+
+initlex
+  :
+    {
+      #scinstal('INITIAL', False)
+    }
+  ;
+
+sect1
+  : sect1 startconddecl namelist1
+    {
+      insert_after(2, '</AST_Section1_StartConditions>')
+      insert_before(1, '<AST_Section1_StartConditions exclusive="{0:s}">'.format('true' if xcluflg else 'false'))
+    }
+  | sect1 options
+    {
+      insert_after(1, '</AST_Section1_Options>')
+      insert_before(1, '<AST_Section1_Options>')
+    }
+  |
+  | error
+    {
+      state.synerr('unknown error processing section 1')
+    }
+  ;
+
+sect1end
+  : SECTEND
+    {
+      #check_options()
+      #scon_stk = allocate_array(lastsc + 1, 4)
+      #scon_stk_ptr = 0
+    }
+  ;
+
+startconddecl
+  : SCDECL
+    {
+      xcluflg = False
+    }
+  | XSCDECL
+    {
+      xcluflg = True
+    }
+  ;
+
+namelist1
+  : namelist1 NAME
+    {
+      #scinstal(nmstr, xcluflg)
+    }
+  | NAME
+    {
+      #scinstal(nmstr, xcluflg)
+    }
+  | error
+    {
+      state.synerr('bad start condition list')
+    }
+  ;
+
+options
+  : TOK_OPTION optionlist
+  ;
+
+optionlist
+  : optionlist option
+  |
+  ;
+
+option
+  : TOK_OUTFILE '=' NAME
+    {
+      #outfilename = xstrdup(nmstr)
+      #did_outfilename = 1
+      insert_after(2, '</AST_Section1_Options_OutFile>')
+      insert_before(0, '<AST_Section1_Options_OutFile>')
+    }
+  | TOK_EXTRA_TYPE '=' NAME
+    {
+      #extra_type = xstrdup(nmstr)
+      insert_after(2, '</AST_Section1_Options_ExtraType>')
+      insert_before(0, '<AST_Section1_Options_ExtraType>')
+    }
+  | TOK_PREFIX '=' NAME
+    {
+      #prefix = xstrdup(nmstr)
+      #if strchr(prefix, ord('[')) or strchr(prefix, ord(']')):
+      #  flexerror('Prefix must not contain [ or ]')
+      insert_after(2, '</AST_Section1_Options_Prefix>')
+      insert_before(0, '<AST_Section1_Options_Prefix>')
+    }
+  | TOK_YYCLASS '=' NAME
+    {
+      #yyclass = xstrdup(nmstr)
+      insert_after(2, '</AST_Section1_Options_YYClass>')
+      insert_before(0, '<AST_Section1_Options_YYClass>')
+    }
+  | TOK_HEADER_FILE '=' NAME
+    {
+      #headerfilename = xstrdup(nmstr)
+      insert_after(2, '</AST_Section1_Options_HeaderFile>')
+      insert_before(0, '<AST_Section1_Options_HeaderFile>')
+    }
+  | TOK_TABLES_FILE '=' NAME
+    {
+      #tablesext = True
+      #tablesfilename = xstrdup(nmstr)
+      insert_after(2, '</AST_Section1_Options_TablesFile>')
+      insert_before(0, '<AST_Section1_Options_TablesFile>')
+    }
+  ;
+
+sect2
+  : sect2 scon initforrule flexrule '\n'
+    {
+      #scon_stk_ptr = $2
+      insert_after(4, '</AST_Section2_Rule>')
+      insert_before(1, '<AST_Section2_Rule>')
+    }
+  | sect2 scon '{' sect2 '}'
+    {
+      #scon_stk_ptr = $2
+      insert_after(4, '</AST_Section2_CompoundRule>')
+      insert_before(1, '<AST_Section2_CompoundRule>')
+    }
+  |
+  ;
+
+initforrule
+  :
+    {
+      trlcontxt = False #variable_trail_rule = varlength = False
+      #trailcnt = headcnt = rulelen = 0
+      #current_state_type = 0x1
+      #previous_continued_action = continued_action
+      state.in_rule = True
+      #new_rule()
+    }
+  ;
+
+flexrule
+  : '^' rule
+    {
+      #pat = $2
+      #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+      #if scon_stk_ptr > 0:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    scbol[scon_stk[i]] = mkbranch(scbol[scon_stk[i]], pat)
+      #    i += 1
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not scxclu[i]:
+      #      scbol[i] = mkbranch(scbol[i], pat)
+      #    i += 1
+      #if not bol_needed:
+      #  bol_needed = True
+      #  if performance_report > 1:
+      #    pinpoint_message('\'^\' operator results in sub-optimal performance')
+      insert_after(1, '</AST_Section2_Rule_FLexRule>')
+      insert_before(0, '<AST_Section2_Rule_FLexRule bol="true">')
+    }
+  | rule
+    {
+      #pat = $1
+      #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+      #if scon_stk_ptr > 0:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    scset[scon_stk[i]] = mkbranch(scset[scon_stk[i]], pat)
+      #    i += 1
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not scxclu[i]:
+      #      scset[i] = mkbranch(scset[i], pat)
+      #    i += 1
+      insert_after(0, '</AST_Section2_Rule_FLexRule>')
+      insert_before(0, '<AST_Section2_Rule_FLexRule bol="false">')
+    }
+  | EOF_OP
+    {
+      #if scon_stk_ptr > 0:
+      #  build_eof_action()
+      #else:
+      #  i = 1
+      #  while i <= lastsc:
+      #    if not sceof[i]:
+      #      scon_stk[++scon_stk_ptr] = i
+      #    i += 1
+      #  if scon_stk_ptr == 0:
+      #    lwarn('all start conditions already have <<EOF>> rules')
+      #  else:
+      #    build_eof_action()
+      insert_after(0, '</AST_Section2_Rule_EOFRule>')
+      insert_before(0, '<AST_Section2_Rule_EOFRule>')
+    }
+  | error
+    {
+      state.synerr('unrecognized rule')
+    }
+  ;
+
+scon_stk_ptr
+  :
+    {
+      #$$ = scon_stk_ptr
+    }
+  ;
+
+scon
+  : '<' scon_stk_ptr namelist2 '>'
+    {
+      #$$ = $2
+      insert_after(3, '</AST_Section2_StartConditions>')
+      insert_before(0, '<AST_Section2_StartConditions>')
+    }
+  | '<' '*' '>'
+    {
+      #$$ = scon_stk_ptr
+      #i = 1
+      #while i <= lastsc:
+      #  j = None
+      #  j = 1
+      #  while j <= scon_stk_ptr:
+      #    if scon_stk[j] == i:
+      #      break
+      #    j += 1
+      #  if j > scon_stk_ptr:
+      #    scon_stk[++scon_stk_ptr] = i
+      #  i += 1
+      insert_after(2, '</AST_Section2_StartConditions>')
+      insert_before(0, '<AST_Section2_StartConditions wildcard="true">')
+    }
+  |
+    {
+      global yychar
+      #$$ = scon_stk_ptr
+      if yychar == YYEMPTY:
+        yychar = lex_yy.flexscan()
+        print('xxx yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', yytext)
+      temp = lex_yy.piece[piece2 + 1]
+      lex_yy.piece[piece2 + 1] = lex_yy.piece[piece2]
+      lex_yy.piece[piece2] = lex_yy.piece[piece2 - 1]
+      lex_yy.piece[piece2 - 1] = temp
+      insert_before(0, '<AST_Section2_StartConditions />')
+    }
+  ;
+
+namelist2
+  : namelist2 ',' sconname
+  | sconname
+  | error
+    {
+      state.synerr('bad start condition list')
+    }
+  ;
+
+sconname
+  : NAME
+    {
+      #if (scnum = sclookup(nmstr)) == 0:
+      #  format_pinpoint_message('undeclared start condition %s', nmstr)
+      #else:
+      #  i = 1
+      #  while i <= scon_stk_ptr:
+      #    if scon_stk[i] == scnum:
+      #      format_warn('<%s> specified twice', scname[scnum])
+      #      break
+      #    i += 1
+      #  if i > scon_stk_ptr:
+      #    scon_stk[++scon_stk_ptr] = scnum
+    }
+  ;
+
+/* this rule handles trailing context, it must produce two separate regexes,
+ * where the first is the expression to be matched, and the second is the
+ * trailing context, RegexEmpty (matches empty string) if no trailing context
+ */
+rule
+  : re2 re
+    {
+      #if transchar[lastst[$2]] != 256 + 1:
+      #  $2 = link_machines($2, mkstate(256 + 1))
+      #mark_beginning_as_normal($2)
+      #current_state_type = 0x1
+      #if previous_continued_action:
+      #  if not varlength or headcnt != 0:
+      #    lwarn('trailing context made variable due to preceding \'|\' action')
+      #  varlength = True
+      #  headcnt = 0
+      #if lex_compat or varlength and headcnt == 0:
+      #  add_accept($1, num_rules | 0x4000)
+      #  variable_trail_rule = True
+      #else:
+      #  trailcnt = rulelen
+      #$$ = link_machines($1, $2)
+    }
+  | re2 re '$'
+    {
+      state.synerr('trailing context used twice')
+    }
+  | re '$'
+    {
+      #headcnt = 0
+      #trailcnt = 1
+      #rulelen = 1
+      #varlength = False
+      #current_state_type = 0x2
+      #if trlcontxt:
+      #  state.synerr('trailing context used twice')
+      #  $$ = mkstate(256 + 1)
+      #else:
+      #  if previous_continued_action:
+      #    lwarn('trailing context made variable due to preceding \'|\' action')
+      #    varlength = True
+      #if lex_compat or varlength:
+      #  add_accept($1, num_rules | 0x4000)
+      #  variable_trail_rule = True
+      #trlcontxt = True
+      #eps = mkstate(256 + 1)
+      #$$ = link_machines($1, link_machines(eps, mkstate(ord('\n'))))
+      insert_after(1, '</RegexCharacterLiteral>')
+      insert_before(1, '<RegexCharacterLiteral character_set="10 11">')
+    }
+  | re
+    {
+      #$$ = $1
+      #if trlcontxt:
+      #  if lex_compat or varlength and headcnt == 0:
+      #    variable_trail_rule = True
+      #  else:
+      #    trailcnt = rulelen
+      insert_after(0, '<RegexEmpty />')
+    }
+  ;
+
+re
+  : re '|' series
+    {
+      #varlength = True
+      #$$ = mkor($1, $3)
+      insert_after(2, '</RegexOr>')
+      insert_before(0, '<RegexOr>')
+    }
+  | series
+    {
+      #$$ = $1
+    }
+  ;
+
+re2
+  : re '/'
+    {
+      #if trlcontxt:
+      #  state.synerr('trailing context used twice')
+      #else:
+      #  trlcontxt = True
+      #if varlength:
+      #  varlength = False
+      #else:
+      #  headcnt = rulelen
+      #rulelen = 0
+      #current_state_type = 0x2
+      $$ = $1
+    }
+  ;
+
+series
+  : series singleton
+    {
+      #$$ = link_machines($1, $2)
+      insert_after(1, '</RegexSequence>')
+      insert_before(0, '<RegexSequence>')
+    }
+  | singleton
+    {
+      #$$ = $1
+    }
+  | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+    {
+      #varlength = True
+      #if $3 > $5 or $3 < 0:
+      #  state.synerr('bad iteration values')
+      #  $$ = $1
+      #else:
+      #  if $3 == 0:
+      #    if $5 <= 0:
+      #      state.synerr('bad iteration values')
+      #      $$ = $1
+      #    else:
+      #      $$ = mkopt(mkrep($1, 1, $5))
+      #  else:
+      #    $$ = mkrep($1, $3, $5)
+      insert_after(5, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $5))
+    }
+  | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = mkrep($1, $3, -1)
+      insert_after(4, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}">'.format($3))
+    }
+  | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = link_machines($1, copysingl($1, $3 - 1))
+      insert_after(3, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $3))
+    }
+  ;
+
+singleton
+  : singleton '*'
+    {
+      #varlength = True
+      #$$ = mkclos($1)
+      insert_after(1, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="0">')
+    }
+  | singleton '+'
+    {
+      #varlength = True
+      #$$ = mkposcl($1)
+      insert_after(1, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="1">')
+    }
+  | singleton '?'
+    {
+      #varlength = True
+      #$$ = mkopt($1)
+      insert_after(1, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="0" count1="1">')
+    }
+  | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
+    {
+      #varlength = True
+      #if $3 > $5 or $3 < 0:
+      #  state.synerr('bad iteration values')
+      #  $$ = $1
+      #else:
+      #  if $3 == 0:
+      #    if $5 <= 0:
+      #      state.synerr('bad iteration values')
+      #      $$ = $1
+      #    else:
+      #      $$ = mkopt(mkrep($1, 1, $5))
+      #  else:
+      #    $$ = mkrep($1, $3, $5)
+      insert_after(5, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $5))
+    }
+  | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = mkrep($1, $3, -1)
+      insert_after(4, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}">'.format($3))
+    }
+  | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
+    {
+      #varlength = True
+      #if $3 <= 0:
+      #  state.synerr('iteration value must be positive')
+      #  $$ = $1
+      #else:
+      #  $$ = link_machines($1, copysingl($1, $3 - 1))
+      insert_after(3, '</RegexRepeat>')
+      insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $3))
+    }
+  | '.'
+    {
+      #if not madeany:
+      #  ccldot = cclinit()
+      #  ccladd(ccldot, ord('\n'))
+      #  cclnegate(ccldot)
+      #  if useecs:
+      #    mkeccl(ccltbl + cclmap[ccldot], ccllen[ccldot], nextecm, ecgroup, csize, csize)
+      #  cclany = cclinit()
+      #  cclnegate(cclany)
+      #  if useecs:
+      #    mkeccl(ccltbl + cclmap[cclany], ccllen[cclany], nextecm, ecgroup, csize, csize)
+      #  madeany = True
+      #rulelen += 1
+      if False: #_sf_stk[_sf_top_ix] & int(0x0002):
+        #$$ = mkstate(-cclany)
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="0 256">')
+      else:
+        #$$ = mkstate(-ccldot)
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="0 10 11 256">')
+    }
+  | fullccl
+    {
+      #qsort(ccltbl + cclmap[$1], int(ccllen[$1]), sizeof *ccltbl, cclcmp)
+      #if useecs:
+      #  mkeccl(ccltbl + cclmap[$1], ccllen[$1], nextecm, ecgroup, csize, csize)
+      #rulelen += 1
+      #if ccl_has_nl[$1]:
+      #  rule_has_nl[num_rules] = True
+      #$$ = mkstate(-$1)
+    }
+  | PREVCCL
+    {
+      #rulelen += 1
+      #if ccl_has_nl[$1]:
+      #  rule_has_nl[num_rules] = True
+      #$$ = mkstate(-$1)
+    }
+  | '"' string '"'
+    {
+      #$$ = $2
+    }
+  | '(' re ')'
+    {
+      #$$ = $2
+      insert_after(2, '</RegexGroup>')
+      insert_before(0, '<RegexGroup>')
+    }
+  /* Nick extra rules for unnumbered groups */
+  | '(' ':' re ')'
+    {
+      #$$ = $3
+    }
+  /* Nick extra rules for named groups */
+  | '(' NAME re ')'
+    {
+      #$$ = $3
+      insert_after(3, '</RegexGroupName>')
+      insert_before(0, '<RegexGroupName>')
+    }
+  /* Nick extra rules for action groups */
+  | '(' TOK_ACTION_GROUP re ')'
+    {
+      #$$ = $3
+      insert_after(3, '</RegexGroupAction>')
+      insert_before(0, '<RegexGroupAction>')
+    }
+  | '(' TOK_ELEMENT_GROUP re ')'
+    {
+      #$$ = $3
+      insert_after(3, '</RegexGroupElement>')
+      insert_before(0, '<RegexGroupElement>')
+    }
+  | CHAR
+    {
+      #rulelen += 1
+      #if $1 == nlch:
+      #  rule_has_nl[num_rules] = True
+      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($1):
+      #  $$ = mkor(mkstate($1), mkstate(reverse_case($1)))
+      #else:
+      #  $$ = mkstate($1)
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($1, $1 + 1))
+    }
+  ;
+
+fullccl
+  : fullccl CCL_OP_DIFF braceccl
+    {
+      #$$ = ccl_set_diff($1, $3)
+      insert_after(2, '</RegexCharacterAnd>')
+      insert_after(2, '</RegexCharacterNot>')
+      insert_before(2, '<RegexCharacterNot>')
+      insert_before(0, '<RegexCharacterAnd>')
+    }
+  | fullccl CCL_OP_UNION braceccl
+    {
+      #$$ = ccl_set_union($1, $3)
+      insert_after(2, '</RegexCharacterOr>')
+      insert_before(0, '<RegexCharacterOr>')
+    }
+  | braceccl
+  ;
+
+braceccl
+  : '[' ccl ']'
+    {
+      #$$ = $2
+    }
+  | '[' '^' ccl ']'
+    {
+      #cclnegate($3)
+      #$$ = $3
+      insert_after(2, '</RegexCharacterNot>')
+      insert_before(1, '<RegexCharacterNot>')
+    }
+  ;
+
+ccl
+  : ccl CHAR '-' CHAR
+    {
+      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #  if has_case($2) != has_case($4) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISlower) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISlower) else False) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISupper) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISupper) else False):
+      #    fw3_msg = []
+      #    snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+      #    lwarn(fw3_msg)
+      #  else:
+      #    if not has_case($2) and not has_case($4) and not range_covers_case($2, $4):
+      #      fw3_msg = []
+      #      snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+      #      lwarn(fw3_msg)
+      #if $2 > $4:
+      #  state.synerr('negative range in character class')
+      #else:
+      #  i = $2
+      #  while i <= $4:
+      #    ccladd($1, i)
+      #    i += 1
+      #  cclsorted = cclsorted and $2 > lastchar
+      #  lastchar = $4
+      #  if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2) and has_case($4):
+      #    $2 = reverse_case($2)
+      #    $4 = reverse_case($4)
+      #    i = $2
+      #    while i <= $4:
+      #      ccladd($1, i)
+      #      i += 1
+      #    cclsorted = cclsorted and $2 > lastchar
+      #    lastchar = $4
+      #$$ = $1
+      insert_after(3, '</RegexCharacterOr>')
+      insert_after(3, '</RegexCharacterLiteral>')
+      insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $4 + 1))
+      insert_before(0, '<RegexCharacterOr>')
+    }
+  | ccl CHAR
+    {
+      #ccladd($1, $2)
+      #cclsorted = cclsorted and $2 > lastchar
+      #lastchar = $2
+      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+      #  $2 = reverse_case($2)
+      #  ccladd($1, $2)
+      #  cclsorted = cclsorted and $2 > lastchar
+      #  lastchar = $2
+      #$$ = $1
+      insert_after(1, '</RegexCharacterOr>')
+      insert_after(1, '</RegexCharacterLiteral>')
+      insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $2 + 1))
+      insert_before(0, '<RegexCharacterOr>')
+    }
+  | ccl ccl_expr
+    {
+      #cclsorted = False
+      #$$ = $1
+      insert_after(1, '</RegexCharacterOr>')
+      insert_before(0, '<RegexCharacterOr>')
+    }
+  |
+    {
+      #cclsorted = True
+      #lastchar = 0
+      #currccl = $$ = cclinit()
+      insert_before(0, '<RegexCharacterLiteral character_set="" />')
+    }
+  ;
+
+ccl_expr
+  : CCE_ALNUM
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalnum):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="48 58 65 91 97 123">')
+    }
+  | CCE_ALPHA
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalpha):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+    }
+  | CCE_BLANK
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (c == ord(' ') or c == ord('\t')):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="9 10 32 33">')
+    }
+  | CCE_CNTRL
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_IScntrl):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 32">')
+    }
+  | CCE_DIGIT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISdigit):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="48 58">')
+    }
+  | CCE_GRAPH
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISgraph):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="33 127">')
+    }
+  | CCE_LOWER
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+      #    ccladd(currccl, c)
+      #  c += 1
+      if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+        #c = None
+        #c = 0
+        #while c < csize:
+        #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+        #    ccladd(currccl, c)
+        #  c += 1
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+      else:
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="97 123">')
+    }
+  | CCE_PRINT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISprint):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="32 127">')
+    }
+  | CCE_PUNCT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISpunct):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="33 48 58 65 91 97 123 127">')
+    }
+  | CCE_SPACE
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISspace):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="9 14 32 33">')
+    }
+  | CCE_XDIGIT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISxdigit):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="48 58 65 71 97 103">')
+    }
+  | CCE_UPPER
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+      #    ccladd(currccl, c)
+      #  c += 1
+      if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+        #c = None
+        #c = 0
+        #while c < csize:
+        #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+        #    ccladd(currccl, c)
+        #  c += 1
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+      else:
+        insert_after(0, '</RegexCharacterLiteral>')
+        insert_before(0, '<RegexCharacterLiteral character_set="65 91">')
+    }
+  | CCE_NEG_ALNUM
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISalnum)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 65 91 97 123 256">')
+    }
+  | CCE_NEG_ALPHA
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISalpha)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 65 91 97 123 256">')
+    }
+  | CCE_NEG_BLANK
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not (c == ord(' ') or c == ord('\t')):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 9 10 32 33 256">')
+    }
+  | CCE_NEG_CNTRL
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_IScntrl)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="32 256">')
+    }
+  | CCE_NEG_DIGIT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISdigit)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 256">')
+    }
+  | CCE_NEG_GRAPH
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISgraph)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 33 127 256">')
+    }
+  | CCE_NEG_PRINT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISprint)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 32 127 256">')
+    }
+  | CCE_NEG_PUNCT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISpunct)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 33 48 58 65 91 97 123 127 256">')
+    }
+  | CCE_NEG_SPACE
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISspace)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 9 14 32 33 256">')
+    }
+  | CCE_NEG_XDIGIT
+    {
+      #c = None
+      #c = 0
+      #while c < csize:
+      #  if not ((*__ctype_b_loc())[int(c)] & int(_ISxdigit)):
+      #    ccladd(currccl, c)
+      #  c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 65 71 97 103 256">')
+    }
+  | CCE_NEG_LOWER
+    {
+      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #  lwarn('[:^lower:] is ambiguous in case insensitive scanner')
+      #else:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if not ((*__ctype_b_loc())[int(c)] & int(_ISlower)):
+      #      ccladd(currccl, c)
+      #    c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 97 123 256">')
+    }
+  | CCE_NEG_UPPER
+    {
+      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #  lwarn('[:^upper:] ambiguous in case insensitive scanner')
+      #else:
+      #  c = None
+      #  c = 0
+      #  while c < csize:
+      #    if not ((*__ctype_b_loc())[int(c)] & int(_ISupper)):
+      #      ccladd(currccl, c)
+      #    c += 1
+      insert_after(0, '</RegexCharacterLiteral>')
+      insert_before(0, '<RegexCharacterLiteral character_set="0 65 91 256">')
+    }
+  ;
+
+string
+  : string CHAR
+    {
+      #if $2 == nlch:
+      #  rule_has_nl[num_rules] = True
+      #rulelen += 1
+      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+      #  $$ = mkor(mkstate($2), mkstate(reverse_case($2)))
+      #else:
+      #  $$ = mkstate($2)
+      #$$ = link_machines($1, $$)
+      insert_after(1, '</RegexSequence>')
+      insert_after(1, '</RegexCharacterLiteral>')
+      insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $2 + 1))
+      insert_before(0, '<RegexSequence>')
+    }
+  |
+    {
+      #$$ = mkstate(256 + 1)
+      insert_before(0, '<RegexEmpty />')
+    }
+  ;
+
+%%
+
+#def build_eof_action():
+#  i = None
+#  action_text = []
+#  i = 1
+#  while i <= scon_stk_ptr:
+#    if sceof[scon_stk[i]]:
+#      format_pinpoint_message('multiple <<EOF>> rules for start condition %s', scname[scon_stk[i]])
+#    else:
+#      sceof[scon_stk[i]] = True
+#      if previous_continued_action:
+#        add_action('YY_RULE_SETUP\n')
+#      snprintf(action_text, sizeof action_text, 'case YY_STATE_EOF(%s):\n', scname[scon_stk[i]])
+#      add_action(action_text)
+#    i += 1
+#  line_directive_out(None, 1)
+#  add_action('[[')
+#  num_rules -= 1
+#  num_eof_rules += 1
+#
+#def format_state.synerr(msg, arg):
+#  errmsg = []
+#  NoneType(snprintf(errmsg, sizeof errmsg, msg, arg))
+#  state.synerr(errmsg)
+#
+#def synerr(str):
+#  syntaxerror = True
+#  pinpoint_message(str)
+#
+#def format_warn(msg, arg):
+#  warn_msg = []
+#  snprintf(warn_msg, sizeof warn_msg, msg, arg)
+#  lwarn(warn_msg)
+#
+#def lwarn(str):
+#  line_warning(str, linenum)
+#
+#def format_pinpoint_message(msg, arg):
+#  errmsg = []
+#  snprintf(errmsg, sizeof errmsg, msg, arg)
+#  pinpoint_message(errmsg)
+#
+#def pinpoint_message(str):
+#  line_pinpoint(str, linenum)
+#
+#def line_warning(str, line):
+#  warning = []
+#  if not nowarn:
+#    snprintf(warning, sizeof warning, 'warning, %s', str)
+#    line_pinpoint(warning, line)
+#
+#def line_pinpoint(str, line):
+#  fprintf(stderr, '%s:%d: %s\n', infilename, line, str)
+
+def yyerror(msg):
+  pass
+
+def insert_before(n, str):
+  global piece3
+  lex_yy.piece_insert(piece2 + n * 2, str)
+  lex_yy.piece0 += 1
+  piece3 += 1
+
+def insert_after(n, str):
+  global piece3
+  lex_yy.piece_insert(piece2 + n * 2 + 1, str)
+  lex_yy.piece0 += 1
+  piece3 += 1
index 7672ac6..483c815 100644 (file)
@@ -1,13 +1,9 @@
 /* scan.l - scanner for flex input -*-C-*- */
 
 %{
+  import state
   import y_tab
 
-  #tablesverify = 0
-  #tablesext = 0
-  trlcontxt = False
-  escaped_qstart = ''
-  escaped_qend = ''
   piece = []
   piece0 = 0
 
@@ -21,7 +17,6 @@
   doing_codeblock = False
   brace_depth = 0
   brace_start_line = 0
-  nmdef = ''
 %}
 
 %option caseless nodefault noreject stack noyy_top_state
@@ -74,13 +69,12 @@ M4QEND      "]""]"
   #doing_codeblock = False
   #brace_depth = 0
   #brace_start_line = 0
-  #nmdef = ''
 
 <INITIAL>{
   ^{WS} {
-    global indented_code, linenum
+    global indented_code
     if not indented_code:
-      linenum += 1
+      state.linenum += 1
       #line_directive_out(None, 1)
     #add_action('[' '[')
     yy_push_state(CODEBLOCK)
@@ -97,9 +91,9 @@ M4QEND      "]""]"
   ^"%s"{NAME}? return y_tab.SCDECL
   ^"%x"{NAME}? return y_tab.XSCDECL
   ^"%{".*{NL} {
-    global indented_code, linenum
+    global indented_code
     if not indented_code:
-      linenum += 1
+      state.linenum += 1
       #line_directive_out(None, 1)
     #add_action('[' '[')
     yy_push_state(CODEBLOCK)
@@ -109,15 +103,15 @@ M4QEND      "]""]"
     piece_append('<AST_Text>')
   }
   ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
-    global brace_start_line, linenum, brace_depth
-    brace_start_line = linenum
-    linenum += 1
-    #buf_linedir(&top_buf, infilename if infilename else '<stdin>', linenum)
+    global brace_start_line, brace_depth
+    brace_start_line = state.linenum
+    state.linenum += 1
+    #buf_linedir(&top_buf, infilename if infilename else '<stdin>', state.linenum)
     brace_depth = 1
     yy_push_state(CODEBLOCK_MATCH_BRACE)
   }
 
-  ^"%top".*                    synerr('malformed \'%top\' directive')
+  ^"%top".*                    state.synerr('malformed \'%top\' directive')
 
   {WS}
 
@@ -137,17 +131,15 @@ M4QEND      "]""]"
   }
 
   ^"%pointer".*{NL} {
-    global linenum
     #yytext_is_array = False
-    linenum += 1
+    state.linenum += 1
     piece_append('<AST_Section1_Options><AST_Section1_Options_Array>')
     piece_flush(len(yytext) - 1)
     piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
   }
   ^"%array".*{NL} {
-    global linenum
     #yytext_is_array = True
-    linenum += 1
+    state.linenum += 1
     piece_append('<AST_Section1_Options><AST_Section1_Options_Array value="true">')
     piece_flush(len(yytext) - 1)
     piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
@@ -158,28 +150,21 @@ M4QEND      "]""]"
     return y_tab.TOK_OPTION
   }
 
-  ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} {
-    global linenum
-    linenum += 1
-  }
-  ^"%"{LEXOPT}{WS}.*{NL} {
-    global linenum
-    linenum += 1
-  }
+  ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} state.linenum += 1
+  ^"%"{LEXOPT}{WS}.*{NL}       state.linenum += 1
 
        /* xgettext: no-c-format */
-  ^"%"[^sxaceknopr{}].*                synerr('unrecognized \'%\' directive')
+  ^"%"[^sxaceknopr{}].*                state.synerr('unrecognized \'%\' directive')
 
   ^{NAME} {
-    global nmstr, didadef
-    nmstr = yytext
+    global didadef
+    state.nmstr = yytext
     didadef = False
     BEGIN(PICKUPDEF)
   }
 
   {SCNAME} {
-    global nmstr
-    nmstr = yytext
+    state.nmstr = yytext
     piece_pack()
     piece_append('<AST_Name>')
     piece_escape(yytext)
@@ -187,14 +172,10 @@ M4QEND      "]""]"
     piece_pack()
     return ~y_tab.NAME
   }
-  ^{OPTWS}{NL} {
-    global linenum
-    linenum += 1
-  }
+  ^{OPTWS}{NL}                 state.linenum += 1
   {OPTWS}{NL} {
-    global linenum
     #add_action(yytext)
-    linenum += 1
+    state.linenum += 1
   }
 }
 
@@ -204,8 +185,7 @@ M4QEND      "]""]"
   .                            #add_action(yytext)
 
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     #add_action(yytext)
   }
 }
@@ -227,32 +207,23 @@ M4QEND      "]""]"
   "*/"                         yy_pop_state()
   "*"
   [^*\n]
-  {NL} {
-    global linenum
-    linenum += 1
-  }
+  {NL}                         state.linenum += 1
 }
 
 <EXTENDED_COMMENT>{
   ")"                          yy_pop_state()
   [^\n\)]+
-  {NL} {
-    global linenum
-    linenum += 1
-  }
+  {NL}                         state.linenum += 1
 }
 
 <LINEDIR>{
   \n                           yy_pop_state()
   [[:digit:]]+ {
-    global linenum
-    linenum = myctoi(yytext)
+    state.linenum = int(yytext)
   }
 
   \"[^"\n]*\" {
-    free(infilename)
-    infilename = xstrdup(yytext + 1)
-    infilename[len(infilename) - 1] = ord('\0')
+    state.infilename = yytext[1:-1]
   }
   .
 }
@@ -263,8 +234,7 @@ M4QEND      "]""]"
 
 <CODEBLOCK>{
   ^"%}".*{NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     piece_append('</AST_Text>')
     piece_flush(len(yytext))
     yy_pop_state()
@@ -276,8 +246,7 @@ M4QEND      "]""]"
   [^\n%\[\]]*                  #add_action(yytext)
   .                            #add_action(yytext)
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     #add_action(yytext)
     if indented_code:
       piece_flush(len(yytext))
@@ -307,8 +276,7 @@ M4QEND      "]""]"
   }
 
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     #buf_strnappend(&top_buf, yytext, len(yytext))
   }
 
@@ -317,9 +285,8 @@ M4QEND      "]""]"
   ([^{}\r\n\[\]]+)|[^{}\r\n]   #buf_strnappend(&top_buf, yytext, len(yytext))
 
   <<EOF>> {
-    global linenum
-    linenum = brace_start_line
-    synerr('Unmatched \'{\'')
+    state.linenum = brace_start_line
+    state.synerr('Unmatched \'{\'')
     yyterminate()
   }
 }
@@ -329,26 +296,23 @@ M4QEND      "]""]"
   {WS}
 
   {NOT_WS}[^\r\n]* {
-    global nmdef, didadef
-    nmdef = yytext.rstrip()
-    #ndinstal(nmstr, nmdef)
+    global didadef
+    state.ndinstal(state.nmstr, yytext.rstrip('\t '))
     didadef = True
   }
 
   {NL} {
-    global linenum
     if not didadef:
-      synerr('incomplete name definition')
+      state.synerr('incomplete name definition')
     BEGIN(INITIAL)
-    linenum += 1
+    state.linenum += 1
   }
 }
 
 
 <OPTION>{
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     BEGIN(INITIAL)
   }
   {WS} {
@@ -449,11 +413,11 @@ M4QEND      "]""]"
     markup_option('Interactive', option_sense)
   }
   lex-compat {
-    #lex_compat = option_sense
+    state.lex_compat = option_sense
     markup_option('LexCompat', option_sense)
   }
   posix-compat {
-    #posix_compat = option_sense
+    state.posix_compat = option_sense
     markup_option('PosixCompat', option_sense)
   }
   line {
@@ -747,8 +711,7 @@ M4QEND      "]""]"
 
 
   \"[^"\n]*\" {
-    global nmstr
-    nmstr = yytext[1:-1]
+    state.nmstr = yytext[1:-1]
     piece_pack()
     piece_append('<AST_String>"<AST_Text>')
     piece_escape(yytext[1:-1])
@@ -758,14 +721,13 @@ M4QEND      "]""]"
   }
 
   (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
-    synerr('unrecognized %option: {0:s}'.format(yytext))
+    state.synerr('unrecognized %option: {0:s}'.format(yytext))
     BEGIN(RECOVER)
   }
 }
 
 <RECOVER>.*{NL} {
-  global linenum
-  linenum += 1
+  state.linenum += 1
   BEGIN(INITIAL)
 }
 
@@ -783,9 +745,9 @@ M4QEND      "]""]"
   }
 
   ^{WS} {
-    global indented_code, linenum
+    global indented_code
     if not indented_code:
-      linenum += 1
+      state.linenum += 1
       #line_directive_out(None, 1)
     #add_action('[' '[')
     yy_push_state(CODEBLOCK)
@@ -796,7 +758,7 @@ M4QEND      "]""]"
   }
 
   ^{NOT_WS}.* {
-    global indented_code, linenum
+    global indented_code
     if bracelevel <= 0:
       yyless(0)
       yy_set_bol(True)
@@ -804,7 +766,7 @@ M4QEND      "]""]"
       BEGIN(SECT2)
     else:
       if not indented_code:
-        linenum += 1
+        state.linenum += 1
         #line_directive_out(None, 1)
       #add_action('[' '[')
       yy_push_state(CODEBLOCK)
@@ -816,8 +778,7 @@ M4QEND      "]""]"
 
   .                            #add_action(yytext)
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     #add_action(yytext)
   }
 
@@ -832,10 +793,7 @@ M4QEND      "]""]"
 }
 
 <SECT2>{
-  ^{OPTWS}{NL} {
-    global linenum
-    linenum += 1
-  }
+  ^{OPTWS}{NL}                 state.linenum += 1
 
   ^{OPTWS}"%{" {
     global indented_code, doing_codeblock, bracelevel
@@ -865,7 +823,7 @@ M4QEND      "]""]"
   }
   "{"/[[:digit:]] {
     BEGIN(NUM)
-    if lex_compat or posix_compat:
+    if state.lex_compat or state.posix_compat:
       return y_tab.BEGIN_REPEAT_POSIX
     else:
       return y_tab.BEGIN_REPEAT_FLEX
@@ -873,14 +831,14 @@ M4QEND      "]""]"
   "$"/([[:blank:]]|{NL}) return ord('$')
 
   {WS}"%{" {
-    global bracelevel, in_rule, doing_rule_action
+    global bracelevel, doing_rule_action
     bracelevel = 1
     BEGIN(PERCENT_BRACE_ACTION)
     piece_flush(len(yytext) - 2)
     piece_append('<AST_Section2_Rule_Action>')
-    if in_rule:
+    if state.in_rule:
       doing_rule_action = True
-      in_rule = False
+      state.in_rule = False
       piece_pack()
       piece_escape(yytext[:2])
       piece_pack()
@@ -889,16 +847,15 @@ M4QEND      "]""]"
     abort()
   }
   {WS}"|".*{NL} {
-    global linenum
     if False: #_sf_stk[_sf_top_ix] & int(0x0004):
       amt = int(strchr(yytext, ord('|')) - yytext)
       yyless(amt)
     else:
       #add_action(']' ']')
       continued_action = True
-      linenum += 1
+      state.linenum += 1
       i = 0
-      while i < len(yytext) and (yytext[i] == '\t' or yytext[i] == ' '):
+      while i < len(yytext) and yytext[i] in '\t ':
         i += 1
       piece_flush(i)
       piece_pack()
@@ -923,16 +880,16 @@ M4QEND      "]""]"
   ^{WS}
 
   {WS} {
-    global bracelevel, in_rule, doing_rule_action
+    global bracelevel, doing_rule_action
     if False: #_sf_stk[_sf_top_ix] & int(0x0004):
       pass
     else:
       bracelevel = 0
       continued_action = False
       BEGIN(ACTION)
-      if in_rule:
+      if state.in_rule:
         doing_rule_action = True
-        in_rule = False
+        state.in_rule = False
         piece_pack()
         piece_escape(yytext)
         piece_pack()
@@ -941,17 +898,17 @@ M4QEND      "]""]"
   }
 
   {OPTWS}{NL} {
-    global linenum, bracelevel, in_rule, doing_rule_action
+    global bracelevel, doing_rule_action
     if False: #_sf_stk[_sf_top_ix] & int(0x0004):
-      linenum += 1
+      state.linenum += 1
     else:
       bracelevel = 0
       continued_action = False
       BEGIN(ACTION)
       yyless(len(yytext) - 1)
-      if in_rule:
+      if state.in_rule:
         doing_rule_action = True
-        in_rule = False
+        state.in_rule = False
         piece_pack()
         piece_escape(yytext)
         piece_pack()
@@ -967,29 +924,28 @@ M4QEND      "]""]"
 
   ^"%%".* {
     sectnum = 3
-    BEGIN(SECT3_NOESCAPE if no_section3_escape else SECT3)
+    BEGIN(SECT3_NOESCAPE if state.no_section3_escape else SECT3)
     #outn('/* Begin user sect3 */')
     piece_pack()
     piece_append('</AST_Section2>')
     piece_escape(yytext)
     piece_pack()
     piece_append('<AST_Section3>')
-    # for some reason flex requires an extra EOF after section 2:
-    #return ~YY_NULL
+    # first EOF, we will call scanner until it's returned EOF twice
+    return ~YY_NULL
   }
 
   "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
-    global nmstr
     #cclval = None
-    nmstr = yytext
-    #if 0 and (cclval = ccllookup(nmstr)) != 0:
+    state.nmstr = yytext
+    #if 0 and (cclval = ccllookup(state.nmstr)) != 0:
     #  if input() != ord(']'):
-    #    synerr('bad character class')
-    #  #yylval = cclval
-    #  #cclreuse += 1
+    #    state.synerr('bad character class')
+    #  y_tab.yylval = cclval
+    #  cclreuse += 1
     #  return y_tab.PREVCCL
     if True: #else:
-      #cclinstal(nmstr, lastccl + 1)
+      #cclinstal(state.nmstr, lastccl + 1)
       yyless(1)
       BEGIN(FIRSTCCL)
       return ord('[')
@@ -1003,40 +959,26 @@ M4QEND      "]""]"
      * context.
      */
   "{"{NAME}"}"[[:space:]]? {
-    # fix this later
-    #global nmstr
-    #nmdefptr = None
-    #end_is_ws = None
-    #end_ch = None
-    #end_ch = yytext[len(yytext) - 1]
-    #end_is_ws = 1 if end_ch != ord('}') else 0
-    #if len(yytext) - 1 < 2048:
-    #  strncpy(nmstr, yytext + 1, sizeof nmstr)
-    #else:
-    #  synerr('Input line too long\n')
-    #  longjmp(flex_main_jmp_buf, 1 + 1)
-    #nmstr[len(yytext) - 2 - end_is_ws] = ord('\0')
-    #if (nmdefptr = ndlookup(nmstr)) == 0:
-    #  synerr('undefined definition {{{0:s}}}'.format(nmstr))
-    #else:
-    #  len = len(nmdefptr)
-    #  if end_is_ws:
-    #    yyless(len(yytext) - 1)
-    #  if lex_compat or nmdefptr[0] == ord('^') or len > 0 and nmdefptr[len - 1] == ord('$') or end_is_ws and trlcontxt and not (_sf_stk[_sf_top_ix] & int(0x0004)):
-    #    i = len(nmdefptr)
-    #    while i > 0:
-    #      unput(nmdefptr[--i])
-    #    if nmdefptr[0] == ord('^'):
-    #      BEGIN(CARETISBOL)
-    #  else:
-    #    unput(ord(')'))
-    #    i = len(nmdefptr)
-    #    while i > 0:
-    #      unput(nmdefptr[--i])
-    #    if not lex_compat and not posix_compat:
-    #      unput(ord(':'))
-    #      unput(ord('?'))
-    #    unput(ord('('))
+    end_ch = yytext[-1]
+    end_is_ws = end_ch != ord('}')
+    state.nmstr = yytext[1:-1 - int(end_is_ws)]
+    nmdef = state.ndlookup(state.nmstr)
+    if nmdef is None:
+      state.synerr('undefined definition {{{0:s}}}'.format(state.nmstr))
+    else:
+      if end_is_ws:
+        yyless(len(yytext) - 1)
+      if state.lex_compat or len(nmdef) and (nmdef[0] == '^' or nmdef[-1] == '$') or end_is_ws and y_tab.trlcontxt and True: #not (_sf_stk[_sf_top_ix] & int(0x0004)):
+        unput(nmdef)
+        if len(nmdef) and nmdef[0] == '^':
+          BEGIN(CARETISBOL)
+      else:
+        unput(
+          '({0:s}{1:s})'.format(
+            '' if state.lex_compat or state.posix_compat else '?:',
+            nmdef
+          )
+        )
   }
 
   "/*" {
@@ -1048,36 +990,36 @@ M4QEND      "]""]"
   }
 
   "(?#" {
-    if lex_compat or posix_compat:
+    if state.lex_compat or state.posix_compat:
       yyless(1)
-      sf_push()
+      #sf_push()
       return ord('(')
     else:
       yy_push_state(EXTENDED_COMMENT)
   }
   "(?" {
-    sf_push()
-    if lex_compat or posix_compat:
+    #sf_push()
+    if state.lex_compat or state.posix_compat:
       yyless(1)
     else:
       BEGIN(GROUP_WITH_PARAMS)
     return ord('(')
   }
   "(" {
-    sf_push()
+    #sf_push()
     return ord('(')
   }
   ")" {
-    if _sf_top_ix > 0:
-      sf_pop()
+    if True: #_sf_top_ix > 0:
+      #sf_pop()
       return ord(')')
     else:
-      synerr('unbalanced parenthesis')
+      state.synerr('unbalanced parenthesis')
   }
 
   [/|*+?.(){}]                 return ord(yytext[0])
   . {
-    #yylval = ord(yytext[0])
+    y_tab.yylval = ord(yytext[0])
     return y_tab.CHAR
   }
 
@@ -1093,10 +1035,7 @@ M4QEND      "]""]"
 
 
 <SC>{
-  {OPTWS}{NL}{OPTWS} {
-    global linenum
-    linenum += 1
-  }
+  {OPTWS}{NL}{OPTWS}           state.linenum += 1
   [,*]                         return ord(yytext[0])
   ">" {
     BEGIN(SECT2)
@@ -1107,8 +1046,7 @@ M4QEND      "]""]"
     return ord('>')
   }
   {SCNAME} {
-    global nmstr
-    nmstr = yytext
+    state.nmstr = yytext
     piece_pack()
     piece_append('<AST_Name>')
     piece_escape(yytext)
@@ -1116,7 +1054,7 @@ M4QEND      "]""]"
     piece_pack()
     return ~y_tab.NAME
   }
-  .                            synerr('bad <start condition>: {0:s}'.format(yytext))
+  .                            state.synerr('bad <start condition>: {0:s}'.format(yytext))
 }
 
 <CARETISBOL>"^" {
@@ -1127,7 +1065,7 @@ M4QEND      "]""]"
 
 <QUOTE>{
   [^"\n] {
-    #yylval = ord(yytext[0])
+    y_tab.yylval = ord(yytext[0])
     return y_tab.CHAR
   }
   \" {
@@ -1136,10 +1074,9 @@ M4QEND      "]""]"
   }
 
   {NL} {
-    global linenum
-    synerr('missing quote')
+    state.synerr('missing quote')
     BEGIN(SECT2)
-    linenum += 1
+    state.linenum += 1
     return ord('"')
   }
 }
@@ -1202,7 +1139,7 @@ M4QEND      "]""]"
   "^"/("-"|"]")                        return ord('^')
   . {
     BEGIN(CCL)
-    #yylval = ord(yytext[0])
+    y_tab.yylval = ord(yytext[0])
     return y_tab.CHAR
   }
 }
@@ -1210,7 +1147,7 @@ M4QEND      "]""]"
 <CCL>{
   -/[^\]\n]                    return ord('-')
   [^\]\n] {
-    #yylval = ord(yytext[0])
+    y_tab.yylval = ord(yytext[0])
     return y_tab.CHAR
   }
   "]" {
@@ -1218,7 +1155,7 @@ M4QEND      "]""]"
     return ord(']')
   }
   .|{NL} {
-    synerr('bad character class')
+    state.synerr('bad character class')
     BEGIN(SECT2)
     return ord(']')
   }
@@ -1323,7 +1260,7 @@ M4QEND      "]""]"
     return y_tab.CCE_NEG_XDIGIT
   }
   {CCL_EXPR} {
-    synerr('bad character class expression: {0:s}'.format(yytext))
+    state.synerr('bad character class expression: {0:s}'.format(yytext))
     BEGIN(CCL)
     return y_tab.CCE_ALNUM
   }
@@ -1331,30 +1268,29 @@ M4QEND      "]""]"
 
 <NUM>{
   [[:digit:]]+ {
-    #yylval = myctoi(yytext)
+    y_tab.yylval = int(yytext)
     return y_tab.NUMBER
   }
 
   ","                          return ord(',')
   "}" {
     BEGIN(SECT2)
-    if lex_compat or posix_compat:
+    if state.lex_compat or state.posix_compat:
       return y_tab.END_REPEAT_POSIX
     else:
       return y_tab.END_REPEAT_FLEX
   }
 
   . {
-    synerr('bad character inside {}\'s')
+    state.synerr('bad character inside {}\'s')
     BEGIN(SECT2)
     return ord('}')
   }
 
   {NL} {
-    global linenum
-    synerr('missing }')
+    state.synerr('missing }')
     BEGIN(SECT2)
-    linenum += 1
+    state.linenum += 1
     return ord('}')
   }
 }
@@ -1387,8 +1323,8 @@ M4QEND      "]""]"
 
   .                            #add_action(yytext)
   {NL} {
-    global linenum, doing_rule_action, doing_codeblock
-    linenum += 1
+    global doing_rule_action, doing_codeblock
+    state.linenum += 1
     #add_action(yytext)
     if bracelevel <= 0 or doing_codeblock and indented_code:
       #if doing_rule_action:
@@ -1428,8 +1364,8 @@ M4QEND      "]""]"
     BEGIN(ACTION_STRING)
   }
   {NL} {
-    global linenum, doing_rule_action
-    linenum += 1
+    global doing_rule_action
+    state.linenum += 1
     #add_action(yytext)
     if bracelevel <= 0:
       if doing_rule_action:
@@ -1460,8 +1396,7 @@ M4QEND      "]""]"
   (\\\n)*                      #add_action(yytext)
   \\(\\\n)*.                   #add_action(yytext)
   {NL} {
-    global linenum
-    linenum += 1
+    state.linenum += 1
     #add_action(yytext)
     if bracelevel <= 0:
       BEGIN(SECT2)
@@ -1477,17 +1412,17 @@ M4QEND      "]""]"
  /* Nick extra rules for action groups */
  /* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
 <COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>> {
-  synerr('EOF encountered inside an action')
+  state.synerr('EOF encountered inside an action')
   yyterminate()
 }
 
 <EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> {
-  synerr('EOF encountered inside pattern')
+  state.synerr('EOF encountered inside pattern')
   yyterminate()
 }
 
 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
-  #yylval = myesc(str(yytext))
+  y_tab.yylval = state.myesc(yytext)
   if YY_START() == FIRSTCCL:
     BEGIN(CCL)
   return y_tab.CHAR
@@ -1565,14 +1500,11 @@ M4QEND      "]""]"
   \\(\\\n)*.
 }
 <ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
-  {NL} {
-    global linenum
-    linenum += 1
-  }
+  {NL}                         state.linenum += 1
   .
 }
 
-<*>.|\n                                synerr('bad character: {0:s}'.format(yytext))
+<*>.|\n                                state.synerr('bad character: {0:s}'.format(yytext))
 
 %%
 
@@ -1584,16 +1516,15 @@ M4QEND      "]""]"
 #    return 1
 #
 #def set_input_file(file):
-#  global linenum
 #  if file and strcmp(file, '-'):
-#    infilename = xstrdup(file)
+#    state.infilename = xstrdup(file)
 #    yyin = fopen(infilename, 'r')
 #    if yyin == None:
 #      lerr('can\'t open %s', file)
 #  else:
 #    yyin = stdin
-#    infilename = xstrdup('<stdin>')
-#  linenum = 1
+#    state.infilename = xstrdup('<stdin>')
+#  state.linenum = 1
 
 def piece_append(str):
   piece.append(str)
@@ -1650,10 +1581,3 @@ def markup_option(name, sense):
   piece0 -= 1
   piece_pack()
 
-# supposed to be somewhere else:
-in_rule = False
-linenum = 1
-nmstr = ''
-no_section3_escape = False
-def synerr(str):
-  sys.stderr.write('{0:d}: {1:s}\n'.format(linenum, str))
diff --git a/bootstrap/skel_y_tab.py b/bootstrap/skel_y_tab.py
new file mode 100644 (file)
index 0000000..746c00f
--- /dev/null
@@ -0,0 +1,100 @@
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-with-bison-exception
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+# As a special exception, you may create a larger work that contains part or
+# all of the Bison or piyacc parser skeleton and distribute that work under
+# terms of your choice, so long as that work isn't itself a parser generator
+# using the skeleton or a modified version thereof as a parser skeleton.
+# Alternatively, if you modify or redistribute the parser skeleton itself, you
+# may (at your option) remove this special exception, which will cause the
+# skeleton and the resulting Bison or piyacc output files to be licensed under
+# the GNU General Public License without this special exception.
+
+import bisect
+import lex_yy
+
+# GENERATE SECTION1
+
+# GENERATE TOKENS
+
+yystack = None
+yychar = None
+YYEMPTY = -1
+
+yyval = None
+yyloc = None
+
+yylval = None
+yylloc = None
+
+# GENERATE SECTION2
+
+def yyparse():
+  global yystack, yychar, yyval, yyloc, yylval, yylloc
+
+  # GENERATE INITIALACTION
+
+  state = 0
+  yystack = []
+  yylval = None
+  yychar = -1
+  while True:
+    #print('state', state, 'yystack', yystack)
+    reduce = yy_lr1dfa_states[state][4]
+    if reduce == -1:
+      if yychar == -1:
+        yylval = None
+        yylloc = None
+        yychar = lex_yy.flexscan()
+        print('yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', lex_yy.yytext)
+      action = yy_lr1dfa_states[state][1][
+        bisect.bisect_right(yy_lr1dfa_states[state][0], yychar)
+      ]
+      if action == -1:
+        raise Exception('syntax error')
+      if (action & 1) == 0:
+        yystack.append((state, yylval, yylloc))
+        state = action >> 1
+        #print('shift', state)
+        yychar = -1
+        continue
+      reduce = action >> 1
+    #print('reduce', reduce)
+    len_symbols, ref_data = yy_lr1dfa_productions[reduce]
+    base = len(yystack) - len_symbols
+    yystack.append((state, None, None))
+    state, yyval, yyloc = yystack[base]
+    n = base * 2
+    piece2 = n + 1
+    piece3 = n + len_symbols * 2
+    if len_symbols == 0:
+      lex_yy.piece[base:base] = ['', '']
+      piece3 = n + 2
+      lex_yy.piece0 += 2
+    ref_data()
+    lex_yy.piece[piece2:piece3] = [''.join(lex_yy.piece[piece2:piece3])]
+    lex_yy.piece0 += piece2 + 1 - piece3
+    del yystack[base:]
+    if reduce == 0:
+      assert base == 0
+      break
+    yystack.append((state, yyval, yyloc))
+    state = yy_lr1dfa_states[state][3][
+      bisect.bisect_right(yy_lr1dfa_states[state][2], reduce)
+    ]
+    assert state != -1
+
+# GENERATE SECTION3
diff --git a/bootstrap/state.py b/bootstrap/state.py
new file mode 100644 (file)
index 0000000..88f26ec
--- /dev/null
@@ -0,0 +1,52 @@
+# miscellaneous state accessed by scan.l and parse.y
+in_rule = False
+infilename = '<stdin>'
+lex_compat = False
+linenum = 1
+nmstr = ''
+no_section3_escape = False
+posix_compat = False
+
+name_defs = {}
+def ndinstal(key, value):
+  if key in name_defs:
+    synerr('name defined twice')
+  else:
+    name_defs[key] = value
+def ndlookup(key):
+  return name_defs.get(key)
+
+def synerr(str):
+  sys.stderr.write(
+    '{0:s}:{1:d}: {1:s}\n'.format(infilename, linenum, str)
+  )
+
+esc = {
+  'b': ord('\b'),
+  'f': ord('\f'),
+  'n': ord('\n'),
+  'r': ord('\r'),
+  't': ord('\t'),
+  'a': ord('\a'),
+  'v': ord('\v')
+}
+def myesc(str):
+  print('myesc @@@{0:s}@@@'.format(str))
+  assert str[0] == '\\'
+  result = esc.get(str[1])
+  if result is None:
+    if str[1] == '0':
+      i = 2
+      j = min(5, len(str))
+      while i < j and str[i] in '01234567':
+        i += 1
+      result = int(str[2:i], 8)
+    elif str[1] == 'x':
+      i = 2
+      j = min(4, len(str))
+      while i < j and str[i] in '0123456789ABCDEFabcdef':
+        i += 1
+      result = int(str[2:i], 16)
+    else:
+      result = ord(str[1])
+  return result 
diff --git a/bootstrap/y_tab.py b/bootstrap/y_tab.py
deleted file mode 100644 (file)
index 3bb913f..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-CHAR = 258
-NUMBER = 259
-SECTEND = 260
-SCDECL = 261
-XSCDECL = 262
-NAME = 263
-PREVCCL = 264
-EOF_OP = 265
-TOK_OPTION = 266
-TOK_OUTFILE = 267
-TOK_PREFIX = 268
-TOK_YYCLASS = 269
-TOK_HEADER_FILE = 270
-TOK_EXTRA_TYPE = 271
-TOK_TABLES_FILE = 272
-CCE_ALNUM = 273
-CCE_ALPHA = 274
-CCE_BLANK = 275
-CCE_CNTRL = 276
-CCE_DIGIT = 277
-CCE_GRAPH = 278
-CCE_LOWER = 279
-CCE_PRINT = 280
-CCE_PUNCT = 281
-CCE_SPACE = 282
-CCE_UPPER = 283
-CCE_XDIGIT = 284
-CCE_NEG_ALNUM = 285
-CCE_NEG_ALPHA = 286
-CCE_NEG_BLANK = 287
-CCE_NEG_CNTRL = 288
-CCE_NEG_DIGIT = 289
-CCE_NEG_GRAPH = 290
-CCE_NEG_LOWER = 291
-CCE_NEG_PRINT = 292
-CCE_NEG_PUNCT = 293
-CCE_NEG_SPACE = 294
-CCE_NEG_UPPER = 295
-CCE_NEG_XDIGIT = 296
-CCL_OP_DIFF = 297
-CCL_OP_UNION = 298
-TOK_ACTION_GROUP = 299
-TOK_ELEMENT_GROUP = 300
-BEGIN_REPEAT_POSIX = 301
-END_REPEAT_POSIX = 302
-BEGIN_REPEAT_FLEX = 303
-END_REPEAT_FLEX = 304