Implement state flags and fix minor trailing context/bracketing macros issue
authorNick Downing <nick@ndcode.org>
Tue, 15 Jan 2019 02:00:26 +0000 (13:00 +1100)
committerNick Downing <nick@ndcode.org>
Tue, 15 Jan 2019 02:00:26 +0000 (13:00 +1100)
bootstrap/parse.y
bootstrap/scan.l
bootstrap/state.py

index 6807a2a..cc071f3 100644 (file)
@@ -45,7 +45,7 @@
   #lastchar = 0
   #i = 0
   #rulelen = 0
-  trlcontxt = 0
+  trlcontxt = False
   xcluflg = 0
   #currccl = 0
   #cclsorted = 0
@@ -121,10 +121,12 @@ sect1end
 startconddecl
   : SCDECL
     {
+      global xcluflg
       xcluflg = False
     }
   | XSCDECL
     {
+      global xcluflg
       xcluflg = True
     }
   ;
@@ -215,6 +217,7 @@ sect2
 initforrule
   :
     {
+      global trlcontxt
       trlcontxt = False #variable_trail_rule = varlength = False
       #trailcnt = headcnt = rulelen = 0
       #current_state_type = 0x1
@@ -389,6 +392,7 @@ rule
     }
   | re '$'
     {
+      global trlcontxt
       #headcnt = 0
       #trailcnt = 1
       #rulelen = 1
@@ -404,7 +408,7 @@ rule
       #if lex_compat or varlength:
       #  add_accept($1, num_rules | 0x4000)
       #  variable_trail_rule = True
-      #trlcontxt = True
+      trlcontxt = True
       #eps = mkstate(256 + 1)
       #$$ = link_machines($1, link_machines(eps, mkstate(ord('\n'))))
       insert_after(1, '</RegexCharacterLiteral>')
@@ -439,17 +443,18 @@ re
 re2
   : re '/'
     {
-      #if trlcontxt:
-      #  state.synerr('trailing context used twice')
-      #else:
-      #  trlcontxt = True
+      global trlcontxt
+      if trlcontxt:
+        state.synerr('trailing context used twice')
+      else:
+        trlcontxt = True
       #if varlength:
       #  varlength = False
       #else:
       #  headcnt = rulelen
       #rulelen = 0
       #current_state_type = 0x2
-      $$ = $1
+      #$$ = $1
     }
   ;
 
@@ -582,7 +587,7 @@ singleton
       #    mkeccl(ccltbl + cclmap[cclany], ccllen[cclany], nextecm, ecgroup, csize, csize)
       #  madeany = True
       #rulelen += 1
-      if False: #_sf_stk[_sf_top_ix] & int(0x0002):
+      if state._sf_stk[-1] & 2:
         #$$ = mkstate(-cclany)
         insert_after(0, '</RegexCharacterLiteral>')
         insert_before(0, '<RegexCharacterLiteral character_set="0 256">')
@@ -648,7 +653,7 @@ singleton
       #rulelen += 1
       #if $1 == nlch:
       #  rule_has_nl[num_rules] = True
-      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($1):
+      #if state._sf_stk[-1] & 1 and has_case($1):
       #  $$ = mkor(mkstate($1), mkstate(reverse_case($1)))
       #else:
       #  $$ = mkstate($1)
@@ -692,7 +697,7 @@ braceccl
 ccl
   : ccl CHAR '-' CHAR
     {
-      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #if state._sf_stk[-1] & 1:
       #  if has_case($2) != has_case($4) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISlower) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISlower) else False) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISupper) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISupper) else False):
       #    fw3_msg = []
       #    snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
@@ -711,7 +716,7 @@ ccl
       #    i += 1
       #  cclsorted = cclsorted and $2 > lastchar
       #  lastchar = $4
-      #  if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2) and has_case($4):
+      #  if state._sf_stk[-1] & 1 and has_case($2) and has_case($4):
       #    $2 = reverse_case($2)
       #    $4 = reverse_case($4)
       #    i = $2
@@ -731,7 +736,7 @@ ccl
       #ccladd($1, $2)
       #cclsorted = cclsorted and $2 > lastchar
       #lastchar = $2
-      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+      #if state._sf_stk[-1] & 1 and has_case($2):
       #  $2 = reverse_case($2)
       #  ccladd($1, $2)
       #  cclsorted = cclsorted and $2 > lastchar
@@ -833,7 +838,7 @@ ccl_expr
       #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
       #    ccladd(currccl, c)
       #  c += 1
-      if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+      if state._sf_stk[-1] & 1:
         #c = None
         #c = 0
         #while c < csize:
@@ -898,7 +903,7 @@ ccl_expr
       #  if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
       #    ccladd(currccl, c)
       #  c += 1
-      if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+      if state._sf_stk[-1] & 1:
         #c = None
         #c = 0
         #while c < csize:
@@ -1023,7 +1028,7 @@ ccl_expr
     }
   | CCE_NEG_LOWER
     {
-      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #if state._sf_stk[-1] & 1:
       #  lwarn('[:^lower:] is ambiguous in case insensitive scanner')
       #else:
       #  c = None
@@ -1037,7 +1042,7 @@ ccl_expr
     }
   | CCE_NEG_UPPER
     {
-      #if _sf_stk[_sf_top_ix] & int(0x0001):
+      #if state._sf_stk[-1] & 1:
       #  lwarn('[:^upper:] ambiguous in case insensitive scanner')
       #else:
       #  c = None
@@ -1057,7 +1062,7 @@ string
       #if $2 == nlch:
       #  rule_has_nl[num_rules] = True
       #rulelen += 1
-      #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+      #if state._sf_stk[-1] & 1 and has_case($2):
       #  $$ = mkor(mkstate($2), mkstate(reverse_case($2)))
       #else:
       #  $$ = mkstate($2)
index f937e92..7095c1b 100644 (file)
@@ -374,11 +374,17 @@ M4QEND      "]""]"
     markup_option('CPlusPlus', option_sense)
   }
   caseful|case-sensitive {
-    #(_sf_stk[_sf_top_ix] |= int(0x0001)) if not option_sense else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+    if not option_sense:
+      state._sf_stk[-1] |= 1
+    else:
+      state._sf_stk[-1] &= ~1
     markup_option('Caseless', not option_sense)
   }
   caseless|case-insensitive {
-    #(_sf_stk[_sf_top_ix] |= int(0x0001)) if option_sense else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
+    if option_sense:
+      state._sf_stk[-1] |= 1
+    else:
+      state._sf_stk[-1] &= ~1
     markup_option('Caseless', option_sense)
   }
   debug {
@@ -808,7 +814,7 @@ M4QEND      "]""]"
   }
 
   ^{OPTWS}"<" {
-    if True: #not (_sf_stk[_sf_top_ix] & int(0x0004)):
+    if not (state._sf_stk[-1] & 4):
       BEGIN(SC)
     piece_flush(len(yytext) - 1)
     return ord('<')
@@ -847,7 +853,7 @@ M4QEND      "]""]"
     abort()
   }
   {WS}"|".*{NL} {
-    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+    if state._sf_stk[-1] & 4:
       amt = int(strchr(yytext, ord('|')) - yytext)
       yyless(amt)
     else:
@@ -868,7 +874,7 @@ M4QEND      "]""]"
 
   ^{WS}"/*" {
     global bracelevel
-    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+    if state._sf_stk[-1] & 4:
       yy_push_state(COMMENT_DISCARD)
     else:
       yyless(len(yytext) - 2)
@@ -881,9 +887,7 @@ M4QEND      "]""]"
 
   {WS} {
     global bracelevel, doing_rule_action
-    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
-      pass
-    else:
+    if not (state._sf_stk[-1] & 4):
       bracelevel = 0
       continued_action = False
       BEGIN(ACTION)
@@ -899,7 +903,7 @@ M4QEND      "]""]"
 
   {OPTWS}{NL} {
     global bracelevel, doing_rule_action
-    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+    if state._sf_stk[-1] & 4:
       state.linenum += 1
     else:
       bracelevel = 0
@@ -968,7 +972,7 @@ M4QEND      "]""]"
     else:
       if end_is_ws:
         yyless(len(yytext) - 1)
-      if state.lex_compat or len(nmdef) and (nmdef[0] == '^' or nmdef[-1] == '$') or end_is_ws and y_tab.trlcontxt and True: #not (_sf_stk[_sf_top_ix] & int(0x0004)):
+      if state.lex_compat or len(nmdef) and (nmdef[0] == '^' or nmdef[-1] == '$') or end_is_ws and y_tab.trlcontxt and not (state._sf_stk[-1] & 4):
         unput(nmdef)
         if len(nmdef) and nmdef[0] == '^':
           BEGIN(CARETISBOL)
@@ -982,7 +986,7 @@ M4QEND      "]""]"
   }
 
   "/*" {
-    if False: #_sf_stk[_sf_top_ix] & int(0x0004):
+    if state._sf_stk[-1] & 4:
       yy_push_state(COMMENT_DISCARD)
     else:
       yyless(1)
@@ -992,13 +996,13 @@ M4QEND      "]""]"
   "(?#" {
     if state.lex_compat or state.posix_compat:
       yyless(1)
-      #sf_push()
+      state.sf_push()
       return ord('(')
     else:
       yy_push_state(EXTENDED_COMMENT)
   }
   "(?" {
-    #sf_push()
+    state.sf_push()
     if state.lex_compat or state.posix_compat:
       yyless(1)
     else:
@@ -1006,12 +1010,12 @@ M4QEND      "]""]"
     return ord('(')
   }
   "(" {
-    #sf_push()
+    state.sf_push()
     return ord('(')
   }
   ")" {
-    if True: #_sf_top_ix > 0:
-      #sf_pop()
+    if len(state._sf_stk) > 1:
+      state.sf_pop()
       return ord(')')
     else:
       state.synerr('unbalanced parenthesis')
@@ -1117,18 +1121,18 @@ M4QEND      "]""]"
     return ord(':')
   }
   "-"                          BEGIN(GROUP_MINUS_PARAMS)
-  i                            #(_sf_stk[_sf_top_ix] |= int(0x0001)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
-  s                            #(_sf_stk[_sf_top_ix] |= int(0x0002)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0002))
-  x                            #(_sf_stk[_sf_top_ix] |= int(0x0004)) if 1 else (_sf_stk[_sf_top_ix] &= ~int(0x0004))
+  i                            state._sf_stk[-1] |= 1
+  s                            state._sf_stk[-1] |= 2
+  x                            state._sf_stk[-1] |= 4
 }
 <GROUP_MINUS_PARAMS>{
   ":" {
     BEGIN(SECT2)
     return ord(':')
   }
-  i                            #(_sf_stk[_sf_top_ix] |= int(0x0001)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0001))
-  s                            #(_sf_stk[_sf_top_ix] |= int(0x0002)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0002))
-  x                            #(_sf_stk[_sf_top_ix] |= int(0x0004)) if 0 else (_sf_stk[_sf_top_ix] &= ~int(0x0004))
+  i                            state._sf_stk[-1] &= ~1
+  s                            state._sf_stk[-1] &= ~2
+  x                            state._sf_stk[-1] &= ~4
 }
 
 <FIRSTCCL>{
index eb987a2..a7f05d6 100644 (file)
@@ -2,13 +2,17 @@ import sys
 
 # miscellaneous state accessed by scan.l and parse.y
 in_rule = False
-infilename = '<stdin>'
 lex_compat = False
-linenum = 1
 nmstr = ''
 no_section3_escape = False
 posix_compat = False
 
+_sf_stk = [0]
+def sf_push():
+  _sf_stk.append(_sf_stk[-1])
+def sf_pop():
+  _sf_stk.pop()
+
 name_defs = {}
 def ndinstal(key, value):
   if key in name_defs:
@@ -18,6 +22,8 @@ def ndinstal(key, value):
 def ndlookup(key):
   return name_defs.get(key)
 
+infilename = '<stdin>'
+linenum = 1
 def synerr(str):
   sys.stderr.write(
     '{0:s}:{1:d}: {2:s}\n'.format(infilename, linenum, str)