--- /dev/null
+/* parse.y - parser for flex input */
+
+%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
+%token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
+%token TOK_TABLES_FILE
+
+%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
+%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
+
+%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
+%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
+
+%left CCL_OP_DIFF CCL_OP_UNION
+
+/* Nick extra rules for action groups */
+%token TOK_ACTION_GROUP TOK_ELEMENT_GROUP
+
+/*
+ *POSIX and AT&T lex place the
+ * precedence of the repeat operator, {}, below that of concatenation.
+ * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
+ * Regular Expression (ERE) precedence that has the repeat operator
+ * higher than concatenation. This causes ab{3} to yield abbb.
+ *
+ * In order to support the POSIX and AT&T precedence and the flex
+ * precedence we define two token sets for the begin and end tokens of
+ * the repeat operator, '{' and '}'. The lexical scanner chooses
+ * which tokens to return based on whether posix_compat or lex_compat
+ * are specified. Specifying either posix_compat or lex_compat will
+ * cause flex to parse scanner files as per the AT&T and
+ * POSIX-mandated behavior.
+ */
+
+%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
+
+
+%{
+ import state
+
+ #pat = 0
+ #scnum = 0
+ #eps = 0
+ #headcnt = 0
+ #trailcnt = 0
+ #lastchar = 0
+ #i = 0
+ #rulelen = 0
+ trlcontxt = 0
+ xcluflg = 0
+ #currccl = 0
+ #cclsorted = 0
+ #varlength = 0
+ #variable_trail_rule = 0
+ #scon_stk = []
+ #scon_stk_ptr = 0
+ #madeany = False
+ #ccldot = 0
+ #cclany = 0
+ #previous_continued_action = 0
+ piece2 = 0
+ piece3 = 0
+%}
+
+%%
+
+goal
+ : initlex sect1 sect1end sect2 initforrule
+ {
+ #def_rule = None
+ #pat = cclinit()
+ #cclnegate(pat)
+ #def_rule = mkstate(-pat)
+ #default_rule = num_rules
+ #finish_rule(def_rule, False, 0, 0, 0)
+ #i = 1
+ #while i <= lastsc:
+ # scset[i] = mkbranch(scset[i], def_rule)
+ # i += 1
+ #if spprdflt:
+ # add_action('YY_FATAL_ERROR( "flex scanner jammed" )')
+ #else:
+ # add_action('ECHO')
+ #add_action(';\n\tYY_BREAK]]\n')
+ }
+ ;
+
+initlex
+ :
+ {
+ #scinstal('INITIAL', False)
+ }
+ ;
+
+sect1
+ : sect1 startconddecl namelist1
+ {
+ insert_after(2, '</AST_Section1_StartConditions>')
+ insert_before(1, '<AST_Section1_StartConditions exclusive="{0:s}">'.format('true' if xcluflg else 'false'))
+ }
+ | sect1 options
+ {
+ insert_after(1, '</AST_Section1_Options>')
+ insert_before(1, '<AST_Section1_Options>')
+ }
+ |
+ | error
+ {
+ state.synerr('unknown error processing section 1')
+ }
+ ;
+
+sect1end
+ : SECTEND
+ {
+ #check_options()
+ #scon_stk = allocate_array(lastsc + 1, 4)
+ #scon_stk_ptr = 0
+ }
+ ;
+
+startconddecl
+ : SCDECL
+ {
+ xcluflg = False
+ }
+ | XSCDECL
+ {
+ xcluflg = True
+ }
+ ;
+
+namelist1
+ : namelist1 NAME
+ {
+ #scinstal(nmstr, xcluflg)
+ }
+ | NAME
+ {
+ #scinstal(nmstr, xcluflg)
+ }
+ | error
+ {
+ state.synerr('bad start condition list')
+ }
+ ;
+
+options
+ : TOK_OPTION optionlist
+ ;
+
+optionlist
+ : optionlist option
+ |
+ ;
+
+option
+ : TOK_OUTFILE '=' NAME
+ {
+ #outfilename = xstrdup(nmstr)
+ #did_outfilename = 1
+ insert_after(2, '</AST_Section1_Options_OutFile>')
+ insert_before(0, '<AST_Section1_Options_OutFile>')
+ }
+ | TOK_EXTRA_TYPE '=' NAME
+ {
+ #extra_type = xstrdup(nmstr)
+ insert_after(2, '</AST_Section1_Options_ExtraType>')
+ insert_before(0, '<AST_Section1_Options_ExtraType>')
+ }
+ | TOK_PREFIX '=' NAME
+ {
+ #prefix = xstrdup(nmstr)
+ #if strchr(prefix, ord('[')) or strchr(prefix, ord(']')):
+ # flexerror('Prefix must not contain [ or ]')
+ insert_after(2, '</AST_Section1_Options_Prefix>')
+ insert_before(0, '<AST_Section1_Options_Prefix>')
+ }
+ | TOK_YYCLASS '=' NAME
+ {
+ #yyclass = xstrdup(nmstr)
+ insert_after(2, '</AST_Section1_Options_YYClass>')
+ insert_before(0, '<AST_Section1_Options_YYClass>')
+ }
+ | TOK_HEADER_FILE '=' NAME
+ {
+ #headerfilename = xstrdup(nmstr)
+ insert_after(2, '</AST_Section1_Options_HeaderFile>')
+ insert_before(0, '<AST_Section1_Options_HeaderFile>')
+ }
+ | TOK_TABLES_FILE '=' NAME
+ {
+ #tablesext = True
+ #tablesfilename = xstrdup(nmstr)
+ insert_after(2, '</AST_Section1_Options_TablesFile>')
+ insert_before(0, '<AST_Section1_Options_TablesFile>')
+ }
+ ;
+
+sect2
+ : sect2 scon initforrule flexrule '\n'
+ {
+ #scon_stk_ptr = $2
+ insert_after(4, '</AST_Section2_Rule>')
+ insert_before(1, '<AST_Section2_Rule>')
+ }
+ | sect2 scon '{' sect2 '}'
+ {
+ #scon_stk_ptr = $2
+ insert_after(4, '</AST_Section2_CompoundRule>')
+ insert_before(1, '<AST_Section2_CompoundRule>')
+ }
+ |
+ ;
+
+initforrule
+ :
+ {
+ trlcontxt = False #variable_trail_rule = varlength = False
+ #trailcnt = headcnt = rulelen = 0
+ #current_state_type = 0x1
+ #previous_continued_action = continued_action
+ state.in_rule = True
+ #new_rule()
+ }
+ ;
+
+flexrule
+ : '^' rule
+ {
+ #pat = $2
+ #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+ #if scon_stk_ptr > 0:
+ # i = 1
+ # while i <= scon_stk_ptr:
+ # scbol[scon_stk[i]] = mkbranch(scbol[scon_stk[i]], pat)
+ # i += 1
+ #else:
+ # i = 1
+ # while i <= lastsc:
+ # if not scxclu[i]:
+ # scbol[i] = mkbranch(scbol[i], pat)
+ # i += 1
+ #if not bol_needed:
+ # bol_needed = True
+ # if performance_report > 1:
+ # pinpoint_message('\'^\' operator results in sub-optimal performance')
+ insert_after(1, '</AST_Section2_Rule_FLexRule>')
+ insert_before(0, '<AST_Section2_Rule_FLexRule bol="true">')
+ }
+ | rule
+ {
+ #pat = $1
+ #finish_rule(pat, variable_trail_rule, headcnt, trailcnt, previous_continued_action)
+ #if scon_stk_ptr > 0:
+ # i = 1
+ # while i <= scon_stk_ptr:
+ # scset[scon_stk[i]] = mkbranch(scset[scon_stk[i]], pat)
+ # i += 1
+ #else:
+ # i = 1
+ # while i <= lastsc:
+ # if not scxclu[i]:
+ # scset[i] = mkbranch(scset[i], pat)
+ # i += 1
+ insert_after(0, '</AST_Section2_Rule_FLexRule>')
+ insert_before(0, '<AST_Section2_Rule_FLexRule bol="false">')
+ }
+ | EOF_OP
+ {
+ #if scon_stk_ptr > 0:
+ # build_eof_action()
+ #else:
+ # i = 1
+ # while i <= lastsc:
+ # if not sceof[i]:
+ # scon_stk[++scon_stk_ptr] = i
+ # i += 1
+ # if scon_stk_ptr == 0:
+ # lwarn('all start conditions already have <<EOF>> rules')
+ # else:
+ # build_eof_action()
+ insert_after(0, '</AST_Section2_Rule_EOFRule>')
+ insert_before(0, '<AST_Section2_Rule_EOFRule>')
+ }
+ | error
+ {
+ state.synerr('unrecognized rule')
+ }
+ ;
+
+scon_stk_ptr
+ :
+ {
+ #$$ = scon_stk_ptr
+ }
+ ;
+
+scon
+ : '<' scon_stk_ptr namelist2 '>'
+ {
+ #$$ = $2
+ insert_after(3, '</AST_Section2_StartConditions>')
+ insert_before(0, '<AST_Section2_StartConditions>')
+ }
+ | '<' '*' '>'
+ {
+ #$$ = scon_stk_ptr
+ #i = 1
+ #while i <= lastsc:
+ # j = None
+ # j = 1
+ # while j <= scon_stk_ptr:
+ # if scon_stk[j] == i:
+ # break
+ # j += 1
+ # if j > scon_stk_ptr:
+ # scon_stk[++scon_stk_ptr] = i
+ # i += 1
+ insert_after(2, '</AST_Section2_StartConditions>')
+ insert_before(0, '<AST_Section2_StartConditions wildcard="true">')
+ }
+ |
+ {
+ global yychar
+ #$$ = scon_stk_ptr
+ if yychar == YYEMPTY:
+ yychar = lex_yy.flexscan()
+ print('xxx yychar', yychar, 'yylval', yylval, 'yylloc', yylloc, 'lex_yy.yytext', yytext)
+ temp = lex_yy.piece[piece2 + 1]
+ lex_yy.piece[piece2 + 1] = lex_yy.piece[piece2]
+ lex_yy.piece[piece2] = lex_yy.piece[piece2 - 1]
+ lex_yy.piece[piece2 - 1] = temp
+ insert_before(0, '<AST_Section2_StartConditions />')
+ }
+ ;
+
+namelist2
+ : namelist2 ',' sconname
+ | sconname
+ | error
+ {
+ state.synerr('bad start condition list')
+ }
+ ;
+
+sconname
+ : NAME
+ {
+ #if (scnum = sclookup(nmstr)) == 0:
+ # format_pinpoint_message('undeclared start condition %s', nmstr)
+ #else:
+ # i = 1
+ # while i <= scon_stk_ptr:
+ # if scon_stk[i] == scnum:
+ # format_warn('<%s> specified twice', scname[scnum])
+ # break
+ # i += 1
+ # if i > scon_stk_ptr:
+ # scon_stk[++scon_stk_ptr] = scnum
+ }
+ ;
+
+/* this rule handles trailing context, it must produce two separate regexes,
+ * where the first is the expression to be matched, and the second is the
+ * trailing context, RegexEmpty (matches empty string) if no trailing context
+ */
+rule
+ : re2 re
+ {
+ #if transchar[lastst[$2]] != 256 + 1:
+ # $2 = link_machines($2, mkstate(256 + 1))
+ #mark_beginning_as_normal($2)
+ #current_state_type = 0x1
+ #if previous_continued_action:
+ # if not varlength or headcnt != 0:
+ # lwarn('trailing context made variable due to preceding \'|\' action')
+ # varlength = True
+ # headcnt = 0
+ #if lex_compat or varlength and headcnt == 0:
+ # add_accept($1, num_rules | 0x4000)
+ # variable_trail_rule = True
+ #else:
+ # trailcnt = rulelen
+ #$$ = link_machines($1, $2)
+ }
+ | re2 re '$'
+ {
+ state.synerr('trailing context used twice')
+ }
+ | re '$'
+ {
+ #headcnt = 0
+ #trailcnt = 1
+ #rulelen = 1
+ #varlength = False
+ #current_state_type = 0x2
+ #if trlcontxt:
+ # state.synerr('trailing context used twice')
+ # $$ = mkstate(256 + 1)
+ #else:
+ # if previous_continued_action:
+ # lwarn('trailing context made variable due to preceding \'|\' action')
+ # varlength = True
+ #if lex_compat or varlength:
+ # add_accept($1, num_rules | 0x4000)
+ # variable_trail_rule = True
+ #trlcontxt = True
+ #eps = mkstate(256 + 1)
+ #$$ = link_machines($1, link_machines(eps, mkstate(ord('\n'))))
+ insert_after(1, '</RegexCharacterLiteral>')
+ insert_before(1, '<RegexCharacterLiteral character_set="10 11">')
+ }
+ | re
+ {
+ #$$ = $1
+ #if trlcontxt:
+ # if lex_compat or varlength and headcnt == 0:
+ # variable_trail_rule = True
+ # else:
+ # trailcnt = rulelen
+ insert_after(0, '<RegexEmpty />')
+ }
+ ;
+
+re
+ : re '|' series
+ {
+ #varlength = True
+ #$$ = mkor($1, $3)
+ insert_after(2, '</RegexOr>')
+ insert_before(0, '<RegexOr>')
+ }
+ | series
+ {
+ #$$ = $1
+ }
+ ;
+
+re2
+ : re '/'
+ {
+ #if trlcontxt:
+ # state.synerr('trailing context used twice')
+ #else:
+ # trlcontxt = True
+ #if varlength:
+ # varlength = False
+ #else:
+ # headcnt = rulelen
+ #rulelen = 0
+ #current_state_type = 0x2
+ $$ = $1
+ }
+ ;
+
+series
+ : series singleton
+ {
+ #$$ = link_machines($1, $2)
+ insert_after(1, '</RegexSequence>')
+ insert_before(0, '<RegexSequence>')
+ }
+ | singleton
+ {
+ #$$ = $1
+ }
+ | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
+ {
+ #varlength = True
+ #if $3 > $5 or $3 < 0:
+ # state.synerr('bad iteration values')
+ # $$ = $1
+ #else:
+ # if $3 == 0:
+ # if $5 <= 0:
+ # state.synerr('bad iteration values')
+ # $$ = $1
+ # else:
+ # $$ = mkopt(mkrep($1, 1, $5))
+ # else:
+ # $$ = mkrep($1, $3, $5)
+ insert_after(5, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $5))
+ }
+ | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
+ {
+ #varlength = True
+ #if $3 <= 0:
+ # state.synerr('iteration value must be positive')
+ # $$ = $1
+ #else:
+ # $$ = mkrep($1, $3, -1)
+ insert_after(4, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}">'.format($3))
+ }
+ | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
+ {
+ #varlength = True
+ #if $3 <= 0:
+ # state.synerr('iteration value must be positive')
+ # $$ = $1
+ #else:
+ # $$ = link_machines($1, copysingl($1, $3 - 1))
+ insert_after(3, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $3))
+ }
+ ;
+
+singleton
+ : singleton '*'
+ {
+ #varlength = True
+ #$$ = mkclos($1)
+ insert_after(1, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="0">')
+ }
+ | singleton '+'
+ {
+ #varlength = True
+ #$$ = mkposcl($1)
+ insert_after(1, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="1">')
+ }
+ | singleton '?'
+ {
+ #varlength = True
+ #$$ = mkopt($1)
+ insert_after(1, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="0" count1="1">')
+ }
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
+ {
+ #varlength = True
+ #if $3 > $5 or $3 < 0:
+ # state.synerr('bad iteration values')
+ # $$ = $1
+ #else:
+ # if $3 == 0:
+ # if $5 <= 0:
+ # state.synerr('bad iteration values')
+ # $$ = $1
+ # else:
+ # $$ = mkopt(mkrep($1, 1, $5))
+ # else:
+ # $$ = mkrep($1, $3, $5)
+ insert_after(5, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $5))
+ }
+ | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
+ {
+ #varlength = True
+ #if $3 <= 0:
+ # state.synerr('iteration value must be positive')
+ # $$ = $1
+ #else:
+ # $$ = mkrep($1, $3, -1)
+ insert_after(4, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}">'.format($3))
+ }
+ | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
+ {
+ #varlength = True
+ #if $3 <= 0:
+ # state.synerr('iteration value must be positive')
+ # $$ = $1
+ #else:
+ # $$ = link_machines($1, copysingl($1, $3 - 1))
+ insert_after(3, '</RegexRepeat>')
+ insert_before(0, '<RegexRepeat count0="{0:d}" count1="{1:d}">'.format($3, $3))
+ }
+ | '.'
+ {
+ #if not madeany:
+ # ccldot = cclinit()
+ # ccladd(ccldot, ord('\n'))
+ # cclnegate(ccldot)
+ # if useecs:
+ # mkeccl(ccltbl + cclmap[ccldot], ccllen[ccldot], nextecm, ecgroup, csize, csize)
+ # cclany = cclinit()
+ # cclnegate(cclany)
+ # if useecs:
+ # mkeccl(ccltbl + cclmap[cclany], ccllen[cclany], nextecm, ecgroup, csize, csize)
+ # madeany = True
+ #rulelen += 1
+ if False: #_sf_stk[_sf_top_ix] & int(0x0002):
+ #$$ = mkstate(-cclany)
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 256">')
+ else:
+ #$$ = mkstate(-ccldot)
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 10 11 256">')
+ }
+ | fullccl
+ {
+ #qsort(ccltbl + cclmap[$1], int(ccllen[$1]), sizeof *ccltbl, cclcmp)
+ #if useecs:
+ # mkeccl(ccltbl + cclmap[$1], ccllen[$1], nextecm, ecgroup, csize, csize)
+ #rulelen += 1
+ #if ccl_has_nl[$1]:
+ # rule_has_nl[num_rules] = True
+ #$$ = mkstate(-$1)
+ }
+ | PREVCCL
+ {
+ #rulelen += 1
+ #if ccl_has_nl[$1]:
+ # rule_has_nl[num_rules] = True
+ #$$ = mkstate(-$1)
+ }
+ | '"' string '"'
+ {
+ #$$ = $2
+ }
+ | '(' re ')'
+ {
+ #$$ = $2
+ insert_after(2, '</RegexGroup>')
+ insert_before(0, '<RegexGroup>')
+ }
+ /* Nick extra rules for unnumbered groups */
+ | '(' ':' re ')'
+ {
+ #$$ = $3
+ }
+ /* Nick extra rules for named groups */
+ | '(' NAME re ')'
+ {
+ #$$ = $3
+ insert_after(3, '</RegexGroupName>')
+ insert_before(0, '<RegexGroupName>')
+ }
+ /* Nick extra rules for action groups */
+ | '(' TOK_ACTION_GROUP re ')'
+ {
+ #$$ = $3
+ insert_after(3, '</RegexGroupAction>')
+ insert_before(0, '<RegexGroupAction>')
+ }
+ | '(' TOK_ELEMENT_GROUP re ')'
+ {
+ #$$ = $3
+ insert_after(3, '</RegexGroupElement>')
+ insert_before(0, '<RegexGroupElement>')
+ }
+ | CHAR
+ {
+ #rulelen += 1
+ #if $1 == nlch:
+ # rule_has_nl[num_rules] = True
+ #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($1):
+ # $$ = mkor(mkstate($1), mkstate(reverse_case($1)))
+ #else:
+ # $$ = mkstate($1)
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($1, $1 + 1))
+ }
+ ;
+
+fullccl
+ : fullccl CCL_OP_DIFF braceccl
+ {
+ #$$ = ccl_set_diff($1, $3)
+ insert_after(2, '</RegexCharacterAnd>')
+ insert_after(2, '</RegexCharacterNot>')
+ insert_before(2, '<RegexCharacterNot>')
+ insert_before(0, '<RegexCharacterAnd>')
+ }
+ | fullccl CCL_OP_UNION braceccl
+ {
+ #$$ = ccl_set_union($1, $3)
+ insert_after(2, '</RegexCharacterOr>')
+ insert_before(0, '<RegexCharacterOr>')
+ }
+ | braceccl
+ ;
+
+braceccl
+ : '[' ccl ']'
+ {
+ #$$ = $2
+ }
+ | '[' '^' ccl ']'
+ {
+ #cclnegate($3)
+ #$$ = $3
+ insert_after(2, '</RegexCharacterNot>')
+ insert_before(1, '<RegexCharacterNot>')
+ }
+ ;
+
+ccl
+ : ccl CHAR '-' CHAR
+ {
+ #if _sf_stk[_sf_top_ix] & int(0x0001):
+ # if has_case($2) != has_case($4) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISlower) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISlower) else False) or has_case($2) and (True if (*__ctype_b_loc())[int($2)] & int(_ISupper) else False) != (True if (*__ctype_b_loc())[int($4)] & int(_ISupper) else False):
+ # fw3_msg = []
+ # snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+ # lwarn(fw3_msg)
+ # else:
+ # if not has_case($2) and not has_case($4) and not range_covers_case($2, $4):
+ # fw3_msg = []
+ # snprintf(fw3_msg, 2048, 'the character range [%c-%c] is ambiguous in a case-insensitive scanner', $2, $4)
+ # lwarn(fw3_msg)
+ #if $2 > $4:
+ # state.synerr('negative range in character class')
+ #else:
+ # i = $2
+ # while i <= $4:
+ # ccladd($1, i)
+ # i += 1
+ # cclsorted = cclsorted and $2 > lastchar
+ # lastchar = $4
+ # if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2) and has_case($4):
+ # $2 = reverse_case($2)
+ # $4 = reverse_case($4)
+ # i = $2
+ # while i <= $4:
+ # ccladd($1, i)
+ # i += 1
+ # cclsorted = cclsorted and $2 > lastchar
+ # lastchar = $4
+ #$$ = $1
+ insert_after(3, '</RegexCharacterOr>')
+ insert_after(3, '</RegexCharacterLiteral>')
+ insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $4 + 1))
+ insert_before(0, '<RegexCharacterOr>')
+ }
+ | ccl CHAR
+ {
+ #ccladd($1, $2)
+ #cclsorted = cclsorted and $2 > lastchar
+ #lastchar = $2
+ #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+ # $2 = reverse_case($2)
+ # ccladd($1, $2)
+ # cclsorted = cclsorted and $2 > lastchar
+ # lastchar = $2
+ #$$ = $1
+ insert_after(1, '</RegexCharacterOr>')
+ insert_after(1, '</RegexCharacterLiteral>')
+ insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $2 + 1))
+ insert_before(0, '<RegexCharacterOr>')
+ }
+ | ccl ccl_expr
+ {
+ #cclsorted = False
+ #$$ = $1
+ insert_after(1, '</RegexCharacterOr>')
+ insert_before(0, '<RegexCharacterOr>')
+ }
+ |
+ {
+ #cclsorted = True
+ #lastchar = 0
+ #currccl = $$ = cclinit()
+ insert_before(0, '<RegexCharacterLiteral character_set="" />')
+ }
+ ;
+
+ccl_expr
+ : CCE_ALNUM
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalnum):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="48 58 65 91 97 123">')
+ }
+ | CCE_ALPHA
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISalpha):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+ }
+ | CCE_BLANK
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (c == ord(' ') or c == ord('\t')):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="9 10 32 33">')
+ }
+ | CCE_CNTRL
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_IScntrl):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 32">')
+ }
+ | CCE_DIGIT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISdigit):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="48 58">')
+ }
+ | CCE_GRAPH
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISgraph):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="33 127">')
+ }
+ | CCE_LOWER
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+ # ccladd(currccl, c)
+ # c += 1
+ if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+ else:
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="97 123">')
+ }
+ | CCE_PRINT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISprint):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="32 127">')
+ }
+ | CCE_PUNCT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISpunct):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="33 48 58 65 91 97 123 127">')
+ }
+ | CCE_SPACE
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISspace):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="9 14 32 33">')
+ }
+ | CCE_XDIGIT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISxdigit):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="48 58 65 71 97 103">')
+ }
+ | CCE_UPPER
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISupper):
+ # ccladd(currccl, c)
+ # c += 1
+ if False: #_sf_stk[_sf_top_ix] & int(0x0001):
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if (c & ~0x7f) == 0 and (*__ctype_b_loc())[int(c)] & int(_ISlower):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="65 91 97 123">')
+ else:
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="65 91">')
+ }
+ | CCE_NEG_ALNUM
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISalnum)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 65 91 97 123 256">')
+ }
+ | CCE_NEG_ALPHA
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISalpha)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 65 91 97 123 256">')
+ }
+ | CCE_NEG_BLANK
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not (c == ord(' ') or c == ord('\t')):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 9 10 32 33 256">')
+ }
+ | CCE_NEG_CNTRL
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_IScntrl)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="32 256">')
+ }
+ | CCE_NEG_DIGIT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISdigit)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 256">')
+ }
+ | CCE_NEG_GRAPH
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISgraph)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 33 127 256">')
+ }
+ | CCE_NEG_PRINT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISprint)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 32 127 256">')
+ }
+ | CCE_NEG_PUNCT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISpunct)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 33 48 58 65 91 97 123 127 256">')
+ }
+ | CCE_NEG_SPACE
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISspace)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 9 14 32 33 256">')
+ }
+ | CCE_NEG_XDIGIT
+ {
+ #c = None
+ #c = 0
+ #while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISxdigit)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 48 58 65 71 97 103 256">')
+ }
+ | CCE_NEG_LOWER
+ {
+ #if _sf_stk[_sf_top_ix] & int(0x0001):
+ # lwarn('[:^lower:] is ambiguous in case insensitive scanner')
+ #else:
+ # c = None
+ # c = 0
+ # while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISlower)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 97 123 256">')
+ }
+ | CCE_NEG_UPPER
+ {
+ #if _sf_stk[_sf_top_ix] & int(0x0001):
+ # lwarn('[:^upper:] ambiguous in case insensitive scanner')
+ #else:
+ # c = None
+ # c = 0
+ # while c < csize:
+ # if not ((*__ctype_b_loc())[int(c)] & int(_ISupper)):
+ # ccladd(currccl, c)
+ # c += 1
+ insert_after(0, '</RegexCharacterLiteral>')
+ insert_before(0, '<RegexCharacterLiteral character_set="0 65 91 256">')
+ }
+ ;
+
+string
+ : string CHAR
+ {
+ #if $2 == nlch:
+ # rule_has_nl[num_rules] = True
+ #rulelen += 1
+ #if _sf_stk[_sf_top_ix] & int(0x0001) and has_case($2):
+ # $$ = mkor(mkstate($2), mkstate(reverse_case($2)))
+ #else:
+ # $$ = mkstate($2)
+ #$$ = link_machines($1, $$)
+ insert_after(1, '</RegexSequence>')
+ insert_after(1, '</RegexCharacterLiteral>')
+ insert_before(1, '<RegexCharacterLiteral character_set="{0:d} {1:d}">'.format($2, $2 + 1))
+ insert_before(0, '<RegexSequence>')
+ }
+ |
+ {
+ #$$ = mkstate(256 + 1)
+ insert_before(0, '<RegexEmpty />')
+ }
+ ;
+
+%%
+
+#def build_eof_action():
+# i = None
+# action_text = []
+# i = 1
+# while i <= scon_stk_ptr:
+# if sceof[scon_stk[i]]:
+# format_pinpoint_message('multiple <<EOF>> rules for start condition %s', scname[scon_stk[i]])
+# else:
+# sceof[scon_stk[i]] = True
+# if previous_continued_action:
+# add_action('YY_RULE_SETUP\n')
+# snprintf(action_text, sizeof action_text, 'case YY_STATE_EOF(%s):\n', scname[scon_stk[i]])
+# add_action(action_text)
+# i += 1
+# line_directive_out(None, 1)
+# add_action('[[')
+# num_rules -= 1
+# num_eof_rules += 1
+#
+#def format_state.synerr(msg, arg):
+# errmsg = []
+# NoneType(snprintf(errmsg, sizeof errmsg, msg, arg))
+# state.synerr(errmsg)
+#
+#def synerr(str):
+# syntaxerror = True
+# pinpoint_message(str)
+#
+#def format_warn(msg, arg):
+# warn_msg = []
+# snprintf(warn_msg, sizeof warn_msg, msg, arg)
+# lwarn(warn_msg)
+#
+#def lwarn(str):
+# line_warning(str, linenum)
+#
+#def format_pinpoint_message(msg, arg):
+# errmsg = []
+# snprintf(errmsg, sizeof errmsg, msg, arg)
+# pinpoint_message(errmsg)
+#
+#def pinpoint_message(str):
+# line_pinpoint(str, linenum)
+#
+#def line_warning(str, line):
+# warning = []
+# if not nowarn:
+# snprintf(warning, sizeof warning, 'warning, %s', str)
+# line_pinpoint(warning, line)
+#
+#def line_pinpoint(str, line):
+# fprintf(stderr, '%s:%d: %s\n', infilename, line, str)
+
+def yyerror(msg):
+ pass
+
+def insert_before(n, str):
+ global piece3
+ lex_yy.piece_insert(piece2 + n * 2, str)
+ lex_yy.piece0 += 1
+ piece3 += 1
+
+def insert_after(n, str):
+ global piece3
+ lex_yy.piece_insert(piece2 + n * 2 + 1, str)
+ lex_yy.piece0 += 1
+ piece3 += 1
/* scan.l - scanner for flex input -*-C-*- */
%{
+ import state
import y_tab
- #tablesverify = 0
- #tablesext = 0
- trlcontxt = False
- escaped_qstart = ''
- escaped_qend = ''
piece = []
piece0 = 0
doing_codeblock = False
brace_depth = 0
brace_start_line = 0
- nmdef = ''
%}
%option caseless nodefault noreject stack noyy_top_state
#doing_codeblock = False
#brace_depth = 0
#brace_start_line = 0
- #nmdef = ''
<INITIAL>{
^{WS} {
- global indented_code, linenum
+ global indented_code
if not indented_code:
- linenum += 1
+ state.linenum += 1
#line_directive_out(None, 1)
#add_action('[' '[')
yy_push_state(CODEBLOCK)
^"%s"{NAME}? return y_tab.SCDECL
^"%x"{NAME}? return y_tab.XSCDECL
^"%{".*{NL} {
- global indented_code, linenum
+ global indented_code
if not indented_code:
- linenum += 1
+ state.linenum += 1
#line_directive_out(None, 1)
#add_action('[' '[')
yy_push_state(CODEBLOCK)
piece_append('<AST_Text>')
}
^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} {
- global brace_start_line, linenum, brace_depth
- brace_start_line = linenum
- linenum += 1
- #buf_linedir(&top_buf, infilename if infilename else '<stdin>', linenum)
+ global brace_start_line, brace_depth
+ brace_start_line = state.linenum
+ state.linenum += 1
+ #buf_linedir(&top_buf, infilename if infilename else '<stdin>', state.linenum)
brace_depth = 1
yy_push_state(CODEBLOCK_MATCH_BRACE)
}
- ^"%top".* synerr('malformed \'%top\' directive')
+ ^"%top".* state.synerr('malformed \'%top\' directive')
{WS}
}
^"%pointer".*{NL} {
- global linenum
#yytext_is_array = False
- linenum += 1
+ state.linenum += 1
piece_append('<AST_Section1_Options><AST_Section1_Options_Array>')
piece_flush(len(yytext) - 1)
piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
}
^"%array".*{NL} {
- global linenum
#yytext_is_array = True
- linenum += 1
+ state.linenum += 1
piece_append('<AST_Section1_Options><AST_Section1_Options_Array value="true">')
piece_flush(len(yytext) - 1)
piece_append('</AST_Section1_Options_Array></AST_Section1_Options>')
return y_tab.TOK_OPTION
}
- ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} {
- global linenum
- linenum += 1
- }
- ^"%"{LEXOPT}{WS}.*{NL} {
- global linenum
- linenum += 1
- }
+ ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} state.linenum += 1
+ ^"%"{LEXOPT}{WS}.*{NL} state.linenum += 1
/* xgettext: no-c-format */
- ^"%"[^sxaceknopr{}].* synerr('unrecognized \'%\' directive')
+ ^"%"[^sxaceknopr{}].* state.synerr('unrecognized \'%\' directive')
^{NAME} {
- global nmstr, didadef
- nmstr = yytext
+ global didadef
+ state.nmstr = yytext
didadef = False
BEGIN(PICKUPDEF)
}
{SCNAME} {
- global nmstr
- nmstr = yytext
+ state.nmstr = yytext
piece_pack()
piece_append('<AST_Name>')
piece_escape(yytext)
piece_pack()
return ~y_tab.NAME
}
- ^{OPTWS}{NL} {
- global linenum
- linenum += 1
- }
+ ^{OPTWS}{NL} state.linenum += 1
{OPTWS}{NL} {
- global linenum
#add_action(yytext)
- linenum += 1
+ state.linenum += 1
}
}
. #add_action(yytext)
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
#add_action(yytext)
}
}
"*/" yy_pop_state()
"*"
[^*\n]
- {NL} {
- global linenum
- linenum += 1
- }
+ {NL} state.linenum += 1
}
<EXTENDED_COMMENT>{
")" yy_pop_state()
[^\n\)]+
- {NL} {
- global linenum
- linenum += 1
- }
+ {NL} state.linenum += 1
}
<LINEDIR>{
\n yy_pop_state()
[[:digit:]]+ {
- global linenum
- linenum = myctoi(yytext)
+ state.linenum = int(yytext)
}
\"[^"\n]*\" {
- free(infilename)
- infilename = xstrdup(yytext + 1)
- infilename[len(infilename) - 1] = ord('\0')
+ state.infilename = yytext[1:-1]
}
.
}
<CODEBLOCK>{
^"%}".*{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
piece_append('</AST_Text>')
piece_flush(len(yytext))
yy_pop_state()
[^\n%\[\]]* #add_action(yytext)
. #add_action(yytext)
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
#add_action(yytext)
if indented_code:
piece_flush(len(yytext))
}
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
#buf_strnappend(&top_buf, yytext, len(yytext))
}
([^{}\r\n\[\]]+)|[^{}\r\n] #buf_strnappend(&top_buf, yytext, len(yytext))
<<EOF>> {
- global linenum
- linenum = brace_start_line
- synerr('Unmatched \'{\'')
+ state.linenum = brace_start_line
+ state.synerr('Unmatched \'{\'')
yyterminate()
}
}
{WS}
{NOT_WS}[^\r\n]* {
- global nmdef, didadef
- nmdef = yytext.rstrip()
- #ndinstal(nmstr, nmdef)
+ global didadef
+ state.ndinstal(state.nmstr, yytext.rstrip('\t '))
didadef = True
}
{NL} {
- global linenum
if not didadef:
- synerr('incomplete name definition')
+ state.synerr('incomplete name definition')
BEGIN(INITIAL)
- linenum += 1
+ state.linenum += 1
}
}
<OPTION>{
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
BEGIN(INITIAL)
}
{WS} {
markup_option('Interactive', option_sense)
}
lex-compat {
- #lex_compat = option_sense
+ state.lex_compat = option_sense
markup_option('LexCompat', option_sense)
}
posix-compat {
- #posix_compat = option_sense
+ state.posix_compat = option_sense
markup_option('PosixCompat', option_sense)
}
line {
\"[^"\n]*\" {
- global nmstr
- nmstr = yytext[1:-1]
+ state.nmstr = yytext[1:-1]
piece_pack()
piece_append('<AST_String>"<AST_Text>')
piece_escape(yytext[1:-1])
}
(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
- synerr('unrecognized %option: {0:s}'.format(yytext))
+ state.synerr('unrecognized %option: {0:s}'.format(yytext))
BEGIN(RECOVER)
}
}
<RECOVER>.*{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
BEGIN(INITIAL)
}
}
^{WS} {
- global indented_code, linenum
+ global indented_code
if not indented_code:
- linenum += 1
+ state.linenum += 1
#line_directive_out(None, 1)
#add_action('[' '[')
yy_push_state(CODEBLOCK)
}
^{NOT_WS}.* {
- global indented_code, linenum
+ global indented_code
if bracelevel <= 0:
yyless(0)
yy_set_bol(True)
BEGIN(SECT2)
else:
if not indented_code:
- linenum += 1
+ state.linenum += 1
#line_directive_out(None, 1)
#add_action('[' '[')
yy_push_state(CODEBLOCK)
. #add_action(yytext)
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
#add_action(yytext)
}
}
<SECT2>{
- ^{OPTWS}{NL} {
- global linenum
- linenum += 1
- }
+ ^{OPTWS}{NL} state.linenum += 1
^{OPTWS}"%{" {
global indented_code, doing_codeblock, bracelevel
}
"{"/[[:digit:]] {
BEGIN(NUM)
- if lex_compat or posix_compat:
+ if state.lex_compat or state.posix_compat:
return y_tab.BEGIN_REPEAT_POSIX
else:
return y_tab.BEGIN_REPEAT_FLEX
"$"/([[:blank:]]|{NL}) return ord('$')
{WS}"%{" {
- global bracelevel, in_rule, doing_rule_action
+ global bracelevel, doing_rule_action
bracelevel = 1
BEGIN(PERCENT_BRACE_ACTION)
piece_flush(len(yytext) - 2)
piece_append('<AST_Section2_Rule_Action>')
- if in_rule:
+ if state.in_rule:
doing_rule_action = True
- in_rule = False
+ state.in_rule = False
piece_pack()
piece_escape(yytext[:2])
piece_pack()
abort()
}
{WS}"|".*{NL} {
- global linenum
if False: #_sf_stk[_sf_top_ix] & int(0x0004):
amt = int(strchr(yytext, ord('|')) - yytext)
yyless(amt)
else:
#add_action(']' ']')
continued_action = True
- linenum += 1
+ state.linenum += 1
i = 0
- while i < len(yytext) and (yytext[i] == '\t' or yytext[i] == ' '):
+ while i < len(yytext) and yytext[i] in '\t ':
i += 1
piece_flush(i)
piece_pack()
^{WS}
{WS} {
- global bracelevel, in_rule, doing_rule_action
+ global bracelevel, doing_rule_action
if False: #_sf_stk[_sf_top_ix] & int(0x0004):
pass
else:
bracelevel = 0
continued_action = False
BEGIN(ACTION)
- if in_rule:
+ if state.in_rule:
doing_rule_action = True
- in_rule = False
+ state.in_rule = False
piece_pack()
piece_escape(yytext)
piece_pack()
}
{OPTWS}{NL} {
- global linenum, bracelevel, in_rule, doing_rule_action
+ global bracelevel, doing_rule_action
if False: #_sf_stk[_sf_top_ix] & int(0x0004):
- linenum += 1
+ state.linenum += 1
else:
bracelevel = 0
continued_action = False
BEGIN(ACTION)
yyless(len(yytext) - 1)
- if in_rule:
+ if state.in_rule:
doing_rule_action = True
- in_rule = False
+ state.in_rule = False
piece_pack()
piece_escape(yytext)
piece_pack()
^"%%".* {
sectnum = 3
- BEGIN(SECT3_NOESCAPE if no_section3_escape else SECT3)
+ BEGIN(SECT3_NOESCAPE if state.no_section3_escape else SECT3)
#outn('/* Begin user sect3 */')
piece_pack()
piece_append('</AST_Section2>')
piece_escape(yytext)
piece_pack()
piece_append('<AST_Section3>')
- # for some reason flex requires an extra EOF after section 2:
- #return ~YY_NULL
+ # first EOF, we will call scanner until it's returned EOF twice
+ return ~YY_NULL
}
"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
- global nmstr
#cclval = None
- nmstr = yytext
- #if 0 and (cclval = ccllookup(nmstr)) != 0:
+ state.nmstr = yytext
+ #if 0 and (cclval = ccllookup(state.nmstr)) != 0:
# if input() != ord(']'):
- # synerr('bad character class')
- # #yylval = cclval
- # #cclreuse += 1
+ # state.synerr('bad character class')
+ # y_tab.yylval = cclval
+ # cclreuse += 1
# return y_tab.PREVCCL
if True: #else:
- #cclinstal(nmstr, lastccl + 1)
+ #cclinstal(state.nmstr, lastccl + 1)
yyless(1)
BEGIN(FIRSTCCL)
return ord('[')
* context.
*/
"{"{NAME}"}"[[:space:]]? {
- # fix this later
- #global nmstr
- #nmdefptr = None
- #end_is_ws = None
- #end_ch = None
- #end_ch = yytext[len(yytext) - 1]
- #end_is_ws = 1 if end_ch != ord('}') else 0
- #if len(yytext) - 1 < 2048:
- # strncpy(nmstr, yytext + 1, sizeof nmstr)
- #else:
- # synerr('Input line too long\n')
- # longjmp(flex_main_jmp_buf, 1 + 1)
- #nmstr[len(yytext) - 2 - end_is_ws] = ord('\0')
- #if (nmdefptr = ndlookup(nmstr)) == 0:
- # synerr('undefined definition {{{0:s}}}'.format(nmstr))
- #else:
- # len = len(nmdefptr)
- # if end_is_ws:
- # yyless(len(yytext) - 1)
- # if lex_compat or nmdefptr[0] == ord('^') or len > 0 and nmdefptr[len - 1] == ord('$') or end_is_ws and trlcontxt and not (_sf_stk[_sf_top_ix] & int(0x0004)):
- # i = len(nmdefptr)
- # while i > 0:
- # unput(nmdefptr[--i])
- # if nmdefptr[0] == ord('^'):
- # BEGIN(CARETISBOL)
- # else:
- # unput(ord(')'))
- # i = len(nmdefptr)
- # while i > 0:
- # unput(nmdefptr[--i])
- # if not lex_compat and not posix_compat:
- # unput(ord(':'))
- # unput(ord('?'))
- # unput(ord('('))
+ end_ch = yytext[-1]
+ end_is_ws = end_ch != ord('}')
+ state.nmstr = yytext[1:-1 - int(end_is_ws)]
+ nmdef = state.ndlookup(state.nmstr)
+ if nmdef is None:
+ state.synerr('undefined definition {{{0:s}}}'.format(state.nmstr))
+ else:
+ if end_is_ws:
+ yyless(len(yytext) - 1)
+ if state.lex_compat or len(nmdef) and (nmdef[0] == '^' or nmdef[-1] == '$') or end_is_ws and y_tab.trlcontxt and True: #not (_sf_stk[_sf_top_ix] & int(0x0004)):
+ unput(nmdef)
+ if len(nmdef) and nmdef[0] == '^':
+ BEGIN(CARETISBOL)
+ else:
+ unput(
+ '({0:s}{1:s})'.format(
+ '' if state.lex_compat or state.posix_compat else '?:',
+ nmdef
+ )
+ )
}
"/*" {
}
"(?#" {
- if lex_compat or posix_compat:
+ if state.lex_compat or state.posix_compat:
yyless(1)
- sf_push()
+ #sf_push()
return ord('(')
else:
yy_push_state(EXTENDED_COMMENT)
}
"(?" {
- sf_push()
- if lex_compat or posix_compat:
+ #sf_push()
+ if state.lex_compat or state.posix_compat:
yyless(1)
else:
BEGIN(GROUP_WITH_PARAMS)
return ord('(')
}
"(" {
- sf_push()
+ #sf_push()
return ord('(')
}
")" {
- if _sf_top_ix > 0:
- sf_pop()
+ if True: #_sf_top_ix > 0:
+ #sf_pop()
return ord(')')
else:
- synerr('unbalanced parenthesis')
+ state.synerr('unbalanced parenthesis')
}
[/|*+?.(){}] return ord(yytext[0])
. {
- #yylval = ord(yytext[0])
+ y_tab.yylval = ord(yytext[0])
return y_tab.CHAR
}
<SC>{
- {OPTWS}{NL}{OPTWS} {
- global linenum
- linenum += 1
- }
+ {OPTWS}{NL}{OPTWS} state.linenum += 1
[,*] return ord(yytext[0])
">" {
BEGIN(SECT2)
return ord('>')
}
{SCNAME} {
- global nmstr
- nmstr = yytext
+ state.nmstr = yytext
piece_pack()
piece_append('<AST_Name>')
piece_escape(yytext)
piece_pack()
return ~y_tab.NAME
}
- . synerr('bad <start condition>: {0:s}'.format(yytext))
+ . state.synerr('bad <start condition>: {0:s}'.format(yytext))
}
<CARETISBOL>"^" {
<QUOTE>{
[^"\n] {
- #yylval = ord(yytext[0])
+ y_tab.yylval = ord(yytext[0])
return y_tab.CHAR
}
\" {
}
{NL} {
- global linenum
- synerr('missing quote')
+ state.synerr('missing quote')
BEGIN(SECT2)
- linenum += 1
+ state.linenum += 1
return ord('"')
}
}
"^"/("-"|"]") return ord('^')
. {
BEGIN(CCL)
- #yylval = ord(yytext[0])
+ y_tab.yylval = ord(yytext[0])
return y_tab.CHAR
}
}
<CCL>{
-/[^\]\n] return ord('-')
[^\]\n] {
- #yylval = ord(yytext[0])
+ y_tab.yylval = ord(yytext[0])
return y_tab.CHAR
}
"]" {
return ord(']')
}
.|{NL} {
- synerr('bad character class')
+ state.synerr('bad character class')
BEGIN(SECT2)
return ord(']')
}
return y_tab.CCE_NEG_XDIGIT
}
{CCL_EXPR} {
- synerr('bad character class expression: {0:s}'.format(yytext))
+ state.synerr('bad character class expression: {0:s}'.format(yytext))
BEGIN(CCL)
return y_tab.CCE_ALNUM
}
<NUM>{
[[:digit:]]+ {
- #yylval = myctoi(yytext)
+ y_tab.yylval = int(yytext)
return y_tab.NUMBER
}
"," return ord(',')
"}" {
BEGIN(SECT2)
- if lex_compat or posix_compat:
+ if state.lex_compat or state.posix_compat:
return y_tab.END_REPEAT_POSIX
else:
return y_tab.END_REPEAT_FLEX
}
. {
- synerr('bad character inside {}\'s')
+ state.synerr('bad character inside {}\'s')
BEGIN(SECT2)
return ord('}')
}
{NL} {
- global linenum
- synerr('missing }')
+ state.synerr('missing }')
BEGIN(SECT2)
- linenum += 1
+ state.linenum += 1
return ord('}')
}
}
. #add_action(yytext)
{NL} {
- global linenum, doing_rule_action, doing_codeblock
- linenum += 1
+ global doing_rule_action, doing_codeblock
+ state.linenum += 1
#add_action(yytext)
if bracelevel <= 0 or doing_codeblock and indented_code:
#if doing_rule_action:
BEGIN(ACTION_STRING)
}
{NL} {
- global linenum, doing_rule_action
- linenum += 1
+ global doing_rule_action
+ state.linenum += 1
#add_action(yytext)
if bracelevel <= 0:
if doing_rule_action:
(\\\n)* #add_action(yytext)
\\(\\\n)*. #add_action(yytext)
{NL} {
- global linenum
- linenum += 1
+ state.linenum += 1
#add_action(yytext)
if bracelevel <= 0:
BEGIN(SECT2)
/* Nick extra rules for action groups */
/* Nick added: ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED */
<COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT,ACTION_GROUP,ELEMENT_GROUP,DOUBLE_QUOTED,SINGLE_QUOTED><<EOF>> {
- synerr('EOF encountered inside an action')
+ state.synerr('EOF encountered inside an action')
yyterminate()
}
<EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> {
- synerr('EOF encountered inside pattern')
+ state.synerr('EOF encountered inside pattern')
yyterminate()
}
<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
- #yylval = myesc(str(yytext))
+ y_tab.yylval = state.myesc(yytext)
if YY_START() == FIRSTCCL:
BEGIN(CCL)
return y_tab.CHAR
\\(\\\n)*.
}
<ACTION_GROUP,ELEMENT_GROUP,SINGLE_QUOTED,DOUBLE_QUOTED>{
- {NL} {
- global linenum
- linenum += 1
- }
+ {NL} state.linenum += 1
.
}
-<*>.|\n synerr('bad character: {0:s}'.format(yytext))
+<*>.|\n state.synerr('bad character: {0:s}'.format(yytext))
%%
# return 1
#
#def set_input_file(file):
-# global linenum
# if file and strcmp(file, '-'):
-# infilename = xstrdup(file)
+# state.infilename = xstrdup(file)
# yyin = fopen(infilename, 'r')
# if yyin == None:
# lerr('can\'t open %s', file)
# else:
# yyin = stdin
-# infilename = xstrdup('<stdin>')
-# linenum = 1
+# state.infilename = xstrdup('<stdin>')
+# state.linenum = 1
def piece_append(str):
piece.append(str)
piece0 -= 1
piece_pack()
-# supposed to be somewhere else:
-in_rule = False
-linenum = 1
-nmstr = ''
-no_section3_escape = False
-def synerr(str):
- sys.stderr.write('{0:d}: {1:s}\n'.format(linenum, str))