accept[n_states] = n_acclist
accept_set = set()
for k in [j for i in threads0[prefix_slop:] for j in i]:
- acc = k >> 1
- if k & 1:
- if (acc | flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set:
- # look back to start of trailing context, then accept
- acc |= flex_dfa.FlexDFA.YY_TRAILING_MASK
- # otherwise zero length trailing context, accept immediately
- else:
- # mark start of (hopefully safe) trailing context
- acc |= flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK
- if acc not in accept_set:
- if n_acclist >= acclist.shape[0]:
- # extend acclist
- new_acclist = numpy.zeros(
- (acclist.shape[0] * 2,),
- numpy.uint16
- )
- new_acclist[:acclist.shape[0]] = acclist
- acclist = new_acclist
- acclist[n_acclist] = acc
- n_acclist += 1
- accept_set.add(acc)
+ if k != -1: # ignore user-defined groups
+ acc = k >> 1
+ if k & 1:
+ if (
+ (acc | flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set
+ ):
+ # look back to start of trailing context, then accept
+ acc |= flex_dfa.FlexDFA.YY_TRAILING_MASK
+ # otherwise zero length trailing context, accept immediately
+ else:
+ # mark start of (hopefully safe) trailing context
+ acc |= flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK
+ if acc not in accept_set:
+ if n_acclist >= acclist.shape[0]:
+ # extend acclist
+ new_acclist = numpy.zeros(
+ (acclist.shape[0] * 2,),
+ numpy.uint16
+ )
+ new_acclist[:acclist.shape[0]] = acclist
+ acclist = new_acclist
+ acclist[n_acclist] = acc
+ n_acclist += 1
+ accept_set.add(acc)
# calculate transition row from self.state character-to-action table
if n_states >= transitions.shape[0]:
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
import xml.etree.ElementTree
class Element(xml.etree.ElementTree._Element_Py):
else:
root[i - 1].tail = text
-def to_end_relative(root, pos, off):
- assert pos >= 0 and off >= 0
- off -= len(get_text(root, pos))
- pos -= len(root) + 1
- return pos, off
-
-def to_start_relative(root, pos, off):
- assert pos < 0 and off <= 0
- pos += len(root) + 1
- off += len(get_text(root, pos))
- return pos, off
+def to_text(root):
+ return ''.join(
+ [
+ j
+ for i in range(len(root))
+ for j in [get_text(root, i), to_text(root[i])]
+ ] +
+ [get_text(root, len(root))]
+ )
+
+def concatenate(children, factory = Element, *args, **kwargs):
+ root = factory(*args, **kwargs)
+ for child in children:
+ i = len(root)
+ set_text(root, i, get_text(root, i) + get_text(child, 0))
+ root[i:] = child[:]
+ return root
import regex
def generate_flex(_ast, _element, home_dir, skel_file, out_file):
- _nfa = _ast.to_nfa()
+ # generate group_ref_data which emulates the old way where
+ # start = even, end = odd, remaining bits = flex rule index,
+ # ignoring user-defined groups by putting start = end = -1:
+ group_ref_data = []
+ for i in range(len(_ast.flex_rules)):
+ group_ref_data.extend(
+ [(-1, -1) for j in range(len(_ast.flex_rules[i].groups0))] +
+ [(i * 2, i * 2 + 1)] +
+ [(-1, -1) for j in range(len(_ast.flex_rules[i].groups1))]
+ )
+
+ _nfa = _ast.to_nfa(group_ref_data)
- # end of buffer expression (do it here because only necessary for flex)
- _regex = regex.RegexGroup(children = [regex.RegexEmpty()])
- _regex.post_process(len(_ast.actions_text))
- _regex.add_to_nfa(_nfa)
+ # end of buffer expression (do here because only necessary for flex)
+ eob_regex = regex.RegexGroup(children = [regex.RegexEmpty()])
+ eob_groups = []
+ eob_regex.post_process(eob_groups, caseless = _ast[0].caseless)
+ assert len(eob_groups) == 1
+ eob_regex.add_to_nfa(
+ _nfa,
+ [(len(_ast.flex_rules) * 2, len(_ast.flex_rules) * 2 + 1)]
+ )
_flex_dfa = _nfa.to_dfa().to_flex_dfa()
self.text = text
class YYBufferState(YYBufferList):
- def __init__(self, next = None, file_in = None):
+ def __init__(self, next = None, file_in = None, at_bol = True):
YYBufferList.__init__(self, next)
self.file_in = file_in
+ self.at_bol = at_bol
yyin = sys.stdin
yyout = sys.stdout
-yy_buffer_stack = [YYBufferState(None, None)]
+yy_buffer_stack = [YYBufferState()]
yystart = INITIAL
+yystart_stack = []
yy_threads0 = [None]
yy_threads1 = [None]
yy_prefix_slop = 1
yytext = ''
yytext_len = 0
+YY_NULL = 0
+
def REJECT():
raise YYReject()
yytext_len -= i
yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+def ECHO():
+ yyout.write(yytext)
+
def yy_rule_start():
global yytext, yytext_len
yytext = yy_group_text[:yy_group_stack[-1]]
yytext_len = yy_group_stack[-1]
del yy_group_stack[-2:]
+ # note that this should also be done after yyless() and REJECT(),
+ # and state should be saved in case they result in a null string,
+ # however, it doesn't seem to be in flex, maintain compatibility:
+ if len(yytext):
+ yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
def yy_group_end():
pass
+def BEGIN(start):
+ global yystart
+ yystart = start
+
+def YY_START():
+ return yystart
+
+def yy_push_state(start):
+ global yystart
+ yystart_stack.append(yystart)
+ yystart = start
+
+def yy_pop_state():
+ global yystart
+ yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+ return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+ yy_buffer_stack[-1].at_bol = at_bol
+
# GENERATE SECTION2
def yylex():
if block is not None:
block_pos = block.pos
- action = yy_dfa_start_action[yystart]
+ action = yy_dfa_start_action[
+ yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+ ]
while action != -1:
state, transition = yy_dfa_actions[action]
#print('i', i, 'action', action, 'state', state, 'transition', transition)
self.text = text
class YYBufferState(YYBufferList):
- def __init__(self, next = None, file_in = None):
+ def __init__(self, next = None, file_in = None, at_bol = True):
YYBufferList.__init__(self, next)
self.file_in = file_in
+ self.at_bol = at_bol
yyin = sys.stdin
yyout = sys.stdout
-yy_buffer_stack = [YYBufferState(None, None)]
+yy_buffer_stack = [YYBufferState()]
yystart = INITIAL
+yystart_stack = []
yy_threads0 = [None]
yy_threads1 = [None]
yy_prefix_slop = 1
yy_element_token = None
yy_element_space = None
+YY_NULL = 0
+
def REJECT():
raise YYReject()
yytext_len -= i
yy_buffer_stack[-1].next = YYBufferBlock(yy_buffer_stack[-1].next, 0, text)
+def ECHO():
+ yyout.write(yytext)
+
def yy_rule_start():
global yytext, yytext_len, yy_element_stack
yytext = yy_group_text[:yy_group_stack[-1]]
yytext_len = yy_group_stack[-1]
del yy_group_stack[-2:]
+ # note that this should also be done after yyless() and REJECT(),
+ # and state should be saved in case they result in a null string,
+ # however, it doesn't seem to be in flex, maintain compatibility:
+ if len(yytext):
+ yy_buffer_stack[-1].at_bol = yytext[-1] == '\n'
yy_element_stack.append([])
def yy_group_end():
element.set_text(_element, len(_element), yy_group_text[pos0:pos1])
return _element
+def BEGIN(start):
+ global yystart
+ yystart = start
+
+def YY_START():
+ return yystart
+
+def yy_push_state(start):
+ global yystart
+ yystart_stack.append(yystart)
+ yystart = start
+
+def yy_pop_state():
+ global yystart
+ yystart = yystart_stack.pop()
+
+def YY_AT_BOL():
+ return yy_buffer_stack[-1].at_bol
+
+def yy_set_bol(at_bol):
+ yy_buffer_stack[-1].at_bol = at_bol
+
# GENERATE SECTION2
def yylex():
if block is not None:
block_pos = block.pos
- action = yy_dfa_start_action[yystart]
+ action = yy_dfa_start_action[
+ yystart * 2 + int(yy_buffer_stack[-1].at_bol)
+ ]
while action != -1:
state, transition = yy_dfa_actions[action]
#print('i', i, 'action', action, 'state', state, 'transition', transition)
+CFLAGS += -g
+
all: lex_yy.py cal flex0 flex1
# Python scanner test
# cal program
cal: y.tab.o
- ${CC} -o $@ $<
+ ${CC} ${CFLAGS} -o $@ $<
y.tab.o: y.tab.c lex.yy.c
# flex0 program
flex0: flex0.o
- gcc -o $@ $< -ll
+ ${CC} ${CFLAGS} -o $@ $< -ll
flex0.o: flex0.c
# flex1 program
flex1: flex1.o
- gcc -o $@ $< -ll
+ ${CC} ${CFLAGS} -o $@ $< -ll
flex1.o: flex1.c
+# Copyright (C) 2018 Nick Downing <nick@ndcode.org>
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 51
+# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
import xml.etree.ElementTree
class Element(xml.etree.ElementTree._Element_Py):
else:
root[i - 1].tail = text
+def to_text(root):
+ return ''.join(
+ [
+ j
+ for i in range(len(root))
+ for j in [get_text(root, i), to_text(root[i])]
+ ] +
+ [get_text(root, len(root))]
+ )
+
def concatenate(children, factory = Element, *args, **kwargs):
root = factory(*args, **kwargs)
for child in children: