name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
raise NotImplementedException
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
section.code_blocks_text.append(self[0])
+ return continued_action
# GENERATE ELEMENT(list(ref) code_blocks_text) BEGIN
def __init__(
plex,
name_to_start_condition,
all_start_conditions,
- inclusive_start_conditions,
- parent_start_conditions
+ inclusive_start_conditions
):
+ parent_start_conditions = set()
+ continued_action = False
for i in self:
- i.post_process(
+ continued_action = i.post_process(
plex,
self,
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
)
+ assert not continued_action
class Section1(Section1Or2):
class Options(Item):
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
for i in self:
i.post_process(section)
+ return continued_action
class StartConditions(Item):
# GENERATE ELEMENT(bool exclusive) BEGIN
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
for i in self:
name = i.get_text()
eof_action = 0,
)
)
+ return continued_action
# GENERATE ELEMENT(bool caseless, bool default, bool ecs, bool meta_ecs, bool reject, bool stack, bool std_init, bool yymore, bool yy_top_state, bool yywrap) BEGIN
def __init__(
plex,
name_to_start_condition,
all_start_conditions,
- inclusive_start_conditions,
- parent_start_conditions
+ inclusive_start_conditions
):
self.caseless = False
self.default = True
plex,
name_to_start_condition,
all_start_conditions,
- inclusive_start_conditions,
- parent_start_conditions
+ inclusive_start_conditions
)
class Section2(Section1Or2):
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
+ assert not continued_action
if self[0].wildcard:
start_conditions = all_start_conditions
else:
name_to_start_condition[i.get_text()]
)
for i in self[1:]:
- i.post_process(
+ continued_action = i.post_process(
plex,
section,
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- start_conditions # parent_start_conditions
+ start_conditions, # parent_start_conditions
+ continued_action
)
-
+ assert not continued_action
+ return False
class Rule(Item):
class Action(element.Element):
- # GENERATE ELEMENT() BEGIN
+ # GENERATE ELEMENT(bool continued) BEGIN
def __init__(
self,
tag = 'PLex_Section2_Rule_Action',
attrib = {},
text = '',
- children = []
+ children = [],
+ continued = False
):
element.Element.__init__(
self,
text,
children
)
+ self.continued = (
+ element.deserialize_bool(continued)
+ if isinstance(continued, str) else
+ continued
+ )
+ def serialize(self, ref_list):
+ element.Element.serialize(self, ref_list)
+ self.set('continued', element.serialize_bool(self.continued))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.continued = element.deserialize_bool(self.get('continued', 'false'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Action if factory is None else factory
)
+ result.continued = self.continued
return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.continued != False:
+ params.append(
+ 'continued = {0:s}'.format(repr(self.continued))
+ )
def __repr__(self):
params = []
self.repr_serialize(params)
name_to_start_condition,
all_start_conditions,
inclusive_start_conditions,
- parent_start_conditions
+ parent_start_conditions,
+ continued_action
):
if self[0].wildcard:
start_conditions = all_start_conditions
name_to_start_condition[i.get_text()]
)
if isinstance(self[1], PLex.Section2.Rule.EOFRule):
+ assert not continued_action
if len(start_conditions) == 0:
for i in all_start_conditions:
if plex.start_conditions[i].eof_action == 0:
plex.start_conditions[i].eof_action = (
len(plex.eof_actions_text)
)
- plex.eof_actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
+ assert not self[2].continued
+ plex.eof_actions_text.append(self[2][0])
elif isinstance(self[1], PLex.Section2.Rule.FLexRule):
for i in (
start_conditions
caseless = plex[0].caseless
) # trailing context regex
self[1].action = len(plex.actions_text)
- plex.actions_text.append(self[2][0] if len(self) > 2 else PLex.Text()) # fix this later
+ if self[2].continued:
+ continued_action = True
+ else:
+ plex.actions_text.append(self[2][0])
+ continued_action = False
+ #def to_text(node):
+ # return ''.join(
+ # [
+ # j
+ # for i in range(len(node))
+ # for j in [element.get_text(node, i), to_text(node[i])]
+ # ] +
+ # [element.get_text(node, len(node))]
+ # )
+ #text = '{0:s}/{1:s}'.format(to_text(self[1][0]), to_text(self[1][1]))
+ #element.set_text(
+ # plex.actions_text[-1],
+ # 0,
+ # 'fprintf(stderr, "%d >>>%s<<< {0:s}\\n", yy_start, yytext);\n{1:s}'.format(
+ # text.replace('\\', '\\\\').replace('"', '\\"').replace('%', '%%'),
+ # element.get_text(plex.actions_text[-1], 0)
+ # )
+ #)
else:
assert False
+ return continued_action
class StartConditions(element.Element):
# GENERATE ELEMENT(bool wildcard) BEGIN
name_to_start_condition = {'INITIAL': 0}
all_start_conditions = set([0])
inclusive_start_conditions = set([0])
- start_conditions = set()
# perform the semantic analysis pass
self[0].post_process(
self,
name_to_start_condition,
all_start_conditions,
- inclusive_start_conditions,
- start_conditions # parent_start_conditions
+ inclusive_start_conditions
)
self[1].post_process(
self,
name_to_start_condition,
all_start_conditions,
- inclusive_start_conditions,
- start_conditions # parent_start_conditions
+ inclusive_start_conditions
)
self.default_action = len(self.actions_text)
- self.actions_text.append(PLex.Text(text = 'ECHO;\n'))
-
+ self.actions_text.append(
+ PLex.Text(
+ text = (
+ 'ECHO;\n'
+ if self[0].default else
+ 'YY_FATAL_ERROR( "flex scanner jammed" );\n'
+ )
+ )
+ )
def to_nfa(self):
_nfa = nfa.NFA()
for i in self.start_conditions:
#element.serialize(plex, 'a.xml', 'utf-8')
#plex = element.deserialize('a.xml', ast.factory, 'utf-8')
plex.post_process()
-#element.serialize(plex, 'b.xml', 'utf-8')
-#plex = element.deserialize('b.xml', ast.factory, 'utf-8')
+element.serialize(plex, 'b.xml', 'utf-8')
+plex = element.deserialize('b.xml', ast.factory, 'utf-8')
flex_dfa.generate(plex, skel_file, out_file)
new_accept[:self.accept.shape[0]] = self.accept
self.accept = new_accept
self.accept[n_states] = n_acclist
+ accept_set = set()
for k in [j for i in threads0[prefix_slop:] for j in i]:
acc = k >> 1
if k & 1:
- if (
- n_acclist == self.accept[n_states] or
- self.acclist[n_acclist - 1] != acc | FlexDFA.YY_TRAILING_HEAD_MASK
- ):
+ if (acc | FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set:
# look back to start of trailing context, then accept
acc |= FlexDFA.YY_TRAILING_MASK
# otherwise zero length trailing context, accept immediately
else:
# mark start of (hopefully safe) trailing context
acc |= FlexDFA.YY_TRAILING_HEAD_MASK
- if n_acclist >= self.acclist.shape[0]:
- # extend acclist
- new_acclist = numpy.zeros(
- (self.acclist.shape[0] * 2,),
- numpy.uint16
- )
- new_acclist[:self.acclist.shape[0]] = self.acclist
- self.acclist = new_acclist
- self.acclist[n_acclist] = acc
- n_acclist += 1
+ if acc not in accept_set:
+ if n_acclist >= self.acclist.shape[0]:
+ # extend acclist
+ new_acclist = numpy.zeros(
+ (self.acclist.shape[0] * 2,),
+ numpy.uint16
+ )
+ new_acclist[:self.acclist.shape[0]] = self.acclist
+ self.acclist = new_acclist
+ self.acclist[n_acclist] = acc
+ n_acclist += 1
+ accept_set.add(acc)
# calculate transition row from _dfa.state character-to-action table
if n_states >= transitions.shape[0]:
return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
# GENERATE END
-# some of this should be moved into grammar.py:
-#if __name__ == '__main__':
-# import sys
-# import xml.etree.ElementTree
-#
-# regex = RegexAnd(children = [RegexRepeat(children = [RegexCharacterNot(
-#children = [RegexCharacter()], character_set = [0, 256])]), RegexGroup(children = [
-#RegexOr(children = [RegexOr(children = [RegexOr(children = [RegexGroup(children
-#= [RegexRepeat(children = [RegexCharacter(character_set = [9, 14, 32, 33])],
-#one_or_more = True)], group_index = 1, group_name = 'Whitespace'), RegexGroup(
-#children = [RegexRepeat(children = [RegexCharacter(character_set = [48, 58])],
-#one_or_more = True)], group_index = 2, group_name = 'Number')]), RegexGroup(
-#children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-#children = [RegexEmpty(), RegexCharacter(character_set = [102, 103])]),
-#RegexCharacter(character_set = [111, 112])]), RegexCharacter(character_set = [114, 115])]
-#)], group_index = 3, group_name = 'For')]), RegexGroup(children = [
-#RegexSequence(children = [RegexCharacter(character_set = [65, 91, 95, 96, 97, 123]),
-#RegexRepeat(children = [RegexCharacter(character_set = [48, 58, 65, 91, 95, 96, 97,
-#123])])])], group_index = 4, group_name = 'Identifier')])], group_index = 0)])
-# #sys.stdout.write(
-# # wrap_repr(
-# # ' regex = {0:s}'.format(repr(regex).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# _nfa = regex.to_nfa()
-# #sys.stdout.write(
-# # wrap_repr(
-# # ' _nfa = {0:s}'.format(repr(_nfa).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# text = ' id 99id id99 for forex '
-# i = 0
-# while i < len(text):
-# print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
-# thread = _nfa.match_text(text, i)
-# if thread is None:
-# print('no match')
-# break
-# i = thread[0] # end position of overall match
-# group_start = [-1 for j in range(len(_nfa.groups))]
-# group_end = [-1 for j in range(len(_nfa.groups))]
-# while thread is not None:
-# pos, mark, thread = thread
-# group = mark >> 1
-# if (mark & 1) == 0:
-# group_start[group] = pos
-# print(
-# 'group {0:d} name "{1:s}" text "{2:s}"'.format(
-# group,
-# _nfa.groups[group][0],
-# text[group_start[group]:group_end[group]].replace('\n', '$')
-# )
-# )
-# else:
-# group_end[group] = pos
-#
-# dfa = _nfa.to_dfa()
-# #sys.stdout.write(
-# # wrap_repr(
-# # ' dfa = {0:s}'.format(repr(dfa).replace('regex.', '')),
-# # 79
-# # )
-# #)
-#
-# text = ' id 99id id99 for forex '
-# i = 0
-# while i < len(text):
-# print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
-# thread = dfa.match_text(text, i)
-# if thread is None:
-# print('no match')
-# break
-# i = thread[0] # end position of overall match
-# group_start = [-1 for j in range(len(dfa.groups))]
-# group_end = [-1 for j in range(len(dfa.groups))]
-# while thread is not None:
-# pos, mark, thread = thread
-# group = mark >> 1
-# if (mark & 1) == 0:
-# group_start[group] = pos
-# print(
-# 'group {0:d} name "{1:s}" text "{2:s}"'.format(
-# group,
-# dfa.groups[group][0],
-# text[group_start[group]:group_end[group]].replace('\n', '$')
-# )
-# )
-# else:
-# group_end[group] = pos
-#
+if __name__ == '__main__':
+ import sys
+ import xml.etree.ElementTree
+ import wrap_repr
+
+ _regex = RegexAnd(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterNot(
+ children = [
+ RegexCharacterLiteral()
+ ],
+ character_set = [0, 256]
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexOr(
+ children = [
+ RegexNone(),
+ RegexGroup(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [9, 14, 32, 33]
+ )
+ ],
+ count0 = 1
+ )
+ ],
+ index = 1,
+ name = 'Whitespace'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [48, 58]
+ )
+ ],
+ count0 = 1
+ )
+ ],
+ index = 2,
+ name = 'Number'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexSequence(
+ children = [
+ RegexEmpty(),
+ RegexCharacterLiteral(
+ character_set = [102, 103]
+ )
+ ]
+ ),
+ RegexCharacterLiteral(
+ character_set = [111, 112]
+ )
+ ]
+ ),
+ RegexCharacterLiteral(
+ character_set = [114, 115]
+ )
+ ]
+ ),
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [101, 102]
+ )
+ ],
+ count0 = 0,
+ count1 = 1
+ )
+ ]
+ )
+ ],
+ index = 3,
+ name = 'For'
+ )
+ ]
+ ),
+ RegexGroup(
+ children = [
+ RegexSequence(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [65, 91, 95, 96, 97, 123]
+ ),
+ RegexRepeat(
+ children = [
+ RegexCharacterLiteral(
+ character_set = [48, 58, 65, 91, 95, 96, 97, 123]
+ )
+ ]
+ )
+ ]
+ )
+ ],
+ index = 4,
+ name = 'Identifier'
+ )
+ ]
+ )
+ ],
+ index = 0
+ )
+ ]
+ )
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _regex = {0:s}'.format(repr(_regex).replace('regex.', '')),
+ 79
+ )
+ )
+
+ groups = []
+ _regex.add_to_groups(groups)
+ _nfa = nfa.NFA(groups)
+ _regex.add_to_nfa(_nfa)
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _nfa = {0:s}'.format(repr(_nfa).replace('nfa.', '')),
+ 79
+ )
+ )
+
+ text = ' id 99id id99 for fore foree forex '
+ i = 0
+ while i < len(text):
+ print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
+ thread = _nfa.match_text(text, i)
+ if thread is None:
+ print('no match')
+ break
+ i = thread[0] # end position of overall match
+ group_start = [-1 for j in range(len(_nfa.groups))]
+ group_end = [-1 for j in range(len(_nfa.groups))]
+ while thread is not None:
+ pos, mark, thread = thread
+ group = mark >> 1
+ if (mark & 1) == 0:
+ group_start[group] = pos
+ print(
+ 'group {0:d} name "{1:s}" text "{2:s}"'.format(
+ group,
+ _nfa.groups[group][0],
+ text[group_start[group]:group_end[group]].replace('\n', '$')
+ )
+ )
+ else:
+ group_end[group] = pos
+
+ _dfa = _nfa.to_dfa()
+ sys.stdout.write(
+ wrap_repr.wrap_repr(
+ ' _dfa = {0:s}'.format(repr(_dfa).replace('dfa.', '')),
+ 79
+ )
+ )
+
+ text = ' id 99id id99 for fore foree forex '
+ i = 0
+ while i < len(text):
+ print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
+ thread = _dfa.match_text(text, i)
+ if thread is None:
+ print('no match')
+ break
+ i = thread[0] # end position of overall match
+ group_start = [-1 for j in range(len(_dfa.groups))]
+ group_end = [-1 for j in range(len(_dfa.groups))]
+ while thread is not None:
+ pos, mark, thread = thread
+ group = mark >> 1
+ if (mark & 1) == 0:
+ group_start[group] = pos
+ print(
+ 'group {0:d} name "{1:s}" text "{2:s}"'.format(
+ group,
+ _dfa.groups[group][0],
+ text[group_start[group]:group_end[group]].replace('\n', '$')
+ )
+ )
+ else:
+ group_end[group] = pos
+
+# move this into grammar.py:
# grammar = Grammar(children = [Grammar.Production(children = [RegexSequence(
#children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
#= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
#'whitespace_opt')]), RegexCharacter(character_set = [32, 33])])], nonterminal = 36)
#], n_terminals = 258)
# #sys.stdout.write(
-# # wrap_repr(
+# # wrap_repr.wrap_repr(
# # ' grammar = {0:s}'.format(repr(grammar).replace('regex.', '')),
# # 79
# # )
#
# lr1 = grammar.to_lr1()
# #sys.stdout.write(
-# # wrap_repr(
+# # wrap_repr.wrap_repr(
# # ' lr1 = {0:s}'.format(repr(lr1).replace('regex.', '')),
# # 79
# # )
#
# clr1 = lr1.to_clr1()
# #sys.stdout.write(
-# # wrap_repr(
+# # wrap_repr.wrap_repr(
# # ' clr1 = {0:s}'.format(repr(clr1).replace('regex.', '')),
# # 79
# # )
#
# lalr1 = lr1.to_lalr1()
# #sys.stdout.write(
-# # wrap_repr(
+# # wrap_repr.wrap_repr(
# # ' lalr1 = {0:s}'.format(repr(lalr1).replace('regex.', '')),
# # 79
# # )