return False # continued_action
class FLexRule(element.Element):
- # GENERATE ELEMENT(bool bol, int action) BEGIN
+ # GENERATE ELEMENT(bool bol, int group_index, list(ref) groups0, list(ref) groups1, int action) BEGIN
def __init__(
self,
tag = 'AST_Section2_Rule_FLexRule',
text = '',
children = [],
bol = False,
+ group_index = -1,
+ groups0 = [],
+ groups1 = [],
action = -1
):
element.Element.__init__(
if isinstance(bol, str) else
bol
)
+ self.group_index = (
+ element.deserialize_int(group_index)
+ if isinstance(group_index, str) else
+ group_index
+ )
+ self.groups0 = groups0
+ self.groups1 = groups1
self.action = (
element.deserialize_int(action)
if isinstance(action, str) else
def serialize(self, ref_list):
element.Element.serialize(self, ref_list)
self.set('bol', element.serialize_bool(self.bol))
+ self.set('group_index', element.serialize_int(self.group_index))
+ self.set(
+ 'groups0',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.groups0])
+ )
+ self.set(
+ 'groups1',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.groups1])
+ )
self.set('action', element.serialize_int(self.action))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.bol = element.deserialize_bool(self.get('bol', 'false'))
+ self.group_index = element.deserialize_int(self.get('group_index', '-1'))
+ self.groups0 = [
+ element.deserialize_ref(i, ref_list)
+ for i in self.get('groups0', '').split()
+ ]
+ self.groups1 = [
+ element.deserialize_ref(i, ref_list)
+ for i in self.get('groups1', '').split()
+ ]
self.action = element.deserialize_int(self.get('action', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
FLexRule if factory is None else factory
)
result.bol = self.bol
+ result.group_index = self.group_index
+ result.groups0 = self.groups0
+ result.groups1 = self.groups1
result.action = self.action
return result
def repr_serialize(self, params):
params.append(
'bol = {0:s}'.format(repr(self.bol))
)
+ if self.group_index != -1:
+ params.append(
+ 'group_index = {0:s}'.format(repr(self.group_index))
+ )
+ if len(self.groups0):
+ params.append(
+ 'groups0 = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.groups0])
+ )
+ )
+ if len(self.groups1):
+ params.append(
+ 'groups1 = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.groups1])
+ )
+ )
if self.action != -1:
params.append(
'action = {0:s}'.format(repr(self.action))
if not self.bol:
_ast.start_conditions[i].rules.append(self)
_ast.start_conditions[i].bol_rules.append(self)
+ self.groups0 = []
self[0].post_process(
+ self.groups0,
caseless = _ast[0].caseless
) # regex
+ self.groups1 = []
self[1].post_process(
+ self.groups1,
caseless = _ast[0].caseless
) # trailing context regex
self.action = len(_ast.actions_text)
# element.get_text(_ast.actions_text[-1], 0)
# )
#)
+ _ast.flex_rules.append(self)
+ self.group_index = _ast.n_groups
+ _ast.n_groups += len(self.groups0) + 1 + len(self.groups1)
return continued_action
# GENERATE ELEMENT() BEGIN
return 'ast.AST.Section3({0:s})'.format(', '.join(params))
# GENERATE END
- # GENERATE ELEMENT(list(ref) start_conditions, list(ref) actions_text, list(ref) eof_actions_text, int default_action) BEGIN
+ # GENERATE ELEMENT(list(ref) start_conditions, list(ref) actions_text, list(ref) eof_actions_text, int default_action, list(ref) flex_rules, int n_groups) BEGIN
def __init__(
self,
tag = 'AST',
start_conditions = [],
actions_text = [],
eof_actions_text = [],
- default_action = -1
+ default_action = -1,
+ flex_rules = [],
+ n_groups = -1
):
element.Element.__init__(
self,
if isinstance(default_action, str) else
default_action
)
+ self.flex_rules = flex_rules
+ self.n_groups = (
+ element.deserialize_int(n_groups)
+ if isinstance(n_groups, str) else
+ n_groups
+ )
def serialize(self, ref_list):
element.Element.serialize(self, ref_list)
self.set(
' '.join([element.serialize_ref(i, ref_list) for i in self.eof_actions_text])
)
self.set('default_action', element.serialize_int(self.default_action))
+ self.set(
+ 'flex_rules',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.flex_rules])
+ )
+ self.set('n_groups', element.serialize_int(self.n_groups))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.start_conditions = [
for i in self.get('eof_actions_text', '').split()
]
self.default_action = element.deserialize_int(self.get('default_action', '-1'))
+ self.flex_rules = [
+ element.deserialize_ref(i, ref_list)
+ for i in self.get('flex_rules', '').split()
+ ]
+ self.n_groups = element.deserialize_int(self.get('n_groups', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
result.actions_text = self.actions_text
result.eof_actions_text = self.eof_actions_text
result.default_action = self.default_action
+ result.flex_rules = self.flex_rules
+ result.n_groups = self.n_groups
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
params.append(
'default_action = {0:s}'.format(repr(self.default_action))
)
+ if len(self.flex_rules):
+ params.append(
+ 'flex_rules = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.flex_rules])
+ )
+ )
+ if self.n_groups != -1:
+ params.append(
+ 'n_groups = {0:s}'.format(repr(self.n_groups))
+ )
def __repr__(self):
params = []
self.repr_serialize(params)
self.eof_actions_text = [
AST.Text(text = '\t\t\t\tyyterminate();\n')
]
+ self.flex_rules = []
+ self.n_groups = 0
# variables that won't be serialized
name_to_start_condition = {'INITIAL': 0}
)
)
)
- def to_nfa(self):
+ def to_nfa(self, group_ref_data):
_nfa = nfa.NFA()
for i in self.start_conditions:
- for j in range(2):
- _regex = regex.RegexNone()
- for k in [i.rules, i.bol_rules][j]:
+ for j in [i.rules, i.bol_rules]:
+ _regex = regex.RegexNone(
+ n_groups = 0
+ )
+ regex_group_ref_data = []
+ for k in j:
+ ng = _regex.n_groups
+ ng0 = k[0].n_groups
+ ng1 = k[1].n_groups
_regex = regex.RegexOr(
children = [
_regex,
children = [
k[1]
],
- index = k.action
+ n_groups = 1 + ng1
)
- ]
+ ],
+ n_groups = ng0 + 1 + ng1
)
+ ],
+ n_groups = ng + ng0 + 1 + ng1
+ )
+ regex_group_ref_data.extend(
+ group_ref_data[
+ k.group_index:
+ k.group_index + len(k.groups0) + 1 + len(k.groups1)
]
)
+ ng = _regex.n_groups
_regex = regex.RegexAnd(
children = [
regex.RegexRepeat(
- count0 = 0,
children = [
regex.RegexCharacter(
- character_set = [0, 0x100]
+ character_set = [0, 0x100],
+ n_groups = 0
)
- ]
+ ],
+ n_groups = 0,
+ count0 = 0
),
regex.RegexOr(
children = [
regex.RegexSequence(
children = [
regex.RegexCharacter(
- character_set = [0, 0x100]
+ character_set = [0, 0x100],
+ n_groups = 0
),
regex.RegexGroup(
children = [
- regex.RegexEmpty()
+ regex.RegexEmpty(
+ n_groups = 0
+ )
],
- index = self.default_action
+ n_groups = 1
)
- ]
+ ],
+ n_groups = 1
)
- ]
+ ],
+ n_groups = ng + 1
)
- ]
+ ],
+ n_groups = ng + 1
)
- _regex.add_to_nfa(_nfa)
+ regex_group_ref_data.append(group_ref_data[-1])
+ _regex.add_to_nfa(_nfa, regex_group_ref_data)
return _nfa
# GENERATE FACTORY(regex.factory) BEGIN
n_characters = 0x100
class Regex(element.Element):
- # GENERATE ELEMENT() BEGIN
+ # GENERATE ELEMENT(int n_groups) BEGIN
def __init__(
self,
tag = 'Regex',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
element.Element.__init__(
self,
text,
children
)
+ self.n_groups = (
+ element.deserialize_int(n_groups)
+ if isinstance(n_groups, str) else
+ n_groups
+ )
+ def serialize(self, ref_list):
+ element.Element.serialize(self, ref_list)
+ self.set('n_groups', element.serialize_int(self.n_groups))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.n_groups = element.deserialize_int(self.get('n_groups', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Regex if factory is None else factory
)
+ result.n_groups = self.n_groups
return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.n_groups != -1:
+ params.append(
+ 'n_groups = {0:s}'.format(repr(self.n_groups))
+ )
def __repr__(self):
params = []
self.repr_serialize(params)
return 'regex.Regex({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
+ def post_process(self, groups, caseless = False):
+ self.n_groups = 0
for i in self:
- group_index = i.post_process(group_index, caseless)
- return group_index
- def add_to_groups(self, groups):
- for i in self:
- i.add_to_groups(groups)
- def to_nfa_state(self, _nfa, next_state):
+ i.post_process(groups, caseless)
+ self.n_groups += i.n_groups
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
raise NotImplementedError
- def add_to_nfa(self, _nfa):
- _nfa.start_state.append(self.to_nfa_state(_nfa, 0))
+ def add_to_nfa(self, _nfa, group_ref_data):
+ _nfa.start_state.append(self.to_nfa_state(_nfa, group_ref_data, 0, 0))
class RegexNone(Regex):
# GENERATE ELEMENT() BEGIN
tag = 'RegexNone',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
Regex.__init__(
self,
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
def copy(self, factory = None):
result = Regex.copy(
self.repr_serialize(params)
return 'regex.RegexNone({0:s})'.format(', '.join(params))
# GENERATE END
- def to_nfa_state(self, _nfa, next_state):
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
return -1
class RegexEmpty(Regex):
tag = 'RegexEmpty',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
Regex.__init__(
self,
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
def copy(self, factory = None):
result = Regex.copy(
self.repr_serialize(params)
return 'regex.RegexEmpty({0:s})'.format(', '.join(params))
# GENERATE END
- def to_nfa_state(self, _nfa, next_state):
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
return next_state
class RegexCharacter(Regex):
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
Regex.__init__(
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
self.character_set = (
[element.deserialize_int(i) for i in character_set.split()]
self.repr_serialize(params)
return 'regex.RegexCharacter({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- group_index = Regex.post_process(self, group_index, caseless)
+ def post_process(self, groups, caseless = False):
+ Regex.post_process(self, groups, caseless)
if caseless:
temp = bisect_set.bisect_set_and(
self.character_set,
[i ^ 0x20 for i in temp if i >= 0x60] +
[i ^ 0x20 for i in temp if i < 0x60]
)
- return group_index
- def to_nfa_state(self, _nfa, next_state):
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
new_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_CHARACTER, self.character_set, next_state))
+ _nfa.states.append(
+ (nfa.NFA.STATE_CHARACTER, self.character_set, next_state)
+ )
return new_state
class RegexCharacterLiteral(RegexCharacter):
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
RegexCharacter.__init__(
attrib,
text,
children,
+ n_groups,
character_set
)
def copy(self, factory = None):
self.repr_serialize(params)
return 'regex.RegexCharacterLiteral({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 1, caseless = False):
- group_index = RegexCharacter.post_process(self, group_index, False)
+ def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, False)
if caseless:
temp = bisect_set.bisect_set_and(
self.character_set,
[i ^ 0x20 for i in temp if i >= 0x60] +
[i ^ 0x20 for i in temp if i < 0x60]
)
- return group_index
class RegexCharacterRange(RegexCharacter):
# GENERATE ELEMENT() BEGIN
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
RegexCharacter.__init__(
attrib,
text,
children,
+ n_groups,
character_set
)
def copy(self, factory = None):
self.repr_serialize(params)
return 'regex.RegexCharacterRange({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- group_index = RegexCharacter.post_process(self, group_index, False)
+ def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, False)
self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
if caseless:
temp = bisect_set.bisect_set_and(
[i ^ 0x20 for i in temp if i >= 0x60] +
[i ^ 0x20 for i in temp if i < 0x60]
)
- return group_index
class RegexCharacterOr(RegexCharacter):
# GENERATE ELEMENT() BEGIN
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
RegexCharacter.__init__(
attrib,
text,
children,
+ n_groups,
character_set
)
def copy(self, factory = None):
self.repr_serialize(params)
return 'regex.RegexCharacterOr({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- group_index = RegexCharacter.post_process(self, group_index, caseless)
- self.character_set = bisect_set.bisect_set_or(self[0].character_set, self[1].character_set)
- return group_index
+ def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
+ self.character_set = bisect_set.bisect_set_or(
+ self[0].character_set,
+ self[1].character_set
+ )
class RegexCharacterAnd(RegexCharacter):
# GENERATE ELEMENT() BEGIN
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
RegexCharacter.__init__(
attrib,
text,
children,
+ n_groups,
character_set
)
def copy(self, factory = None):
self.repr_serialize(params)
return 'regex.RegexCharacterAnd({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- group_index = RegexCharacter.post_process(self, group_index, caseless)
- self.character_set = bisect_set.bisect_set_and(self[0].character_set, self[1].character_set)
- return group_index
+ def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
+ self.character_set = bisect_set.bisect_set_and(
+ self[0].character_set,
+ self[1].character_set
+ )
class RegexCharacterNot(RegexCharacter):
# GENERATE ELEMENT() BEGIN
attrib = {},
text = '',
children = [],
+ n_groups = -1,
character_set = []
):
RegexCharacter.__init__(
attrib,
text,
children,
+ n_groups,
character_set
)
def copy(self, factory = None):
self.repr_serialize(params)
return 'regex.RegexCharacterNot({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- group_index = RegexCharacter.post_process(self, group_index, caseless)
+ def post_process(self, groups, caseless = False):
+ RegexCharacter.post_process(self, groups, caseless)
self.character_set = bisect_set.bisect_set_not(self[0].character_set)
- return group_index
class RegexOr(Regex):
# GENERATE ELEMENT() BEGIN
tag = 'RegexOr',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
Regex.__init__(
self,
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
def copy(self, factory = None):
result = Regex.copy(
self.repr_serialize(params)
return 'regex.RegexOr({0:s})'.format(', '.join(params))
# GENERATE END
- def to_nfa_state(self, _nfa, next_state):
- child0_state = self[0].to_nfa_state(_nfa, next_state)
- child1_state = self[1].to_nfa_state(_nfa, next_state)
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ child0_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+ child1_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ next_state
+ )
if child0_state == -1:
return child1_state
if child1_state == -1:
tag = 'RegexAnd',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
Regex.__init__(
self,
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
def copy(self, factory = None):
result = Regex.copy(
self.repr_serialize(params)
return 'regex.RegexAnd({0:s})'.format(', '.join(params))
# GENERATE END
- def to_nfa_state(self, _nfa, next_state):
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
join0_state = len(_nfa.states)
- _nfa.states.append(nfa.NFA.join0_state) # takes no arguments so use static one
+ _nfa.states.append(nfa.NFA.join0_state) # no arguments so use static one
join1_state = len(_nfa.states)
_nfa.states.append((nfa.NFA.STATE_JOIN1, next_state))
- child0_state = self[0].to_nfa_state(_nfa, join0_state)
+ child0_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ join0_state
+ )
if child0_state == -1:
return -1
- child1_state = self[1].to_nfa_state(_nfa, join1_state)
+ child1_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ join1_state
+ )
if child1_state == -1:
return -1
new_state = len(_nfa.states)
tag = 'RegexSequence',
attrib = {},
text = '',
- children = []
+ children = [],
+ n_groups = -1
):
Regex.__init__(
self,
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
def copy(self, factory = None):
result = Regex.copy(
self.repr_serialize(params)
return 'regex.RegexSequence({0:s})'.format(', '.join(params))
# GENERATE END
- def to_nfa_state(self, _nfa, next_state):
- child1_state = self[1].to_nfa_state(_nfa, next_state)
- if child1_state == -1:
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ next_state = self[1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + self[0].n_groups,
+ next_state
+ )
+ if next_state == -1:
return -1
- return self[0].to_nfa_state(_nfa, child1_state)
+ return self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
class RegexRepeat(Regex):
# GENERATE ELEMENT(int count0, int count1, bool non_greedy) BEGIN
attrib = {},
text = '',
children = [],
+ n_groups = -1,
count0 = -1,
count1 = -1,
non_greedy = False
tag,
attrib,
text,
- children
+ children,
+ n_groups
)
self.count0 = (
element.deserialize_int(count0)
self.repr_serialize(params)
return 'regex.RegexRepeat({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
+ def post_process(self, groups, caseless = False):
# total hack which will be done in a Python action in future
if len(self) >= 2:
assert self[1].tag == 'Number'
self.count1 = self.count0
del self[1:]
# end total hack
- return Regex.post_process(self, group_index, caseless)
- def to_nfa_state(self, _nfa, next_state):
+ Regex.post_process(self, groups, caseless)
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
count0 = self.count0
count1 = self.count1
if count1 == -1:
new_state = len(_nfa.states)
_nfa.states.append(None)
- child_state = self[0].to_nfa_state(_nfa, new_state)
- if child_state == -1:
- new_state = next_state # note: unreachable state remains invalid (None)
- else:
+ child_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ new_state
+ )
+ if child_state != -1:
_nfa.states[new_state] = (
(nfa.NFA.STATE_OR, next_state, child_state)
if self.non_greedy else
(nfa.NFA.STATE_OR, child_state, next_state)
)
+ next_state = new_state
else:
- new_state = next_state
+ done_state = next_state
for i in range(count1 - count0):
- child_state = self[0].to_nfa_state(_nfa, new_state)
+ child_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
if child_state == -1:
break
new_state = len(_nfa.states)
_nfa.states.append(
- (nfa.NFA.STATE_OR, next_state, child_state)
+ (nfa.NFA.STATE_OR, done_state, child_state)
if self.non_greedy else
- (nfa.NFA.STATE_OR, child_state, next_state)
+ (nfa.NFA.STATE_OR, child_state, done_state)
)
+ next_state = new_state
for i in range(count0):
- new_state = self[0].to_nfa_state(_nfa, new_state)
- if new_state == -1:
+ next_state = self[0].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index,
+ next_state
+ )
+ if next_state == -1:
break
- return new_state
+ return next_state
class RegexGroup(Regex):
- class Attribute(element.Element):
- # GENERATE ELEMENT(str name, str value) BEGIN
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'RegexGroup',
+ attrib = {},
+ text = '',
+ children = [],
+ n_groups = -1
+ ):
+ Regex.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children,
+ n_groups
+ )
+ def copy(self, factory = None):
+ result = Regex.copy(
+ self,
+ RegexGroup if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.RegexGroup({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(self, groups, caseless = False):
+ # we use -1 here because named or action groups use self[0] for text
+ groups.append(self)
+ self[-1].post_process(groups, caseless)
+ self.n_groups = self[-1].n_groups + 1
+ def to_nfa_state(self, _nfa, group_ref_data, group_index, next_state):
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_MARK, group_ref_data[group_index][1], next_state)
+ )
+ next_state = new_state
+ next_state = self[-1].to_nfa_state(
+ _nfa,
+ group_ref_data,
+ group_index + 1,
+ next_state
+ )
+ if next_state == -1:
+ return -1
+ new_state = len(_nfa.states)
+ _nfa.states.append(
+ (nfa.NFA.STATE_MARK, group_ref_data[group_index][0], next_state)
+ )
+ return new_state
+
+# internal base class
+class Text(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'Text',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Text if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.Text({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def get_text(self):
+ return element.get_text(self, 0)
+
+class RegexGroupName(RegexGroup):
+ class Text(Text):
+ # GENERATE ELEMENT() BEGIN
def __init__(
self,
- tag = 'RegexGroup_Attribute',
+ tag = 'RegexGroupName_Text',
attrib = {},
text = '',
- children = [],
- name = '',
- value = ''
+ children = []
):
- element.Element.__init__(
+ Text.__init__(
self,
tag,
attrib,
text,
children
)
- self.name = name
- self.value = value
- def serialize(self, ref_list):
- element.Element.serialize(self, ref_list)
- self.set('name', element.serialize_str(self.name))
- self.set('value', element.serialize_str(self.value))
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.name = element.deserialize_str(self.get('name', ''))
- self.value = element.deserialize_str(self.get('value', ''))
def copy(self, factory = None):
- result = element.Element.copy(
+ result = Text.copy(
self,
- Attribute if factory is None else factory
+ Text if factory is None else factory
)
- result.name = self.name
- result.value = self.value
return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if self.name != '':
- params.append(
- 'name = {0:s}'.format(repr(self.name))
- )
- if self.value != '':
- params.append(
- 'value = {0:s}'.format(repr(self.value))
- )
def __repr__(self):
params = []
self.repr_serialize(params)
- return 'regex.RegexGroup.Attribute({0:s})'.format(', '.join(params))
+ return 'regex.RegexGroupName.Text({0:s})'.format(', '.join(params))
# GENERATE END
- # GENERATE ELEMENT(int index, str name, list(ref) attributes) BEGIN
+ # GENERATE ELEMENT() BEGIN
def __init__(
self,
- tag = 'RegexGroup',
+ tag = 'RegexGroupName',
attrib = {},
text = '',
children = [],
- index = -1,
- name = '',
- attributes = []
+ n_groups = -1
):
- Regex.__init__(
+ RegexGroup.__init__(
self,
tag,
attrib,
text,
- children
- )
- self.index = (
- element.deserialize_int(index)
- if isinstance(index, str) else
- index
- )
- self.name = name
- self.attributes = attributes
- def serialize(self, ref_list):
- Regex.serialize(self, ref_list)
- self.set('index', element.serialize_int(self.index))
- self.set('name', element.serialize_str(self.name))
- self.set(
- 'attributes',
- ' '.join([element.serialize_ref(i, ref_list) for i in self.attributes])
+ children,
+ n_groups
)
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.index = element.deserialize_int(self.get('index', '-1'))
- self.name = element.deserialize_str(self.get('name', ''))
- self.attributes = [
- element.deserialize_ref(i, ref_list)
- for i in self.get('attributes', '').split()
- ]
def copy(self, factory = None):
- result = Regex.copy(
+ result = RegexGroup.copy(
self,
- RegexGroup if factory is None else factory
+ RegexGroupName if factory is None else factory
)
- result.index = self.index
- result.name = self.name
- result.attributes = self.attributes
return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if self.index != -1:
- params.append(
- 'index = {0:s}'.format(repr(self.index))
- )
- if self.name != '':
- params.append(
- 'name = {0:s}'.format(repr(self.name))
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.RegexGroupName({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+class RegexGroupAction(RegexGroup):
+ class Text(Text):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'RegexGroupAction_Text',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ Text.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
)
- if len(self.attributes):
- params.append(
- 'attributes = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.attributes])
- )
+ def copy(self, factory = None):
+ result = Text.copy(
+ self,
+ Text if factory is None else factory
)
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.RegexGroupAction.Text({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'RegexGroupAction',
+ attrib = {},
+ text = '',
+ children = [],
+ n_groups = -1
+ ):
+ RegexGroup.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children,
+ n_groups
+ )
+ def copy(self, factory = None):
+ result = RegexGroup.copy(
+ self,
+ RegexGroupAction if factory is None else factory
+ )
+ return result
def __repr__(self):
params = []
self.repr_serialize(params)
- return 'regex.RegexGroup({0:s})'.format(', '.join(params))
+ return 'regex.RegexGroupAction({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, caseless = False):
- # total hack which will be done in a Python action in future
- if len(self) >= 2:
- assert self[0].tag == 'GroupName'
- self.name = self[0].text[1:-1]
- del self[:1]
- # end total hack
- self.index = group_index
- group_index += 1
- return Regex.post_process(self, group_index, caseless)
- def add_to_groups(self, groups):
- assert len(groups) == self.index
- groups.append(
- (self.name, {i.name: i.value for i in self.attributes})
- )
- return Regex.add_to_groups(self, groups)
- def to_nfa_state(self, _nfa, next_state):
- mark_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_MARK, self.index * 2 + 1, next_state))
- child_state = self[0].to_nfa_state(_nfa, mark_state)
- if child_state == -1:
- return -1
- new_state = len(_nfa.states)
- _nfa.states.append((nfa.NFA.STATE_MARK, self.index * 2, child_state))
- return new_state
-
+
# GENERATE FACTORY(element.Element) BEGIN
tag_to_class = {
'Regex': Regex,
'RegexSequence': RegexSequence,
'RegexRepeat': RegexRepeat,
'RegexGroup': RegexGroup,
- 'RegexGroup_Attribute': RegexGroup.Attribute
+ 'Text': Text,
+ 'RegexGroupName': RegexGroupName,
+ 'RegexGroupName_Text': RegexGroupName.Text,
+ 'RegexGroupAction': RegexGroupAction,
+ 'RegexGroupAction_Text': RegexGroupAction.Text
}
def factory(tag, attrib = {}, *args, **kwargs):
return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
)
)
- groups = []
- _regex.add_to_groups(groups)
- _nfa = nfa.NFA(groups)
+ _nfa = nfa.NFA()
_regex.add_to_nfa(_nfa)
sys.stdout.write(
wrap_repr.wrap_repr(