import sys
# defines the alphabet size, set this to 0x11000 for unicode
-chars = 0x100
+n_characters = 0x100
-def char_set_or(char_set0, char_set1):
+def character_set_or(character_set0, character_set1):
# calculate union of the child sets
# we do this by calculating a series of breakpoints, at each breakpoint
# evaluating the "or" (max) of the even/odd truth values of each child,
i = 0
j = 0
while True:
- if i < len(char_set0):
- k = char_set0[i]
- if j < len(char_set1):
- k = min(k, char_set1[j])
- elif j < len(char_set1):
- k = char_set1[j]
+ if i < len(character_set0):
+ k = character_set0[i]
+ if j < len(character_set1):
+ k = min(k, character_set1[j])
+ elif j < len(character_set1):
+ k = character_set1[j]
else:
break
- if i < len(char_set0) and char_set0[i] == k:
+ if i < len(character_set0) and character_set0[i] == k:
i += 1
- if j < len(char_set1) and char_set1[j] == k:
+ if j < len(character_set1) and character_set1[j] == k:
j += 1
if (len(result) & 1) != max(i & 1, j & 1):
result.append(k)
assert (i & 1) == 0 and (j & 1) == 0
return result
-def char_set_and(char_set0, char_set1):
+def character_set_and(character_set0, character_set1):
# calculate intersection of the child sets
# we do this by calculating a series of breakpoints, at each breakpoint
# evaluating the "and" (min) of the even/odd truth values of each child,
i = 0
j = 0
while True:
- if i < len(char_set0):
- k = char_set0[i]
- if j < len(char_set1):
- k = min(k, char_set1[j])
- elif j < len(char_set1):
- k = char_set1[j]
+ if i < len(character_set0):
+ k = character_set0[i]
+ if j < len(character_set1):
+ k = min(k, character_set1[j])
+ elif j < len(character_set1):
+ k = character_set1[j]
else:
break
- if i < len(char_set0) and char_set0[i] == k:
+ if i < len(character_set0) and character_set0[i] == k:
i += 1
- if j < len(char_set1) and char_set1[j] == k:
+ if j < len(character_set1) and character_set1[j] == k:
j += 1
if (len(result) & 1) != min(i & 1, j & 1):
result.append(k)
assert (i & 1) == 0 and (j & 1) == 0
return result
-def char_set_not(char_set):
+def character_set_not(character_set):
# calculate complement of the child set
# if child set begins with [0], remove it, otherwise add [0] prefix
- # if child set ends with [chars], remove it, otherwise add [chars] suffix
+ # if child set ends with [n_characters], remove it, otherwise add [n_characters] suffix
# the suffix part is not totally necessary, but makes sure length is even
# (the evenness is so that single character sets can always be [c, c + 1])
- result = list(char_set)
+ result = list(character_set)
if result[:1] == [0]:
del result[:1]
else:
result[:0] = [0]
- if result[-1:] == [chars]:
+ if result[-1:] == [n_characters]:
del result[-1:]
else:
- result.append(chars)
+ result.append(n_characters)
return result
class Regex(element.Element):
self.repr_serialize(params)
return 'regex.Regex({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
for i in self:
- group_index = i.post_process(group_index, rule_char_sets)
+ group_index = i.post_process(group_index, rule_name_to_character_set)
return group_index
def to_groups(self, groups):
for i in self:
raise NotImplementedException
def add_to_nfa(self, nfa):
nfa.start_state.append(self.to_nfa_state(nfa, 0))
- def to_lr1_symbols(self, terminal_thres, symbols, lookaheads, group_bounds):
+ def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
group_count = 0
for i in self:
group_count += (
- i.to_lr1_symbols(terminal_thres, symbols, lookaheads, group_bounds)
+ i.to_lr1_symbols(n_terminals, symbols, lookaheads, group_bounds)
)
return group_count # count of groups or ungrouped characters
return next_state
class RegexCharacter(Regex):
- # GENERATE ELEMENT(list(int) char_set) BEGIN
+ # GENERATE ELEMENT(list(int) character_set) BEGIN
def __init__(
self,
tag = 'RegexCharacter',
attrib = {},
text = '',
children = [],
- char_set = []
+ character_set = []
):
Regex.__init__(
self,
text,
children
)
- self.char_set = (
- [element.deserialize_int(i) for i in char_set.split()]
- if isinstance(char_set, str) else
- char_set
+ self.character_set = (
+ [element.deserialize_int(i) for i in character_set.split()]
+ if isinstance(character_set, str) else
+ character_set
)
def serialize(self, ref_list, indent = 0):
Regex.serialize(self, ref_list, indent)
self.set(
- 'char_set',
- ' '.join([element.serialize_int(i) for i in self.char_set])
+ 'character_set',
+ ' '.join([element.serialize_int(i) for i in self.character_set])
)
def deserialize(self, ref_list):
Regex.deserialize(self, ref_list)
- self.char_set = [
+ self.character_set = [
element.deserialize_int(i)
- for i in self.get('char_set', '').split()
+ for i in self.get('character_set', '').split()
]
def copy(self, factory = None):
result = Regex.copy(
self,
RegexCharacter if factory is None else factory
)
- result.char_set = self.char_set
+ result.character_set = self.character_set
return result
def repr_serialize(self, params):
Regex.repr_serialize(self, params)
- if len(self.char_set):
+ if len(self.character_set):
params.append(
- 'char_set = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.char_set])
+ 'character_set = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.character_set])
)
)
def __repr__(self):
# GENERATE END
def to_nfa_state(self, nfa, next_state):
new_state = len(nfa.states)
- nfa.states.append((NFA.STATE_CHARACTER, self.char_set, next_state))
+ nfa.states.append((NFA.STATE_CHARACTER, self.character_set, next_state))
return new_state
- def to_lr1_symbols(self, terminal_thres, symbols, lookaheads, group_bounds):
+ def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
terminal_set = []
nonterminal_set = []
i = 0
- while i < len(self.char_set):
- [j, k] = self.char_set[i:i + 2]
- if k > terminal_thres:
- if j < terminal_thres:
- terminal_set.extend([j, terminal_thres])
- nonterminal_set.extend([0, k - terminal_thres])
+ while i < len(self.character_set):
+ [j, k] = self.character_set[i:i + 2]
+ if k > n_terminals:
+ if j < n_terminals:
+ terminal_set.extend([j, n_terminals])
+ nonterminal_set.extend([0, k - n_terminals])
i += 2
- while i < len(self.char_set):
- [j, k] = self.char_set[i:i + 2]
- nonterminal_set.extend([j - terminal_thres, k - terminal_thres])
+ while i < len(self.character_set):
+ [j, k] = self.character_set[i:i + 2]
+ nonterminal_set.extend([j - n_terminals, k - n_terminals])
i += 2
break
terminal_set.extend([j, k])
attrib = {},
text = '',
children = [],
- char_set = []
+ character_set = []
):
RegexCharacter.__init__(
self,
attrib,
text,
children,
- char_set
+ character_set
)
def copy(self, factory = None):
result = RegexCharacter.copy(
self.repr_serialize(params)
return 'regex.RegexCharacterRange({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_char_sets)
- self.char_set = [self[0].char_set[0], self[1].char_set[-1]]
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
return group_index
class RegexCharacterOr(RegexCharacter):
attrib = {},
text = '',
children = [],
- char_set = []
+ character_set = []
):
RegexCharacter.__init__(
self,
attrib,
text,
children,
- char_set
+ character_set
)
def copy(self, factory = None):
result = RegexCharacter.copy(
self.repr_serialize(params)
return 'regex.RegexCharacterOr({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_char_sets)
- self.char_set = char_set_or(self[0].char_set, self[1].char_set)
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ self.character_set = character_set_or(self[0].character_set, self[1].character_set)
return group_index
class RegexCharacterAnd(RegexCharacter):
attrib = {},
text = '',
children = [],
- char_set = []
+ character_set = []
):
RegexCharacter.__init__(
self,
attrib,
text,
children,
- char_set
+ character_set
)
def copy(self, factory = None):
result = RegexCharacter.copy(
self.repr_serialize(params)
return 'regex.RegexCharacterAnd({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_char_sets)
- self.char_set = char_set_and(self[0].char_set, self[1].char_set)
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ self.character_set = character_set_and(self[0].character_set, self[1].character_set)
return group_index
class RegexCharacterNot(RegexCharacter):
attrib = {},
text = '',
children = [],
- char_set = []
+ character_set = []
):
RegexCharacter.__init__(
self,
attrib,
text,
children,
- char_set
+ character_set
)
def copy(self, factory = None):
result = RegexCharacter.copy(
self.repr_serialize(params)
return 'regex.RegexCharacterNot({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_char_sets)
- self.char_set = char_set_not(self[0].char_set)
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ self.character_set = character_set_not(self[0].character_set)
return group_index
class RegexCharacterRule(RegexCharacter):
attrib = {},
text = '',
children = [],
- char_set = [],
+ character_set = [],
rule_name = ''
):
RegexCharacter.__init__(
attrib,
text,
children,
- char_set
+ character_set
)
self.rule_name = rule_name
def serialize(self, ref_list, indent = 0):
self.repr_serialize(params)
return 'regex.RegexCharacterRule({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
- if rule_char_sets is not None:
- self.char_set = rule_char_sets[self.rule_name]
- return RegexCharacter.post_process(self, group_index, rule_char_sets)
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ if rule_name_to_character_set is not None:
+ self.character_set = rule_name_to_character_set[self.rule_name]
+ return RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
class RegexOr(Regex):
# GENERATE ELEMENT() BEGIN
self.repr_serialize(params)
return 'regex.RegexRepeat({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
# total hack which will be done in a Python action in future
if len(self) >= 2:
assert self[1].tag == 'Number'
self.count1 = self.count0
del self[1:]
# end total hack
- return Regex.post_process(self, group_index, rule_char_sets)
+ return Regex.post_process(self, group_index, rule_name_to_character_set)
def to_nfa_state(self, nfa, next_state):
count0 = self.count0
count1 = self.count1
self.repr_serialize(params)
return 'regex.RegexGroup({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_char_sets = None):
+ def post_process(self, group_index = 0, rule_name_to_character_set = None):
# total hack which will be done in a Python action in future
if len(self) >= 2:
assert self[0].tag == 'GroupName'
# end total hack
self.group_index = group_index
group_index += 1
- return Regex.post_process(self, group_index, rule_char_sets)
+ return Regex.post_process(self, group_index, rule_name_to_character_set)
def to_groups(self, groups):
assert len(groups) == self.group_index
groups.append(
new_state = len(nfa.states)
nfa.states.append((NFA.STATE_MARK, self.group_index * 2, child_state))
return new_state
- def to_lr1_symbols(self, terminal_thres, symbols, lookaheads, group_bounds):
+ def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
group_start = len(symbols)
assert self.group_index == len(group_bounds)
group_bounds.append(None)
group_count = Regex.to_lr1_symbols(
self,
- terminal_thres,
+ n_terminals,
symbols,
lookaheads,
group_bounds
self.repr_serialize(params)
return 'regex.Grammar.Production({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, nonterminal, rule_char_sets):
+ def post_process(self, nonterminal, rule_name_to_character_set):
self.nonterminal = nonterminal
- self[0].post_process(0, rule_char_sets)
+ self[0].post_process(0, rule_name_to_character_set)
def add_to_lr1(self, lr1):
symbols = []
lookaheads = []
group_bounds = []
self[0].to_lr1_symbols(
- lr1.terminal_thres,
+ lr1.n_terminals,
symbols,
lookaheads,
group_bounds
)
)
- # GENERATE ELEMENT(int terminal_thres) BEGIN
+ # GENERATE ELEMENT(int n_terminals) BEGIN
def __init__(
self,
tag = 'Grammar',
attrib = {},
text = '',
children = [],
- terminal_thres = -1
+ n_terminals = -1
):
element.Element.__init__(
self,
text,
children
)
- self.terminal_thres = (
- element.deserialize_int(terminal_thres)
- if isinstance(terminal_thres, str) else
- terminal_thres
+ self.n_terminals = (
+ element.deserialize_int(n_terminals)
+ if isinstance(n_terminals, str) else
+ n_terminals
)
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
- self.set('terminal_thres', element.serialize_int(self.terminal_thres))
+ self.set('n_terminals', element.serialize_int(self.n_terminals))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
- self.terminal_thres = element.deserialize_int(self.get('terminal_thres', '-1'))
+ self.n_terminals = element.deserialize_int(self.get('n_terminals', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Grammar if factory is None else factory
)
- result.terminal_thres = self.terminal_thres
+ result.n_terminals = self.n_terminals
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
- if self.terminal_thres != -1:
+ if self.n_terminals != -1:
params.append(
- 'terminal_thres = {0:s}'.format(repr(self.terminal_thres))
+ 'n_terminals = {0:s}'.format(repr(self.n_terminals))
)
def __repr__(self):
params = []
self.repr_serialize(params)
return 'regex.Grammar({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, rule_char_sets):
+ def post_process(self, rule_name_to_character_set):
for i in range(len(self)):
- self[i].post_process(i, rule_char_sets)
+ self[i].post_process(i, rule_name_to_character_set)
def to_lr1(self):
- lr1 = LR1([], self.terminal_thres)
+ lr1 = LR1([], self.n_terminals)
for i in self:
i.add_to_lr1(lr1)
# propagate lookaheads
for j in range(0, len(nonterminal_set), 2):
for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
child_initial_set, child_can_be_empty = lr1.productions[k][2][0]
- initial_set = char_set_or(initial_set, child_initial_set)
+ initial_set = character_set_or(initial_set, child_initial_set)
can_be_empty = can_be_empty or child_can_be_empty
# at this point can_be_empty refers to current symbol only
if can_be_empty:
next_initial_set, can_be_empty = lookaheads[i + 1]
- initial_set = char_set_or(initial_set, next_initial_set)
+ initial_set = character_set_or(initial_set, next_initial_set)
# at this point can_be_empty refers to all remaining symbols
if (initial_set, can_be_empty) != lookaheads[i]:
lookaheads[i] = (initial_set, can_be_empty)
class NFA:
# state_desc classes:
- # (STATE_CHARACTER, char_set, next_state)
+ # (STATE_CHARACTER, character_set, next_state)
# (STATE_OR, next_state0, next_state1)
# (STATE_AND, next_state0, next_state1)
# (STATE_JOIN0,)
def __init__(
self,
groups = [],
- states = [(STATE_CHARACTER, [0, chars], 0)],
+ states = [(STATE_CHARACTER, [0, n_characters], 0)],
start_state = [] # can have multiple NFAs in same container
):
# groups: list of group_desc
self.states = states
self.start_state = start_state
- def multistate_next(self, root_multistate, char):
+ def multistate_next(self, root_multistate, character):
# the deduplication works as effectively a second pass which goes
# over the multistate tree in pre-order, looking for OR-disjunctions
# of any depth and configuration, e.g. (a OR b) or (c OR d), and
return result
def advance(multistate, join_count, done_multistates):
- nonlocal char0, char1 # modifies nonlocal: transition
+ nonlocal character0, character1 # modifies nonlocal: transition
if multistate[0] == NFA.MULTISTATE_ACCEPT:
assert join_count == 0
len_transition = len(transition)
transition.append((DFA.TRANSITION_MOVE, 1))
result = NFA.accept_multistate # takes no arguments so use static one
elif multistate[0] == NFA.MULTISTATE_AND:
- if char >= 0:
+ if character >= 0:
_, _, state, child = multistate
state_desc = self.states[state]
assert state_desc[0] == NFA.STATE_CHARACTER
- _, char_set, next_state = state_desc
- k = bisect.bisect_right(char_set, char)
- if k > 0 and char0 < char_set[k - 1]:
- char0 = char_set[k - 1]
- if k < len(char_set) and char1 > char_set[k]:
- char1 = char_set[k]
+ _, character_set, next_state = state_desc
+ k = bisect.bisect_right(character_set, character)
+ if k > 0 and character0 < character_set[k - 1]:
+ character0 = character_set[k - 1]
+ if k < len(character_set) and character1 > character_set[k]:
+ character1 = character_set[k]
if (k & 1) == 0:
transition.append((DFA.TRANSITION_POP, child[1]))
return None
return result
transition = []
- char0 = 0
- char1 = chars
+ character0 = 0
+ character1 = n_characters
root_multistate = advance(root_multistate, 0, set())
- return root_multistate, transition, char0, char1
+ return root_multistate, transition, character0, character1
def multistate_accept(root_multistate):
i = 0
dfa.actions.append((next_state, list(transition)))
while len(dfa.states) < len(state_to_meaning):
- char = 0
+ character = 0
multistate = state_to_meaning[len(dfa.states)]
state_desc = ([], [], NFA.multistate_accept(multistate))
- while char < chars:
- next_multistate, transition, char0, char1 = self.multistate_next(
+ while character < n_characters:
+ next_multistate, transition, character0, character1 = self.multistate_next(
multistate,
- char
+ character
)
- assert char0 == char and char1 > char
+ assert character0 == character and character1 > character
if next_multistate is None:
action = -1
else:
action = len(action_to_meaning)
action_to_meaning.append(key)
meaning_to_action[key] = action
- state_desc[0].append(char1)
+ state_desc[0].append(character1)
state_desc[1].append(action)
- char = char1
+ character = character1
dfa.states.append(state_desc)
return dfa
def __init__(
self,
groups = [],
- states = [([chars], [0], 0)],
+ states = [([n_characters], [0], 0)],
actions = [(0, [])],
start_action = [] # can have multiple DFAs in same container
):
)
class LR1:
- def __init__(self, productions = [], terminal_thres = chars):
+ def __init__(self, productions = [], n_terminals = n_characters):
# productions: list of production
# production: (
# priority,
# priority: bit 0 = right to left, bits 1: = numeric priority
# symbols: list of symbol_desc
# symbol_desc: (terminal_set, nonterminal_set)
- # terminal_set: similar to char_set, even length list of pairs of breaks
- # nonterminal_set: as above but has terminal_thres subtracted from breaks
+ # terminal_set: similar to character_set, even length list of pairs of breaks
+ # nonterminal_set: as above but has n_terminals subtracted from breaks
# lookaheads: list of lookahead_desc, len(lookaheads) = len(symbols) + 1
# lookahead_desc: (initial_set, can_be_empty)
# initial_set: what terminals can occur at this position in symbols array,
# True (because all symbols from the end to the end can obviously be empty)
# group_bounds: list of group_bound
# group_bound: (start_index, end_index, tag, kwargs)
- # where start_index, end_index are indices into list of char_set,
+ # where start_index, end_index are indices into list of character_set,
# and tag, kwargs will be passed to apply_markup() hence factory(),
# noting that markup has to be applied in reverse order of the list
- # terminal_thres: offset to apply to productions[] index to get symbol
- # (char set code), also symbol for productions[0] = start production
+ # n_terminals: offset to apply to productions[] index to get symbol
+ # (character set code), also symbol for productions[0] = start production
self.productions = productions
- self.terminal_thres = terminal_thres
+ self.n_terminals = n_terminals
def lookahead_item_set_closure(self, items, item_to_index):
in_queue = [True for i in range(len(items))]
if len(nonterminal_set):
next_lookahead_set, next_can_be_empty = lookaheads[k + 1]
if next_can_be_empty:
- next_lookahead_set = char_set_or(
+ next_lookahead_set = character_set_or(
next_lookahead_set,
lookahead_set
)
key = (m, 0)
if key in item_to_index:
n = item_to_index[key]
- child_lookahead_set = char_set_or(
+ child_lookahead_set = character_set_or(
items[n][2],
next_lookahead_set
)
next_item_to_index = {}
reductions = set()
terminal0 = 0
- terminal1 = self.terminal_thres
+ terminal1 = self.n_terminals
for i, j, lookahead_set in items:
_, symbols, _, _ = self.productions[i]
if j < len(symbols):
return next_items, next_item_to_index, nonterminal0, nonterminal1
def parse_text(self, text, i):
- items = [(0, 0, [chars, chars + 1])] # EOF
+ items = [(0, 0, [n_characters, n_characters + 1])] # EOF
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
- lookahead_char = ord(text[i]) if i < len(text) else chars # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
while True:
self.lookahead_item_set_closure(items, item_to_index)
value_stack.append(i)
state_stack.append(items)
items, item_to_index, reductions, _, _ = (
- self.lookahead_item_set_shift(items, lookahead_char)
+ self.lookahead_item_set_shift(items, lookahead_character)
)
if len(items) != 0:
if len(reductions) != 0:
)
)
i += 1
- lookahead_char = ord(text[i]) if i < len(text) else chars # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
elif len(reductions) != 0:
if len(reductions) != 1:
sys.stderr.write(
if pos < 0:
pos, off = element.to_start_relative(root, pos, off)
- items = [(0, 0, [chars, chars + 1])] # EOF
+ items = [(0, 0, [n_characters, n_characters + 1])] # EOF
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
try:
next(yychunk_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
break
text = element.get_text(root, pos)
else:
- lookahead_char = ord(text[off])
+ lookahead_character = ord(text[off])
while True:
self.lookahead_item_set_closure(items, item_to_index)
value_stack.append((pos, off))
state_stack.append(items)
items, item_to_index, reductions, _, _ = (
- self.lookahead_item_set_shift(items, lookahead_char)
+ self.lookahead_item_set_shift(items, lookahead_character)
)
if len(items) != 0:
if len(reductions) != 0:
try:
next(yychunk_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
break
text = element.get_text(root, pos)
else:
- lookahead_char = ord(text[off])
+ lookahead_character = ord(text[off])
elif len(reductions) != 0:
if len(reductions) != 1:
sys.stderr.write(
(len(symbols), group_bounds)
for _, symbols, _, group_bounds in self.productions
],
- self.terminal_thres
+ self.n_terminals
)
- items = [(0, 0, [chars, chars + 1])] # EOF
+ items = [(0, 0, [n_characters, n_characters + 1])] # EOF
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
return state
terminal = 0
- while terminal < self.terminal_thres:
+ while terminal < self.n_terminals:
next_items, next_item_to_index, reductions, terminal0, terminal1 = (
self.lookahead_item_set_shift(items, terminal)
)
(len(symbols), group_bounds)
for _, symbols, _, group_bounds in self.productions
],
- self.terminal_thres
+ self.n_terminals
)
- items = [(0, 0, [chars, chars + 1])] # EOF
+ items = [(0, 0, [n_characters, n_characters + 1])] # EOF
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
state_items = state_to_items[state]
for i in range(len(new_items)):
j, k, lookahead_set = new_items[i]
- lookahead_set = char_set_or(lookahead_set, state_items[i][2])
+ lookahead_set = character_set_or(lookahead_set, state_items[i][2])
if lookahead_set != state_items[i][2]:
state_items[i] = (j, k, lookahead_set)
if not in_queue[state]:
return state
terminal = 0
- while terminal < self.terminal_thres:
+ while terminal < self.n_terminals:
next_items, next_item_to_index, reductions, terminal0, terminal1 = (
self.lookahead_item_set_shift(items, terminal)
)
def __repr__(self):
return 'regex.LR1({0:s}, {1:d})'.format(
repr(self.productions),
- self.terminal_thres
+ self.n_terminals
)
class LR1DFA:
- def __init__(self, states = [], productions = [], terminal_thres = chars):
+ def __init__(self, states = [], productions = [], n_terminals = n_characters):
# states: list of state_desc
# state_desc: (terminal breaks, actions, nonterminal breaks, gotos)
# action: shift = new state * 2, reduce = production * 2 + 1, error = -1
# len(symbols): how many states to pop stack to reduce this production
# group_bounds: list of group_bound
# group_bound: (start_index, end_index, tag, kwargs)
- # where start_index, end_index are indices into list of char_set,
+ # where start_index, end_index are indices into list of character_set,
# and tag, kwargs will be passed to apply_markup() hence factory(),
# noting that markup has to be applied in reverse order of the list
- # terminal_thres: offset to apply to productions[] index to get symbol
- # (char set code), also symbol for productions[0] = start production
+ # n_terminals: offset to apply to productions[] index to get symbol
+ # (character set code), also symbol for productions[0] = start production
self.states = states
self.productions = productions
- self.terminal_thres = terminal_thres
+ self.n_terminals = n_terminals
def parse_text(self, text, i):
state = 0
value_stack = []
state_stack = []
- lookahead_char = ord(text[i]) if i < len(text) else chars # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
while True:
value_stack.append(i)
state_stack.append(state)
action = self.states[state][1][
- bisect.bisect_right(self.states[state][0], lookahead_char)
+ bisect.bisect_right(self.states[state][0], lookahead_character)
]
if action == -1:
raise Exception(
if (action & 1) == 0:
state = action >> 1
i += 1
- lookahead_char = ord(text[i]) if i < len(text) else chars # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
else:
reduce = action >> 1
len_symbols, group_bounds = self.productions[reduce]
try:
next(yychunk_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
break
text = element.get_text(root, pos)
else:
- lookahead_char = ord(text[off])
+ lookahead_character = ord(text[off])
while True:
value_stack.append((pos, off))
state_stack.append(state)
action = self.states[state][1][
- bisect.bisect_right(self.states[state][0], lookahead_char)
+ bisect.bisect_right(self.states[state][0], lookahead_character)
]
- #print('lookahead_char', lookahead_char, 'action', action)
+ #print('lookahead_character', lookahead_character, 'action', action)
if action == -1:
raise Exception(
'syntax error at {0:d},{1:d}: {2:s}'.format(pos, off, text[off:])
try:
next(yychunk_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
break
text = element.get_text(root, pos)
else:
- lookahead_char = ord(text[off])
+ lookahead_character = ord(text[off])
else:
reduce = action >> 1
len_symbols, group_bounds = self.productions[reduce]
value_stack = []
state_stack = []
try:
- end_pos, end_off, lookahead_char = next(yylex_iter)
+ end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
end_pos, end_off = element.to_end_relative(root, pos, off)
while True:
value_stack.append((pos, off))
state_stack.append(state)
action = self.states[state][1][
- bisect.bisect_right(self.states[state][0], lookahead_char)
+ bisect.bisect_right(self.states[state][0], lookahead_character)
]
- #print('lookahead_char', lookahead_char, 'action', action)
+ #print('lookahead_character', lookahead_character, 'action', action)
if action == -1:
raise Exception(
- 'syntax error at {0:d},{1:d}: {2:d}'.format(pos, off, lookahead_char)
+ 'syntax error at {0:d},{1:d}: {2:d}'.format(pos, off, lookahead_character)
)
if (action & 1) == 0:
state = action >> 1
pos, off = element.to_start_relative(root, end_pos, end_off)
try:
- end_pos, end_off, lookahead_char = next(yylex_iter)
+ end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_char = chars # EOF
+ lookahead_character = n_characters # EOF
#end_pos, end_off = element.to_end_relative(root, pos, off)
else:
reduce = action >> 1
return 'regex.LR1DFA({0:s}, {1:s}, {2:d})'.format(
repr(self.states),
repr(self.productions),
- self.terminal_thres
+ self.n_terminals
)
def wrap_repr(text, width):
import xml.etree.ElementTree
regex = RegexAnd(children = [RegexRepeat(children = [RegexCharacterNot(
-children = [RegexCharacter()], char_set = [0, 256])]), RegexGroup(children = [
+children = [RegexCharacter()], character_set = [0, 256])]), RegexGroup(children = [
RegexOr(children = [RegexOr(children = [RegexOr(children = [RegexGroup(children
-= [RegexRepeat(children = [RegexCharacter(char_set = [9, 14, 32, 33])],
+= [RegexRepeat(children = [RegexCharacter(character_set = [9, 14, 32, 33])],
one_or_more = True)], group_index = 1, group_name = 'Whitespace'), RegexGroup(
-children = [RegexRepeat(children = [RegexCharacter(char_set = [48, 58])],
+children = [RegexRepeat(children = [RegexCharacter(character_set = [48, 58])],
one_or_more = True)], group_index = 2, group_name = 'Number')]), RegexGroup(
children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(char_set = [102, 103])]),
-RegexCharacter(char_set = [111, 112])]), RegexCharacter(char_set = [114, 115])]
+children = [RegexEmpty(), RegexCharacter(character_set = [102, 103])]),
+RegexCharacter(character_set = [111, 112])]), RegexCharacter(character_set = [114, 115])]
)], group_index = 3, group_name = 'For')]), RegexGroup(children = [
-RegexSequence(children = [RegexCharacter(char_set = [65, 91, 95, 96, 97, 123]),
-RegexRepeat(children = [RegexCharacter(char_set = [48, 58, 65, 91, 95, 96, 97,
+RegexSequence(children = [RegexCharacter(character_set = [65, 91, 95, 96, 97, 123]),
+RegexRepeat(children = [RegexCharacter(character_set = [48, 58, 65, 91, 95, 96, 97,
123])])])], group_index = 4, group_name = 'Identifier')])], group_index = 0)])
#sys.stdout.write(
# wrap_repr(
group_end[group] = pos
grammar = Grammar(children = [Grammar.Production(children = [RegexSequence(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(char_set
-= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(char_set = [
+children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
+= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
259, 262], rule_name = 'expr0')])], nonterminal = 0), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(char_set
+children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
= [262, 265], rule_name = 'expr1')])], nonterminal = 1), Grammar.Production(
children = [RegexSequence(children = [RegexEmpty(), RegexGroup(children = [
RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
-RegexSequence(children = [RegexEmpty(), RegexCharacterRule(char_set = [259, 262
-], rule_name = 'expr0')]), RegexCharacter(char_set = [43, 44])]),
-RegexCharacterRule(char_set = [288, 295], rule_name = 'whitespace_opt')]),
-RegexCharacterRule(char_set = [262, 265], rule_name = 'expr1')])], group_index
+RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262
+], rule_name = 'expr0')]), RegexCharacter(character_set = [43, 44])]),
+RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
+RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')])], group_index
= 0, group_name = 'Add')])], nonterminal = 2), Grammar.Production(children = [
RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(char_set = [259, 262], rule_name =
-'expr0')]), RegexCharacter(char_set = [45, 46])]), RegexCharacterRule(char_set
-= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(char_set = [
+children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262], rule_name =
+'expr0')]), RegexCharacter(character_set = [45, 46])]), RegexCharacterRule(character_set
+= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
262, 265], rule_name = 'expr1')])], group_index = 0, group_name = 'Subtract')])
], nonterminal = 3), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexCharacterRule(char_set = [265, 268], rule_name = 'expr2')])
+RegexEmpty(), RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])
], nonterminal = 4), Grammar.Production(children = [RegexSequence(children = [
RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacterRule(char_set = [262, 265], rule_name = 'expr1')]),
-RegexCharacter(char_set = [42, 43])]), RegexCharacterRule(char_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(char_set = [265, 268],
+RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
+RegexCharacter(character_set = [42, 43])]), RegexCharacterRule(character_set = [288, 295
+], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
rule_name = 'expr2')])], group_index = 0, group_name = 'Multiply')])],
nonterminal = 5), Grammar.Production(children = [RegexSequence(children = [
RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacterRule(char_set = [262, 265], rule_name = 'expr1')]),
-RegexCharacter(char_set = [47, 48])]), RegexCharacterRule(char_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(char_set = [265, 268],
+RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
+RegexCharacter(character_set = [47, 48])]), RegexCharacterRule(character_set = [288, 295
+], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
rule_name = 'expr2')])], group_index = 0, group_name = 'Divide')])],
nonterminal = 6), Grammar.Production(children = [RegexSequence(children = [
RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name =
+children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
'number')])], group_index = 0, group_name = 'Number')]), RegexCharacterRule(
-char_set = [288, 295], rule_name = 'whitespace_opt')])], nonterminal = 7),
+character_set = [288, 295], rule_name = 'whitespace_opt')])], nonterminal = 7),
Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
RegexGroup(children = [RegexSequence(children = [RegexSequence(children = [
-RegexSequence(children = [RegexEmpty(), RegexCharacter(char_set = [45, 46])]),
-RegexCharacterRule(char_set = [288, 295], rule_name = 'whitespace_opt')]),
-RegexCharacterRule(char_set = [265, 268], rule_name = 'expr2')])], group_index
+RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [45, 46])]),
+RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
+RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])], group_index
= 0, group_name = 'Negate')])], nonterminal = 8), Grammar.Production(children =
[RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(char_set = [40, 41])]), RegexCharacterRule(char_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(char_set = [259, 262],
-rule_name = 'expr0')]), RegexCharacter(char_set = [41, 42])]),
-RegexCharacterRule(char_set = [288, 295], rule_name = 'whitespace_opt')])],
+RegexCharacter(character_set = [40, 41])]), RegexCharacterRule(character_set = [288, 295
+], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [259, 262],
+rule_name = 'expr0')]), RegexCharacter(character_set = [41, 42])]),
+RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')])],
nonterminal = 9), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexCharacter(char_set = [48, 49])])], nonterminal = 10),
+RegexEmpty(), RegexCharacter(character_set = [48, 49])])], nonterminal = 10),
Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(char_set = [49, 50])])], nonterminal = 11), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(char_set = [
+RegexCharacter(character_set = [49, 50])])], nonterminal = 11), Grammar.Production(
+children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
50, 51])])], nonterminal = 12), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(char_set = [51, 52])])], nonterminal =
+children = [RegexEmpty(), RegexCharacter(character_set = [51, 52])])], nonterminal =
13), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(char_set = [52, 53])])], nonterminal = 14), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(char_set = [
+RegexCharacter(character_set = [52, 53])])], nonterminal = 14), Grammar.Production(
+children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
53, 54])])], nonterminal = 15), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(char_set = [54, 55])])], nonterminal =
+children = [RegexEmpty(), RegexCharacter(character_set = [54, 55])])], nonterminal =
16), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(char_set = [55, 56])])], nonterminal = 17), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(char_set = [
+RegexCharacter(character_set = [55, 56])])], nonterminal = 17), Grammar.Production(
+children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
56, 57])])], nonterminal = 18), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(char_set = [57, 58])])], nonterminal =
+children = [RegexEmpty(), RegexCharacter(character_set = [57, 58])])], nonterminal =
19), Grammar.Production(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name =
-'number')]), RegexCharacter(char_set = [48, 49])])], nonterminal = 20),
+children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
+'number')]), RegexCharacter(character_set = [48, 49])])], nonterminal = 20),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [49, 50])])], nonterminal = 21),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [49, 50])])], nonterminal = 21),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [50, 51])])], nonterminal = 22),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [50, 51])])], nonterminal = 22),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [51, 52])])], nonterminal = 23),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [51, 52])])], nonterminal = 23),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [52, 53])])], nonterminal = 24),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [52, 53])])], nonterminal = 24),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [53, 54])])], nonterminal = 25),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [53, 54])])], nonterminal = 25),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [54, 55])])], nonterminal = 26),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [54, 55])])], nonterminal = 26),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [55, 56])])], nonterminal = 27),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [55, 56])])], nonterminal = 27),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [56, 57])])], nonterminal = 28),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [56, 57])])], nonterminal = 28),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(char_set = [57, 58])])], nonterminal = 29),
+= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
+)]), RegexCharacter(character_set = [57, 58])])], nonterminal = 29),
Grammar.Production(children = [RegexEmpty()], nonterminal = 30),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [9, 10])])], nonterminal = 31),
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [9, 10])])], nonterminal = 31),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [10, 11])])], nonterminal = 32),
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [10, 11])])], nonterminal = 32),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [11, 12])])], nonterminal = 33),
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [11, 12])])], nonterminal = 33),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [12, 13])])], nonterminal = 34),
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [12, 13])])], nonterminal = 34),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [13, 14])])], nonterminal = 35),
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [13, 14])])], nonterminal = 35),
Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(char_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(char_set = [32, 33])])], nonterminal = 36)
-], terminal_thres = 258)
+= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
+'whitespace_opt')]), RegexCharacter(character_set = [32, 33])])], nonterminal = 36)
+], n_terminals = 258)
#sys.stdout.write(
# wrap_repr(
# ' grammar = {0:s}'.format(repr(grammar).replace('regex.', '')),