# translate is yytranslate
self.translate = translate_terminals
- # rule_data[:, 0] is yyr1
- # rule_data[:, 1] is yyr2
+ # rules[:, 0] is yyr1
+ # rules[:, 1] is yyr2
# note: production 0 (start production) can't be reduced, null it out
- self.rule_data = numpy.concatenate(
+ self.rules = numpy.concatenate(
[
numpy.zeros((1, 2), numpy.int16),
numpy.stack(
#print(goto_table)
# entry_base indicates the most negative starting index we can expect
- # we maintain entry_size such that entry_used[entry_size:, :] == False
- # entry_data[i, 0] is yytable[i - entry_base]
- # entry_data[i, 1] is yycheck[i - entry_base]
- # entry_used[i, 0] == True if any vector has been stored starting at i
- # entry_used[0, 0] does not go True as it is -infinity and can be reused
- # entry_used[i, 1] == True if entry has been stored in entry_data[i, :]
+ # we maintain n_entries such that entries_used[n_entries:, :] == False
+ # entries[i, 0] is yytable[i - entry_base]
+ # entries[i, 1] is yycheck[i - entry_base]
+ # entries_used[i, 0] == True if any vector has been stored starting at i
+ # entries_used[0, 0] does not go True as it is -infinity and can be reused
+ # entries_used[i, 1] == True if entry has been stored in entries[i, :]
self.entry_base = max(n_terminals, len(lr1dfa.states))
- entry_size = self.entry_base
- entry_data = numpy.full((self.entry_base, 2), 0x8000, numpy.int16)
- entry_used = numpy.zeros((self.entry_base, 2), numpy.bool)
- entry_used[:, 1] = True # padding so we don't use any negative entries
+ n_entries = self.entry_base
+ entries = numpy.full((self.entry_base, 2), 0x8000, numpy.int16)
+ entries_used = numpy.zeros((self.entry_base, 2), numpy.bool)
+ entries_used[:, 1] = True # padding so we don't use any negative entries
# action_pointer is yypact, goto_pointer is yypgoto
def pack_matrix(data, mask):
- nonlocal entry_size, entry_data, entry_used # also uses self.entry_base
+ nonlocal n_entries, entries, entries_used # also uses self.entry_base
start_indices = numpy.zeros((data.shape[0],), numpy.int16)
for i in range(data.shape[0]):
size = indices[-1] + 1
# ensure arrays are at least large enough to find a spot
- while entry_data.shape[0] < entry_size + size:
- # extend entry_data, copying only entry_size entries
- new_entry_data = numpy.full(
- (entry_data.shape[0] * 2, 2),
+ while entries.shape[0] < n_entries + size:
+ # extend entries, copying only n_entries entries
+ new_entries = numpy.full(
+ (entries.shape[0] * 2, 2),
0x8000,
numpy.int16
)
- new_entry_data[:entry_size, :] = entry_data[:entry_size, :]
- entry_data = new_entry_data
+ new_entries[:n_entries, :] = entries[:n_entries, :]
+ entries = new_entries
- # extend entry_used, copying only entry_size entries
- new_entry_used = numpy.zeros(
- (entry_used.shape[0] * 2, 2),
+ # extend entries_used, copying only n_entries entries
+ new_entries_used = numpy.zeros(
+ (entries_used.shape[0] * 2, 2),
numpy.bool
)
- new_entry_used[:entry_size, :] = entry_used[:entry_size, :]
- entry_used = new_entry_used
+ new_entries_used[:n_entries, :] = entries_used[:n_entries, :]
+ entries_used = new_entries_used
# find a suitable spot and store differences from default
start_index = self.entry_base - indices[0]
- while start_index < entry_size:
+ while start_index < n_entries:
if (
- not entry_used[start_index, 0] and
- not numpy.any(entry_used[indices + start_index, 1])
+ not entries_used[start_index, 0] and
+ not numpy.any(entries_used[indices + start_index, 1])
):
break
start_index += 1
- entry_data[indices + start_index, 0] = data[i, indices]
- entry_data[indices + start_index, 1] = indices
- entry_used[start_index, 0] = True
- entry_used[indices + start_index, 1] = True
- if entry_size < start_index + size:
- entry_size = start_index + size
+ entries[indices + start_index, 0] = data[i, indices]
+ entries[indices + start_index, 1] = indices
+ entries_used[start_index, 0] = True
+ entries_used[indices + start_index, 1] = True
+ if n_entries < start_index + size:
+ n_entries = start_index + size
start_indices[i] = start_index
return start_indices
goto_table,
goto_table != self.default_goto[:, numpy.newaxis]
) - self.entry_base
- self.entry_data = numpy.zeros(
- (entry_size - self.entry_base, 2),
+ self.entries = numpy.zeros(
+ (n_entries - self.entry_base, 2),
numpy.int16
)
- self.entry_data[:, :] = entry_data[self.entry_base:entry_size, :]
+ self.entries[:, :] = entries[self.entry_base:n_entries, :]
# n_states == self.action_pointer.shape[0]
- # n_productions == self.rule_data.shape[0]
+ # n_productions == self.rules.shape[0]
# n_nonterminals == self.goto_pointer.shape[0]
self.n_terminals = n_terminals
elif line == '/* GENERATE TABLES */\n':
# yyrline (source line where rule is defined) not implemented yet
yyrline = numpy.zeros(
- (bison_lr1dfa.rule_data.shape[0],),
+ (bison_lr1dfa.rules.shape[0],),
numpy.int16
)
# YYFINAL
bison_lr1dfa.action_pointer.shape[0],
# YYLAST
- bison_lr1dfa.entry_data.shape[0] - 1,
+ bison_lr1dfa.entries.shape[0] - 1,
# YYNTOKENS
bison_lr1dfa.n_terminals,
# YYNNTS
bison_lr1dfa.goto_pointer.shape[0],
# YYNRULES
- bison_lr1dfa.rule_data.shape[0],
+ bison_lr1dfa.rules.shape[0],
# YYNSTATES
bison_lr1dfa.action_pointer.shape[0],
# YYMAXUTOK
','.join(
[
'{0:6d}'.format(j)
- for j in bison_lr1dfa.entry_data[i:i + 10, 0]
+ for j in bison_lr1dfa.entries[i:i + 10, 0]
]
)
)
- for i in range(0, bison_lr1dfa.entry_data.shape[0], 10)
+ for i in range(0, bison_lr1dfa.entries.shape[0], 10)
]
),
# yycheck
','.join(
[
'{0:6d}'.format(j)
- for j in bison_lr1dfa.entry_data[i:i + 10, 1]
+ for j in bison_lr1dfa.entries[i:i + 10, 1]
]
)
)
- for i in range(0, bison_lr1dfa.entry_data.shape[0], 10)
+ for i in range(0, bison_lr1dfa.entries.shape[0], 10)
]
),
# yystos
','.join(
[
'{0:6d}'.format(j)
- for j in bison_lr1dfa.rule_data[i:i + 10, 0]
+ for j in bison_lr1dfa.rules[i:i + 10, 0]
]
)
)
- for i in range(0, bison_lr1dfa.rule_data.shape[0], 10)
+ for i in range(0, bison_lr1dfa.rules.shape[0], 10)
]
),
# yyr2
','.join(
[
'{0:6d}'.format(j)
- for j in bison_lr1dfa.rule_data[i:i + 10, 1]
+ for j in bison_lr1dfa.rules[i:i + 10, 1]
]
)
)
- for i in range(0, bison_lr1dfa.rule_data.shape[0], 10)
+ for i in range(0, bison_lr1dfa.rules.shape[0], 10)
]
)
)
import getopt
import os
import sys
-import xml.etree.ElementTree
home_dir = os.path.dirname(sys.argv[0])
try:
['defines', 'defines=', 'outfile=', 'skel=']
)
except getopt.GetoptError as err:
- sys.stderr.write(str(err) + '\n')
+ sys.stderr.write('{0:s}\n'.format(str(err)))
sys.exit(1)
defines_file = None
+++ /dev/null
-import bisect
-import element
-import work
-import sys
-
-# defines the alphabet size, set this to 0x11000 for unicode
-n_characters = 0x100
-
-class DFA:
- # transition classes:
- # instructions to transform thread list from a multistate to next multistate
- # (TRANSITION_POP, n) j += n
- # (TRANSITION_DUP, n) threads0[j - n:j] = threads0[j:j + n]
- # j -= n
- # (TRANSITION_MARK, n, mark_value) threads0[j:j + n] = [
- # (i, mark, thread)
- # for thread in threads0[j:j + n]
- # ]
- # (TRANSITION_MOVE, n) threads1.extend(threads0[j:j + n])
- # j += n
- # (TRANSITION_DEL, n) del threads1[-n:]
-
- TRANSITION_POP = 0
- TRANSITION_DUP = 1
- TRANSITION_MARK = 2
- TRANSITION_MOVE = 3
- #TRANSITION_DEL = 4
-
- def __init__(
- self,
- groups = [],
- states = [([n_characters], [0], 0)],
- actions = [(0, [])],
- start_action = [] # can have multiple DFAs in same container
- ):
- # groups: list of group_desc
- # group_desc: (tag, kwargs)
- # tag, kwargs will be passed to apply_markup() hence factory()
- # states: list of state_desc
- # state_desc: (list of breaks, list of action to do, accept_thread)
- # actions: list of action_desc
- # action_desc: (state to go to next, compiled transition to do first)
- # accept_thread: which thread of thread list to use, -1 don't accept
- self.groups = groups
- self.states = states
- self.actions = actions
- self.start_action = start_action
-
- def match_text(self, text, i, start_index = 0):
- def transit(transition):
- nonlocal threads0, threads1, prefix_slop # note: also uses i
- j = prefix_slop
- for trans in transition:
- if trans[0] == DFA.TRANSITION_POP:
- j += trans[1]
- elif trans[0] == DFA.TRANSITION_DUP:
- while j < trans[1]:
- threads0[:0] = [None] * prefix_slop
- threads1[:0] = [None] * prefix_slop
- j += prefix_slop
- prefix_slop *= 2
- threads0[j - trans[1]:j] = threads0[j:j + trans[1]]
- j -= trans[1]
- elif trans[0] == DFA.TRANSITION_MARK:
- threads0[j:j + trans[1]] = [
- (i, trans[2], thread)
- for thread in threads0[j:j + trans[1]]
- ]
- elif trans[0] == DFA.TRANSITION_MOVE:
- threads1.extend(threads0[j:j + trans[1]])
- j += trans[1]
- #elif trans[0] == DFA.TRANSITION_DEL:
- # del threads1[-trans[1]:]
- else:
- assert False
- assert j == len(threads0)
- threads0, threads1 = threads1, threads0
- del threads1[prefix_slop:]
-
- threads0 = [None, None]
- threads1 = [None]
- prefix_slop = 1
-
- action = self.start_action[start_index]
- while action != -1:
- state, transition = self.actions[action]
- #print('i', i, 'action', action, 'state', state, 'transition', transition)
- transit(transition)
- if state == 0:
- # there is only one match, which is complete
- assert len(threads0) == prefix_slop + 1
- return threads0[prefix_slop]
- if i >= len(text):
- # return best match we have, but not incomplete match
- i = self.states[state][2]
- return (None if i == -1 else threads0[prefix_slop + i])
- action = self.states[state][1][
- bisect.bisect_right(self.states[state][0], ord(text[i]))
- ]
- i += 1
- return None
-
- def match_yychunk(self, root, pos, off, yychunk_iter, start_index = 0):
- if pos < 0:
- pos, off = element.to_start_relative(root, pos, off)
-
- def transit(transition):
- nonlocal threads0, threads1, prefix_slop # note: also uses pos, off
- j = prefix_slop
- for trans in transition:
- if trans[0] == DFA.TRANSITION_POP:
- j += trans[1]
- elif trans[0] == DFA.TRANSITION_DUP:
- while j < trans[1]:
- threads0[:0] = [None] * prefix_slop
- threads1[:0] = [None] * prefix_slop
- j += prefix_slop
- prefix_slop *= 2
- threads0[j - trans[1]:j] = threads0[j:j + trans[1]]
- j -= trans[1]
- elif trans[0] == DFA.TRANSITION_MARK:
- threads0[j:j + trans[1]] = [
- (pos, off, trans[2], thread)
- for thread in threads0[j:j + trans[1]]
- ]
- elif trans[0] == DFA.TRANSITION_MOVE:
- threads1.extend(threads0[j:j + trans[1]])
- j += trans[1]
- #elif trans[0] == DFA.TRANSITION_DEL:
- # del threads1[-trans[1]:]
- else:
- assert False
- assert j == len(threads0)
- threads0, threads1 = threads1, threads0
- del threads1[prefix_slop:]
-
- threads0 = [None, None]
- threads1 = [None]
- prefix_slop = 1
-
- action = self.start_action[start_index]
- text = element.get_text(root, pos)
- while action != -1:
- state, transition = self.actions[action]
- transit(transition)
- if state == 0:
- # there is only one match, which is complete
- assert len(threads0) == prefix_slop + 1
- return threads0[prefix_slop]
- while off >= len(text):
- if pos < len(root):
- pos += 1
- off = 0
- else:
- try:
- next(yychunk_iter)
- except StopIteration:
- # return best match we have, but not incomplete match
- i = self.states[state][2]
- return (None if i == -1 else threads0[prefix_slop + i])
- text = element.get_text(root, pos)
- #print(
- # 'state {0:d} pos {1:d} off {2:d} text "{3:s}"'.format(
- # state,
- # pos,
- # off,
- # text.replace('\n', '$')
- # )
- #)
- action = self.states[state][1][
- bisect.bisect_right(self.states[state][0], ord(text[off]))
- ]
- off += 1
- return None
-
- def yylex(self, root, pos, off, factory, yychunk_iter):
- if pos < 0:
- pos, off = element.to_start_relative(root, pos, off)
-
- while True:
- # note: pointers must be kept start relative during the below call,
- # because it extends the following text by calling the yychunk_iter
- thread = self.match_yychunk(root, pos, off, yychunk_iter)
- if thread is None:
- break
- stack = []
- while True:
- pos, off, mark_value, thread = thread
- group_index = mark_value >> 1
- if (mark_value & 1) != 0:
- end_pos, end_off = element.to_end_relative(root, pos, off)
- stack.append((end_pos, end_off, group_index))
- else:
- end_pos, end_off, temp = stack.pop()
- assert temp == group_index
- if len(stack) == 0:
- break
- tag, kwargs = self.groups[group_index]
- if tag != '':
- work.apply_markup(
- root,
- pos,
- off,
- end_pos,
- end_off,
- factory,
- tag,
- **kwargs
- )
- # note: pointers must be kept end relative during the below call,
- # because it modifies the preceding text by calling apply_markup()
- yield end_pos, end_off, group_index
- pos, off = element.to_start_relative(root, end_pos, end_off)
-
- def __repr__(self):
- return 'dfa.DFA({0:s}, {1:s}, {2:s}, {3:s})'.format(
- repr(self.groups),
- repr(self.states),
- repr(self.actions),
- repr(self.start_action)
- )
+++ /dev/null
-import bisect
-import bisect_set
-import element
-import lr1
-import work
-import sys
-
-# defines the alphabet size, set this to 0x11000 for unicode
-n_characters = 0x100
-
-class Grammar(element.Element):
- class Production(element.Element):
- class Symbol(element.Element):
- # GENERATE ELEMENT(list(int) terminal_set, list(int) nonterminal_set) BEGIN
- def __init__(
- self,
- tag = 'Grammar_Production_Symbol',
- attrib = {},
- text = '',
- children = [],
- terminal_set = [],
- nonterminal_set = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.terminal_set = (
- [element.deserialize_int(i) for i in terminal_set.split()]
- if isinstance(terminal_set, str) else
- terminal_set
- )
- self.nonterminal_set = (
- [element.deserialize_int(i) for i in nonterminal_set.split()]
- if isinstance(nonterminal_set, str) else
- nonterminal_set
- )
- def serialize(self, ref_list, indent = 0):
- element.Element.serialize(self, ref_list, indent)
- self.set(
- 'terminal_set',
- ' '.join([element.serialize_int(i) for i in self.terminal_set])
- )
- self.set(
- 'nonterminal_set',
- ' '.join([element.serialize_int(i) for i in self.nonterminal_set])
- )
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.terminal_set = [
- element.deserialize_int(i)
- for i in self.get('terminal_set', '').split()
- ]
- self.nonterminal_set = [
- element.deserialize_int(i)
- for i in self.get('nonterminal_set', '').split()
- ]
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Symbol if factory is None else factory
- )
- result.terminal_set = self.terminal_set
- result.nonterminal_set = self.nonterminal_set
- return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if len(self.terminal_set):
- params.append(
- 'terminal_set = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.terminal_set])
- )
- )
- if len(self.nonterminal_set):
- params.append(
- 'nonterminal_set = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.nonterminal_set])
- )
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'grammar.Grammar.Production.Symbol({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, name_to_character_sets):
- pass
-
- class NamedSymbol(Symbol):
- # GENERATE ELEMENT(str name) BEGIN
- def __init__(
- self,
- tag = 'Grammar_Production_NamedSymbol',
- attrib = {},
- text = '',
- children = [],
- terminal_set = [],
- nonterminal_set = [],
- name = ''
- ):
- Grammar.Production.Symbol.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- terminal_set,
- nonterminal_set
- )
- self.name = name
- def serialize(self, ref_list, indent = 0):
- Grammar.Production.Symbol.serialize(self, ref_list, indent)
- self.set('name', element.serialize_str(self.name))
- def deserialize(self, ref_list):
- Grammar.Production.Symbol.deserialize(self, ref_list)
- self.name = element.deserialize_str(self.get('name', ''))
- def copy(self, factory = None):
- result = Grammar.Production.Symbol.copy(
- self,
- NamedSymbol if factory is None else factory
- )
- result.name = self.name
- return result
- def repr_serialize(self, params):
- Grammar.Production.Symbol.repr_serialize(self, params)
- if self.name != '':
- params.append(
- 'name = {0:s}'.format(repr(self.name))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'grammar.Grammar.Production.NamedSymbol({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, name_to_character_sets):
- self.terminal_set, self.nonterminal_set = (
- name_to_character_sets[self.name]
- )
-
- # GENERATE ELEMENT(int nonterminal, int precedence, int associativity) BEGIN
- def __init__(
- self,
- tag = 'Grammar_Production',
- attrib = {},
- text = '',
- children = [],
- nonterminal = -1,
- precedence = -1,
- associativity = -1
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.nonterminal = (
- element.deserialize_int(nonterminal)
- if isinstance(nonterminal, str) else
- nonterminal
- )
- self.precedence = (
- element.deserialize_int(precedence)
- if isinstance(precedence, str) else
- precedence
- )
- self.associativity = (
- element.deserialize_int(associativity)
- if isinstance(associativity, str) else
- associativity
- )
- def serialize(self, ref_list, indent = 0):
- element.Element.serialize(self, ref_list, indent)
- self.set('nonterminal', element.serialize_int(self.nonterminal))
- self.set('precedence', element.serialize_int(self.precedence))
- self.set('associativity', element.serialize_int(self.associativity))
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.nonterminal = element.deserialize_int(self.get('nonterminal', '-1'))
- self.precedence = element.deserialize_int(self.get('precedence', '-1'))
- self.associativity = element.deserialize_int(self.get('associativity', '-1'))
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Production if factory is None else factory
- )
- result.nonterminal = self.nonterminal
- result.precedence = self.precedence
- result.associativity = self.associativity
- return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if self.nonterminal != -1:
- params.append(
- 'nonterminal = {0:s}'.format(repr(self.nonterminal))
- )
- if self.precedence != -1:
- params.append(
- 'precedence = {0:s}'.format(repr(self.precedence))
- )
- if self.associativity != -1:
- params.append(
- 'associativity = {0:s}'.format(repr(self.associativity))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'grammar.Grammar.Production({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, nonterminal, name_to_character_sets):
- self.nonterminal = nonterminal
- for i in self:
- i.post_process(name_to_character_sets)
- def add_to_lr1(self, lr1):
- lr1.productions.append(
- (
- # precedence
- self.precedence * 2 + self.associativity,
- # symbols
- [(i.terminal_set, i.nonterminal_set) for i in self],
- # lookaheads (list of initial_set, can_be_empty)
- [([], False) for i in range(len(self))] + [([], True)],
- # group_bounds
- []
- )
- )
-
- # GENERATE ELEMENT(int n_terminals, int eof_terminal) BEGIN
- def __init__(
- self,
- tag = 'Grammar',
- attrib = {},
- text = '',
- children = [],
- n_terminals = -1,
- eof_terminal = -1
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.n_terminals = (
- element.deserialize_int(n_terminals)
- if isinstance(n_terminals, str) else
- n_terminals
- )
- self.eof_terminal = (
- element.deserialize_int(eof_terminal)
- if isinstance(eof_terminal, str) else
- eof_terminal
- )
- def serialize(self, ref_list, indent = 0):
- element.Element.serialize(self, ref_list, indent)
- self.set('n_terminals', element.serialize_int(self.n_terminals))
- self.set('eof_terminal', element.serialize_int(self.eof_terminal))
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.n_terminals = element.deserialize_int(self.get('n_terminals', '-1'))
- self.eof_terminal = element.deserialize_int(self.get('eof_terminal', '-1'))
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Grammar if factory is None else factory
- )
- result.n_terminals = self.n_terminals
- result.eof_terminal = self.eof_terminal
- return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if self.n_terminals != -1:
- params.append(
- 'n_terminals = {0:s}'.format(repr(self.n_terminals))
- )
- if self.eof_terminal != -1:
- params.append(
- 'eof_terminal = {0:s}'.format(repr(self.eof_terminal))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'grammar.Grammar({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, name_to_character_sets):
- for i in range(len(self)):
- self[i].post_process(i, name_to_character_sets)
- def to_lr1(self):
- _lr1 = lr1.LR1([], self.n_terminals, self.eof_terminal)
- for i in self:
- i.add_to_lr1(_lr1)
- # propagate lookaheads
- modified = True
- while modified:
- modified = False
- for _, symbols, lookaheads, _ in _lr1.productions:
- for i in range(len(symbols) - 1, -1, -1):
- initial_set, nonterminal_set = symbols[i]
- can_be_empty = False
- for j in range(0, len(nonterminal_set), 2):
- for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
- child_initial_set, child_can_be_empty = _lr1.productions[k][2][0]
- initial_set = bisect_set.bisect_set_or(initial_set, child_initial_set)
- can_be_empty = can_be_empty or child_can_be_empty
- # at this point can_be_empty refers to current symbol only
- if can_be_empty:
- next_initial_set, can_be_empty = lookaheads[i + 1]
- initial_set = bisect_set.bisect_set_or(initial_set, next_initial_set)
- # at this point can_be_empty refers to all remaining symbols
- if (initial_set, can_be_empty) != lookaheads[i]:
- lookaheads[i] = (initial_set, can_be_empty)
- modified = True
- return _lr1
-
-# GENERATE FACTORY(element.Element) BEGIN
-tag_to_class = {
- 'Grammar': Grammar,
- 'Grammar_Production': Grammar.Production,
- 'Grammar_Production_Symbol': Grammar.Production.Symbol,
- 'Grammar_Production_NamedSymbol': Grammar.Production.NamedSymbol
-}
-def factory(tag, attrib = {}, *args, **kwargs):
- return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
-# GENERATE END
+++ /dev/null
-#!/bin/sh
-if ./generate.py grammar <grammar.py >grammar.py.new && ! diff -q grammar.py grammar.py.new
-then
- mv grammar.py.new grammar.py
-else
- rm -f grammar.py.new
-fi
+++ /dev/null
-import bisect
-import dfa
-import element
-
-# defines the alphabet size, set this to 0x11000 for unicode
-n_characters = 0x100
-
-class NFA:
- # state_desc classes:
- # (STATE_CHARACTER, character_set, next_state)
- # (STATE_OR, next_state0, next_state1)
- # (STATE_AND, next_state0, next_state1)
- # (STATE_JOIN0,)
- # (STATE_JOIN1, next_state)
- # (STATE_MARK, mark_value, next_state)
-
- STATE_CHARACTER = 0
- STATE_OR = 1
- STATE_AND = 2
- STATE_JOIN0 = 3
- STATE_JOIN1 = 4
- STATE_MARK = 5
- join0_state = (STATE_JOIN0,)
-
- # multistate classes:
- # (MULTISTATE_ACCEPT, 1) accept, occupies one thread in list
- # (MULTISTATE_AND, n, state, child) and, occupies n threads in list
- # (MULTISTATE_OR, n, child0, child1)
- # n = sum of n from child states, that is, multistate[1] is the number of
- # (MULTISTATE_ACCEPT, 1) leaf states ultimately reachable from this subtree
-
- MULTISTATE_ACCEPT = 0
- MULTISTATE_AND = 1
- MULTISTATE_OR = 2
- accept_multistate = (MULTISTATE_ACCEPT, 1)
-
- def __init__(
- self,
- groups = [],
- states = [(STATE_CHARACTER, [0, n_characters], 0)],
- start_state = [] # can have multiple NFAs in same container
- ):
- # groups: list of group_desc
- # group_desc: (tag, kwargs)
- # tag, kwargs will be passed to apply_markup() hence factory()
- self.groups = groups
- self.states = states
- self.start_state = start_state
-
- def multistate_next(self, root_multistate, character):
- # the deduplication works as effectively a second pass which goes
- # over the multistate tree in pre-order, looking for OR-disjunctions
- # of any depth and configuration, e.g. (a OR b) or (c OR d), and
- # collecting the subexpressions e.g. a, b, c OR b, c, d, c OR d, and
- # failing any subexpression that's previously occurred in pre-order
- # (children of AND-conjunctions get a fresh empty subexpression set)
-
- # unfortunately it can't be done as a second pass literally, because
- # we have to generate the transition list, thus we need to know if a
- # subexpression is going to be failed, so we can delete it's threads
-
- # therefore the deduplication is tacked onto the end of the building
- # process, examining each built node just before the builder returns,
- # if already in the subexpression set we will rewrite the transition
- # list produced by the recursive calls, otherwise we will add it in
-
- # the problem is determining the subexpression set to pass into any
- # recursive calls, the caller's set will be sent unchanged by cases
- # that return the result unchanged or add an OR-disjuction, an empty
- # set will be sent by cases that add an AND-conjuction to the result
-
- # if we have added something to the node built by a recursive call,
- # we'll fall into the deduplication logic, otherwise just return it
-
- def advance1(multistate, join_count, done_multistates):
- # modifies nonlocal: transition
- assert multistate[0] == NFA.MULTISTATE_AND
- _, _, state, child = multistate
- if state == 0:
- # note that if we reach the accepting state, we must be a top-level
- # expression, and could not be part of an AND-conjunction (because
- # AND-conjunction terms always go to a join0 or join1 state first),
- # we remove ourselves to indicate to scanner that match is complete
- assert join_count == 0
- assert child == NFA.accept_multistate
- return advance(child, 0, done_multistates)
- state_desc = self.states[state]
- if state_desc[0] == NFA.STATE_CHARACTER:
- if join_count != 0:
- transition.append((dfa.DFA.TRANSITION_POP, child[1]))
- return None
- len_transition = len(transition)
- child = advance(child, 0, set())
- if child is None:
- return None
- result = (NFA.MULTISTATE_AND, child[1], state, child)
- elif state_desc[0] == NFA.STATE_OR:
- _, next_state0, next_state1 = state_desc
- len_transition = len(transition)
- transition.append((dfa.DFA.TRANSITION_DUP, child[1]))
- child0 = advance1(
- (NFA.MULTISTATE_AND, child[1], next_state0, child),
- join_count,
- done_multistates
- )
- child1 = advance1(
- (NFA.MULTISTATE_AND, child[1], next_state1, child),
- join_count,
- done_multistates
- )
- if child0 is None:
- return child1
- if child1 is None:
- return child0
- result = (NFA.MULTISTATE_OR, child0[1] + child1[1], child0, child1)
- elif state_desc[0] == NFA.STATE_AND:
- _, next_state0, next_state1 = state_desc
- return advance1(
- (
- NFA.MULTISTATE_AND,
- child[1],
- next_state0,
- (NFA.MULTISTATE_AND, child[1], next_state1, child)
- ),
- join_count,
- done_multistates
- )
- elif state_desc[0] == NFA.STATE_JOIN0:
- return advance(child, join_count + 1, done_multistates)
- elif state_desc[0] == NFA.STATE_JOIN1:
- _, next_state = state_desc
- if join_count == 0:
- transition.append((dfa.DFA.TRANSITION_POP, child[1]))
- return None
- return advance1(
- (NFA.MULTISTATE_AND, child[1], next_state, child),
- join_count - 1,
- done_multistates
- )
- elif state_desc[0] == NFA.STATE_MARK:
- _, mark_value, next_state = state_desc
- transition.append((dfa.DFA.TRANSITION_MARK, child[1], mark_value))
- return advance1(
- (NFA.MULTISTATE_AND, child[1], next_state, child),
- join_count,
- done_multistates
- )
- else:
- assert False
- if result in done_multistates:
- transition[len_transition:] = [(dfa.DFA.TRANSITION_POP, multistate[1])]
- return None
- done_multistates.add(result)
- return result
-
- def advance(multistate, join_count, done_multistates):
- nonlocal character0, character1 # modifies nonlocal: transition
- if multistate[0] == NFA.MULTISTATE_ACCEPT:
- assert join_count == 0
- len_transition = len(transition)
- transition.append((dfa.DFA.TRANSITION_MOVE, 1))
- result = NFA.accept_multistate # takes no arguments so use static one
- elif multistate[0] == NFA.MULTISTATE_AND:
- if character >= 0:
- _, _, state, child = multistate
- state_desc = self.states[state]
- assert state_desc[0] == NFA.STATE_CHARACTER
- _, character_set, next_state = state_desc
- k = bisect.bisect_right(character_set, character)
- if k > 0 and character0 < character_set[k - 1]:
- character0 = character_set[k - 1]
- if k < len(character_set) and character1 > character_set[k]:
- character1 = character_set[k]
- if (k & 1) == 0:
- transition.append((dfa.DFA.TRANSITION_POP, child[1]))
- return None
- multistate = (NFA.MULTISTATE_AND, child[1], next_state, child)
- return advance1(multistate, join_count, done_multistates)
- elif multistate[0] == NFA.MULTISTATE_OR:
- _, _, child0, child1 = multistate
- len_transition = len(transition)
- child0 = advance(child0, join_count, done_multistates)
- child1 = advance(child1, join_count, done_multistates)
- if child0 is None:
- return child1
- if child1 is None:
- return child0
- result = (NFA.MULTISTATE_OR, child0[1] + child1[1], child0, child1)
- else:
- assert False
- if result in done_multistates:
- transition[len_transition:] = [(dfa.DFA.TRANSITION_POP, multistate[1])]
- return None
- done_multistates.add(result)
- return result
-
- transition = []
- character0 = 0
- character1 = n_characters
- root_multistate = advance(root_multistate, 0, set())
- return root_multistate, transition, character0, character1
-
- def multistate_accept(root_multistate):
- i = 0
- def accept(multistate):
- nonlocal i
- if multistate[0] == NFA.MULTISTATE_ACCEPT:
- return True
- if multistate[0] == NFA.MULTISTATE_AND:
- _, _, _, child = multistate
- i += child[1]
- return False
- if multistate[0] == NFA.MULTISTATE_OR:
- _, _, child0, child1 = multistate
- return accept(child0) or accept(child1)
- assert False
- return i if accept(root_multistate) else -1
-
- def match_text(self, text, i, start_index = 0):
- def transit(transition):
- nonlocal threads0, threads1, prefix_slop # note: also uses i
- j = prefix_slop
- for trans in transition:
- if trans[0] == dfa.DFA.TRANSITION_POP:
- j += trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_DUP:
- while j < trans[1]:
- threads0[:0] = [None] * prefix_slop
- threads1[:0] = [None] * prefix_slop
- j += prefix_slop
- prefix_slop *= 2
- threads0[j - trans[1]:j] = threads0[j:j + trans[1]]
- j -= trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_MARK:
- threads0[j:j + trans[1]] = [
- (i, trans[2], thread)
- for thread in threads0[j:j + trans[1]]
- ]
- elif trans[0] == dfa.DFA.TRANSITION_MOVE:
- threads1.extend(threads0[j:j + trans[1]])
- j += trans[1]
- #elif trans[0] == dfa.DFA.TRANSITION_DEL:
- # del threads1[-trans[1]:]
- else:
- assert False
- assert j == len(threads0)
- threads0, threads1 = threads1, threads0
- del threads1[prefix_slop:]
-
- threads0 = [None, None]
- threads1 = [None]
- prefix_slop = 1
-
- start_state = self.start_state[start_index]
- if start_state == -1:
- return None
- next_multistate, transition, _, _ = self.multistate_next(
- (NFA.MULTISTATE_AND, 1, start_state, NFA.accept_multistate),
- -1
- )
- while next_multistate is not None:
- transit(transition)
- assert len(threads0) == prefix_slop + next_multistate[1]
- if next_multistate == NFA.accept_multistate:
- # there is only one match, which is complete
- assert len(threads0) == prefix_slop + 1
- return threads0[prefix_slop]
- if i >= len(text):
- # return best match we have, but not incomplete match
- i = NFA.multistate_accept(next_multistate)
- return (None if i == -1 else threads0[prefix_slop + i])
- next_multistate, transition, _, _ = (
- self.multistate_next(next_multistate, ord(text[i]))
- )
- i += 1
- return None
-
- def match_yychunk(self, root, pos, off, yychunk_iter, start_index = 0):
- if pos < 0:
- pos, off = element.to_start_relative(root, pos, off)
-
- def transit(transition):
- nonlocal threads0, threads1, prefix_slop # note: also uses pos, off
- j = prefix_slop
- for trans in transition:
- if trans[0] == dfa.DFA.TRANSITION_POP:
- j += trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_DUP:
- while j < trans[1]:
- threads0[:0] = [None] * prefix_slop
- threads1[:0] = [None] * prefix_slop
- j += prefix_slop
- prefix_slop *= 2
- threads0[j - trans[1]:j] = threads0[j:j + trans[1]]
- j -= trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_MARK:
- threads0[j:j + trans[1]] = [
- (pos, off, trans[2], thread)
- for thread in threads0[j:j + trans[1]]
- ]
- elif trans[0] == dfa.DFA.TRANSITION_MOVE:
- threads1.extend(threads0[j:j + trans[1]])
- j += trans[1]
- #elif trans[0] == dfa.DFA.TRANSITION_DEL:
- # del threads1[-trans[1]:]
- else:
- assert False
- assert j == len(threads0)
- threads0, threads1 = threads1, threads0
- del threads1[prefix_slop:]
-
- threads0 = [None, None]
- threads1 = [None]
- prefix_slop = 1
-
- start_state = self.start_state[start_index]
- if start_state == -1:
- return None
- next_multistate, transition, _, _ = self.multistate_next(
- (NFA.MULTISTATE_AND, 1, start_state, NFA.accept_multistate),
- -1
- )
- text = element.get_text(root, pos)
- while next_multistate is not None:
- transit(transition)
- assert len(threads0) == prefix_slop + next_multistate[1]
- if next_multistate == NFA.accept_multistate:
- # there is only one match, which is complete
- assert len(threads0) == prefix_slop + 1
- return threads0[prefix_slop]
- while off >= len(text):
- if pos < len(root):
- pos += 1
- off = 0
- else:
- try:
- next(yychunk_iter)
- except StopIteration:
- # return best match we have, but not incomplete match
- i = NFA.multistate_accept(next_multistate)
- return (None if i == -1 else threads0[prefix_slop + i])
- text = element.get_text(root, pos)
- next_multistate, transition, _, _ = (
- self.multistate_next(next_multistate, ord(text[off]))
- )
- off += 1
- return None
-
- def to_dfa(self):
- dfa = dfa.DFA(list(self.groups))
-
- accept_key = (NFA.accept_multistate, ())
- action_to_meaning = [accept_key]
- meaning_to_action = {accept_key: 0}
- state_to_meaning = [NFA.accept_multistate]
- meaning_to_state = {NFA.accept_multistate: 0}
-
- for start_state in self.start_state:
- if start_state == -1:
- start_action = -1
- else:
- next_multistate, transition, _, _ = self.multistate_next(
- (NFA.MULTISTATE_AND, 1, start_state, NFA.accept_multistate),
- -1
- )
- if next_multistate is None:
- start_action = -1
- else:
- start_key = (next_multistate, tuple(transition))
- start_action = len(action_to_meaning)
- meaning_to_action[start_key] = start_action
- action_to_meaning.append(start_key)
- dfa.start_action.append(start_action)
-
- while len(dfa.actions) < len(action_to_meaning):
- next_multistate, transition = action_to_meaning[len(dfa.actions)]
- if next_multistate in meaning_to_state:
- next_state = meaning_to_state[next_multistate]
- else:
- next_state = len(state_to_meaning)
- state_to_meaning.append(next_multistate)
- meaning_to_state[next_multistate] = next_state
- dfa.actions.append((next_state, list(transition)))
-
- while len(dfa.states) < len(state_to_meaning):
- character = 0
- multistate = state_to_meaning[len(dfa.states)]
- state_desc = ([], [], NFA.multistate_accept(multistate))
- while character < n_characters:
- next_multistate, transition, character0, character1 = self.multistate_next(
- multistate,
- character
- )
- assert character0 == character and character1 > character
- if next_multistate is None:
- action = -1
- else:
- # optimize transition (optional)
- i = 0
- j = 0
- while i < len(transition):
- if transition[i][0] == dfa.DFA.TRANSITION_POP:
- n = transition[i][1]
- i += 1
- while (
- i < len(transition) and
- transition[i][0] == dfa.DFA.TRANSITION_POP
- ):
- n += transition[i][1]
- i += 1
- transition[j] = (dfa.DFA.TRANSITION_POP, n)
- elif transition[i][0] == dfa.DFA.TRANSITION_MOVE:
- n = transition[i][1]
- i += 1
- while (
- i < len(transition) and
- transition[i][0] == dfa.DFA.TRANSITION_MOVE
- ):
- n += transition[i][1]
- i += 1
- transition[j] = (dfa.DFA.TRANSITION_MOVE, n)
- else:
- transition[j] = transition[i]
- i += 1
- j += 1
- del transition[j:]
- # end optimize transition
- key = (next_multistate, tuple(transition))
- if key in meaning_to_action:
- action = meaning_to_action[key]
- else:
- action = len(action_to_meaning)
- action_to_meaning.append(key)
- meaning_to_action[key] = action
- state_desc[0].append(character1)
- state_desc[1].append(action)
- character = character1
- dfa.states.append(state_desc)
- return dfa
-
- def __repr__(self):
- return 'nfa.NFA({0:s}, {1:s}, {2:s})'.format(
- repr(self.groups),
- repr(self.states),
- repr(self.start_state)
- )
+++ /dev/null
-import bisect_set
-import element
-import nfa
-
-# defines the alphabet size, set this to 0x11000 for unicode
-n_characters = 0x100
-
-class Regex(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'Regex',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Regex if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.Regex({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- for i in self:
- group_index = i.post_process(group_index) #, rule_name_to_character_set)
- return group_index
- def to_groups(self, groups):
- for i in self:
- i.to_groups(groups)
- def to_nfa_state(self, nfa, next_state):
- raise NotImplementedException
- def add_to_nfa(self, nfa):
- nfa.start_state.append(self.to_nfa_state(nfa, 0))
- #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- # group_count = 0
- # for i in self:
- # group_count += (
- # i.to_lr1_symbols(n_terminals, symbols, lookaheads, group_bounds)
- # )
- # return group_count # count of groups or ungrouped characters
-
-class RegexNone(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexNone',
- attrib = {},
- text = '',
- children = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexNone if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexNone({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- return -1
-
-class RegexEmpty(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexEmpty',
- attrib = {},
- text = '',
- children = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexEmpty if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexEmpty({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- return next_state
-
-class RegexCharacter(Regex):
- # GENERATE ELEMENT(list(int) character_set) BEGIN
- def __init__(
- self,
- tag = 'RegexCharacter',
- attrib = {},
- text = '',
- children = [],
- character_set = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.character_set = (
- [element.deserialize_int(i) for i in character_set.split()]
- if isinstance(character_set, str) else
- character_set
- )
- def serialize(self, ref_list, indent = 0):
- Regex.serialize(self, ref_list, indent)
- self.set(
- 'character_set',
- ' '.join([element.serialize_int(i) for i in self.character_set])
- )
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.character_set = [
- element.deserialize_int(i)
- for i in self.get('character_set', '').split()
- ]
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexCharacter if factory is None else factory
- )
- result.character_set = self.character_set
- return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if len(self.character_set):
- params.append(
- 'character_set = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.character_set])
- )
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacter({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- new_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_CHARACTER, self.character_set, next_state))
- return new_state
- #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- # terminal_set = []
- # nonterminal_set = []
- # i = 0
- # while i < len(self.character_set):
- # [j, k] = self.character_set[i:i + 2]
- # if k > n_terminals:
- # if j < n_terminals:
- # terminal_set.extend([j, n_terminals])
- # nonterminal_set.extend([0, k - n_terminals])
- # i += 2
- # while i < len(self.character_set):
- # [j, k] = self.character_set[i:i + 2]
- # nonterminal_set.extend([j - n_terminals, k - n_terminals])
- # i += 2
- # break
- # terminal_set.extend([j, k])
- # i += 2
- # symbols.append((terminal_set, nonterminal_set))
- # lookaheads.append(([], False)) # initial_set, can_be_empty
- # return 1 # count of groups or ungrouped characters
-
-class RegexCharacterRange(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterRange',
- attrib = {},
- text = '',
- children = [],
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterRange if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterRange({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
- self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
- return group_index
-
-class RegexCharacterOr(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterOr',
- attrib = {},
- text = '',
- children = [],
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterOr if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterOr({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
- self.character_set = bisect_set.bisect_set_or(self[0].character_set, self[1].character_set)
- return group_index
-
-class RegexCharacterAnd(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterAnd',
- attrib = {},
- text = '',
- children = [],
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterAnd if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterAnd({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
- self.character_set = bisect_set.bisect_set_and(self[0].character_set, self[1].character_set)
- return group_index
-
-class RegexCharacterNot(RegexCharacter):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterNot',
- attrib = {},
- text = '',
- children = [],
- character_set = []
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- character_set
- )
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterNot if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterNot({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
- self.character_set = bisect_set.bisect_set_not(self[0].character_set)
- return group_index
-
-#class RegexCharacterRule(RegexCharacter):
-# # GENERATE ELEMENT(str rule_name) BEGIN
-# def __init__(
-# self,
-# tag = 'RegexCharacterRule',
-# attrib = {},
-# text = '',
-# children = [],
-# character_set = [],
-# rule_name = ''
-# ):
-# RegexCharacter.__init__(
-# self,
-# tag,
-# attrib,
-# text,
-# children,
-# character_set
-# )
-# self.rule_name = rule_name
-# def serialize(self, ref_list, indent = 0):
-# RegexCharacter.serialize(self, ref_list, indent)
-# self.set('rule_name', element.serialize_str(self.rule_name))
-# def deserialize(self, ref_list):
-# RegexCharacter.deserialize(self, ref_list)
-# self.rule_name = element.deserialize_str(self.get('rule_name', ''))
-# def copy(self, factory = None):
-# result = RegexCharacter.copy(
-# self,
-# RegexCharacterRule if factory is None else factory
-# )
-# result.rule_name = self.rule_name
-# return result
-# def repr_serialize(self, params):
-# RegexCharacter.repr_serialize(self, params)
-# if self.rule_name != '':
-# params.append(
-# 'rule_name = {0:s}'.format(repr(self.rule_name))
-# )
-# def __repr__(self):
-# params = []
-# self.repr_serialize(params)
-# return 'regex.RegexCharacterRule({0:s})'.format(', '.join(params))
-# # GENERATE END
-# def post_process(self, group_index = 0, rule_name_to_character_set = None):
-# if rule_name_to_character_set is not None:
-# self.character_set = rule_name_to_character_set[self.rule_name]
-# return RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
-
-class RegexOr(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexOr',
- attrib = {},
- text = '',
- children = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexOr if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexOr({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- child0_state = self[0].to_nfa_state(nfa, next_state)
- child1_state = self[1].to_nfa_state(nfa, next_state)
- if child0_state == -1:
- return child1_state
- if child1_state == -1:
- return child0_state
- new_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_OR, child0_state, child1_state))
- return new_state
-
-class RegexAnd(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexAnd',
- attrib = {},
- text = '',
- children = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexAnd if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexAnd({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- join0_state = len(nfa.states)
- nfa.states.append(nfa.NFA.join0_state) # takes no arguments so use static one
- join1_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_JOIN1, next_state))
- child0_state = self[0].to_nfa_state(nfa, join0_state)
- if child0_state == -1:
- return -1
- child1_state = self[1].to_nfa_state(nfa, join1_state)
- if child1_state == -1:
- return -1
- new_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_AND, child0_state, child1_state))
- return new_state
-
-class RegexSequence(Regex):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'RegexSequence',
- attrib = {},
- text = '',
- children = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexSequence if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexSequence({0:s})'.format(', '.join(params))
- # GENERATE END
- def to_nfa_state(self, nfa, next_state):
- child1_state = self[1].to_nfa_state(nfa, next_state)
- if child1_state == -1:
- return -1
- return self[0].to_nfa_state(nfa, child1_state)
-
-class RegexRepeat(Regex):
- # GENERATE ELEMENT(int count0, int count1, bool non_greedy) BEGIN
- def __init__(
- self,
- tag = 'RegexRepeat',
- attrib = {},
- text = '',
- children = [],
- count0 = -1,
- count1 = -1,
- non_greedy = False
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.count0 = (
- element.deserialize_int(count0)
- if isinstance(count0, str) else
- count0
- )
- self.count1 = (
- element.deserialize_int(count1)
- if isinstance(count1, str) else
- count1
- )
- self.non_greedy = (
- element.deserialize_bool(non_greedy)
- if isinstance(non_greedy, str) else
- non_greedy
- )
- def serialize(self, ref_list, indent = 0):
- Regex.serialize(self, ref_list, indent)
- self.set('count0', element.serialize_int(self.count0))
- self.set('count1', element.serialize_int(self.count1))
- self.set('non_greedy', element.serialize_bool(self.non_greedy))
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.count0 = element.deserialize_int(self.get('count0', '-1'))
- self.count1 = element.deserialize_int(self.get('count1', '-1'))
- self.non_greedy = element.deserialize_bool(self.get('non_greedy', 'false'))
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexRepeat if factory is None else factory
- )
- result.count0 = self.count0
- result.count1 = self.count1
- result.non_greedy = self.non_greedy
- return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if self.count0 != -1:
- params.append(
- 'count0 = {0:s}'.format(repr(self.count0))
- )
- if self.count1 != -1:
- params.append(
- 'count1 = {0:s}'.format(repr(self.count1))
- )
- if self.non_greedy != False:
- params.append(
- 'non_greedy = {0:s}'.format(repr(self.non_greedy))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexRepeat({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- # total hack which will be done in a Python action in future
- if len(self) >= 2:
- assert self[1].tag == 'Number'
- self.count0 = int(self[1].text)
- if len(self) >= 3:
- assert self[2].tag == 'Number'
- self.count1 = int(self[2].text)
- else:
- self.count1 = self.count0
- del self[1:]
- # end total hack
- return Regex.post_process(self, group_index) #, rule_name_to_character_set)
- def to_nfa_state(self, nfa, next_state):
- count0 = self.count0
- count1 = self.count1
- if count1 == -1:
- new_state = len(nfa.states)
- nfa.states.append(None)
- child_state = self[0].to_nfa_state(nfa, new_state)
- if child_state == -1:
- new_state = next_state # note: unreachable state remains invalid (None)
- else:
- nfa.states[new_state] = (
- (nfa.NFA.STATE_OR, next_state, child_state)
- if self.non_greedy else
- (nfa.NFA.STATE_OR, child_state, next_state)
- )
- else:
- new_state = next_state
- for i in range(count1 - count0):
- child_state = self[0].to_nfa_state(nfa, new_state)
- if child_state == -1:
- break
- new_state = len(nfa.states)
- nfa.states.append(
- (nfa.NFA.STATE_OR, next_state, child_state)
- if self.non_greedy else
- (nfa.NFA.STATE_OR, child_state, next_state)
- )
- for i in range(count0):
- new_state = self[0].to_nfa_state(nfa, new_state)
- if new_state == -1:
- break
- return new_state
-
-class RegexGroup(Regex):
- class Attribute(element.Element):
- # GENERATE ELEMENT(str name, str value) BEGIN
- def __init__(
- self,
- tag = 'RegexGroup_Attribute',
- attrib = {},
- text = '',
- children = [],
- name = '',
- value = ''
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.name = name
- self.value = value
- def serialize(self, ref_list, indent = 0):
- element.Element.serialize(self, ref_list, indent)
- self.set('name', element.serialize_str(self.name))
- self.set('value', element.serialize_str(self.value))
- def deserialize(self, ref_list):
- element.Element.deserialize(self, ref_list)
- self.name = element.deserialize_str(self.get('name', ''))
- self.value = element.deserialize_str(self.get('value', ''))
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Attribute if factory is None else factory
- )
- result.name = self.name
- result.value = self.value
- return result
- def repr_serialize(self, params):
- element.Element.repr_serialize(self, params)
- if self.name != '':
- params.append(
- 'name = {0:s}'.format(repr(self.name))
- )
- if self.value != '':
- params.append(
- 'value = {0:s}'.format(repr(self.value))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroup.Attribute({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT(int group_index, str group_name, list(ref) group_attributes) BEGIN
- def __init__(
- self,
- tag = 'RegexGroup',
- attrib = {},
- text = '',
- children = [],
- group_index = -1,
- group_name = '',
- group_attributes = []
- ):
- Regex.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- self.group_index = (
- element.deserialize_int(group_index)
- if isinstance(group_index, str) else
- group_index
- )
- self.group_name = group_name
- self.group_attributes = group_attributes
- def serialize(self, ref_list, indent = 0):
- Regex.serialize(self, ref_list, indent)
- self.set('group_index', element.serialize_int(self.group_index))
- self.set('group_name', element.serialize_str(self.group_name))
- self.set(
- 'group_attributes',
- ' '.join([element.serialize_ref(i, ref_list) for i in self.group_attributes])
- )
- def deserialize(self, ref_list):
- Regex.deserialize(self, ref_list)
- self.group_index = element.deserialize_int(self.get('group_index', '-1'))
- self.group_name = element.deserialize_str(self.get('group_name', ''))
- self.group_attributes = [
- element.deserialize_ref(i, ref_list)
- for i in self.get('group_attributes', '').split()
- ]
- def copy(self, factory = None):
- result = Regex.copy(
- self,
- RegexGroup if factory is None else factory
- )
- result.group_index = self.group_index
- result.group_name = self.group_name
- result.group_attributes = self.group_attributes
- return result
- def repr_serialize(self, params):
- Regex.repr_serialize(self, params)
- if self.group_index != -1:
- params.append(
- 'group_index = {0:s}'.format(repr(self.group_index))
- )
- if self.group_name != '':
- params.append(
- 'group_name = {0:s}'.format(repr(self.group_name))
- )
- if len(self.group_attributes):
- params.append(
- 'group_attributes = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.group_attributes])
- )
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexGroup({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
- # total hack which will be done in a Python action in future
- if len(self) >= 2:
- assert self[0].tag == 'GroupName'
- self.group_name = self[0].text[1:-1]
- del self[:1]
- # end total hack
- self.group_index = group_index
- group_index += 1
- return Regex.post_process(self, group_index) #, rule_name_to_character_set)
- def to_groups(self, groups):
- assert len(groups) == self.group_index
- groups.append(
- (self.group_name, {i.name: i.value for i in self.group_attributes})
- )
- return Regex.to_groups(self, groups)
- def to_nfa_state(self, nfa, next_state):
- mark_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_MARK, self.group_index * 2 + 1, next_state))
- child_state = self[0].to_nfa_state(nfa, mark_state)
- if child_state == -1:
- return -1
- new_state = len(nfa.states)
- nfa.states.append((nfa.NFA.STATE_MARK, self.group_index * 2, child_state))
- return new_state
- #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- # group_start = len(symbols)
- # assert self.group_index == len(group_bounds)
- # group_bounds.append(None)
- # group_count = Regex.to_lr1_symbols(
- # self,
- # n_terminals,
- # symbols,
- # lookaheads,
- # group_bounds
- # )
- # group_bounds[self.group_index] = (
- # group_start,
- # group_count,
- # self.group_name,
- # {i.name: i.value for i in self.group_attributes}
- # )
- # return 1 # count of groups or ungrouped characters
-
-# GENERATE FACTORY(element.Element) BEGIN
-tag_to_class = {
- 'Regex': Regex,
- 'RegexNone': RegexNone,
- 'RegexEmpty': RegexEmpty,
- 'RegexCharacter': RegexCharacter,
- 'RegexCharacterRange': RegexCharacterRange,
- 'RegexCharacterOr': RegexCharacterOr,
- 'RegexCharacterAnd': RegexCharacterAnd,
- 'RegexCharacterNot': RegexCharacterNot,
- 'RegexOr': RegexOr,
- 'RegexAnd': RegexAnd,
- 'RegexSequence': RegexSequence,
- 'RegexRepeat': RegexRepeat,
- 'RegexGroup': RegexGroup,
- 'RegexGroup_Attribute': RegexGroup.Attribute
-}
-def factory(tag, attrib = {}, *args, **kwargs):
- return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
-# GENERATE END
-
-if __name__ == '__main__':
- import sys
- import xml.etree.ElementTree
-
- regex = RegexAnd(children = [RegexRepeat(children = [RegexCharacterNot(
-children = [RegexCharacter()], character_set = [0, 256])]), RegexGroup(children = [
-RegexOr(children = [RegexOr(children = [RegexOr(children = [RegexGroup(children
-= [RegexRepeat(children = [RegexCharacter(character_set = [9, 14, 32, 33])],
-one_or_more = True)], group_index = 1, group_name = 'Whitespace'), RegexGroup(
-children = [RegexRepeat(children = [RegexCharacter(character_set = [48, 58])],
-one_or_more = True)], group_index = 2, group_name = 'Number')]), RegexGroup(
-children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(character_set = [102, 103])]),
-RegexCharacter(character_set = [111, 112])]), RegexCharacter(character_set = [114, 115])]
-)], group_index = 3, group_name = 'For')]), RegexGroup(children = [
-RegexSequence(children = [RegexCharacter(character_set = [65, 91, 95, 96, 97, 123]),
-RegexRepeat(children = [RegexCharacter(character_set = [48, 58, 65, 91, 95, 96, 97,
-123])])])], group_index = 4, group_name = 'Identifier')])], group_index = 0)])
- #sys.stdout.write(
- # wrap_repr(
- # ' regex = {0:s}'.format(repr(regex).replace('regex.', '')),
- # 79
- # )
- #)
-
- nfa = regex.to_nfa()
- #sys.stdout.write(
- # wrap_repr(
- # ' nfa = {0:s}'.format(repr(nfa).replace('regex.', '')),
- # 79
- # )
- #)
-
- text = ' id 99id id99 for forex '
- i = 0
- while i < len(text):
- print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
- thread = nfa.match_text(text, i)
- if thread is None:
- print('no match')
- break
- i = thread[0] # end position of overall match
- group_start = [-1 for j in range(len(nfa.groups))]
- group_end = [-1 for j in range(len(nfa.groups))]
- while thread is not None:
- pos, mark, thread = thread
- group = mark >> 1
- if (mark & 1) == 0:
- group_start[group] = pos
- print(
- 'group {0:d} name "{1:s}" text "{2:s}"'.format(
- group,
- nfa.groups[group][0],
- text[group_start[group]:group_end[group]].replace('\n', '$')
- )
- )
- else:
- group_end[group] = pos
-
- dfa = nfa.to_dfa()
- #sys.stdout.write(
- # wrap_repr(
- # ' dfa = {0:s}'.format(repr(dfa).replace('regex.', '')),
- # 79
- # )
- #)
-
- text = ' id 99id id99 for forex '
- i = 0
- while i < len(text):
- print('text "{0:s}"'.format(text[i:i + 72].replace('\n', '$')))
- thread = dfa.match_text(text, i)
- if thread is None:
- print('no match')
- break
- i = thread[0] # end position of overall match
- group_start = [-1 for j in range(len(dfa.groups))]
- group_end = [-1 for j in range(len(dfa.groups))]
- while thread is not None:
- pos, mark, thread = thread
- group = mark >> 1
- if (mark & 1) == 0:
- group_start[group] = pos
- print(
- 'group {0:d} name "{1:s}" text "{2:s}"'.format(
- group,
- dfa.groups[group][0],
- text[group_start[group]:group_end[group]].replace('\n', '$')
- )
- )
- else:
- group_end[group] = pos
-
- grammar = Grammar(children = [Grammar.Production(children = [RegexSequence(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
-= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
-259, 262], rule_name = 'expr0')])], nonterminal = 0), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set
-= [262, 265], rule_name = 'expr1')])], nonterminal = 1), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexGroup(children = [
-RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
-RegexSequence(children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262
-], rule_name = 'expr0')]), RegexCharacter(character_set = [43, 44])]),
-RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
-RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')])], group_index
-= 0, group_name = 'Add')])], nonterminal = 2), Grammar.Production(children = [
-RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
-children = [RegexSequence(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(character_set = [259, 262], rule_name =
-'expr0')]), RegexCharacter(character_set = [45, 46])]), RegexCharacterRule(character_set
-= [288, 295], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [
-262, 265], rule_name = 'expr1')])], group_index = 0, group_name = 'Subtract')])
-], nonterminal = 3), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])
-], nonterminal = 4), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
-children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
-RegexCharacter(character_set = [42, 43])]), RegexCharacterRule(character_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
-rule_name = 'expr2')])], group_index = 0, group_name = 'Multiply')])],
-nonterminal = 5), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexGroup(children = [RegexSequence(children = [RegexSequence(
-children = [RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacterRule(character_set = [262, 265], rule_name = 'expr1')]),
-RegexCharacter(character_set = [47, 48])]), RegexCharacterRule(character_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [265, 268],
-rule_name = 'expr2')])], group_index = 0, group_name = 'Divide')])],
-nonterminal = 6), Grammar.Production(children = [RegexSequence(children = [
-RegexSequence(children = [RegexEmpty(), RegexGroup(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
-'number')])], group_index = 0, group_name = 'Number')]), RegexCharacterRule(
-character_set = [288, 295], rule_name = 'whitespace_opt')])], nonterminal = 7),
-Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexGroup(children = [RegexSequence(children = [RegexSequence(children = [
-RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [45, 46])]),
-RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')]),
-RegexCharacterRule(character_set = [265, 268], rule_name = 'expr2')])], group_index
-= 0, group_name = 'Negate')])], nonterminal = 8), Grammar.Production(children =
-[RegexSequence(children = [RegexSequence(children = [RegexSequence(children = [
-RegexSequence(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(character_set = [40, 41])]), RegexCharacterRule(character_set = [288, 295
-], rule_name = 'whitespace_opt')]), RegexCharacterRule(character_set = [259, 262],
-rule_name = 'expr0')]), RegexCharacter(character_set = [41, 42])]),
-RegexCharacterRule(character_set = [288, 295], rule_name = 'whitespace_opt')])],
-nonterminal = 9), Grammar.Production(children = [RegexSequence(children = [
-RegexEmpty(), RegexCharacter(character_set = [48, 49])])], nonterminal = 10),
-Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(character_set = [49, 50])])], nonterminal = 11), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-50, 51])])], nonterminal = 12), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(character_set = [51, 52])])], nonterminal =
-13), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(character_set = [52, 53])])], nonterminal = 14), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-53, 54])])], nonterminal = 15), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(character_set = [54, 55])])], nonterminal =
-16), Grammar.Production(children = [RegexSequence(children = [RegexEmpty(),
-RegexCharacter(character_set = [55, 56])])], nonterminal = 17), Grammar.Production(
-children = [RegexSequence(children = [RegexEmpty(), RegexCharacter(character_set = [
-56, 57])])], nonterminal = 18), Grammar.Production(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacter(character_set = [57, 58])])], nonterminal =
-19), Grammar.Production(children = [RegexSequence(children = [RegexSequence(
-children = [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name =
-'number')]), RegexCharacter(character_set = [48, 49])])], nonterminal = 20),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [49, 50])])], nonterminal = 21),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [50, 51])])], nonterminal = 22),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [51, 52])])], nonterminal = 23),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [52, 53])])], nonterminal = 24),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [53, 54])])], nonterminal = 25),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [54, 55])])], nonterminal = 26),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [55, 56])])], nonterminal = 27),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [56, 57])])], nonterminal = 28),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [268, 288], rule_name = 'number'
-)]), RegexCharacter(character_set = [57, 58])])], nonterminal = 29),
-Grammar.Production(children = [RegexEmpty()], nonterminal = 30),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [9, 10])])], nonterminal = 31),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [10, 11])])], nonterminal = 32),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [11, 12])])], nonterminal = 33),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [12, 13])])], nonterminal = 34),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [13, 14])])], nonterminal = 35),
-Grammar.Production(children = [RegexSequence(children = [RegexSequence(children
-= [RegexEmpty(), RegexCharacterRule(character_set = [288, 295], rule_name =
-'whitespace_opt')]), RegexCharacter(character_set = [32, 33])])], nonterminal = 36)
-], n_terminals = 258)
- #sys.stdout.write(
- # wrap_repr(
- # ' grammar = {0:s}'.format(repr(grammar).replace('regex.', '')),
- # 79
- # )
- #)
-
- lr1 = grammar.to_lr1()
- #sys.stdout.write(
- # wrap_repr(
- # ' lr1 = {0:s}'.format(repr(lr1).replace('regex.', '')),
- # 79
- # )
- #)
-
- lr1.parse_text('(13 + 5 * 6) * 2', 0)
- root = element.Element('root', text = '(13 + 5 * 6) * 2')
- lr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
- xml.etree.ElementTree.dump(root)
-
- clr1 = lr1.to_clr1()
- #sys.stdout.write(
- # wrap_repr(
- # ' clr1 = {0:s}'.format(repr(clr1).replace('regex.', '')),
- # 79
- # )
- #)
-
- clr1.parse_text('(13 + 5 * 6) * 2', 0)
- root = element.Element('root', text = '(13 + 5 * 6) * 2')
- clr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
- xml.etree.ElementTree.dump(root)
-
- lalr1 = lr1.to_lalr1()
- #sys.stdout.write(
- # wrap_repr(
- # ' lalr1 = {0:s}'.format(repr(lalr1).replace('regex.', '')),
- # 79
- # )
- #)
-
- lalr1.parse_text('(13 + 5 * 6) * 2', 0)
- root = element.Element('root', text = '(13 + 5 * 6) * 2')
- lalr1.parse_yychunk(root, 0, 0, element.Element, iter([]))
- xml.etree.ElementTree.dump(root)
+++ /dev/null
-#!/bin/sh
-if ./generate.py regex <regex.py >regex.py.new && ! diff -q regex.py regex.py.new
-then
- mv regex.py.new regex.py
-else
- rm -f regex.py.new
-fi