self.repr_serialize(params)
return 'regex.Regex({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
for i in self:
- group_index = i.post_process(group_index, rule_name_to_character_set)
+ group_index = i.post_process(group_index) #, rule_name_to_character_set)
return group_index
def to_groups(self, groups):
for i in self:
raise NotImplementedException
def add_to_nfa(self, nfa):
nfa.start_state.append(self.to_nfa_state(nfa, 0))
- def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- group_count = 0
- for i in self:
- group_count += (
- i.to_lr1_symbols(n_terminals, symbols, lookaheads, group_bounds)
- )
- return group_count # count of groups or ungrouped characters
+ #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
+ # group_count = 0
+ # for i in self:
+ # group_count += (
+ # i.to_lr1_symbols(n_terminals, symbols, lookaheads, group_bounds)
+ # )
+ # return group_count # count of groups or ungrouped characters
class RegexNone(Regex):
# GENERATE ELEMENT() BEGIN
new_state = len(nfa.states)
nfa.states.append((NFA.STATE_CHARACTER, self.character_set, next_state))
return new_state
- def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- terminal_set = []
- nonterminal_set = []
- i = 0
- while i < len(self.character_set):
- [j, k] = self.character_set[i:i + 2]
- if k > n_terminals:
- if j < n_terminals:
- terminal_set.extend([j, n_terminals])
- nonterminal_set.extend([0, k - n_terminals])
- i += 2
- while i < len(self.character_set):
- [j, k] = self.character_set[i:i + 2]
- nonterminal_set.extend([j - n_terminals, k - n_terminals])
- i += 2
- break
- terminal_set.extend([j, k])
- i += 2
- symbols.append((terminal_set, nonterminal_set))
- lookaheads.append(([], False)) # initial_set, can_be_empty
- return 1 # count of groups or ungrouped characters
+ #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
+ # terminal_set = []
+ # nonterminal_set = []
+ # i = 0
+ # while i < len(self.character_set):
+ # [j, k] = self.character_set[i:i + 2]
+ # if k > n_terminals:
+ # if j < n_terminals:
+ # terminal_set.extend([j, n_terminals])
+ # nonterminal_set.extend([0, k - n_terminals])
+ # i += 2
+ # while i < len(self.character_set):
+ # [j, k] = self.character_set[i:i + 2]
+ # nonterminal_set.extend([j - n_terminals, k - n_terminals])
+ # i += 2
+ # break
+ # terminal_set.extend([j, k])
+ # i += 2
+ # symbols.append((terminal_set, nonterminal_set))
+ # lookaheads.append(([], False)) # initial_set, can_be_empty
+ # return 1 # count of groups or ungrouped characters
class RegexCharacterRange(RegexCharacter):
# GENERATE ELEMENT() BEGIN
self.repr_serialize(params)
return 'regex.RegexCharacterRange({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
self.character_set = [self[0].character_set[0], self[1].character_set[-1]]
return group_index
self.repr_serialize(params)
return 'regex.RegexCharacterOr({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
self.character_set = character_set_or(self[0].character_set, self[1].character_set)
return group_index
self.repr_serialize(params)
return 'regex.RegexCharacterAnd({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
self.character_set = character_set_and(self[0].character_set, self[1].character_set)
return group_index
self.repr_serialize(params)
return 'regex.RegexCharacterNot({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
- group_index = RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
+ group_index = RegexCharacter.post_process(self, group_index) #, rule_name_to_character_set)
self.character_set = character_set_not(self[0].character_set)
return group_index
-class RegexCharacterRule(RegexCharacter):
- # GENERATE ELEMENT(str rule_name) BEGIN
- def __init__(
- self,
- tag = 'RegexCharacterRule',
- attrib = {},
- text = '',
- children = [],
- character_set = [],
- rule_name = ''
- ):
- RegexCharacter.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- character_set
- )
- self.rule_name = rule_name
- def serialize(self, ref_list, indent = 0):
- RegexCharacter.serialize(self, ref_list, indent)
- self.set('rule_name', element.serialize_str(self.rule_name))
- def deserialize(self, ref_list):
- RegexCharacter.deserialize(self, ref_list)
- self.rule_name = element.deserialize_str(self.get('rule_name', ''))
- def copy(self, factory = None):
- result = RegexCharacter.copy(
- self,
- RegexCharacterRule if factory is None else factory
- )
- result.rule_name = self.rule_name
- return result
- def repr_serialize(self, params):
- RegexCharacter.repr_serialize(self, params)
- if self.rule_name != '':
- params.append(
- 'rule_name = {0:s}'.format(repr(self.rule_name))
- )
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'regex.RegexCharacterRule({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
- if rule_name_to_character_set is not None:
- self.character_set = rule_name_to_character_set[self.rule_name]
- return RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
+#class RegexCharacterRule(RegexCharacter):
+# # GENERATE ELEMENT(str rule_name) BEGIN
+# def __init__(
+# self,
+# tag = 'RegexCharacterRule',
+# attrib = {},
+# text = '',
+# children = [],
+# character_set = [],
+# rule_name = ''
+# ):
+# RegexCharacter.__init__(
+# self,
+# tag,
+# attrib,
+# text,
+# children,
+# character_set
+# )
+# self.rule_name = rule_name
+# def serialize(self, ref_list, indent = 0):
+# RegexCharacter.serialize(self, ref_list, indent)
+# self.set('rule_name', element.serialize_str(self.rule_name))
+# def deserialize(self, ref_list):
+# RegexCharacter.deserialize(self, ref_list)
+# self.rule_name = element.deserialize_str(self.get('rule_name', ''))
+# def copy(self, factory = None):
+# result = RegexCharacter.copy(
+# self,
+# RegexCharacterRule if factory is None else factory
+# )
+# result.rule_name = self.rule_name
+# return result
+# def repr_serialize(self, params):
+# RegexCharacter.repr_serialize(self, params)
+# if self.rule_name != '':
+# params.append(
+# 'rule_name = {0:s}'.format(repr(self.rule_name))
+# )
+# def __repr__(self):
+# params = []
+# self.repr_serialize(params)
+# return 'regex.RegexCharacterRule({0:s})'.format(', '.join(params))
+# # GENERATE END
+# def post_process(self, group_index = 0, rule_name_to_character_set = None):
+# if rule_name_to_character_set is not None:
+# self.character_set = rule_name_to_character_set[self.rule_name]
+# return RegexCharacter.post_process(self, group_index, rule_name_to_character_set)
class RegexOr(Regex):
# GENERATE ELEMENT() BEGIN
self.repr_serialize(params)
return 'regex.RegexRepeat({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
# total hack which will be done in a Python action in future
if len(self) >= 2:
assert self[1].tag == 'Number'
self.count1 = self.count0
del self[1:]
# end total hack
- return Regex.post_process(self, group_index, rule_name_to_character_set)
+ return Regex.post_process(self, group_index) #, rule_name_to_character_set)
def to_nfa_state(self, nfa, next_state):
count0 = self.count0
count1 = self.count1
self.repr_serialize(params)
return 'regex.RegexGroup({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, group_index = 0, rule_name_to_character_set = None):
+ def post_process(self, group_index = 0): #, rule_name_to_character_set = None):
# total hack which will be done in a Python action in future
if len(self) >= 2:
assert self[0].tag == 'GroupName'
# end total hack
self.group_index = group_index
group_index += 1
- return Regex.post_process(self, group_index, rule_name_to_character_set)
+ return Regex.post_process(self, group_index) #, rule_name_to_character_set)
def to_groups(self, groups):
assert len(groups) == self.group_index
groups.append(
new_state = len(nfa.states)
nfa.states.append((NFA.STATE_MARK, self.group_index * 2, child_state))
return new_state
- def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
- group_start = len(symbols)
- assert self.group_index == len(group_bounds)
- group_bounds.append(None)
- group_count = Regex.to_lr1_symbols(
- self,
- n_terminals,
- symbols,
- lookaheads,
- group_bounds
- )
- group_bounds[self.group_index] = (
- group_start,
- group_count,
- self.group_name,
- {i.name: i.value for i in self.group_attributes}
- )
- return 1 # count of groups or ungrouped characters
+ #def to_lr1_symbols(self, n_terminals, symbols, lookaheads, group_bounds):
+ # group_start = len(symbols)
+ # assert self.group_index == len(group_bounds)
+ # group_bounds.append(None)
+ # group_count = Regex.to_lr1_symbols(
+ # self,
+ # n_terminals,
+ # symbols,
+ # lookaheads,
+ # group_bounds
+ # )
+ # group_bounds[self.group_index] = (
+ # group_start,
+ # group_count,
+ # self.group_name,
+ # {i.name: i.value for i in self.group_attributes}
+ # )
+ # return 1 # count of groups or ungrouped characters
class Grammar(element.Element):
class Production(element.Element):
- # GENERATE ELEMENT(int nonterminal, int priority, bool right_to_left) BEGIN
+ class Symbol(element.Element):
+ # GENERATE ELEMENT(list(int) terminal_set, list(int) nonterminal_set) BEGIN
+ def __init__(
+ self,
+ tag = 'Grammar_Production_Symbol',
+ attrib = {},
+ text = '',
+ children = [],
+ terminal_set = [],
+ nonterminal_set = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.terminal_set = (
+ [element.deserialize_int(i) for i in terminal_set.split()]
+ if isinstance(terminal_set, str) else
+ terminal_set
+ )
+ self.nonterminal_set = (
+ [element.deserialize_int(i) for i in nonterminal_set.split()]
+ if isinstance(nonterminal_set, str) else
+ nonterminal_set
+ )
+ def serialize(self, ref_list, indent = 0):
+ element.Element.serialize(self, ref_list, indent)
+ self.set(
+ 'terminal_set',
+ ' '.join([element.serialize_int(i) for i in self.terminal_set])
+ )
+ self.set(
+ 'nonterminal_set',
+ ' '.join([element.serialize_int(i) for i in self.nonterminal_set])
+ )
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.terminal_set = [
+ element.deserialize_int(i)
+ for i in self.get('terminal_set', '').split()
+ ]
+ self.nonterminal_set = [
+ element.deserialize_int(i)
+ for i in self.get('nonterminal_set', '').split()
+ ]
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Symbol if factory is None else factory
+ )
+ result.terminal_set = self.terminal_set
+ result.nonterminal_set = self.nonterminal_set
+ return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if len(self.terminal_set):
+ params.append(
+ 'terminal_set = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.terminal_set])
+ )
+ )
+ if len(self.nonterminal_set):
+ params.append(
+ 'nonterminal_set = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.nonterminal_set])
+ )
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.Grammar.Production.Symbol({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(self, name_to_character_sets):
+ pass
+
+ class NamedSymbol(Symbol):
+ # GENERATE ELEMENT(str name) BEGIN
+ def __init__(
+ self,
+ tag = 'Grammar_Production_NamedSymbol',
+ attrib = {},
+ text = '',
+ children = [],
+ terminal_set = [],
+ nonterminal_set = [],
+ name = ''
+ ):
+ Grammar.Production.Symbol.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children,
+ terminal_set,
+ nonterminal_set
+ )
+ self.name = name
+ def serialize(self, ref_list, indent = 0):
+ Grammar.Production.Symbol.serialize(self, ref_list, indent)
+ self.set('name', element.serialize_str(self.name))
+ def deserialize(self, ref_list):
+ Grammar.Production.Symbol.deserialize(self, ref_list)
+ self.name = element.deserialize_str(self.get('name', ''))
+ def copy(self, factory = None):
+ result = Grammar.Production.Symbol.copy(
+ self,
+ NamedSymbol if factory is None else factory
+ )
+ result.name = self.name
+ return result
+ def repr_serialize(self, params):
+ Grammar.Production.Symbol.repr_serialize(self, params)
+ if self.name != '':
+ params.append(
+ 'name = {0:s}'.format(repr(self.name))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'regex.Grammar.Production.NamedSymbol({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(self, name_to_character_sets):
+ self.terminal_set, self.nonterminal_set = (
+ name_to_character_sets[self.name]
+ )
+
+ # GENERATE ELEMENT(int nonterminal, int precedence, int associativity) BEGIN
def __init__(
self,
tag = 'Grammar_Production',
text = '',
children = [],
nonterminal = -1,
- priority = -1,
- right_to_left = False
+ precedence = -1,
+ associativity = -1
):
element.Element.__init__(
self,
if isinstance(nonterminal, str) else
nonterminal
)
- self.priority = (
- element.deserialize_int(priority)
- if isinstance(priority, str) else
- priority
+ self.precedence = (
+ element.deserialize_int(precedence)
+ if isinstance(precedence, str) else
+ precedence
)
- self.right_to_left = (
- element.deserialize_bool(right_to_left)
- if isinstance(right_to_left, str) else
- right_to_left
+ self.associativity = (
+ element.deserialize_int(associativity)
+ if isinstance(associativity, str) else
+ associativity
)
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set('nonterminal', element.serialize_int(self.nonterminal))
- self.set('priority', element.serialize_int(self.priority))
- self.set('right_to_left', element.serialize_bool(self.right_to_left))
+ self.set('precedence', element.serialize_int(self.precedence))
+ self.set('associativity', element.serialize_int(self.associativity))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.nonterminal = element.deserialize_int(self.get('nonterminal', '-1'))
- self.priority = element.deserialize_int(self.get('priority', '-1'))
- self.right_to_left = element.deserialize_bool(self.get('right_to_left', 'false'))
+ self.precedence = element.deserialize_int(self.get('precedence', '-1'))
+ self.associativity = element.deserialize_int(self.get('associativity', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Production if factory is None else factory
)
result.nonterminal = self.nonterminal
- result.priority = self.priority
- result.right_to_left = self.right_to_left
+ result.precedence = self.precedence
+ result.associativity = self.associativity
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
params.append(
'nonterminal = {0:s}'.format(repr(self.nonterminal))
)
- if self.priority != -1:
+ if self.precedence != -1:
params.append(
- 'priority = {0:s}'.format(repr(self.priority))
+ 'precedence = {0:s}'.format(repr(self.precedence))
)
- if self.right_to_left != False:
+ if self.associativity != -1:
params.append(
- 'right_to_left = {0:s}'.format(repr(self.right_to_left))
+ 'associativity = {0:s}'.format(repr(self.associativity))
)
def __repr__(self):
params = []
self.repr_serialize(params)
return 'regex.Grammar.Production({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, nonterminal, rule_name_to_character_set):
+ def post_process(self, nonterminal, name_to_character_sets):
self.nonterminal = nonterminal
- self[0].post_process(0, rule_name_to_character_set)
+ for i in self:
+ i.post_process(name_to_character_sets)
def add_to_lr1(self, lr1):
- symbols = []
- lookaheads = []
- group_bounds = []
- self[0].to_lr1_symbols(
- lr1.n_terminals,
- symbols,
- lookaheads,
- group_bounds
- )
- lookaheads.append(([], True)) # initial_set, can_be_empty (sentinel)
lr1.productions.append(
(
- self.priority * 2 + int(self.right_to_left),
- symbols,
- lookaheads,
- group_bounds
+ # precedence
+ self.precedence * 2 + self.associativity,
+ # symbols
+ [(i.terminal_set, i.nonterminal_set) for i in self],
+ # lookaheads (list of initial_set, can_be_empty)
+ [([], False) for i in range(len(self))] + [([], True)],
+ # group_bounds
+ []
)
)
- # GENERATE ELEMENT(int n_terminals, int eof_character) BEGIN
+ # GENERATE ELEMENT(int n_terminals, int eof_terminal) BEGIN
def __init__(
self,
tag = 'Grammar',
text = '',
children = [],
n_terminals = -1,
- eof_character = -1
+ eof_terminal = -1
):
element.Element.__init__(
self,
if isinstance(n_terminals, str) else
n_terminals
)
- self.eof_character = (
- element.deserialize_int(eof_character)
- if isinstance(eof_character, str) else
- eof_character
+ self.eof_terminal = (
+ element.deserialize_int(eof_terminal)
+ if isinstance(eof_terminal, str) else
+ eof_terminal
)
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set('n_terminals', element.serialize_int(self.n_terminals))
- self.set('eof_character', element.serialize_int(self.eof_character))
+ self.set('eof_terminal', element.serialize_int(self.eof_terminal))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.n_terminals = element.deserialize_int(self.get('n_terminals', '-1'))
- self.eof_character = element.deserialize_int(self.get('eof_character', '-1'))
+ self.eof_terminal = element.deserialize_int(self.get('eof_terminal', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Grammar if factory is None else factory
)
result.n_terminals = self.n_terminals
- result.eof_character = self.eof_character
+ result.eof_terminal = self.eof_terminal
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
params.append(
'n_terminals = {0:s}'.format(repr(self.n_terminals))
)
- if self.eof_character != -1:
+ if self.eof_terminal != -1:
params.append(
- 'eof_character = {0:s}'.format(repr(self.eof_character))
+ 'eof_terminal = {0:s}'.format(repr(self.eof_terminal))
)
def __repr__(self):
params = []
self.repr_serialize(params)
return 'regex.Grammar({0:s})'.format(', '.join(params))
# GENERATE END
- def post_process(self, rule_name_to_character_set):
+ def post_process(self, name_to_character_sets):
for i in range(len(self)):
- self[i].post_process(i, rule_name_to_character_set)
+ self[i].post_process(i, name_to_character_sets)
def to_lr1(self):
- lr1 = LR1([], self.n_terminals, self.eof_character)
+ lr1 = LR1([], self.n_terminals, self.eof_terminal)
for i in self:
i.add_to_lr1(lr1)
# propagate lookaheads
'RegexCharacterOr': RegexCharacterOr,
'RegexCharacterAnd': RegexCharacterAnd,
'RegexCharacterNot': RegexCharacterNot,
- 'RegexCharacterRule': RegexCharacterRule,
'RegexOr': RegexOr,
'RegexAnd': RegexAnd,
'RegexSequence': RegexSequence,
'RegexGroup': RegexGroup,
'RegexGroup_Attribute': RegexGroup.Attribute,
'Grammar': Grammar,
- 'Grammar_Production': Grammar.Production
+ 'Grammar_Production': Grammar.Production,
+ 'Grammar_Production_Symbol': Grammar.Production.Symbol,
+ 'Grammar_Production_NamedSymbol': Grammar.Production.NamedSymbol
}
def factory(tag, attrib = {}, *args, **kwargs):
return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
def __init__(
self,
productions = [],
- n_terminals = n_characters,
- eof_character = n_characters
+ n_terminals = n_characters + 1,
+ eof_terminal = n_characters
):
# productions: list of production
# production: (
# noting that markup has to be applied in reverse order of the list
# n_terminals: offset to apply to productions[] index to get symbol
# (character set code), also symbol for productions[0] = start production
- # eof_character: usually == n_terminals (need not be valid terminal value)
+ # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
self.productions = productions
self.n_terminals = n_terminals
- self.eof_character = eof_character
+ self.eof_terminal = eof_terminal
def lookahead_item_set_closure(self, items, item_to_index):
in_queue = [True for i in range(len(items))]
return next_items, next_item_to_index, nonterminal0, nonterminal1
def parse_text(self, text, i):
- items = [(0, 0, [self.eof_character, self.eof_character + 1])]
+ items = [(0, 0, [self.eof_terminal, self.eof_terminal + 1])]
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
- lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_terminal
while True:
self.lookahead_item_set_closure(items, item_to_index)
value_stack.append(i)
)
)
i += 1
- lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_terminal
elif len(reductions) != 0:
if len(reductions) != 1:
sys.stderr.write(
if pos < 0:
pos, off = element.to_start_relative(root, pos, off)
- items = [(0, 0, [self.eof_character, self.eof_character + 1])]
+ items = [(0, 0, [self.eof_terminal, self.eof_terminal + 1])]
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
break
text = element.get_text(root, pos)
else:
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
break
text = element.get_text(root, pos)
else:
for _, symbols, _, group_bounds in self.productions
],
self.n_terminals,
- self.eof_character
+ self.eof_terminal
)
- items = [(0, 0, [self.eof_character, self.eof_character + 1])]
+ items = [(0, 0, [self.eof_terminal, self.eof_terminal + 1])]
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
for _, symbols, _, group_bounds in self.productions
],
self.n_terminals,
- self.eof_character
+ self.eof_terminal
)
- items = [(0, 0, [self.eof_character, self.eof_character + 1])]
+ items = [(0, 0, [self.eof_terminal, self.eof_terminal + 1])]
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
return 'regex.LR1({0:s}, {1:d}, {2:d})'.format(
repr(self.productions),
self.n_terminals,
- self.eof_character
+ self.eof_terminal
)
class LR1DFA:
self,
states = [],
productions = [],
- n_terminals = n_characters,
- eof_character = n_characters
+ n_terminals = n_characters + 1,
+ eof_terminal = n_characters
):
# states: list of state_desc
# state_desc: (terminal breaks, actions, nonterminal breaks, gotos)
# noting that markup has to be applied in reverse order of the list
# n_terminals: offset to apply to productions[] index to get symbol
# (character set code), also symbol for productions[0] = start production
- # eof_character: usually == n_terminals (need not be valid terminal value)
+ # eof_terminal: usually == n_terminals - 1 (must be valid terminal value)
self.states = states
self.productions = productions
self.n_terminals = n_terminals
- self.eof_character = eof_character
+ self.eof_terminal = eof_terminal
def parse_text(self, text, i):
state = 0
value_stack = []
state_stack = []
- lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_terminal
while True:
value_stack.append(i)
state_stack.append(state)
if (action & 1) == 0:
state = action >> 1
i += 1
- lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_terminal
else:
reduce = action >> 1
len_symbols, group_bounds = self.productions[reduce]
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
break
text = element.get_text(root, pos)
else:
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
break
text = element.get_text(root, pos)
else:
try:
end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
end_pos, end_off = element.to_end_relative(root, pos, off)
while True:
value_stack.append((pos, off))
try:
end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_character = self.eof_character
+ lookahead_character = self.eof_terminal
#end_pos, end_off = element.to_end_relative(root, pos, off)
else:
reduce = action >> 1
repr(self.states),
repr(self.productions),
self.n_terminals,
- self.eof_character
+ self.eof_terminal
)
def wrap_repr(text, width):