class PYACC(element.Element):
# internal classes
class Symbol(element.Element):
- # GENERATE ELEMENT(str name, int token, list(int) character_set, int precedence, int associativity) BEGIN
+ # GENERATE ELEMENT(str name, list(int) character_set, int precedence, int associativity) BEGIN
def __init__(
self,
tag = 'PYACC_Symbol',
text = '',
children = [],
name = '',
- token = -1,
character_set = [],
precedence = -1,
associativity = -1
children
)
self.name = name
- self.token = (
- element.deserialize_int(token)
- if isinstance(token, str) else
- token
- )
self.character_set = (
[element.deserialize_int(i) for i in character_set.split()]
if isinstance(character_set, str) else
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set('name', element.serialize_str(self.name))
- self.set('token', element.serialize_int(self.token))
self.set(
'character_set',
' '.join([element.serialize_int(i) for i in self.character_set])
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.name = element.deserialize_str(self.get('name', ''))
- self.token = element.deserialize_int(self.get('token', '-1'))
self.character_set = [
element.deserialize_int(i)
for i in self.get('character_set', '').split()
Symbol if factory is None else factory
)
result.name = self.name
- result.token = self.token
result.character_set = self.character_set
result.precedence = self.precedence
result.associativity = self.associativity
params.append(
'name = {0:s}'.format(repr(self.name))
)
- if self.token != -1:
- params.append(
- 'token = {0:s}'.format(repr(self.token))
- )
if len(self.character_set):
params.append(
'character_set = [{0:s}]'.format(
self.symbol = len(pyacc.terminal_symbols)
character_to_symbol[character] = self.symbol
pyacc.terminal_symbols.append(
- PYACC.Symbol(
- token = character,
- character_set = [self.symbol, self.symbol + 1]
- )
+ PYACC.Symbol(character_set = [character, character + 1])
)
elif isinstance(self[0], PYACC.ID):
name = element.get_text(self[0], 0)
self.symbol = len(pyacc.terminal_symbols)
name_to_symbol[name] = self.symbol
pyacc.terminal_symbols.append(
- PYACC.Symbol(
- name = name,
- character_set = [self.symbol, self.symbol + 1]
- )
+ PYACC.Symbol(name = name)
)
else:
assert False
self.symbol = len(pyacc.terminal_symbols)
character_to_symbol[character] = self.symbol
pyacc.terminal_symbols.append(
- PYACC.Symbol(
- token = character,
- character_set = [self.symbol, self.symbol + 1]
- )
+ PYACC.Symbol(character_set = [character, character + 1])
)
elif isinstance(self[0], PYACC.ID):
name = element.get_text(self[0], 0)
self.symbol = ~len(pyacc.nonterminal_symbols)
name_to_symbol[name] = self.symbol
pyacc.nonterminal_symbols.append(
- PYACC.Symbol(
- name = name,
- character_set = []
- )
+ PYACC.Symbol(name = name, character_set = [])
)
else:
assert False
character_set.extend([character, character + 1])
pyacc.productions.append(self)
- def to_lr1_production(self, pyacc):
+ def add_to_lr1(self, pyacc, _lr1):
last_action = None
symbols = []
lookaheads = []
for i in self:
last_action = i.add_to_symbols(pyacc, last_action, symbols)
- return (
- # precedence
+ _lr1.productions.append(
(
- (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
- (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
- if self.precedence_terminal != -1 else
- -1
- ),
- # symbols (list of terminal_set, nonterminal_set)
- symbols,
- # lookaheads (list of initial_set, can_be_empty)
- [([], False) for i in range(len(symbols))] + [([], True)],
- # ref_data
- last_action if last_action is not None else PYACC.BracedCode()
+ # precedence
+ (
+ (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
+ (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
+ if self.precedence_terminal != -1 else
+ -1
+ ),
+ # symbols (list of terminal_set, nonterminal_set)
+ symbols,
+ # lookaheads (list of initial_set, can_be_empty)
+ [([], False) for i in range(len(symbols))] + [([], True)],
+ # ref_data
+ last_action if last_action is not None else PYACC.BracedCode()
+ )
)
class Section(element.Element):
self.symbol = ~len(pyacc.nonterminal_symbols)
name_to_symbol[name] = self.symbol
pyacc.nonterminal_symbols.append(
- PYACC.Symbol(
- name = name,
- character_set = []
- )
+ PYACC.Symbol(name = name, character_set = [])
)
else:
assert False
self.symbol = len(pyacc.terminal_symbols)
character_to_symbol[character] = self.symbol
pyacc.terminal_symbols.append(
- PYACC.Symbol(
- token = character,
- character_set = [self.symbol, self.symbol + 1]
- )
+ PYACC.Symbol(character_set = [character, character + 1])
)
elif isinstance(self[0], PYACC.ID):
name = element.get_text(self[0], 0)
self.symbol = len(pyacc.terminal_symbols)
name_to_symbol[name] = self.symbol
pyacc.terminal_symbols.append(
- PYACC.Symbol(
- name = name,
- character_set = [self.symbol, self.symbol + 1]
- )
+ PYACC.Symbol(name = name)
)
else:
assert False
if self.token != -1:
- assert pyacc.terminal_symbols[self.symbol].token == -1
- pyacc.terminal_symbols[self.symbol].token = self.token
+ assert len(pyacc.terminal_symbols[self.symbol].character_set) == 0
+ pyacc.terminal_symbols[self.symbol].character_set = (
+ [self.token, self.token + 1]
+ )
if precedence != -1:
assert pyacc.terminal_symbols[self.symbol].precedence == -1
pyacc.terminal_symbols[self.symbol].precedence = precedence
self.prologue_text = []
self.precedences = 0
self.terminal_symbols = [
- PYACC.Symbol(name = '$eof', token = 0, character_set = [0, 1]),
- PYACC.Symbol(name = 'error', character_set = [1, 2]),
- PYACC.Symbol(name = '$undefined', character_set = [2, 3])
+ PYACC.Symbol(name = '$eof', character_set = [0, 1]),
+ PYACC.Symbol(name = 'error'),
+ PYACC.Symbol(name = '$undefined')
]
self.nonterminal_symbols = []
self.start_nonterminal = -1
# fill in token numbers that are not characters or overridden by user
token = 0x100
for i in self.terminal_symbols:
- if i.token == -1:
- i.token = token
+ if len(i.character_set) == 0:
+ i.character_set = [token, token + 1]
token += 1
# fill in start nonterminal if not overridden by user
self.start_nonterminal = 0
def to_lr1(self):
- lr1_productions = [
- (
- # precedence
- -1,
- # symbols (list of terminal_set, nonterminal_set)
- [([], self.nonterminal_symbols[self.start_nonterminal].character_set)],
- # lookaheads (list of initial_set, can_be_empty)
- [([], False), ([], True)],
- # ref_data
- PYACC.BracedCode()
- )
- ]
+ _lr1 = lr1.LR1(
+ [
+ (
+ # precedence
+ -1,
+ # symbols (list of terminal_set, nonterminal_set)
+ [
+ (
+ [],
+ self.nonterminal_symbols[self.start_nonterminal].character_set
+ )
+ ],
+ # lookaheads (list of initial_set, can_be_empty)
+ [([], False), ([], True)],
+ # ref_data
+ PYACC.BracedCode()
+ )
+ ],
+ max([0] + [i.character_set[1] for i in self.terminal_symbols]),
+ 0
+ )
for i in self.productions:
- lr1_productions.append(i.to_lr1_production(self))
+ i.add_to_lr1(self, _lr1)
# propagate lookaheads
modified = True
while modified:
modified = False
- for _, symbols, lookaheads, _ in lr1_productions:
+ for _, symbols, lookaheads, _ in _lr1.productions:
for i in range(len(symbols) - 1, -1, -1):
initial_set, nonterminal_set = symbols[i]
can_be_empty = False
for j in range(0, len(nonterminal_set), 2):
for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
- child_initial_set, child_can_be_empty = lr1_productions[k][2][0]
- initial_set = bisect_set.bisect_set_or(initial_set, child_initial_set)
+ child_initial_set, child_can_be_empty = _lr1.productions[k][2][0]
+ initial_set = bisect_set.bisect_set_or(
+ initial_set,
+ child_initial_set
+ )
can_be_empty = can_be_empty or child_can_be_empty
# at this point can_be_empty refers to current symbol only
if can_be_empty:
next_initial_set, can_be_empty = lookaheads[i + 1]
- initial_set = bisect_set.bisect_set_or(initial_set, next_initial_set)
+ initial_set = bisect_set.bisect_set_or(
+ initial_set,
+ next_initial_set
+ )
# at this point can_be_empty refers to all remaining symbols
if (initial_set, can_be_empty) != lookaheads[i]:
lookaheads[i] = (initial_set, can_be_empty)
modified = True
- return lr1.LR1(lr1_productions, len(self.terminal_symbols), 0)
+ return _lr1
# GENERATE FACTORY(element.Element) BEGIN
tag_to_class = {
nonterminal0 = nonterminal1
assert nonterminal0 == len(lr1dfa.productions)
- # permute and combine rows based on the nonterminal translate vector
+ # permute and combine columns/rows on the basis of the translate vectors
+ new_action_table = numpy.zeros(
+ (len(lr1dfa.states), n_terminals),
+ numpy.int16
+ )
+ new_action_table[:, translate_terminals] = action_table
+ action_table = new_action_table
new_goto_table = numpy.zeros(
(n_nonterminals, len(lr1dfa.states)),
numpy.int16
# this undoes yacc/bison's rather wasteful mapping of 0x00..0xff to literal
# characters, and also accommodates any token value overrides given by the
# user, yielding a consecutive set of terminal numbers that are really used
- # (matching pyacc.terminal_symbols[*].character_set, and hence the lr1dfa)
- reverse_translate_terminals = numpy.array(
- [i.token for i in pyacc.terminal_symbols],
- numpy.int16
- )
translate_terminals = numpy.full(
- (numpy.max(reverse_translate_terminals) + 1,),
+ (lr1dfa.n_terminals,),
2, # '$undefined'
numpy.int16
)
- translate_terminals[reverse_translate_terminals] = numpy.arange(
- len(pyacc.terminal_symbols),
- dtype = numpy.int16
- )
+ for i in range(len(pyacc.terminal_symbols)):
+ for j in range(0, len(pyacc.terminal_symbols[i].character_set), 2):
+ translate_terminals[
+ pyacc.terminal_symbols[i].character_set[j]:
+ pyacc.terminal_symbols[i].character_set[j + 1]
+ ] = i
# generate translate table for nonterminal symbols
# this is effectively a map from productions back to nonterminal symbols
# we do not generate an entry for the first production (start production)
- nonterminal = 0
translate_nonterminals = numpy.zeros(
(len(lr1dfa.productions) - 1,),
numpy.int16
)
- for i in pyacc.nonterminal_symbols:
- for j in range(0, len(i.character_set), 2):
+ for i in range(len(pyacc.nonterminal_symbols)):
+ for j in range(0, len(pyacc.nonterminal_symbols[i].character_set), 2):
translate_nonterminals[
- i.character_set[j] - 1:
- i.character_set[j + 1] - 1
- ] = nonterminal
- nonterminal += 1
+ pyacc.nonterminal_symbols[i].character_set[j] - 1:
+ pyacc.nonterminal_symbols[i].character_set[j + 1] - 1
+ ] = i
# translate and compress the tables
bison_lr1dfa = BisonLR1DFA(
'''.format(
','.join(
[
- '\n {0:s} = {1:d}'.format(i.name, i.token)
+ '\n {0:s} = {1:d}'.format(i.name, i.character_set[0])
for i in pyacc.terminal_symbols[3:]
if len(i.name)
]
'''.format(
''.join(
[
- '#define {0:s} {1:d}\n'.format(i.name, i.token)
+ '#define {0:s} {1:d}\n'.format(i.name, i.character_set[0])
for i in pyacc.terminal_symbols[3:]
if len(i.name)
]
'"{0:s}"'.format(i.name)
if len(i.name) else
'"\'{0:s}\'"'.format(
- chr(i.token)
- if i.token >= 0x20 else
- '\\\\x{0:02x}'.format(i.token)
+ chr(i.character_set[0])
+ if i.character_set[0] >= 0x20 else
+ '\\\\x{0:02x}'.format(i.character_set[0])
)
)
for i in pyacc.terminal_symbols