return 'ast.PYACC.Symbol({0:s})'.format(', '.join(params))
# GENERATE END
- class TerminalSymbol(Symbol):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_TerminalSymbol',
- attrib = {},
- text = '',
- children = [],
- name = '',
- character_set = []
- ):
- PYACC.Symbol.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- name,
- character_set
- )
- def copy(self, factory = None):
- result = PYACC.Symbol.copy(
- self,
- TerminalSymbol if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.TerminalSymbol({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class NonterminalSymbol(Symbol):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_NonterminalSymbol',
- attrib = {},
- text = '',
- children = [],
- name = '',
- character_set = []
- ):
- PYACC.Symbol.__init__(
- self,
- tag,
- attrib,
- text,
- children,
- name,
- character_set
- )
- def copy(self, factory = None):
- result = PYACC.Symbol.copy(
- self,
- NonterminalSymbol if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.NonterminalSymbol({0:s})'.format(', '.join(params))
- # GENERATE END
-
# syntax classes
class BracedCode(element.Element):
# GENERATE ELEMENT() BEGIN
assert isinstance(i, PYACC.ID)
token_name = element.get_text(i, 0)
assert token_name not in name_to_symbol
- name_to_symbol[token_name] = len(pyacc.symbols)
- pyacc.symbols.append(
- PYACC.TerminalSymbol(
+ name_to_symbol[token_name] = len(pyacc.terminal_symbols)
+ character = 0x100 + len(pyacc.terminal_symbols)
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
name = token_name,
- character_set = [
- pyacc.grammar.n_terminals,
- pyacc.grammar.n_terminals + 1
- ]
+ character_set = [character, character + 1]
)
)
- pyacc.grammar.n_terminals += 1
class Type(Item):
class Symbols(element.Element):
for i in range(len(self)):
if isinstance(self[i], PYACC.Section2.Rules.RHS.Symbol):
if isinstance(self[i][0], PYACC.Char):
- text = self[i][0].get_text()
+ character = ord(self[i][0].get_text())
+ pyacc.characters_used.add(character)
expr = regex.RegexSequence(
children = [
expr,
regex.RegexCharacter(
- character_set = [ord(text), ord(text) + 1]
+ character_set = [character, character + 1]
)
]
)
else:
pyacc.actions_text.append('')
- character_set = pyacc.symbols[lhs_symbol].character_set
- if len(character_set) and character_set[-1] == len(pyacc.grammar):
- character_set[-1] = len(pyacc.grammar) + 1
+ character_set = pyacc.nonterminal_symbols[lhs_symbol].character_set
+ character = len(pyacc.grammar)
+ if len(character_set) and character_set[-1] == character:
+ character_set[-1] = character + 1
else:
- character_set.extend([len(pyacc.grammar), len(pyacc.grammar) + 1])
+ character_set.extend([character, character + 1])
pyacc.grammar.append(regex.Grammar.Production(children = [expr]))
# GENERATE ELEMENT() BEGIN
assert isinstance(self[0], PYACC.ID)
lhs_name = element.get_text(self[0], 0)
if lhs_name in name_to_symbol:
- lhs_symbol = name_to_symbol[lhs_name]
- assert isinstance(
- pyacc.symbols[lhs.symbol],
- PYACC.NonterminalSymbol
- )
+ i = name_to_symbol[lhs_name]
+ assert i < 0
+ lhs_symbol = ~i
else:
- lhs_symbol = len(pyacc.symbols)
- name_to_symbol[lhs_name] = lhs_symbol
- pyacc.symbols.append(
- PYACC.NonterminalSymbol(name = lhs_name, character_set = [])
+ lhs_symbol = len(pyacc.nonterminal_symbols)
+ name_to_symbol[lhs_name] = ~lhs_symbol
+ pyacc.nonterminal_symbols.append(
+ PYACC.Symbol(name = lhs_name, character_set = [])
)
for i in self[1:]:
i.post_process(
return 'ast.PYACC.Section3({0:s})'.format(', '.join(params))
# GENERATE END
- # GENERATE ELEMENT(list(str) prologue_text, list(ref) symbols, ref grammar, list(str) actions_text) BEGIN
+ # GENERATE ELEMENT(list(str) prologue_text, set(int) characters_used, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(str) actions_text) BEGIN
def __init__(
self,
tag = 'PYACC',
text = '',
children = [],
prologue_text = [],
- symbols = [],
+ characters_used = set(),
+ terminal_symbols = [],
+ nonterminal_symbols = [],
grammar = None,
actions_text = []
):
if isinstance(prologue_text, str) else
prologue_text
)
- self.symbols = symbols
+ self.characters_used = (
+ set([element.deserialize_int(i) for i in characters_used.split()])
+ if isinstance(characters_used, str) else
+ characters_used
+ )
+ self.terminal_symbols = terminal_symbols
+ self.nonterminal_symbols = nonterminal_symbols
self.grammar = grammar
self.actions_text = (
[element.deserialize_str(i) for i in actions_text.split()]
' '.join([element.serialize_str(i) for i in self.prologue_text])
)
self.set(
- 'symbols',
- ' '.join([element.serialize_ref(i, ref_list) for i in self.symbols])
+ 'characters_used',
+ ' '.join([element.serialize_int(i) for i in sorted(self.characters_used)])
+ )
+ self.set(
+ 'terminal_symbols',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.terminal_symbols])
+ )
+ self.set(
+ 'nonterminal_symbols',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.nonterminal_symbols])
)
self.set('grammar', element.serialize_ref(self.grammar, ref_list))
self.set(
element.deserialize_str(i)
for i in self.get('prologue_text', '').split()
]
- self.symbols = [
+ self.characters_used = set(
+ [
+ element.deserialize_int(i)
+ for i in self.get('characters_used', '').split()
+ ]
+ )
+ self.terminal_symbols = [
element.deserialize_ref(i, ref_list)
- for i in self.get('symbols', '').split()
+ for i in self.get('terminal_symbols', '').split()
+ ]
+ self.nonterminal_symbols = [
+ element.deserialize_ref(i, ref_list)
+ for i in self.get('nonterminal_symbols', '').split()
]
self.grammar = element.deserialize_ref(self.get('grammar', '-1'), ref_list)
self.actions_text = [
PYACC if factory is None else factory
)
result.prologue_text = self.prologue_text
- result.symbols = self.symbols
+ result.characters_used = self.characters_used
+ result.terminal_symbols = self.terminal_symbols
+ result.nonterminal_symbols = self.nonterminal_symbols
result.grammar = self.grammar
result.actions_text = self.actions_text
return result
', '.join([repr(i) for i in self.prologue_text])
)
)
- if len(self.symbols):
+ if len(self.characters_used):
+ params.append(
+ 'characters_used = set([{0:s}])'.format(
+ ', '.join([repr(i) for i in sorted(self.characters_used)])
+ )
+ )
+ if len(self.terminal_symbols):
+ params.append(
+ 'terminal_symbols = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.terminal_symbols])
+ )
+ )
+ if len(self.nonterminal_symbols):
params.append(
- 'symbols = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.symbols])
+ 'nonterminal_symbols = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.nonterminal_symbols])
)
)
if self.grammar != None:
def post_process(self):
# variables that will be serialized
self.prologue_text = []
- self.symbols = [
- PYACC.NonterminalSymbol(name = 'error', character_set = [])
+ self.characters_used = set()
+ self.terminal_symbols = [
+ PYACC.Symbol(name = '$end', character_set = [0x100, 0x101]),
+ PYACC.Symbol(name = 'error', character_set = [0x101, 0x102]),
+ PYACC.Symbol(name = '$undefined', character_set = [0x102, 0x103])
]
+ self.nonterminal_symbols = []
self.grammar = regex.Grammar(
children = [
regex.Grammar.Production(
children = [
- regex.RegexCharacterRule()
+ regex.RegexSequence(
+ children = [
+ regex.RegexCharacterRule(),
+ regex.RegexCharacterRule(rule_name = '$end')
+ ]
+ )
]
)
- ],
- n_terminals = 0x102
+ ]
)
self.actions_text = []
# variables that won't be serialized
- name_to_symbol = {'error': 0}
+ # note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
+ # (we do not bother storing the '$end' and '$undefined' entries)
+ name_to_symbol = {'error': 1}
# perform the semantic analysis pass
for i in self:
)
# if start symbol not specified, use first nonterminal defined in file
- if len(self.grammar[0][0].rule_name) == 0:
- for i in self.symbols:
- if isinstance(i, PYACC.NonterminalSymbol):
- self.grammar[0][0].rule_name = i.name
-
- # make nonterminal character_set offset by n_terminals which is now known
- for i in self.symbols:
- if isinstance(i, PYACC.NonterminalSymbol):
- i.character_set = [j + self.grammar.n_terminals for j in i.character_set]
+ if len(self.grammar[0][0][0].rule_name) == 0:
+ self.grammar[0][0][0].rule_name = self.nonterminal_symbols[0].name
- # look up all rule names and substitute appropriate character_set for each
+ # look up rule names and substitute appropriate character_set for each
+ self.grammar.n_terminals = 0x100 + len(self.terminal_symbols)
self.grammar.post_process(
- dict([(i.name, i.character_set) for i in self.symbols])
+ dict(
+ [
+ (i.name, i.character_set)
+ for i in self.terminal_symbols
+ ] +
+ [
+ (i.name, [self.grammar.n_terminals + j for j in i.character_set])
+ for i in self.nonterminal_symbols
+ ]
+ )
)
# GENERATE FACTORY(regex.factory) BEGIN
'Item': Item,
'PYACC': PYACC,
'PYACC_Symbol': PYACC.Symbol,
- 'PYACC_TerminalSymbol': PYACC.TerminalSymbol,
- 'PYACC_NonterminalSymbol': PYACC.NonterminalSymbol,
'PYACC_BracedCode': PYACC.BracedCode,
'PYACC_BracedPredicate': PYACC.BracedPredicate,
'PYACC_Char': PYACC.Char,