+import bisect_set
import element
-import grammar
+import lr1
class Item(element.Element):
# GENERATE ELEMENT() BEGIN
return 'ast.PYACC.ID({0:s})'.format(', '.join(params))
# GENERATE END
- class IDNone(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_IDNone',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- IDNone if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.IDNone({0:s})'.format(', '.join(params))
- # GENERATE END
-
class Int(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
return 'ast.PYACC.Int({0:s})'.format(', '.join(params))
# GENERATE END
- class IntNone(element.Element):
- # GENERATE ELEMENT() BEGIN
+ class Production(element.Element):
+ class Item(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Item',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Item if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Item({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ production,
+ character_to_symbol,
+ name_to_symbol,
+ last_terminal
+ ):
+ raise NotImplementedException
+ def add_to_symbols(self, pyacc, action, symbols):
+ return action
+
+ class Action(Item):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Action',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ Action if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Action({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ production,
+ character_to_symbol,
+ name_to_symbol,
+ last_terminal
+ ):
+ return last_terminal
+ def add_to_symbols(self, pyacc, action, symbols):
+ assert action is None
+ return self[0]
+
+ class DPrec(Item):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_DPrec',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ DPrec if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.DPrec({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ class Empty(Item):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Empty',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ Empty if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Empty({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ class Merge(Item):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Merge',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ Merge if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Merge({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ class Prec(Item):
+ # GENERATE ELEMENT(int symbol) BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Prec',
+ attrib = {},
+ text = '',
+ children = [],
+ symbol = -1
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.symbol = (
+ element.deserialize_int(symbol)
+ if isinstance(symbol, str) else
+ symbol
+ )
+ def serialize(self, ref_list, indent = 0):
+ PYACC.Production.Item.serialize(self, ref_list, indent)
+ self.set('symbol', element.serialize_int(self.symbol))
+ def deserialize(self, ref_list):
+ PYACC.Production.Item.deserialize(self, ref_list)
+ self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ Prec if factory is None else factory
+ )
+ result.symbol = self.symbol
+ return result
+ def repr_serialize(self, params):
+ PYACC.Production.Item.repr_serialize(self, params)
+ if self.symbol != -1:
+ params.append(
+ 'symbol = {0:s}'.format(repr(self.symbol))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Prec({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ production,
+ character_to_symbol,
+ name_to_symbol,
+ last_terminal
+ ):
+ # make this contain Section1Or2.Symbol eventually, duplicate for now
+ if isinstance(self[0], PYACC.Char):
+ character = ord(self[0].get_text())
+ assert character != 0 # would conflict with YYEOF
+ if character in character_to_symbol:
+ self.symbol = character_to_symbol[character]
+ assert self.symbol >= 0
+ else:
+ self.symbol = len(pyacc.terminal_symbols)
+ character_to_symbol[character] = self.symbol
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
+ token = character,
+ character_set = [self.symbol, self.symbol + 1]
+ )
+ )
+ elif isinstance(self[0], PYACC.ID):
+ name = element.get_text(self[0], 0)
+ if name in name_to_symbol:
+ self.symbol = name_to_symbol[name]
+ assert self.symbol >= 0
+ else:
+ self.symbol = len(pyacc.terminal_symbols)
+ name_to_symbol[name] = self.symbol
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
+ name = name,
+ character_set = [self.symbol, self.symbol + 1]
+ )
+ )
+ else:
+ assert False
+ assert production.precedence_terminal == -1
+ production.precedence_terminal = self.symbol
+ return last_terminal
+
+ class Symbol(Item):
+ # GENERATE ELEMENT(int symbol) BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_Production_Symbol',
+ attrib = {},
+ text = '',
+ children = [],
+ symbol = -1
+ ):
+ PYACC.Production.Item.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.symbol = (
+ element.deserialize_int(symbol)
+ if isinstance(symbol, str) else
+ symbol
+ )
+ def serialize(self, ref_list, indent = 0):
+ PYACC.Production.Item.serialize(self, ref_list, indent)
+ self.set('symbol', element.serialize_int(self.symbol))
+ def deserialize(self, ref_list):
+ PYACC.Production.Item.deserialize(self, ref_list)
+ self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+ def copy(self, factory = None):
+ result = PYACC.Production.Item.copy(
+ self,
+ Symbol if factory is None else factory
+ )
+ result.symbol = self.symbol
+ return result
+ def repr_serialize(self, params):
+ PYACC.Production.Item.repr_serialize(self, params)
+ if self.symbol != -1:
+ params.append(
+ 'symbol = {0:s}'.format(repr(self.symbol))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.Production.Symbol({0:s})'.format(', '.join(params))
+ # GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ production,
+ character_to_symbol,
+ name_to_symbol,
+ last_terminal
+ ):
+ if isinstance(self[0], PYACC.Char):
+ character = ord(self[0].get_text())
+ assert character != 0 # would conflict with YYEOF
+ if character in character_to_symbol:
+ self.symbol = character_to_symbol[character]
+ assert self.symbol >= 0
+ else:
+ self.symbol = len(pyacc.terminal_symbols)
+ character_to_symbol[character] = self.symbol
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
+ token = character,
+ character_set = [self.symbol, self.symbol + 1]
+ )
+ )
+ elif isinstance(self[0], PYACC.ID):
+ name = element.get_text(self[0], 0)
+ if name in name_to_symbol:
+ self.symbol = name_to_symbol[name]
+ else:
+ self.symbol = ~len(pyacc.nonterminal_symbols)
+ name_to_symbol[name] = self.symbol
+ pyacc.nonterminal_symbols.append(
+ PYACC.Symbol(
+ name = name,
+ character_set = []
+ )
+ )
+ else:
+ assert False
+ return self.symbol if self.symbol >= 0 else last_terminal
+ def add_to_symbols(self, pyacc, action, symbols):
+ assert action is None
+ symbols.append(
+ (pyacc.terminal_symbols[self.symbol].character_set, [])
+ if self.symbol >= 0 else
+ ([], pyacc.nonterminal_symbols[~self.symbol].character_set)
+ )
+ return None
+
+ # GENERATE ELEMENT(int lhs_nonterminal, int precedence_terminal) BEGIN
def __init__(
self,
- tag = 'PYACC_IntNone',
+ tag = 'PYACC_Production',
attrib = {},
text = '',
- children = []
+ children = [],
+ lhs_nonterminal = -1,
+ precedence_terminal = -1
):
element.Element.__init__(
self,
text,
children
)
+ self.lhs_nonterminal = (
+ element.deserialize_int(lhs_nonterminal)
+ if isinstance(lhs_nonterminal, str) else
+ lhs_nonterminal
+ )
+ self.precedence_terminal = (
+ element.deserialize_int(precedence_terminal)
+ if isinstance(precedence_terminal, str) else
+ precedence_terminal
+ )
+ def serialize(self, ref_list, indent = 0):
+ element.Element.serialize(self, ref_list, indent)
+ self.set('lhs_nonterminal', element.serialize_int(self.lhs_nonterminal))
+ self.set('precedence_terminal', element.serialize_int(self.precedence_terminal))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.lhs_nonterminal = element.deserialize_int(self.get('lhs_nonterminal', '-1'))
+ self.precedence_terminal = element.deserialize_int(self.get('precedence_terminal', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
- IntNone if factory is None else factory
+ Production if factory is None else factory
)
+ result.lhs_nonterminal = self.lhs_nonterminal
+ result.precedence_terminal = self.precedence_terminal
return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.lhs_nonterminal != -1:
+ params.append(
+ 'lhs_nonterminal = {0:s}'.format(repr(self.lhs_nonterminal))
+ )
+ if self.precedence_terminal != -1:
+ params.append(
+ 'precedence_terminal = {0:s}'.format(repr(self.precedence_terminal))
+ )
def __repr__(self):
params = []
self.repr_serialize(params)
- return 'ast.PYACC.IntNone({0:s})'.format(', '.join(params))
+ return 'ast.PYACC.Production({0:s})'.format(', '.join(params))
# GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ lhs_nonterminal,
+ character_to_symbol,
+ name_to_symbol
+ ):
+ self.lhs_nonterminal = lhs_nonterminal
+
+ self.precedence_terminal = -1
+ last_terminal = -1
+ for i in self:
+ last_terminal = i.post_process(
+ pyacc,
+ self,
+ character_to_symbol,
+ name_to_symbol,
+ last_terminal
+ )
+ if self.precedence_terminal == -1:
+ self.precedence_terminal = last_terminal
+
+ character_set = pyacc.nonterminal_symbols[
+ self.lhs_nonterminal
+ ].character_set
+ character = 1 + len(pyacc.productions)
+ if len(character_set) and character_set[-1] == character:
+ character_set[-1] = character + 1
+ else:
+ character_set.extend([character, character + 1])
+ pyacc.productions.append(self)
+
+ def to_lr1_production(self, pyacc):
+ action = None
+ symbols = []
+ lookaheads = []
+ for i in self:
+ action = i.add_to_symbols(pyacc, action, symbols)
+ return (
+ (
+ # precedence
+ (
+ (pyacc.terminal_symbols[self.precedence_terminal].precedence << 2) |
+ (pyacc.terminal_symbols[self.precedence_terminal].associativity & 3)
+ if self.precedence_terminal != -1 else
+ -1
+ ),
+ # symbols (list of terminal_set, nonterminal_set)
+ symbols,
+ # lookaheads (list of initial_set, can_be_empty)
+ [([], False) for i in range(len(symbols))] + [([], True)],
+ # group_bounds
+ []
+ ),
+ action if action is not None else PYACC.BracedCode()
+ )
+
class Section(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
section,
character_to_symbol,
name_to_symbol,
+ False, # is_nonterminal
pyacc.precedences, # precedence
self.type # associativity
)
self.repr_serialize(params)
return 'ast.PYACC.Section1Or2.Start({0:s})'.format(', '.join(params))
# GENERATE END
+ def post_process(
+ self,
+ pyacc,
+ section,
+ character_to_symbol,
+ name_to_symbol
+ ):
+ self[0].post_process(
+ pyacc,
+ section,
+ character_to_symbol,
+ name_to_symbol,
+ True, # is_nonterminal
+ -1, # precedence
+ -1 # associativity
+ )
+ pyacc.start_nonterminal = ~self[0].symbol
class TagOrSymbol(element.Element):
# GENERATE ELEMENT() BEGIN
section,
character_to_symbol,
name_to_symbol,
+ is_nonterminal,
precedence,
associativity,
tag
section,
character_to_symbol,
name_to_symbol,
+ is_terminal,
precedence,
associativity,
tag
return self
class Symbol(TagOrSymbol):
- # GENERATE ELEMENT(int user_token) BEGIN
+ # GENERATE ELEMENT(int symbol, int token) BEGIN
def __init__(
self,
tag = 'PYACC_Section1Or2_Symbol',
attrib = {},
text = '',
children = [],
- user_token = -1
+ symbol = -1,
+ token = -1
):
PYACC.Section1Or2.TagOrSymbol.__init__(
self,
text,
children
)
- self.user_token = (
- element.deserialize_int(user_token)
- if isinstance(user_token, str) else
- user_token
+ self.symbol = (
+ element.deserialize_int(symbol)
+ if isinstance(symbol, str) else
+ symbol
+ )
+ self.token = (
+ element.deserialize_int(token)
+ if isinstance(token, str) else
+ token
)
def serialize(self, ref_list, indent = 0):
PYACC.Section1Or2.TagOrSymbol.serialize(self, ref_list, indent)
- self.set('user_token', element.serialize_int(self.user_token))
+ self.set('symbol', element.serialize_int(self.symbol))
+ self.set('token', element.serialize_int(self.token))
def deserialize(self, ref_list):
PYACC.Section1Or2.TagOrSymbol.deserialize(self, ref_list)
- self.user_token = element.deserialize_int(self.get('user_token', '-1'))
+ self.symbol = element.deserialize_int(self.get('symbol', '-1'))
+ self.token = element.deserialize_int(self.get('token', '-1'))
def copy(self, factory = None):
result = PYACC.Section1Or2.TagOrSymbol.copy(
self,
Symbol if factory is None else factory
)
- result.user_token = self.user_token
+ result.symbol = self.symbol
+ result.token = self.token
return result
def repr_serialize(self, params):
PYACC.Section1Or2.TagOrSymbol.repr_serialize(self, params)
- if self.user_token != -1:
+ if self.symbol != -1:
params.append(
- 'user_token = {0:s}'.format(repr(self.user_token))
+ 'symbol = {0:s}'.format(repr(self.symbol))
+ )
+ if self.token != -1:
+ params.append(
+ 'token = {0:s}'.format(repr(self.token))
)
def __repr__(self):
params = []
section,
character_to_symbol,
name_to_symbol,
+ is_nonterminal,
precedence,
associativity,
tag
):
- if isinstance(self[0], PYACC.Char):
- character = ord(self[0].get_text())
- assert character != 0 # would conflict with YYEOF
- if character in character_to_symbol:
- symbol = character_to_symbol[character]
- assert symbol >= 0
- else:
- symbol = len(pyacc.terminal_symbols)
- character_to_symbol[character] = symbol
- pyacc.terminal_symbols.append(
- PYACC.Symbol(
- token = character,
- character_set = [symbol, symbol + 1]
+ if is_nonterminal:
+ if isinstance(self[0], PYACC.ID):
+ name = element.get_text(self[0], 0)
+ if name in name_to_symbol:
+ self.symbol = name_to_symbol[name]
+ assert self.symbol < 0
+ else:
+ self.symbol = ~len(pyacc.nonterminal_symbols)
+ name_to_symbol[name] = self.symbol
+ pyacc.nonterminal_symbols.append(
+ PYACC.Symbol(
+ name = name,
+ character_set = []
+ )
)
- )
- print('symbol', symbol, 'token', character)
- elif isinstance(self[0], PYACC.ID):
- name = element.get_text(self[0], 0)
- if name in name_to_symbol:
- symbol = name_to_symbol[name]
- assert symbol >= 0
else:
- symbol = len(pyacc.terminal_symbols)
- name_to_symbol[name] = symbol
- pyacc.terminal_symbols.append(
- PYACC.Symbol(
- name = name,
- character_set = [symbol, symbol + 1]
- )
- )
+ assert False
+ # in this case ignore token, precedence and associativity
else:
- assert False
- if self.user_token != -1:
- assert pyacc.terminal_symbols[symbol].token == -1
- pyacc.terminal_symbols[symbol].token = self.user_token
- if precedence != -1:
- assert pyacc.terminal_symbols[symbol].precedence == -1
- pyacc.terminal_symbols[symbol].precedence = precedence
- if associativity != -1:
- assert pyacc.terminal_symbols[symbol].associativity == -1
- pyacc.terminal_symbols[symbol].associativity = associativity
+ if isinstance(self[0], PYACC.Char):
+ character = ord(self[0].get_text())
+ assert character != 0 # would conflict with YYEOF
+ if character in character_to_symbol:
+ self.symbol = character_to_symbol[character]
+ assert self.symbol >= 0
+ else:
+ self.symbol = len(pyacc.terminal_symbols)
+ character_to_symbol[character] = self.symbol
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
+ token = character,
+ character_set = [self.symbol, self.symbol + 1]
+ )
+ )
+ elif isinstance(self[0], PYACC.ID):
+ name = element.get_text(self[0], 0)
+ if name in name_to_symbol:
+ self.symbol = name_to_symbol[name]
+ assert self.symbol >= 0
+ else:
+ self.symbol = len(pyacc.terminal_symbols)
+ name_to_symbol[name] = self.symbol
+ pyacc.terminal_symbols.append(
+ PYACC.Symbol(
+ name = name,
+ character_set = [self.symbol, self.symbol + 1]
+ )
+ )
+ else:
+ assert False
+ if self.token != -1:
+ assert pyacc.terminal_symbols[self.symbol].token == -1
+ pyacc.terminal_symbols[self.symbol].token = self.token
+ if precedence != -1:
+ assert pyacc.terminal_symbols[self.symbol].precedence == -1
+ pyacc.terminal_symbols[self.symbol].precedence = precedence
+ if associativity != -1:
+ assert pyacc.terminal_symbols[self.symbol].associativity == -1
+ pyacc.terminal_symbols[self.symbol].associativity = associativity
return tag
class TaggedSymbols(element.Element):
section,
character_to_symbol,
name_to_symbol,
+ is_terminal,
precedence,
associativity
):
section,
character_to_symbol,
name_to_symbol,
+ is_terminal,
precedence,
associativity,
tag
section,
character_to_symbol,
name_to_symbol,
+ False, # is_nonterminal
-1, # precedence
-1 # associativity
)
class Section2(Section1Or2):
class Rules(Item):
- class RHS(element.Element):
- class Action(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_Action',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Action if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.Action({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class DPrec(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_DPrec',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- DPrec if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.DPrec({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class Empty(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_Empty',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Empty if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.Empty({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class Merge(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_Merge',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Merge if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.Merge({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class Prec(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_Prec',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Prec if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.Prec({0:s})'.format(', '.join(params))
- # GENERATE END
-
- class Symbol(element.Element):
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS_Symbol',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- Symbol if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS.Symbol({0:s})'.format(', '.join(params))
- # GENERATE END
-
- # GENERATE ELEMENT() BEGIN
- def __init__(
- self,
- tag = 'PYACC_Section2_Rules_RHS',
- attrib = {},
- text = '',
- children = []
- ):
- element.Element.__init__(
- self,
- tag,
- attrib,
- text,
- children
- )
- def copy(self, factory = None):
- result = element.Element.copy(
- self,
- RHS if factory is None else factory
- )
- return result
- def __repr__(self):
- params = []
- self.repr_serialize(params)
- return 'ast.PYACC.Section2.Rules.RHS({0:s})'.format(', '.join(params))
- # GENERATE END
- def post_process(
- self,
- pyacc,
- lhs_symbol,
- character_to_symbol,
- name_to_symbol
- ):
- production = grammar.Grammar.Production(
- nonterminal = len(pyacc.grammar)
- )
- for i in range(len(self)):
- if isinstance(self[i], PYACC.Section2.Rules.RHS.Symbol):
- if isinstance(self[i][0], PYACC.Char):
- character = ord(self[i][0].get_text())
- assert character != 0 # would conflict with YYEOF
- if character in character_to_symbol:
- symbol = character_to_symbol[character]
- assert symbol >= 0
- else:
- symbol = len(pyacc.terminal_symbols)
- character_to_symbol[character] = symbol
- pyacc.terminal_symbols.append(
- PYACC.Symbol(
- token = character,
- character_set = [symbol, symbol + 1]
- )
- )
- production.append(
- grammar.Grammar.Production.Symbol(
- terminal_set = pyacc.terminal_symbols[symbol].character_set
- )
- )
- elif isinstance(self[i][0], PYACC.ID):
- production.append(
- grammar.Grammar.Production.NamedSymbol(
- # (non)terminal_set will be filled in later once assigned
- name = element.get_text(self[i][0], 0)
- )
- )
- else:
- assert False
- elif isinstance(self[i], PYACC.Section2.Rules.RHS.Action):
- assert i == len(self) - 1
- assert isinstance(self[i][0], PYACC.BracedCode)
- pyacc.actions_braced_code.append(self[i][0])
- break
- else:
- pyacc.actions_braced_code.append(PYACC.BracedCode())
-
- character_set = pyacc.nonterminal_symbols[lhs_symbol].character_set
- character = len(pyacc.grammar)
- if len(character_set) and character_set[-1] == character:
- character_set[-1] = character + 1
- else:
- character_set.extend([character, character + 1])
- pyacc.grammar.append(production)
-
# GENERATE ELEMENT() BEGIN
def __init__(
self,
character_to_symbol,
name_to_symbol
):
- assert isinstance(self[0], PYACC.ID)
- lhs_name = element.get_text(self[0], 0)
- if lhs_name in name_to_symbol:
- i = name_to_symbol[lhs_name]
- assert i < 0
- lhs_symbol = ~i
- else:
- lhs_symbol = len(pyacc.nonterminal_symbols)
+ self[0].post_process(
+ pyacc,
+ section,
character_to_symbol,
- name_to_symbol[lhs_name] = ~lhs_symbol
- pyacc.nonterminal_symbols.append(
- PYACC.Symbol(name = lhs_name, character_set = [])
- )
+ name_to_symbol,
+ True, # is_nonterminal
+ -1, # precedence
+ -1, # associativity
+ None # tag
+ )
for i in self[1:]:
i.post_process(
pyacc,
- lhs_symbol,
+ ~self[0].symbol,
character_to_symbol,
name_to_symbol
)
return 'ast.PYACC.ValueReference({0:s})'.format(', '.join(params))
# GENERATE END
- # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(ref) actions_braced_code) BEGIN
+ # GENERATE ELEMENT(list(ref) prologue_text, int precedences, list(ref) terminal_symbols, list(ref) nonterminal_symbols, int start_nonterminal, list(ref) productions) BEGIN
def __init__(
self,
tag = 'PYACC',
precedences = -1,
terminal_symbols = [],
nonterminal_symbols = [],
- grammar = None,
- actions_braced_code = []
+ start_nonterminal = -1,
+ productions = []
):
element.Element.__init__(
self,
)
self.terminal_symbols = terminal_symbols
self.nonterminal_symbols = nonterminal_symbols
- self.grammar = grammar
- self.actions_braced_code = actions_braced_code
+ self.start_nonterminal = (
+ element.deserialize_int(start_nonterminal)
+ if isinstance(start_nonterminal, str) else
+ start_nonterminal
+ )
+ self.productions = productions
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set(
'nonterminal_symbols',
' '.join([element.serialize_ref(i, ref_list) for i in self.nonterminal_symbols])
)
- self.set('grammar', element.serialize_ref(self.grammar, ref_list))
+ self.set('start_nonterminal', element.serialize_int(self.start_nonterminal))
self.set(
- 'actions_braced_code',
- ' '.join([element.serialize_ref(i, ref_list) for i in self.actions_braced_code])
+ 'productions',
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.productions])
)
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
element.deserialize_ref(i, ref_list)
for i in self.get('nonterminal_symbols', '').split()
]
- self.grammar = element.deserialize_ref(self.get('grammar', '-1'), ref_list)
- self.actions_braced_code = [
+ self.start_nonterminal = element.deserialize_int(self.get('start_nonterminal', '-1'))
+ self.productions = [
element.deserialize_ref(i, ref_list)
- for i in self.get('actions_braced_code', '').split()
+ for i in self.get('productions', '').split()
]
def copy(self, factory = None):
result = element.Element.copy(
result.precedences = self.precedences
result.terminal_symbols = self.terminal_symbols
result.nonterminal_symbols = self.nonterminal_symbols
- result.grammar = self.grammar
- result.actions_braced_code = self.actions_braced_code
+ result.start_nonterminal = self.start_nonterminal
+ result.productions = self.productions
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
', '.join([repr(i) for i in self.nonterminal_symbols])
)
)
- if self.grammar != None:
+ if self.start_nonterminal != -1:
params.append(
- 'grammar = {0:s}'.format(repr(self.grammar))
+ 'start_nonterminal = {0:s}'.format(repr(self.start_nonterminal))
)
- if len(self.actions_braced_code):
+ if len(self.productions):
params.append(
- 'actions_braced_code = [{0:s}]'.format(
- ', '.join([repr(i) for i in self.actions_braced_code])
+ 'productions = [{0:s}]'.format(
+ ', '.join([repr(i) for i in self.productions])
)
)
def __repr__(self):
self.repr_serialize(params)
return 'ast.PYACC({0:s})'.format(', '.join(params))
# GENERATE END
+
def post_process(self):
# variables that will be serialized
self.prologue_text = []
PYACC.Symbol(name = '$undefined', character_set = [2, 3])
]
self.nonterminal_symbols = []
- self.grammar = grammar.Grammar(
- children = [
- grammar.Grammar.Production(
- children = [
- grammar.Grammar.Production.NamedSymbol()
- ],
- nonterminal = 0
- )
- ],
- eof_terminal = 0
- )
- self.actions_braced_code = []
+ self.start_nonterminal = -1
+ self.productions = []
# variables that won't be serialized
character_to_symbol = {} # indexed by int, always >= 0 (terminal)
# note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
# (don't bother storing the '$undefined', it can't be looked up)
- name_to_symbol = {'error': 0}
+ name_to_symbol = {'error': 1}
# perform the semantic analysis pass
for i in self:
i.token = token
token += 1
- # if start symbol not specified, use first nonterminal defined in file
- if len(self.grammar[0][0].name) == 0:
- self.grammar[0][0].name = self.nonterminal_symbols[0].name
-
- # look up rule names and substitute appropriate character_set for each
- self.grammar.n_terminals = len(self.terminal_symbols)
- self.grammar.post_process(
- dict(
- [
- (i.name, (i.character_set, []))
- for i in self.terminal_symbols
- if len(i.name) # fix this later
- ] +
- [
- (i.name, ([], i.character_set))
- for i in self.nonterminal_symbols
- ]
+ # fill in start nonterminal if not overridden by user
+ if self.start_nonterminal == -1:
+ assert len(self.nonterminal_symbols) != 0
+ self.start_nonterminal = 0
+
+ def to_lr1(self):
+ lr1_productions = [
+ (
+ # precedence
+ -1,
+ # symbols (list of terminal_set, nonterminal_set)
+ [([], self.nonterminal_symbols[self.start_nonterminal].character_set)],
+ # lookaheads (list of initial_set, can_be_empty)
+ [([], False), ([], True)],
+ # group_bounds
+ []
)
- )
-
-# GENERATE FACTORY(grammar.factory) BEGIN
+ ]
+ actions = []
+ for i in self.productions:
+ lr1_production, action = i.to_lr1_production(self)
+ lr1_productions.append(lr1_production)
+ actions.append(action)
+
+ # propagate lookaheads
+ modified = True
+ while modified:
+ modified = False
+ for _, symbols, lookaheads, _ in lr1_productions:
+ for i in range(len(symbols) - 1, -1, -1):
+ initial_set, nonterminal_set = symbols[i]
+ can_be_empty = False
+ for j in range(0, len(nonterminal_set), 2):
+ for k in range(nonterminal_set[j], nonterminal_set[j + 1]):
+ child_initial_set, child_can_be_empty = lr1_productions[k][2][0]
+ initial_set = bisect_set.bisect_set_or(initial_set, child_initial_set)
+ can_be_empty = can_be_empty or child_can_be_empty
+ # at this point can_be_empty refers to current symbol only
+ if can_be_empty:
+ next_initial_set, can_be_empty = lookaheads[i + 1]
+ initial_set = bisect_set.bisect_set_or(initial_set, next_initial_set)
+ # at this point can_be_empty refers to all remaining symbols
+ if (initial_set, can_be_empty) != lookaheads[i]:
+ lookaheads[i] = (initial_set, can_be_empty)
+ modified = True
+
+ return (lr1.LR1(lr1_productions, len(self.terminal_symbols), 0), actions)
+
+# GENERATE FACTORY(element.Element) BEGIN
tag_to_class = {
'Item': Item,
'PYACC': PYACC,
'PYACC_Char': PYACC.Char,
'PYACC_Escape': PYACC.Escape,
'PYACC_ID': PYACC.ID,
- 'PYACC_IDNone': PYACC.IDNone,
'PYACC_Int': PYACC.Int,
- 'PYACC_IntNone': PYACC.IntNone,
+ 'PYACC_Production': PYACC.Production,
+ 'PYACC_Production_Item': PYACC.Production.Item,
+ 'PYACC_Production_Action': PYACC.Production.Action,
+ 'PYACC_Production_DPrec': PYACC.Production.DPrec,
+ 'PYACC_Production_Empty': PYACC.Production.Empty,
+ 'PYACC_Production_Merge': PYACC.Production.Merge,
+ 'PYACC_Production_Prec': PYACC.Production.Prec,
+ 'PYACC_Production_Symbol': PYACC.Production.Symbol,
'PYACC_Section': PYACC.Section,
'PYACC_StackLocation': PYACC.StackLocation,
'PYACC_StackReference': PYACC.StackReference,
'PYACC_Section1_YACC': PYACC.Section1.YACC,
'PYACC_Section2': PYACC.Section2,
'PYACC_Section2_Rules': PYACC.Section2.Rules,
- 'PYACC_Section2_Rules_RHS': PYACC.Section2.Rules.RHS,
- 'PYACC_Section2_Rules_RHS_Action': PYACC.Section2.Rules.RHS.Action,
- 'PYACC_Section2_Rules_RHS_DPrec': PYACC.Section2.Rules.RHS.DPrec,
- 'PYACC_Section2_Rules_RHS_Empty': PYACC.Section2.Rules.RHS.Empty,
- 'PYACC_Section2_Rules_RHS_Merge': PYACC.Section2.Rules.RHS.Merge,
- 'PYACC_Section2_Rules_RHS_Prec': PYACC.Section2.Rules.RHS.Prec,
- 'PYACC_Section2_Rules_RHS_Symbol': PYACC.Section2.Rules.RHS.Symbol,
'PYACC_Section3': PYACC.Section3,
'PYACC_ValueLocation': PYACC.ValueLocation,
'PYACC_ValueReference': PYACC.ValueReference
}
def factory(tag, attrib = {}, *args, **kwargs):
- return tag_to_class.get(tag, grammar.factory)(tag, attrib, *args, **kwargs)
+ return tag_to_class.get(tag, element.Element)(tag, attrib, *args, **kwargs)
# GENERATE END