self.repr_serialize(params)
return 'ast.PYACC.BracedCode({0:s})'.format(', '.join(params))
# GENERATE END
+ def get_text(self, len_production = 0):
+ return ''.join(
+ [
+ (
+ '(yyvsp[{0:d}])'.format(i.index - len_production)
+ if isinstance(i, PYACC.StackReference) else
+ '(yyval)'
+ if isinstance(i, PYACC.ValueReference) else
+ chr(i.character)
+ if isinstance(i, PYACC.Escape) else
+ element.get_text(i, 0)
+ )
+ for i in self
+ ]
+ )
class BracedPredicate(element.Element):
# GENERATE ELEMENT() BEGIN
):
raise NotImplementedException
+ class StackLocation(element.Element):
+ # GENERATE ELEMENT(int index) BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_StackLocation',
+ attrib = {},
+ text = '',
+ children = [],
+ index = -1
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.index = (
+ element.deserialize_int(index)
+ if isinstance(index, str) else
+ index
+ )
+ def serialize(self, ref_list, indent = 0):
+ element.Element.serialize(self, ref_list, indent)
+ self.set('index', element.serialize_int(self.index))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.index = element.deserialize_int(self.get('index', '-1'))
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ StackLocation if factory is None else factory
+ )
+ result.index = self.index
+ return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.index != -1:
+ params.append(
+ 'index = {0:s}'.format(repr(self.index))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.StackLocation({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ class StackReference(element.Element):
+ # GENERATE ELEMENT(int index) BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_StackReference',
+ attrib = {},
+ text = '',
+ children = [],
+ index = -1
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.index = (
+ element.deserialize_int(index)
+ if isinstance(index, str) else
+ index
+ )
+ def serialize(self, ref_list, indent = 0):
+ element.Element.serialize(self, ref_list, indent)
+ self.set('index', element.serialize_int(self.index))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.index = element.deserialize_int(self.get('index', '-1'))
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ StackReference if factory is None else factory
+ )
+ result.index = self.index
+ return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.index != -1:
+ params.append(
+ 'index = {0:s}'.format(repr(self.index))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.StackReference({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class String(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
self.repr_serialize(params)
return 'ast.PYACC.String({0:s})'.format(', '.join(params))
# GENERATE END
+ def get_text(self):
+ return ''.join(
+ [
+ (
+ chr(i.character)
+ if isinstance(i, PYACC.Escape) else
+ element.get_text(i, 0)
+ )
+ for i in self
+ ]
+ )
class Tag(element.Element):
# GENERATE ELEMENT() BEGIN
if isinstance(self[i], PYACC.Section2.Rules.RHS.Symbol):
if isinstance(self[i][0], PYACC.Char):
character = ord(self[i][0].get_text())
+ assert character != 0 # would conflict with YYEOF
pyacc.characters_used.add(character)
expr = regex.RegexSequence(
children = [
):
pass
+ class ValueLocation(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_ValueLocation',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ ValueLocation if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.ValueLocation({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+ class ValueReference(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'PYACC_ValueReference',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ ValueReference if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.PYACC.ValueReference({0:s})'.format(', '.join(params))
+ # GENERATE END
+
+
# GENERATE ELEMENT(list(ref) prologue_text, set(int) characters_used, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(ref) actions_braced_code) BEGIN
def __init__(
self,
text,
children
)
- self.prologue_text = (
- [element.deserialize_str(i) for i in prologue_text.split()]
- if isinstance(prologue_text, str) else
- prologue_text
- )
+ self.prologue_text = prologue_text
self.characters_used = (
set([element.deserialize_int(i) for i in characters_used.split()])
if isinstance(characters_used, str) else
self.terminal_symbols = terminal_symbols
self.nonterminal_symbols = nonterminal_symbols
self.grammar = grammar
- self.actions_braced_code = (
- [element.deserialize_str(i) for i in actions_braced_code.split()]
- if isinstance(actions_braced_code, str) else
- actions_braced_code
- )
+ self.actions_braced_code = actions_braced_code
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set(
'prologue_text',
- ' '.join([element.serialize_str(i) for i in self.prologue_text])
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.prologue_text])
)
self.set(
'characters_used',
self.set('grammar', element.serialize_ref(self.grammar, ref_list))
self.set(
'actions_braced_code',
- ' '.join([element.serialize_str(i) for i in self.actions_braced_code])
+ ' '.join([element.serialize_ref(i, ref_list) for i in self.actions_braced_code])
)
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.prologue_text = [
- element.deserialize_str(i)
+ element.deserialize_ref(i, ref_list)
for i in self.get('prologue_text', '').split()
]
self.characters_used = set(
]
self.grammar = element.deserialize_ref(self.get('grammar', '-1'), ref_list)
self.actions_braced_code = [
- element.deserialize_str(i)
+ element.deserialize_ref(i, ref_list)
for i in self.get('actions_braced_code', '').split()
]
def copy(self, factory = None):
self.prologue_text = []
self.characters_used = set()
self.terminal_symbols = [
- PYACC.Symbol(name = '$end', character_set = [0x100, 0x101]),
- PYACC.Symbol(name = 'error', character_set = [0x101, 0x102])
+ PYACC.Symbol(name = 'error', character_set = [0x100, 0x101]),
+ PYACC.Symbol(name = '$undefined', character_set = [0x101, 0x102])
]
self.nonterminal_symbols = []
self.grammar = regex.Grammar(
regex.RegexCharacterRule()
]
)
- ]
+ ],
+ n_terminals = 0,
+ eof_character = 0
)
self.actions_braced_code = []
# variables that won't be serialized
# note: in name_to_symbol, >= 0 is terminal, < 0 is ~nonterminal
- # (we do not bother storing the '$end' entry, it can't be looked up)
- name_to_symbol = {'error': 1}
+ # (don't bother storing the '$undefined', it can't be looked up)
+ name_to_symbol = {'error': 0}
# perform the semantic analysis pass
for i in self:
'PYACC_Int': PYACC.Int,
'PYACC_IntNone': PYACC.IntNone,
'PYACC_Section': PYACC.Section,
+ 'PYACC_StackLocation': PYACC.StackLocation,
+ 'PYACC_StackReference': PYACC.StackReference,
'PYACC_String': PYACC.String,
'PYACC_Tag': PYACC.Tag,
'PYACC_TagNone': PYACC.TagNone,
'PYACC_Section2_Rules_RHS_Merge': PYACC.Section2.Rules.RHS.Merge,
'PYACC_Section2_Rules_RHS_Prec': PYACC.Section2.Rules.RHS.Prec,
'PYACC_Section2_Rules_RHS_Symbol': PYACC.Section2.Rules.RHS.Symbol,
- 'PYACC_Section3': PYACC.Section3
+ 'PYACC_Section3': PYACC.Section3,
+ 'PYACC_ValueLocation': PYACC.ValueLocation,
+ 'PYACC_ValueReference': PYACC.ValueReference
}
def factory(tag, attrib = {}, *args, **kwargs):
return tag_to_class.get(tag, regex.factory)(tag, attrib, *args, **kwargs)
# - change the low-bit indication of shift/reduce to positive/negative
# we do it here after removing redundant columns, as it's more efficient
assert numpy.all(action_table != 0)
+ action_table[action_table == 1] = len(lr1dfa.states) << 1
+ action_table[action_table == -1] = 0
mask = (action_table & 1).astype(numpy.bool)
action_table >>= 1
- action_table[action_table == 0] = len(lr1dfa.states)
- action_table[action_table == -1] = 0
action_table[mask] = -action_table[mask]
assert numpy.all(goto_table != 0)
goto_table[goto_table == -1] = 0
# the nonterminals (for each nonterminal, one character per production)
lr1dfa = pyacc.grammar.to_lr1().to_lalr1()
- # squash this down to the set of character literals that are referenced,
- # the set of terminals, then only one character per nonterminal (hence
- # nonterminals referenced by pyacc.nonterminal_symbols[] index, rather
+ # squash this down to the set of terminals, then the set of character
+ # literals that are referenced, then only one character per nonterminal
+ # (nonterminals referenced by pyacc.nonterminal_symbols[] index, rather
# than the internal way as only the set of lr1dfa.productions[] indices)
# generate translate table for character literals and terminal symbols
- n_terminals = 1 # room for "$undefined"
+ n_terminals = 1 # room for '$eof'
translate_terminals = numpy.zeros(
(lr1dfa.n_terminals,),
numpy.int16
)
- for i in sorted(pyacc.characters_used):
- translate_terminals[i] = n_terminals
- n_terminals += 1
+ translate_terminals[1:0x100] = 2 # '$undefined'
for i in pyacc.terminal_symbols:
for j in range(0, len(i.character_set), 2):
translate_terminals[
i.character_set[j + 1]
] = n_terminals
n_terminals += 1
+ for i in sorted(pyacc.characters_used):
+ translate_terminals[i] = n_terminals
+ n_terminals += 1
# generate translate table for nonterminal symbols
# this is effectively a map from productions back to nonterminal symbols
)
# yytname (textual terminal/nonterminal name) wraps 70 columns
- x = 72
+ x = 70
yytname_lines = []
for i in (
- ['"$undefined"'] +
+ ['"$end"'] +
+ ['"{0:s}"'.format(i.name) for i in pyacc.terminal_symbols] +
[
'"\'{0:s}\'"'.format(
chr(i)
if i >= 0x20 else
- '\\x{0:02x}'.format(i)
+ '\\\\x{0:02x}'.format(i)
)
for i in sorted(pyacc.characters_used)
] +
- ['"{0:s}"'.format(i.name) for i in pyacc.terminal_symbols] +
['"{0:s}"'.format(i.name) for i in pyacc.nonterminal_symbols] +
['YY_NULLPTR']
):
- if x >= 72:
+ if x + len(i) >= 70:
yytname_lines.append([])
x = 0
yytname_lines[-1].append(i)
/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned
by yylex, with out-of-bounds checking. */
-#define YYUNDEFTOK 0
+#define YYUNDEFTOK 2
#define YYMAXUTOK {6:d}
#define YYTRANSLATE(YYX) \\
/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
as returned by yylex, without out-of-bounds checking. */
-static const yytype_uint16 yytranslate[] =
+static const yytype_int16 yytranslate[] =
{{{7:s}
}};
#if YYDEBUG
/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
-static const yytype_uint16 yyrline[] =
+static const yytype_int16 yyrline[] =
{{{8:s}
}};
#endif
# ifdef YYPRINT
/* YYTOKNUM[NUM] -- (External) token number corresponding to the
(internal) symbol number NUM (which must be that of a token). */
-static const yytype_uint16 yytoknum[] =
+static const yytype_int16 yytoknum[] =
{{{10:s}
}};
# endif
/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
Performed when YYTABLE does not specify something else to do. Zero
means the default is an error. */
-static const yytype_uint16 yydefact[] =
+static const yytype_int16 yydefact[] =
{{{14:s}
}};
/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
positive, shift that token. If negative, reduce the rule whose
number is the opposite. If YYTABLE_NINF, syntax error. */
-static const yytype_uint16 yytable[] =
+static const yytype_int16 yytable[] =
{{{17:s}
}};
-static const yytype_uint16 yycheck[] =
+static const yytype_int16 yycheck[] =
{{{18:s}
}};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
symbol of state STATE-NUM. */
-static const yytype_uint16 yystos[] =
+static const yytype_int16 yystos[] =
{{{19:s}
}};
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_uint16 yyr1[] =
+static const yytype_int16 yyr1[] =
{{{20:s}
}};
/* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */
-static const yytype_uint16 yyr2[] =
+static const yytype_int16 yyr2[] =
{{{21:s}
}};
/* GENERATE TABLES END */
{1:s}
break;
'''.format(
- i,
- ''.join(
- [
- element.get_text(j, 0)
- for j in pyacc.actions_braced_code[i]
- ]
+ i + 1,
+ pyacc.actions_braced_code[i].get_text(
+ bison_lr1dfa.rule_data[i + 1, 1] # length of production
)
)
- for i in range(1, len(pyacc.actions_braced_code))
+ for i in range(len(pyacc.actions_braced_code))
if len(pyacc.actions_braced_code[i])
]
)
)
)
- # GENERATE ELEMENT(int n_terminals) BEGIN
+ # GENERATE ELEMENT(int n_terminals, int eof_character) BEGIN
def __init__(
self,
tag = 'Grammar',
attrib = {},
text = '',
children = [],
- n_terminals = -1
+ n_terminals = -1,
+ eof_character = -1
):
element.Element.__init__(
self,
if isinstance(n_terminals, str) else
n_terminals
)
+ self.eof_character = (
+ element.deserialize_int(eof_character)
+ if isinstance(eof_character, str) else
+ eof_character
+ )
def serialize(self, ref_list, indent = 0):
element.Element.serialize(self, ref_list, indent)
self.set('n_terminals', element.serialize_int(self.n_terminals))
+ self.set('eof_character', element.serialize_int(self.eof_character))
def deserialize(self, ref_list):
element.Element.deserialize(self, ref_list)
self.n_terminals = element.deserialize_int(self.get('n_terminals', '-1'))
+ self.eof_character = element.deserialize_int(self.get('eof_character', '-1'))
def copy(self, factory = None):
result = element.Element.copy(
self,
Grammar if factory is None else factory
)
result.n_terminals = self.n_terminals
+ result.eof_character = self.eof_character
return result
def repr_serialize(self, params):
element.Element.repr_serialize(self, params)
params.append(
'n_terminals = {0:s}'.format(repr(self.n_terminals))
)
+ if self.eof_character != -1:
+ params.append(
+ 'eof_character = {0:s}'.format(repr(self.eof_character))
+ )
def __repr__(self):
params = []
self.repr_serialize(params)
for i in range(len(self)):
self[i].post_process(i, rule_name_to_character_set)
def to_lr1(self):
- lr1 = LR1([], self.n_terminals)
+ lr1 = LR1([], self.n_terminals, self.eof_character)
for i in self:
i.add_to_lr1(lr1)
# propagate lookaheads
)
class LR1:
- def __init__(self, productions = [], n_terminals = n_characters):
+ def __init__(
+ self,
+ productions = [],
+ n_terminals = n_characters,
+ eof_character = n_characters
+ ):
# productions: list of production
# production: (
# priority,
# noting that markup has to be applied in reverse order of the list
# n_terminals: offset to apply to productions[] index to get symbol
# (character set code), also symbol for productions[0] = start production
+ # eof_character: usually == n_terminals (need not be valid terminal value)
self.productions = productions
self.n_terminals = n_terminals
+ self.eof_character = eof_character
def lookahead_item_set_closure(self, items, item_to_index):
in_queue = [True for i in range(len(items))]
return next_items, next_item_to_index, nonterminal0, nonterminal1
def parse_text(self, text, i):
- items = [(0, 0, [n_characters, n_characters + 1])] # EOF
+ items = [(0, 0, [self.eof_character, self.eof_character + 1])]
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
- lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
while True:
self.lookahead_item_set_closure(items, item_to_index)
value_stack.append(i)
)
)
i += 1
- lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
elif len(reductions) != 0:
if len(reductions) != 1:
sys.stderr.write(
if pos < 0:
pos, off = element.to_start_relative(root, pos, off)
- items = [(0, 0, [n_characters, n_characters + 1])] # EOF
+ items = [(0, 0, [self.eof_character, self.eof_character + 1])]
item_to_index = {(0, 0): 0}
value_stack = []
state_stack = []
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
break
text = element.get_text(root, pos)
else:
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
break
text = element.get_text(root, pos)
else:
(len(symbols), group_bounds)
for _, symbols, _, group_bounds in self.productions
],
- self.n_terminals
+ self.n_terminals,
+ self.eof_character
)
- items = [(0, 0, [n_characters, n_characters + 1])] # EOF
+ items = [(0, 0, [self.eof_character, self.eof_character + 1])]
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
(len(symbols), group_bounds)
for _, symbols, _, group_bounds in self.productions
],
- self.n_terminals
+ self.n_terminals,
+ self.eof_character
)
- items = [(0, 0, [n_characters, n_characters + 1])] # EOF
+ items = [(0, 0, [self.eof_character, self.eof_character + 1])]
item_to_index = {(0, 0): 0}
self.lookahead_item_set_closure(items, item_to_index)
return lr1dfa
def __repr__(self):
- return 'regex.LR1({0:s}, {1:d})'.format(
+ return 'regex.LR1({0:s}, {1:d}, {2:d})'.format(
repr(self.productions),
- self.n_terminals
+ self.n_terminals,
+ self.eof_character
)
class LR1DFA:
- def __init__(self, states = [], productions = [], n_terminals = n_characters):
+ def __init__(
+ self,
+ states = [],
+ productions = [],
+ n_terminals = n_characters,
+ eof_character = n_characters
+ ):
# states: list of state_desc
# state_desc: (terminal breaks, actions, nonterminal breaks, gotos)
# action: shift = new state * 2, reduce = production * 2 + 1, error = -1
# noting that markup has to be applied in reverse order of the list
# n_terminals: offset to apply to productions[] index to get symbol
# (character set code), also symbol for productions[0] = start production
+ # eof_character: usually == n_terminals (need not be valid terminal value)
self.states = states
self.productions = productions
self.n_terminals = n_terminals
+ self.eof_character = eof_character
def parse_text(self, text, i):
state = 0
value_stack = []
state_stack = []
- lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
while True:
value_stack.append(i)
state_stack.append(state)
if (action & 1) == 0:
state = action >> 1
i += 1
- lookahead_character = ord(text[i]) if i < len(text) else n_characters # EOF
+ lookahead_character = ord(text[i]) if i < len(text) else self.eof_character
else:
reduce = action >> 1
len_symbols, group_bounds = self.productions[reduce]
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
break
text = element.get_text(root, pos)
else:
try:
next(yychunk_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
break
text = element.get_text(root, pos)
else:
try:
end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
end_pos, end_off = element.to_end_relative(root, pos, off)
while True:
value_stack.append((pos, off))
try:
end_pos, end_off, lookahead_character = next(yylex_iter)
except StopIteration:
- lookahead_character = n_characters # EOF
+ lookahead_character = self.eof_character
#end_pos, end_off = element.to_end_relative(root, pos, off)
else:
reduce = action >> 1
assert state != -1
def __repr__(self):
- return 'regex.LR1DFA({0:s}, {1:s}, {2:d})'.format(
+ return 'regex.LR1DFA({0:s}, {1:s}, {2:d}, {3:d})'.format(
repr(self.states),
repr(self.productions),
- self.n_terminals
+ self.n_terminals,
+ self.eof_character
)
def wrap_repr(text, width):