#!/bin/sh
-if ./generate.py ast <ast.py >ast.py.new && ! diff -q ast.py ast.py.new
+if ./generate_ast.py ast <ast.py >ast.py.new && ! diff -q ast.py ast.py.new
then
mv ast.py.new ast.py
else
-import ast
-import element
-import numpy
-import sys
-
class BisonLR1DFA:
def __init__(
self,
- lr1dfa,
- n_terminals,
- translate_terminals,
- n_nonterminals,
- translate_nonterminals
+ translate,
+ rules,
+ accessing_symbols,
+ default_action,
+ default_goto,
+ entry_base,
+ entries,
+ action_pointer,
+ goto_pointer,
+ n_terminals
):
- #numpy.set_printoptions(threshold = numpy.nan)
- #print(translate_terminals)
- #print(translate_nonterminals)
-
- # translate is yytranslate
- self.translate = translate_terminals
-
- # rules[:, 0] is yyr1
- # rules[:, 1] is yyr2
- # note: production 0 (start production) can't be reduced, null it out
- self.rules = numpy.concatenate(
- [
- numpy.zeros((1, 2), numpy.int16),
- numpy.stack(
- [
- translate_nonterminals + n_terminals,
- numpy.array([i for i, _ in lr1dfa.productions[1:]], numpy.int16)
- ],
- 1
- )
- ],
- 0
- )
-
- # unpack tables into numpy arrays so we can manipulate them efficiently
- # note: the goto table is transposed with respect to the action table,
- # so the row in the table corresponds to the yypact[]/yypgoto[] index,
- # and the column in the table is what gets added to yypact[]/yypgoto[]
- action_table = numpy.zeros(
- (len(lr1dfa.states), lr1dfa.n_terminals),
- numpy.int16
- )
- goto_table = numpy.zeros(
- (len(lr1dfa.productions), len(lr1dfa.states)),
- numpy.int16
- )
- for i in range(len(lr1dfa.states)):
- terminal_breaks, actions, nonterminal_breaks, gotos = lr1dfa.states[i]
-
- terminal0 = 0
- for j in range(len(terminal_breaks)):
- terminal1 = terminal_breaks[j]
- action_table[i, terminal0:terminal1] = actions[j]
- terminal0 = terminal1
- assert terminal0 == lr1dfa.n_terminals
-
- nonterminal0 = 0
- for j in range(len(nonterminal_breaks)):
- nonterminal1 = nonterminal_breaks[j]
- goto_table[nonterminal0:nonterminal1, i] = gotos[j]
- nonterminal0 = nonterminal1
- assert nonterminal0 == len(lr1dfa.productions)
- assert numpy.all(action_table != 0)
- assert numpy.all(goto_table != 0)
- #print(action_table)
- #print(goto_table)
-
- # permute and combine columns/rows on the basis of the translate vectors
- new_action_table = numpy.zeros(
- (len(lr1dfa.states), n_terminals),
- numpy.int16
- )
- new_action_table[:, translate_terminals] = action_table
- action_table = new_action_table
- new_goto_table = numpy.zeros(
- (n_nonterminals, len(lr1dfa.states)),
- numpy.int16
- )
- new_goto_table[translate_nonterminals, :] = goto_table[1:] # ignore start
- goto_table = new_goto_table
-
- # manipulate the table entries as follows:
- # - ensure there is no shift or goto 0 (cannot return to starting state)
- # - replace reduce 0 (start production) with shift to a nonexistent state
- # - replace action or goto -1 (unrecognized terminal/nonterminal) with 0
- # - change the low-bit indication of shift/reduce to positive/negative
- # we do it here after removing redundant columns, as it's more efficient
- assert numpy.all(action_table != 0)
- action_table[action_table == 1] = len(lr1dfa.states) << 1
- action_table[action_table == -1] = 0
- mask = (action_table & 1).astype(numpy.bool)
- action_table >>= 1
- action_table[mask] = -action_table[mask]
- assert numpy.all(goto_table != 0)
- goto_table[goto_table == -1] = 0
-
- # find out which column the transition to each state occurs in, this
- # must be unique and is called the "accessing symbol" for the state
- accessing_table = numpy.any(
- numpy.concatenate(
- [action_table, goto_table.transpose()],
- 1
- )[:, :, numpy.newaxis] ==
- numpy.arange(
- 1,
- len(lr1dfa.states),
- dtype = numpy.int16
- )[numpy.newaxis, numpy.newaxis, :],
- 0
- )
- self.accessing_symbols = numpy.full(
- (len(lr1dfa.states),),
- 2, # '$undefined'
- numpy.int16
- )
- for i in range(1, len(lr1dfa.states)):
- accessing_symbol = numpy.nonzero(accessing_table[:, i - 1])[0]
- if accessing_symbol.shape[0] == 0:
- sys.stderr.write('warning: unreachable state {0:d}\n'.format(i))
- elif accessing_symbol.shape[0] == 1:
- self.accessing_symbols[i] = accessing_symbol[0]
- else:
- assert False
-
- # default_action is yydefact, default_goto is yydefgoto
- # find default reduce (most common negative value per action_table row)
- # and find default goto (most common non-zero value per goto_table row)
- # note: 0 is used as a last resort if there is no proper default value
- self.default_action = numpy.argmax(
- numpy.concatenate(
- [
- numpy.zeros((len(lr1dfa.states), 1), numpy.int16),
- numpy.sum(
- (
- action_table[:, :, numpy.newaxis] ==
- numpy.arange(
- -1,
- -len(lr1dfa.productions),
- -1,
- dtype = numpy.int16
- )[numpy.newaxis, numpy.newaxis, :]
- ).astype(numpy.int16),
- 1
- )
- ],
- 1
- ),
- 1
- )
- #print(action_table)
- #print(self.default_action)
- self.default_goto = numpy.argmax(
- numpy.concatenate(
- [
- numpy.zeros((n_nonterminals, 1), numpy.int16),
- numpy.sum(
- (
- goto_table[:, :, numpy.newaxis] ==
- numpy.arange(
- 1,
- len(lr1dfa.states),
- dtype = numpy.int16
- )[numpy.newaxis, numpy.newaxis, :]
- ).astype(numpy.int16),
- 1
- )
- ],
- 1
- ),
- 1
- )
- #print(goto_table)
- #print(self.default_goto)
-
- # fill in the zero entries in the tables with default reduce or goto
- for i in range(len(lr1dfa.states)):
- action_table[i, action_table[i, :] == 0] = -self.default_action[i]
- for i in range(n_nonterminals):
- goto_table[i, goto_table[i, :] == 0] = self.default_goto[i]
- #print(action_table)
- #print(goto_table)
-
- # entry_base indicates the most negative starting index we can expect
- # we maintain n_entries such that entries_used[n_entries:, :] == False
- # entries[i, 0] is yytable[i - entry_base]
- # entries[i, 1] is yycheck[i - entry_base]
- # entries_used[i, 0] == True if any vector has been stored starting at i
- # entries_used[0, 0] does not go True as it is -infinity and can be reused
- # entries_used[i, 1] == True if entry has been stored in entries[i, :]
- self.entry_base = max(n_terminals, len(lr1dfa.states))
- n_entries = self.entry_base
- entries = numpy.full((self.entry_base, 2), 0x8000, numpy.int16)
- entries_used = numpy.zeros((self.entry_base, 2), numpy.bool)
- entries_used[:, 1] = True # padding so we don't use any negative entries
-
- # action_pointer is yypact, goto_pointer is yypgoto
- def pack_matrix(data, mask):
- nonlocal n_entries, entries, entries_used # also uses self.entry_base
-
- start_indices = numpy.zeros((data.shape[0],), numpy.int16)
- for i in range(data.shape[0]):
- indices = numpy.nonzero(mask[i, :])[0]
- if indices.shape[0] == 0:
- start_index = 0
- else:
- size = indices[-1] + 1
-
- # ensure arrays are at least large enough to find a spot
- while entries.shape[0] < n_entries + size:
- # extend entries, copying only n_entries entries
- new_entries = numpy.full(
- (entries.shape[0] * 2, 2),
- 0x8000,
- numpy.int16
- )
- new_entries[:n_entries, :] = entries[:n_entries, :]
- entries = new_entries
-
- # extend entries_used, copying only n_entries entries
- new_entries_used = numpy.zeros(
- (entries_used.shape[0] * 2, 2),
- numpy.bool
- )
- new_entries_used[:n_entries, :] = entries_used[:n_entries, :]
- entries_used = new_entries_used
-
- # find a suitable spot and store differences from default
- start_index = self.entry_base - indices[0]
- while start_index < n_entries:
- if (
- not entries_used[start_index, 0] and
- not numpy.any(entries_used[indices + start_index, 1])
- ):
- break
- start_index += 1
- entries[indices + start_index, 0] = data[i, indices]
- entries[indices + start_index, 1] = indices
- entries_used[start_index, 0] = True
- entries_used[indices + start_index, 1] = True
- if n_entries < start_index + size:
- n_entries = start_index + size
-
- start_indices[i] = start_index
- return start_indices
- self.action_pointer = pack_matrix(
- action_table,
- action_table != (-self.default_action)[:, numpy.newaxis]
- ) - self.entry_base
- self.goto_pointer = pack_matrix(
- goto_table,
- goto_table != self.default_goto[:, numpy.newaxis]
- ) - self.entry_base
- self.entries = numpy.zeros(
- (n_entries - self.entry_base, 2),
- numpy.int16
- )
- self.entries[:, :] = entries[self.entry_base:n_entries, :]
-
- # n_states == self.action_pointer.shape[0]
- # n_productions == self.rules.shape[0]
- # n_nonterminals == self.goto_pointer.shape[0]
+ self.translate = translate
+ self.rules = rules
+ self.accessing_symbols = accessing_symbols
+ self.default_action = default_action
+ self.default_goto = default_goto
+ self.entry_base = entry_base
+ self.entries = entries
+ self.action_pointer = action_pointer
+ self.goto_pointer = goto_pointer
self.n_terminals = n_terminals
-escapes = {
- 0x07: '\\a',
- 0x08: '\\b',
- 0x09: '\\t',
- 0x0a: '\\n',
- 0x0b: '\\v',
- 0x0c: '\\f',
- 0x0d: '\\r',
- 0x22: '\\"',
- 0x5c: '\\\\'
-}
-def generate(pyacc, skel_file, out_file, defines_file = None):
- lr1dfa = pyacc.to_lr1().to_lalr1()
-
- # generate translate table for terminal symbols
- # this undoes yacc/bison's rather wasteful mapping of 0x00..0xff to literal
- # characters, and also accommodates any token value overrides given by the
- # user, yielding a consecutive set of terminal numbers that are really used
- n_terminals = 0
- translate_terminals = numpy.full(
- (lr1dfa.n_terminals,),
- 2, # '$undefined'
- numpy.int16
- )
- for i in pyacc.symbols:
- if i._type == ast.PYACC.Symbol.TYPE_TERMINAL:
- for j in range(0, len(i.character_set), 2):
- translate_terminals[
- i.character_set[j]:i.character_set[j + 1]
- ] = n_terminals
- n_terminals += 1
-
- # generate translate table for nonterminal symbols
- # this is effectively a map from productions back to nonterminal symbols
- # we do not generate an entry for the first production (start production)
- # we generate extra fake entries after end of the nonterminals for fake
- # productions due to midrule actions (which leave gaps in the numbering)
- n_nonterminals = 0
- translate_nonterminals = numpy.full(
- (len(lr1dfa.productions) - 1,),
- -1,
- numpy.int16
- )
- for i in pyacc.symbols:
- if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL:
- for j in range(0, len(i.character_set), 2):
- translate_nonterminals[
- i.character_set[j] - 1:i.character_set[j + 1] - 1
- ] = n_nonterminals
- n_nonterminals += 1
- midrule_actions = [translate_nonterminals == -1]
- n_midrule_actions = numpy.sum(midrule_actions)
- translate_nonterminals[midrule_actions] = numpy.arange(
- n_nonterminals,
- n_nonterminals + n_midrule_actions,
- dtype = numpy.int16
- )
-
- # translate and compress the tables
- bison_lr1dfa = BisonLR1DFA(
- lr1dfa,
- n_terminals,
- translate_terminals,
- n_nonterminals + n_midrule_actions,
- translate_nonterminals
- )
-
- def generate(
- skel_file,
- out_file,
- file_prefix,
- type_prefix,
- name_prefix,
- is_header
- ):
- with open(skel_file, 'r') as fin:
- with open(out_file, 'w+') as fout:
- line = fin.readline()
- while len(line):
- if line == '/* GENERATE YYPURE */\n':
- fout.write(
- '''/* GENERATE YYPURE BEGIN */
-#define YYPURE {0:d}
-/* GENERATE YYPURE END */
-'''.format(
- pyacc[0].api_pure
- ).replace('YY', type_prefix if is_header else 'YY').replace('yy', name_prefix if is_header else 'yy') # hack
- )
- elif line == '/* GENERATE TYPEPREFIX */\n':
- fout.write(
- '''/* GENERATE TYPEPREFIX BEGIN */
-{0:s}/* GENERATE TYPEPREFIX END */
-'''.format(
- '''/* Substitute the type names. */
-{0:s}'''.format(
- ''.join(
- [
- '#define YY{0:s} {1:s}{2:s}\n'.format(
- i,
- pyacc[0].type_prefix,
- i
- )
- for i in (
- ['STYPE'] +
- (['LTYPE'] if pyacc[0].locations else [])
- )
- ]
- )
- )
- if pyacc[0].type_prefix != 'YY' else
- ''
- )
- )
- elif line == '/* GENERATE NAMEPREFIX */\n':
- fout.write(
- '''/* GENERATE NAMEPREFIX BEGIN */
-{0:s}/* GENERATE NAMEPREFIX END */
-'''.format(
- '''/* Substitute the variable and function names. */
-{0:s}'''.format(
- ''.join(
- [
- '#define yy{0:s} {1:s}{2:s}\n'.format(
- i,
- pyacc[0].name_prefix,
- i
- )
- for i in (
- # note: nerrs is actually pure but do what bison does
- ['parse', 'lex', 'error', 'debug', 'nerrs'] +
- (
- []
- if pyacc[0].api_pure else
- ['lval', 'char'] +
- (['lloc'] if pyacc[0].locations else [])
- )
- )
- ]
- )
- )
- if pyacc[0].name_prefix != 'yy' else
- ''
- )
- )
- elif line == '/* GENERATE SECTION1TOP */\n':
- fout.write(
- '''/* GENERATE SECTION1TOP BEGIN */
-{0:s}/* GENERATE SECTION1TOP END */
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.top_text
- ]
- )
- )
- )
- elif line == '/* GENERATE SECTION1BEFOREUNION */\n':
- fout.write(
- '''/* GENERATE SECTION1BEFOREUNION BEGIN */
-{0:s}/* GENERATE SECTION1BEFOREUNION END */
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.before_union_text
- ]
- )
- )
- )
- elif line == '/* GENERATE YYERROR_VERBOSE */\n':
- fout.write(
- '''/* GENERATE YYERROR_VERBOSE BEGIN */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE {0:d}
-#endif
-/* GENERATE YYERROR_VERBOSE END */
-'''.format(
- int(pyacc[0].error_verbose)
- )
- )
- elif line == '/* GENERATE SECTION1REQUIRES */\n':
- fout.write(
- '''/* GENERATE SECTION1REQUIRES BEGIN */
-{0:s}/* GENERATE SECTION1REQUIRES END */
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.requires_text
- ]
- )
- )
- )
- elif line == '/* GENERATE YYDEBUG */\n':
- fout.write(
- '''/* GENERATE YYDEBUG BEGIN */
-#ifndef YYDEBUG
-# define YYDEBUG {0:d}
-#endif
-#if YYDEBUG
-extern int yydebug;
-#endif
-/* GENERATE YYDEBUG END */
-'''.format(
- int(pyacc[0].debug)
- ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
- )
- elif line == '/* GENERATE TOKENSEQUAL */\n':
- fout.write(
- '''/* GENERATE TOKENSEQUAL BEGIN */{0:s}
-/* GENERATE TOKENSEQUAL END */
-'''.format(
- ','.join(
- [
- '\n {0:s} = {1:d}'.format(i.name, i.character_set[0])
- for i in pyacc.symbols[3:]
- if (
- i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
- len(i.name)
- )
- ]
- )
- )
- )
- elif line == '/* GENERATE TOKENS */\n':
- fout.write(
- '''/* GENERATE TOKENS BEGIN */
-{0:s}/* GENERATE TOKENS END */
-'''.format(
- ''.join(
- [
- '#define {0:s} {1:d}\n'.format(i.name, i.character_set[0])
- for i in pyacc.symbols[3:]
- if (
- i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
- len(i.name)
- )
- ]
- )
- )
- )
- elif line == '/* GENERATE YYSTYPE */\n':
- fout.write(
- '''/* GENERATE YYSTYPE BEGIN */
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-{0:s}# define YYSTYPE_IS_TRIVIAL 1
-# define YYSTYPE_IS_DECLARED 1
-#endif
-/* GENERATE YYSTYPE END */
-'''.format(
- '''union YYSTYPE
-{{
-{0:s}}};
-typedef union YYSTYPE YYSTYPE;
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.union_text
- ]
- )
- )
- if len(pyacc.union_text) else
- '''typedef int YYSTYPE;
-'''
- ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
- )
- elif line == '/* GENERATE YYLTYPE */\n':
- fout.write(
- '''/* GENERATE YYLTYPE BEGIN */
-{0:s}/* GENERATE YYLTYPE END */
-'''.format(
- '''#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
-typedef struct YYLTYPE YYLTYPE;
-struct YYLTYPE
-{
- int first_line;
- int first_column;
- int last_line;
- int last_column;
-};
-# define YYLTYPE_IS_DECLARED 1
-# define YYLTYPE_IS_TRIVIAL 1
-#endif
-'''
- if pyacc[0].locations else
- ''
- ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
- )
- elif line == '/* GENERATE SECTION1AFTERUNION */\n':
- fout.write(
- '''/* GENERATE SECTION1AFTERUNION BEGIN */
-{0:s}/* GENERATE SECTION1AFTERUNION END */
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.after_union_text
- ]
- )
- )
- )
- elif line == '/* GENERATE TABLES */\n':
- # yyrline (source line where rule is defined) not implemented yet
- yyrline = numpy.zeros(
- (bison_lr1dfa.rules.shape[0],),
- numpy.int16
- )
-
- # yytname (textual terminal/nonterminal name) wraps 70 columns
- x = 70
- yytname_lines = []
- for i in (
- [
- (
- '"{0:s}"'.format(i.name)
- if len(i.name) else
- '"\'{0:s}\'"'.format(
- escapes[i.character_set[0]]
- if i.character_set[0] in escapes else
- chr(i.character_set[0])
- if i.character_set[0] >= 0x20 else
- '\\\\x{0:02x}'.format(i.character_set[0])
- )
- )
- for i in pyacc.symbols
- if i._type == ast.PYACC.Symbol.TYPE_TERMINAL
- ] +
- [
- '"{0:s}"'.format(i.name)
- for i in pyacc.symbols
- if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL
- ] +
- ['"$@{0:d}"'.format(i) for i in range(n_midrule_actions)] +
- ['YY_NULLPTR']
- ):
- if x + len(i) >= 70:
- yytname_lines.append([])
- x = 0
- yytname_lines[-1].append(i)
- x += len(i) + 2
-
- # yytoknum is the approximate reverse of bison_lr1dfa.translate,
- # do in reverse order so later duplicate entries get overwritten
- yytoknum = numpy.zeros((bison_lr1dfa.n_terminals,), numpy.int16)
- yytoknum[bison_lr1dfa.translate[::-1]] = numpy.arange(
- bison_lr1dfa.translate.shape[0] - 1,
- -1,
- -1,
- numpy.int16
- )
-
- fout.write(
- '''/* GENERATE TABLES BEGIN */
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL {0:d}
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST {1:d}
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS {2:d}
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS {3:d}
-/* YYNRULES -- Number of rules. */
-#define YYNRULES {4:d}
-/* YYNSTATES -- Number of states. */
-#define YYNSTATES {5:d}
-
-/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned
- by yylex, with out-of-bounds checking. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK {6:d}
-
-#define YYTRANSLATE(YYX) \\
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
- as returned by yylex, without out-of-bounds checking. */
-static const yytype_int16 yytranslate[] =
-{{{7:s}
-}};
-
-#if YYDEBUG
- /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
-static const yytype_int16 yyrline[] =
-{{{8:s}
-}};
-#endif
-
-#if YYDEBUG || YYERROR_VERBOSE || {9:d}
-/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{{{10:s}
-}};
-#endif
-
-# ifdef YYPRINT
-/* YYTOKNUM[NUM] -- (External) token number corresponding to the
- (internal) symbol number NUM (which must be that of a token). */
-static const yytype_int16 yytoknum[] =
-{{{11:s}
-}};
-# endif
-
-#define YYPACT_NINF {12:d}
-
-#define yypact_value_is_default(Yystate) \\
- (!!((Yystate) == ({13:d})))
-
-#define YYTABLE_NINF -1
-
-#define yytable_value_is_error(Yytable_value) \\
- 0
-
- /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-static const yytype_int16 yypact[] =
-{{{14:s}
-}};
-
- /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
- Performed when YYTABLE does not specify something else to do. Zero
- means the default is an error. */
-static const yytype_int16 yydefact[] =
-{{{15:s}
-}};
-
- /* YYPGOTO[NTERM-NUM]. */
-static const yytype_int16 yypgoto[] =
-{{{16:s}
-}};
-
- /* YYDEFGOTO[NTERM-NUM]. */
-static const yytype_int16 yydefgoto[] =
-{{{17:s}
-}};
-
- /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule whose
- number is the opposite. If YYTABLE_NINF, syntax error. */
-static const yytype_int16 yytable[] =
-{{{18:s}
-}};
-
-static const yytype_int16 yycheck[] =
-{{{19:s}
-}};
-
- /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const yytype_int16 yystos[] =
-{{{20:s}
-}};
-
- /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_int16 yyr1[] =
-{{{21:s}
-}};
-
- /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */
-static const yytype_int16 yyr2[] =
-{{{22:s}
-}};
-/* GENERATE TABLES END */
-'''.format(
- # YYFINAL
- bison_lr1dfa.action_pointer.shape[0],
- # YYLAST
- bison_lr1dfa.entries.shape[0] - 1,
- # YYNTOKENS
- bison_lr1dfa.n_terminals,
- # YYNNTS
- bison_lr1dfa.goto_pointer.shape[0],
- # YYNRULES
- bison_lr1dfa.rules.shape[0],
- # YYNSTATES
- bison_lr1dfa.action_pointer.shape[0],
- # YYMAXUTOK
- bison_lr1dfa.translate.shape[0] - 1,
- # yytranslate
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.translate[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.translate.shape[0], 10)
- ]
- ),
- # yyrline
- ','.join(
- [
- '\n{0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in yyrline[i:i + 10]
- ]
- )
- )
- for i in range(0, yyrline.shape[0], 10)
- ]
- ),
- # YYERROR_VERBOSE (strangely the defined value is repeated)
- int(pyacc[0].error_verbose),
- # yytname
- ','.join(
- ['\n {0:s}'.format(', '.join(i)) for i in yytname_lines]
- ),
- # yytoknum
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in yytoknum[i:i + 10]
- ]
- )
- )
- for i in range(0, yytoknum.shape[0], 10)
- ]
- ),
- # YYPACT_NINF
- -bison_lr1dfa.entry_base,
- # yypact_value_is_default
- -bison_lr1dfa.entry_base,
- # yypact
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.action_pointer[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.action_pointer.shape[0], 10)
- ]
- ),
- # yydefact
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.default_action[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.default_action.shape[0], 10)
- ]
- ),
- # yypgoto
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.goto_pointer[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.goto_pointer.shape[0], 10)
- ]
- ),
- # yydefgoto
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.default_goto[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.default_goto.shape[0], 10)
- ]
- ),
- # yytable
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.entries[i:i + 10, 0]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.entries.shape[0], 10)
- ]
- ),
- # yycheck
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.entries[i:i + 10, 1]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.entries.shape[0], 10)
- ]
- ),
- # yystos
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.accessing_symbols[i:i + 10]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.accessing_symbols.shape[0], 10)
- ]
- ),
- # yyr1
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.rules[i:i + 10, 0]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.rules.shape[0], 10)
- ]
- ),
- # yyr2
- ','.join(
- [
- '\n {0:s}'.format(
- ','.join(
- [
- '{0:6d}'.format(j)
- for j in bison_lr1dfa.rules[i:i + 10, 1]
- ]
- )
- )
- for i in range(0, bison_lr1dfa.rules.shape[0], 10)
- ]
- )
- ).replace('YYDEBUG', type_prefix + 'DEBUG') # hack
- )
- elif line == '/* GENERATE INITIALACTION */\n':
- fout.write(
- '''/* GENERATE INITIALACTION BEGIN */
-{0:s}/* GENERATE INITIALACTION END */
-'''.format(
- ''.join(
- [
- '{0:s}\n'.format(i.get_text())
- for i in pyacc.initial_action_text
- ]
- ).replace('(yyval)', '(yylval').replace('(yyloc)', '(yylloc)') # hack
- )
- )
- elif line == '/* GENERATE SECTION2 */\n':
- fout.write(
- '''/* GENERATE SECTION2 BEGIN */
-{0:s}/* GENERATE SECTION2 END */
-'''.format(
- '\n'.join(
- [
- ''' case {0:d}:
- {1:s}
- break;
-'''.format(
- i,
- lr1dfa.productions[i][1].get_text()
- )
- for i in range(len(lr1dfa.productions))
- if lr1dfa.productions[i][1] is not None
- ]
- )
- )
- )
- elif line == '/* GENERATE SECTION3 */\n':
- fout.write(
- '''/* GENERATE SECTION3 BEGIN */
-{0:s}/*GENERATE SECTION3 END */
-'''.format(
- '' if len(pyacc) < 3 else pyacc[2].get_text()
- )
- )
- else:
- if file_prefix != 'YY_YY_Y_':
- line = line.replace('YY_YY_Y_', file_prefix)
- if is_header:
- if type_prefix != 'YY':
- line = line.replace('YY_YY_Y_', 'YY').replace('YY', type_prefix)
- if name_prefix != 'yy':
- line = line.replace('yy', name_prefix)
- elif type_prefix != 'YY':
- line = line.replace('YYDEBUG', type_prefix + 'DEBUG')
- line = line.replace('YYSTYPE_IS_', type_prefix + 'STYPE_IS_')
- line = line.replace('YYLTYPE_IS_', type_prefix + 'LTYPE_IS_')
- fout.write(line)
- line = fin.readline()
- generate(
- skel_file,
- out_file,
- pyacc[0].type_prefix,
- pyacc[0].type_prefix,
- pyacc[0].name_prefix,
- False
- )
- if defines_file is not None:
- generate(
- '{0:s}.h'.format(
- skel_file[:-2] if skel_file[-2:] == '.c' else skel_file
- ),
- defines_file,
- pyacc[0].type_prefix,
- pyacc[0].type_prefix,
- pyacc[0].name_prefix,
- True
- )
+ # add some test routines here for parsing text, etc, similar to LR1DFA()
+ # add repr() routine, to use with wrap_repr() for basic serialization
import ast
import element
-import bison_lr1dfa
+import generate_bison
import getopt
import os
import sys
#element.serialize(pyacc, 'a.xml', 'utf-8')
#pyacc = element.deserialize('a.xml', ast.factory, 'utf-8')
pyacc.post_process()
-element.serialize(pyacc, 'b.xml', 'utf-8')
-pyacc = element.deserialize('b.xml', ast.factory, 'utf-8')
-bison_lr1dfa.generate(pyacc, skel_file, out_file, defines_file)
+#element.serialize(pyacc, 'b.xml', 'utf-8')
+#pyacc = element.deserialize('b.xml', ast.factory, 'utf-8')
+generate_bison.generate_bison(pyacc, skel_file, out_file, defines_file)
--- /dev/null
+import ast
+import numpy
+
+escapes = {
+ 0x07: '\\a',
+ 0x08: '\\b',
+ 0x09: '\\t',
+ 0x0a: '\\n',
+ 0x0b: '\\v',
+ 0x0c: '\\f',
+ 0x0d: '\\r',
+ 0x22: '\\"',
+ 0x5c: '\\\\'
+}
+
+def generate_bison(pyacc, skel_file, out_file, defines_file = None):
+ _lr1dfa = pyacc.to_lr1().to_lalr1()
+
+ # generate translate table for terminal symbols
+ # this undoes yacc/bison's rather wasteful mapping of 0x00..0xff to literal
+ # characters, and also accommodates any token value overrides given by the
+ # user, yielding a consecutive set of terminal numbers that are really used
+ n_terminals = 0
+ translate_terminals = numpy.full(
+ (_lr1dfa.n_terminals,),
+ 2, # '$undefined'
+ numpy.int16
+ )
+ for i in pyacc.symbols:
+ if i._type == ast.PYACC.Symbol.TYPE_TERMINAL:
+ for j in range(0, len(i.character_set), 2):
+ translate_terminals[
+ i.character_set[j]:i.character_set[j + 1]
+ ] = n_terminals
+ n_terminals += 1
+
+ # generate translate table for nonterminal symbols
+ # this is effectively a map from productions back to nonterminal symbols
+ # we do not generate an entry for the first production (start production)
+ # we generate extra fake entries after end of the nonterminals for fake
+ # productions due to midrule actions (which leave gaps in the numbering)
+ n_nonterminals = 0
+ translate_nonterminals = numpy.full(
+ (len(_lr1dfa.productions) - 1,),
+ -1,
+ numpy.int16
+ )
+ for i in pyacc.symbols:
+ if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL:
+ for j in range(0, len(i.character_set), 2):
+ translate_nonterminals[
+ i.character_set[j] - 1:i.character_set[j + 1] - 1
+ ] = n_nonterminals
+ n_nonterminals += 1
+ midrule_actions = [translate_nonterminals == -1]
+ n_midrule_actions = numpy.sum(midrule_actions)
+ translate_nonterminals[midrule_actions] = numpy.arange(
+ n_nonterminals,
+ n_nonterminals + n_midrule_actions,
+ dtype = numpy.int16
+ )
+
+ # translate and compress the tables
+ _bison_lr1dfa = _lr1dfa.to_bison_lr1dfa(
+ n_terminals,
+ translate_terminals,
+ n_nonterminals + n_midrule_actions,
+ translate_nonterminals
+ )
+
+ def generate(
+ skel_file,
+ out_file,
+ file_prefix,
+ type_prefix,
+ name_prefix,
+ is_header
+ ):
+ with open(skel_file, 'r') as fin:
+ with open(out_file, 'w+') as fout:
+ line = fin.readline()
+ while len(line):
+ if line == '/* GENERATE YYPURE */\n':
+ fout.write(
+ '''/* GENERATE YYPURE BEGIN */
+#define YYPURE {0:d}
+/* GENERATE YYPURE END */
+'''.format(
+ pyacc[0].api_pure
+ ).replace('YY', type_prefix if is_header else 'YY').replace('yy', name_prefix if is_header else 'yy') # hack
+ )
+ elif line == '/* GENERATE TYPEPREFIX */\n':
+ fout.write(
+ '''/* GENERATE TYPEPREFIX BEGIN */
+{0:s}/* GENERATE TYPEPREFIX END */
+'''.format(
+ '''/* Substitute the type names. */
+{0:s}'''.format(
+ ''.join(
+ [
+ '#define YY{0:s} {1:s}{2:s}\n'.format(
+ i,
+ pyacc[0].type_prefix,
+ i
+ )
+ for i in (
+ ['STYPE'] +
+ (['LTYPE'] if pyacc[0].locations else [])
+ )
+ ]
+ )
+ )
+ if pyacc[0].type_prefix != 'YY' else
+ ''
+ )
+ )
+ elif line == '/* GENERATE NAMEPREFIX */\n':
+ fout.write(
+ '''/* GENERATE NAMEPREFIX BEGIN */
+{0:s}/* GENERATE NAMEPREFIX END */
+'''.format(
+ '''/* Substitute the variable and function names. */
+{0:s}'''.format(
+ ''.join(
+ [
+ '#define yy{0:s} {1:s}{2:s}\n'.format(
+ i,
+ pyacc[0].name_prefix,
+ i
+ )
+ for i in (
+ # note: nerrs is actually pure but do what bison does
+ ['parse', 'lex', 'error', 'debug', 'nerrs'] +
+ (
+ []
+ if pyacc[0].api_pure else
+ ['lval', 'char'] +
+ (['lloc'] if pyacc[0].locations else [])
+ )
+ )
+ ]
+ )
+ )
+ if pyacc[0].name_prefix != 'yy' else
+ ''
+ )
+ )
+ elif line == '/* GENERATE SECTION1TOP */\n':
+ fout.write(
+ '''/* GENERATE SECTION1TOP BEGIN */
+{0:s}/* GENERATE SECTION1TOP END */
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.top_text
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE SECTION1BEFOREUNION */\n':
+ fout.write(
+ '''/* GENERATE SECTION1BEFOREUNION BEGIN */
+{0:s}/* GENERATE SECTION1BEFOREUNION END */
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.before_union_text
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE YYERROR_VERBOSE */\n':
+ fout.write(
+ '''/* GENERATE YYERROR_VERBOSE BEGIN */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE {0:d}
+#endif
+/* GENERATE YYERROR_VERBOSE END */
+'''.format(
+ int(pyacc[0].error_verbose)
+ )
+ )
+ elif line == '/* GENERATE SECTION1REQUIRES */\n':
+ fout.write(
+ '''/* GENERATE SECTION1REQUIRES BEGIN */
+{0:s}/* GENERATE SECTION1REQUIRES END */
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.requires_text
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE YYDEBUG */\n':
+ fout.write(
+ '''/* GENERATE YYDEBUG BEGIN */
+#ifndef YYDEBUG
+# define YYDEBUG {0:d}
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
+/* GENERATE YYDEBUG END */
+'''.format(
+ int(pyacc[0].debug)
+ ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
+ )
+ elif line == '/* GENERATE TOKENSEQUAL */\n':
+ fout.write(
+ '''/* GENERATE TOKENSEQUAL BEGIN */{0:s}
+/* GENERATE TOKENSEQUAL END */
+'''.format(
+ ','.join(
+ [
+ '\n {0:s} = {1:d}'.format(i.name, i.character_set[0])
+ for i in pyacc.symbols[3:]
+ if (
+ i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
+ len(i.name)
+ )
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE TOKENS */\n':
+ fout.write(
+ '''/* GENERATE TOKENS BEGIN */
+{0:s}/* GENERATE TOKENS END */
+'''.format(
+ ''.join(
+ [
+ '#define {0:s} {1:d}\n'.format(i.name, i.character_set[0])
+ for i in pyacc.symbols[3:]
+ if (
+ i._type == ast.PYACC.Symbol.TYPE_TERMINAL and
+ len(i.name)
+ )
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE YYSTYPE */\n':
+ fout.write(
+ '''/* GENERATE YYSTYPE BEGIN */
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+{0:s}# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+#endif
+/* GENERATE YYSTYPE END */
+'''.format(
+ '''union YYSTYPE
+{{
+{0:s}}};
+typedef union YYSTYPE YYSTYPE;
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.union_text
+ ]
+ )
+ )
+ if len(pyacc.union_text) else
+ '''typedef int YYSTYPE;
+'''
+ ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
+ )
+ elif line == '/* GENERATE YYLTYPE */\n':
+ fout.write(
+ '''/* GENERATE YYLTYPE BEGIN */
+{0:s}/* GENERATE YYLTYPE END */
+'''.format(
+ '''#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
+typedef struct YYLTYPE YYLTYPE;
+struct YYLTYPE
+{
+ int first_line;
+ int first_column;
+ int last_line;
+ int last_column;
+};
+# define YYLTYPE_IS_DECLARED 1
+# define YYLTYPE_IS_TRIVIAL 1
+#endif
+'''
+ if pyacc[0].locations else
+ ''
+ ).replace('YY', type_prefix).replace('yy', name_prefix) # hack
+ )
+ elif line == '/* GENERATE SECTION1AFTERUNION */\n':
+ fout.write(
+ '''/* GENERATE SECTION1AFTERUNION BEGIN */
+{0:s}/* GENERATE SECTION1AFTERUNION END */
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.after_union_text
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE TABLES */\n':
+ # yyrline (source line where rule is defined) not implemented yet
+ yyrline = numpy.zeros(
+ (_bison_lr1dfa.rules.shape[0],),
+ numpy.int16
+ )
+
+ # yytname (textual terminal/nonterminal name) wraps 70 columns
+ x = 70
+ yytname_lines = []
+ for i in (
+ [
+ (
+ '"{0:s}"'.format(i.name)
+ if len(i.name) else
+ '"\'{0:s}\'"'.format(
+ escapes[i.character_set[0]]
+ if i.character_set[0] in escapes else
+ chr(i.character_set[0])
+ if i.character_set[0] >= 0x20 else
+ '\\\\x{0:02x}'.format(i.character_set[0])
+ )
+ )
+ for i in pyacc.symbols
+ if i._type == ast.PYACC.Symbol.TYPE_TERMINAL
+ ] +
+ [
+ '"{0:s}"'.format(i.name)
+ for i in pyacc.symbols
+ if i._type == ast.PYACC.Symbol.TYPE_NONTERMINAL
+ ] +
+ ['"$@{0:d}"'.format(i) for i in range(n_midrule_actions)] +
+ ['YY_NULLPTR']
+ ):
+ if x + len(i) >= 70:
+ yytname_lines.append([])
+ x = 0
+ yytname_lines[-1].append(i)
+ x += len(i) + 2
+
+ # yytoknum is the approximate reverse of _bison_lr1dfa.translate,
+ # do in reverse order so later duplicate entries get overwritten
+ yytoknum = numpy.zeros((_bison_lr1dfa.n_terminals,), numpy.int16)
+ yytoknum[_bison_lr1dfa.translate[::-1]] = numpy.arange(
+ _bison_lr1dfa.translate.shape[0] - 1,
+ -1,
+ -1,
+ numpy.int16
+ )
+
+ fout.write(
+ '''/* GENERATE TABLES BEGIN */
+/* YYFINAL -- State number of the termination state. */
+#define YYFINAL {0:d}
+/* YYLAST -- Last index in YYTABLE. */
+#define YYLAST {1:d}
+
+/* YYNTOKENS -- Number of terminals. */
+#define YYNTOKENS {2:d}
+/* YYNNTS -- Number of nonterminals. */
+#define YYNNTS {3:d}
+/* YYNRULES -- Number of rules. */
+#define YYNRULES {4:d}
+/* YYNSTATES -- Number of states. */
+#define YYNSTATES {5:d}
+
+/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned
+ by yylex, with out-of-bounds checking. */
+#define YYUNDEFTOK 2
+#define YYMAXUTOK {6:d}
+
+#define YYTRANSLATE(YYX) \\
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
+ as returned by yylex, without out-of-bounds checking. */
+static const yytype_int16 yytranslate[] =
+{{{7:s}
+}};
+
+#if YYDEBUG
+ /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
+static const yytype_int16 yyrline[] =
+{{{8:s}
+}};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || {9:d}
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
+static const char *const yytname[] =
+{{{10:s}
+}};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[NUM] -- (External) token number corresponding to the
+ (internal) symbol number NUM (which must be that of a token). */
+static const yytype_int16 yytoknum[] =
+{{{11:s}
+}};
+# endif
+
+#define YYPACT_NINF {12:d}
+
+#define yypact_value_is_default(Yystate) \\
+ (!!((Yystate) == ({13:d})))
+
+#define YYTABLE_NINF -1
+
+#define yytable_value_is_error(Yytable_value) \\
+ 0
+
+ /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+static const yytype_int16 yypact[] =
+{{{14:s}
+}};
+
+ /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
+ Performed when YYTABLE does not specify something else to do. Zero
+ means the default is an error. */
+static const yytype_int16 yydefact[] =
+{{{15:s}
+}};
+
+ /* YYPGOTO[NTERM-NUM]. */
+static const yytype_int16 yypgoto[] =
+{{{16:s}
+}};
+
+ /* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_int16 yydefgoto[] =
+{{{17:s}
+}};
+
+ /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule whose
+ number is the opposite. If YYTABLE_NINF, syntax error. */
+static const yytype_int16 yytable[] =
+{{{18:s}
+}};
+
+static const yytype_int16 yycheck[] =
+{{{19:s}
+}};
+
+ /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
+static const yytype_int16 yystos[] =
+{{{20:s}
+}};
+
+ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_int16 yyr1[] =
+{{{21:s}
+}};
+
+ /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */
+static const yytype_int16 yyr2[] =
+{{{22:s}
+}};
+/* GENERATE TABLES END */
+'''.format(
+ # YYFINAL
+ _bison_lr1dfa.action_pointer.shape[0],
+ # YYLAST
+ _bison_lr1dfa.entries.shape[0] - 1,
+ # YYNTOKENS
+ _bison_lr1dfa.n_terminals,
+ # YYNNTS
+ _bison_lr1dfa.goto_pointer.shape[0],
+ # YYNRULES
+ _bison_lr1dfa.rules.shape[0],
+ # YYNSTATES
+ _bison_lr1dfa.action_pointer.shape[0],
+ # YYMAXUTOK
+ _bison_lr1dfa.translate.shape[0] - 1,
+ # yytranslate
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.translate[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.translate.shape[0], 10)
+ ]
+ ),
+ # yyrline
+ ','.join(
+ [
+ '\n{0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in yyrline[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, yyrline.shape[0], 10)
+ ]
+ ),
+ # YYERROR_VERBOSE (strangely the defined value is repeated)
+ int(pyacc[0].error_verbose),
+ # yytname
+ ','.join(
+ ['\n {0:s}'.format(', '.join(i)) for i in yytname_lines]
+ ),
+ # yytoknum
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in yytoknum[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, yytoknum.shape[0], 10)
+ ]
+ ),
+ # YYPACT_NINF
+ -_bison_lr1dfa.entry_base,
+ # yypact_value_is_default
+ -_bison_lr1dfa.entry_base,
+ # yypact
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.action_pointer[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.action_pointer.shape[0], 10)
+ ]
+ ),
+ # yydefact
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.default_action[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.default_action.shape[0], 10)
+ ]
+ ),
+ # yypgoto
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.goto_pointer[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.goto_pointer.shape[0], 10)
+ ]
+ ),
+ # yydefgoto
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.default_goto[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.default_goto.shape[0], 10)
+ ]
+ ),
+ # yytable
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.entries[i:i + 10, 0]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.entries.shape[0], 10)
+ ]
+ ),
+ # yycheck
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.entries[i:i + 10, 1]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.entries.shape[0], 10)
+ ]
+ ),
+ # yystos
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.accessing_symbols[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.accessing_symbols.shape[0], 10)
+ ]
+ ),
+ # yyr1
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.rules[i:i + 10, 0]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.rules.shape[0], 10)
+ ]
+ ),
+ # yyr2
+ ','.join(
+ [
+ '\n {0:s}'.format(
+ ','.join(
+ [
+ '{0:6d}'.format(j)
+ for j in _bison_lr1dfa.rules[i:i + 10, 1]
+ ]
+ )
+ )
+ for i in range(0, _bison_lr1dfa.rules.shape[0], 10)
+ ]
+ )
+ ).replace('YYDEBUG', type_prefix + 'DEBUG') # hack
+ )
+ elif line == '/* GENERATE INITIALACTION */\n':
+ fout.write(
+ '''/* GENERATE INITIALACTION BEGIN */
+{0:s}/* GENERATE INITIALACTION END */
+'''.format(
+ ''.join(
+ [
+ '{0:s}\n'.format(i.get_text())
+ for i in pyacc.initial_action_text
+ ]
+ ).replace('(yyval)', '(yylval').replace('(yyloc)', '(yylloc)') # hack
+ )
+ )
+ elif line == '/* GENERATE SECTION2 */\n':
+ fout.write(
+ '''/* GENERATE SECTION2 BEGIN */
+{0:s}/* GENERATE SECTION2 END */
+'''.format(
+ '\n'.join(
+ [
+ ''' case {0:d}:
+ {1:s}
+ break;
+'''.format(
+ i,
+ _lr1dfa.productions[i][1].get_text()
+ )
+ for i in range(len(_lr1dfa.productions))
+ if _lr1dfa.productions[i][1] is not None
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE SECTION3 */\n':
+ fout.write(
+ '''/* GENERATE SECTION3 BEGIN */
+{0:s}/*GENERATE SECTION3 END */
+'''.format(
+ '' if len(pyacc) < 3 else pyacc[2].get_text()
+ )
+ )
+ else:
+ if file_prefix != 'YY_YY_Y_':
+ line = line.replace('YY_YY_Y_', file_prefix)
+ if is_header:
+ if type_prefix != 'YY':
+ line = line.replace('YY_YY_Y_', 'YY').replace('YY', type_prefix)
+ if name_prefix != 'yy':
+ line = line.replace('yy', name_prefix)
+ elif type_prefix != 'YY':
+ line = line.replace('YYDEBUG', type_prefix + 'DEBUG')
+ line = line.replace('YYSTYPE_IS_', type_prefix + 'STYPE_IS_')
+ line = line.replace('YYLTYPE_IS_', type_prefix + 'LTYPE_IS_')
+ fout.write(line)
+ line = fin.readline()
+ generate(
+ skel_file,
+ out_file,
+ pyacc[0].type_prefix,
+ pyacc[0].type_prefix,
+ pyacc[0].name_prefix,
+ False
+ )
+ if defines_file is not None:
+ generate(
+ '{0:s}.h'.format(
+ skel_file[:-2] if skel_file[-2:] == '.c' else skel_file
+ ),
+ defines_file,
+ pyacc[0].type_prefix,
+ pyacc[0].type_prefix,
+ pyacc[0].name_prefix,
+ True
+ )
import bisect
+import bison_lr1dfa
import element
+import numpy
import work
import sys
self.n_terminals = n_terminals
self.eof_terminal = eof_terminal
+ def to_bison_lr1dfa(
+ self,
+ n_terminals,
+ translate_terminals,
+ n_nonterminals,
+ translate_nonterminals
+ ):
+ #numpy.set_printoptions(threshold = numpy.nan)
+ #print(translate_terminals)
+ #print(translate_nonterminals)
+
+ # translate is yytranslate
+ translate = translate_terminals
+
+ # rules[:, 0] is yyr1
+ # rules[:, 1] is yyr2
+ # note: production 0 (start production) can't be reduced, null it out
+ rules = numpy.concatenate(
+ [
+ numpy.zeros((1, 2), numpy.int16),
+ numpy.stack(
+ [
+ translate_nonterminals + n_terminals,
+ numpy.array([i for i, _ in self.productions[1:]], numpy.int16)
+ ],
+ 1
+ )
+ ],
+ 0
+ )
+
+ # unpack tables into numpy arrays so we can manipulate them efficiently
+ # note: the goto table is transposed with respect to the action table,
+ # so the row in the table corresponds to the yypact[]/yypgoto[] index,
+ # and the column in the table is what gets added to yypact[]/yypgoto[]
+ action_table = numpy.zeros(
+ (len(self.states), self.n_terminals),
+ numpy.int16
+ )
+ goto_table = numpy.zeros(
+ (len(self.productions), len(self.states)),
+ numpy.int16
+ )
+ for i in range(len(self.states)):
+ terminal_breaks, actions, nonterminal_breaks, gotos = self.states[i]
+
+ terminal0 = 0
+ for j in range(len(terminal_breaks)):
+ terminal1 = terminal_breaks[j]
+ action_table[i, terminal0:terminal1] = actions[j]
+ terminal0 = terminal1
+ assert terminal0 == self.n_terminals
+
+ nonterminal0 = 0
+ for j in range(len(nonterminal_breaks)):
+ nonterminal1 = nonterminal_breaks[j]
+ goto_table[nonterminal0:nonterminal1, i] = gotos[j]
+ nonterminal0 = nonterminal1
+ assert nonterminal0 == len(self.productions)
+ assert numpy.all(action_table != 0)
+ assert numpy.all(goto_table != 0)
+ #print(action_table)
+ #print(goto_table)
+
+ # permute and combine columns/rows on the basis of the translate vectors
+ new_action_table = numpy.zeros(
+ (len(self.states), n_terminals),
+ numpy.int16
+ )
+ new_action_table[:, translate_terminals] = action_table
+ action_table = new_action_table
+ new_goto_table = numpy.zeros(
+ (n_nonterminals, len(self.states)),
+ numpy.int16
+ )
+ new_goto_table[translate_nonterminals, :] = goto_table[1:] # ignore start
+ goto_table = new_goto_table
+
+ # manipulate the table entries as follows:
+ # - ensure there is no shift or goto 0 (cannot return to starting state)
+ # - replace reduce 0 (start production) with shift to a nonexistent state
+ # - replace action or goto -1 (unrecognized terminal/nonterminal) with 0
+ # - change the low-bit indication of shift/reduce to positive/negative
+ # we do it here after removing redundant columns, as it's more efficient
+ assert numpy.all(action_table != 0)
+ action_table[action_table == 1] = len(self.states) << 1
+ action_table[action_table == -1] = 0
+ mask = (action_table & 1).astype(numpy.bool)
+ action_table >>= 1
+ action_table[mask] = -action_table[mask]
+ assert numpy.all(goto_table != 0)
+ goto_table[goto_table == -1] = 0
+
+ # find out which column the transition to each state occurs in, this
+ # must be unique and is called the "accessing symbol" for the state
+ accessing_table = numpy.any(
+ numpy.concatenate(
+ [action_table, goto_table.transpose()],
+ 1
+ )[:, :, numpy.newaxis] ==
+ numpy.arange(
+ 1,
+ len(self.states),
+ dtype = numpy.int16
+ )[numpy.newaxis, numpy.newaxis, :],
+ 0
+ )
+ accessing_symbols = numpy.full(
+ (len(self.states),),
+ 2, # '$undefined'
+ numpy.int16
+ )
+ for i in range(1, len(self.states)):
+ accessing_symbol = numpy.nonzero(accessing_table[:, i - 1])[0]
+ if accessing_symbol.shape[0] == 0:
+ sys.stderr.write('warning: unreachable state {0:d}\n'.format(i))
+ elif accessing_symbol.shape[0] == 1:
+ accessing_symbols[i] = accessing_symbol[0]
+ else:
+ assert False
+
+ # default_action is yydefact, default_goto is yydefgoto
+ # find default reduce (most common negative value per action_table row)
+ # and find default goto (most common non-zero value per goto_table row)
+ # note: 0 is used as a last resort if there is no proper default value
+ default_action = numpy.argmax(
+ numpy.concatenate(
+ [
+ numpy.zeros((len(self.states), 1), numpy.int16),
+ numpy.sum(
+ (
+ action_table[:, :, numpy.newaxis] ==
+ numpy.arange(
+ -1,
+ -len(self.productions),
+ -1,
+ dtype = numpy.int16
+ )[numpy.newaxis, numpy.newaxis, :]
+ ).astype(numpy.int16),
+ 1
+ )
+ ],
+ 1
+ ),
+ 1
+ )
+ #print(action_table)
+ #print(default_action)
+ default_goto = numpy.argmax(
+ numpy.concatenate(
+ [
+ numpy.zeros((n_nonterminals, 1), numpy.int16),
+ numpy.sum(
+ (
+ goto_table[:, :, numpy.newaxis] ==
+ numpy.arange(
+ 1,
+ len(self.states),
+ dtype = numpy.int16
+ )[numpy.newaxis, numpy.newaxis, :]
+ ).astype(numpy.int16),
+ 1
+ )
+ ],
+ 1
+ ),
+ 1
+ )
+ #print(goto_table)
+ #print(default_goto)
+
+ # fill in the zero entries in the tables with default reduce or goto
+ for i in range(len(self.states)):
+ action_table[i, action_table[i, :] == 0] = -default_action[i]
+ for i in range(n_nonterminals):
+ goto_table[i, goto_table[i, :] == 0] = default_goto[i]
+ #print(action_table)
+ #print(goto_table)
+
+ # entry_base indicates the most negative starting index we can expect
+ # we maintain n_entries such that entries_used[n_entries:, :] == False
+ # entries[i, 0] is yytable[i - entry_base]
+ # entries[i, 1] is yycheck[i - entry_base]
+ # entries_used[i, 0] == True if any vector has been stored starting at i
+ # entries_used[0, 0] does not go True as it is -infinity and can be reused
+ # entries_used[i, 1] == True if entry has been stored in entries[i, :]
+ entry_base = max(n_terminals, len(self.states))
+ n_entries = entry_base
+ entries = numpy.full((entry_base, 2), 0x8000, numpy.int16)
+ entries_used = numpy.zeros((entry_base, 2), numpy.bool)
+ entries_used[:, 1] = True # padding so we don't use any negative entries
+
+ # action_pointer is yypact, goto_pointer is yypgoto
+ def pack_matrix(data, mask):
+ nonlocal n_entries, entries, entries_used # also uses entry_base
+
+ start_indices = numpy.zeros((data.shape[0],), numpy.int16)
+ for i in range(data.shape[0]):
+ indices = numpy.nonzero(mask[i, :])[0]
+ if indices.shape[0] == 0:
+ start_index = 0
+ else:
+ size = indices[-1] + 1
+
+ # ensure arrays are at least large enough to find a spot
+ while entries.shape[0] < n_entries + size:
+ # extend entries, copying only n_entries entries
+ new_entries = numpy.full(
+ (entries.shape[0] * 2, 2),
+ 0x8000,
+ numpy.int16
+ )
+ new_entries[:n_entries, :] = entries[:n_entries, :]
+ entries = new_entries
+
+ # extend entries_used, copying only n_entries entries
+ new_entries_used = numpy.zeros(
+ (entries_used.shape[0] * 2, 2),
+ numpy.bool
+ )
+ new_entries_used[:n_entries, :] = entries_used[:n_entries, :]
+ entries_used = new_entries_used
+
+ # find a suitable spot and store differences from default
+ start_index = entry_base - indices[0]
+ while start_index < n_entries:
+ if (
+ not entries_used[start_index, 0] and
+ not numpy.any(entries_used[indices + start_index, 1])
+ ):
+ break
+ start_index += 1
+ entries[indices + start_index, 0] = data[i, indices]
+ entries[indices + start_index, 1] = indices
+ entries_used[start_index, 0] = True
+ entries_used[indices + start_index, 1] = True
+ if n_entries < start_index + size:
+ n_entries = start_index + size
+
+ start_indices[i] = start_index
+ return start_indices
+ action_pointer = pack_matrix(
+ action_table,
+ action_table != (-default_action)[:, numpy.newaxis]
+ ) - entry_base
+ goto_pointer = pack_matrix(
+ goto_table,
+ goto_table != default_goto[:, numpy.newaxis]
+ ) - entry_base
+ new_entries = numpy.zeros(
+ (n_entries - entry_base, 2),
+ numpy.int16
+ )
+ new_entries[:, :] = entries[entry_base:n_entries, :]
+ entries = new_entries
+
+ # n_states == action_pointer.shape[0]
+ # n_productions == rules.shape[0]
+ # n_nonterminals == goto_pointer.shape[0]
+ return bison_lr1dfa.BisonLR1DFA(
+ translate,
+ rules,
+ accessing_symbols,
+ default_action,
+ default_goto,
+ entry_base,
+ entries,
+ action_pointer,
+ goto_pointer,
+ n_terminals
+ )
+
#def parse_text(self, text, i):
# state = 0
# value_stack = []