First cut at table generation and packing, almost OK but doesn't compile yet

author Nick Downing <downing.nick@gmail.com>

Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)

committer Nick Downing <downing.nick@gmail.com>

Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)
author Nick Downing <downing.nick@gmail.com>
Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)
committer Nick Downing <downing.nick@gmail.com>
Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)
diff --git a/ast.py b/ast.py

index 804098d..babcd76 100644 (file)
--- a/ast.py
+++ b/ast.py
@@ -389,12 +389,7 @@ class PYACC(element.Element):
        pyacc,
        name_to_symbol
      ):
-      for i in self:
-        i.post_process(
-          pyacc,
-          self,
-          name_to_symbol
-        )
+      raise NotImplementedException
  
    class String(element.Element):
      # GENERATE ELEMENT() BEGIN
@@ -535,6 +530,17 @@ class PYACC(element.Element):
        self.repr_serialize(params)
        return 'ast.PYACC.Section1Or2({0:s})'.format(', '.join(params))
      # GENERATE END
+    def post_process(
+      self,
+      pyacc,
+      name_to_symbol
+    ):
+      for i in self:
+        i.post_process(
+          pyacc,
+          self,
+          name_to_symbol
+        )
  
      class Code(Item):
        # GENERATE ELEMENT() BEGIN
@@ -1373,7 +1379,7 @@ class PYACC(element.Element):
          name_to_symbol
        ):
          assert isinstance(self[0], PYACC.Text)
-        pyacc.prologue_text.append(element.get_text(self[0], 0))
+        pyacc.prologue_text.append(self[0])
  
      class Require(Item):
        # GENERATE ELEMENT() BEGIN
@@ -1774,7 +1780,7 @@ class PYACC(element.Element):
              elif isinstance(i, PYACC.Section2.Rules.RHS.Action):
                assert i == len(self) - 1
                assert isinstance(self[i][0], PYACC.Text)
-              pyacc.actions_text.append(element.get_text(self[i][0], 0))
+              pyacc.actions_text.append(self[i][0])
                break
            else:
              pyacc.actions_text.append('')
@@ -1892,8 +1898,14 @@ class PYACC(element.Element):
        self.repr_serialize(params)
        return 'ast.PYACC.Section3({0:s})'.format(', '.join(params))
      # GENERATE END
+    def post_process(
+      self,
+      pyacc,
+      name_to_symbol
+    ):
+      pass
  
-  # GENERATE ELEMENT(list(str) prologue_text, set(int) characters_used, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(str) actions_text) BEGIN
+  # GENERATE ELEMENT(list(ref) prologue_text, set(int) characters_used, list(ref) terminal_symbols, list(ref) nonterminal_symbols, ref grammar, list(ref) actions_text) BEGIN
    def __init__(
      self,
      tag = 'PYACC',
@@ -2047,12 +2059,7 @@ class PYACC(element.Element):
        children = [
          regex.Grammar.Production(
            children = [
-            regex.RegexSequence(
-              children = [
-                regex.RegexCharacterRule(),
-                regex.RegexCharacterRule(rule_name = '$end')
-              ]
-            )
+            regex.RegexCharacterRule()
            ]
          )
        ]
@@ -2072,8 +2079,8 @@ class PYACC(element.Element):
        )
  
      # if start symbol not specified, use first nonterminal defined in file
-    if len(self.grammar[0][0][0].rule_name) == 0:
-      self.grammar[0][0][0].rule_name = self.nonterminal_symbols[0].name
+    if len(self.grammar[0][0].rule_name) == 0:
+      self.grammar[0][0].rule_name = self.nonterminal_symbols[0].name
  
      # look up rule names and substitute appropriate character_set for each
      self.grammar.n_terminals = 0x100 + len(self.terminal_symbols)
diff --git a/bison_lr1dfa.py b/bison_lr1dfa.py

index e358d3c..61fb89e 100644 (file)
--- a/bison_lr1dfa.py
+++ b/bison_lr1dfa.py
@@ -1,278 +1,423 @@
  import element
  import numpy
  import regex
+import sys
  
  class BisonLR1DFA:
-  def __init__(self, lr1dfa):
-    print(repr(lr1dfa))
-    assert False
-
-    # this is basically just a renumbering
-
-    # state numbers in the DFA become base/def numbers in the BisonLR1DFA,
-    # obviously with gaps in the numbering depending on how things fit
-    state_to_flex_base_def = {}
-
-    # action numbers in the DFA become state numbers in the BisonLR1DFA,
-    # with the start-action for each start-condition being copied into
-    # the correctly numbered slot (may cause duplicates), and then all
-    # actions reachable from these being copied to the subsequent slots
-    # (if start-action is reached again, uses the lower numbered copy)
-    flex_state_to_action = [0] + lr1dfa.start_action
-    action_to_flex_state = {-1: 0} # see comment about state -1 below
-    for i in range(len(flex_state_to_action)):
-      action = flex_state_to_action[i]
-      if action not in action_to_flex_state:
-        action_to_flex_state[action] = i
-
-    # last start-condition is really end-of-buffer (EOB), it has only
-    # a dummy rule that accepts the null string and executes EOB action
-    eob_state = len(lr1dfa.start_action)
-
-    # state 0 is the jam state, the EOB state will be added later on
-    self.states = [([], 0, 0)] # accept, base, def
-    self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
-
-    # full_entries[i, j] is transition on character j in state i
-    # in our way of thinking, 0 is don't care and -1 is failure
-    # however, in the flex way these are both 0 (don't care),
-    # the distinction being that failure has no associated action
-    # thus all entries of states are filled, with 0 as a catch-all
-    full_entries = numpy.zeros((0x100, 0x101), numpy.int16)
-    full_entries[0, 0] = eob_state # all states go to EOB on NUL
-    used = numpy.zeros(0x200, numpy.bool)
-    used[:0x101] = True # account for the jam (don't care) state
-
-    while len(self.states) < len(flex_state_to_action):
-      action = flex_state_to_action[len(self.states)]
-      state, transition = lr1dfa.actions[action]
-      #print('state', len(self.states), 'transition', transition)
-
-      del threads0[prefix_slop:]
-      transit(transition)
-      #print(threads0[prefix_slop:])
-      flex_accept = []
-      for k in [j for i in threads0[prefix_slop:] for j in i]:
-        if k & 1:
-          if (
-            len(flex_accept) > 0 and
-            flex_accept[-1] == (k >> 1) | BisonLR1DFA.YY_TRAILING_HEAD_MASK
-          ):
-            # zero length trailing context, accept immediately
-            flex_accept.append(k >> 1)
-          else:
-            # look back to start of trailing context, then accept
-            flex_accept.append((k >> 1) | BisonLR1DFA.YY_HEAD_MASK)
-        else:
-          # mark start of (hopefully safe) trailing context
-          flex_accept.append((k >> 1) | BisonLR1DFA.YY_TRAILING_HEAD_MASK)
-      #print(flex_accept)
-
-      if state in state_to_flex_base_def:
-        flex_base, flex_def = state_to_flex_base[state]
+  def __init__(
+    self,
+    lr1dfa,
+    n_terminals,
+    undefined_terminal,
+    translate_terminals,
+    n_nonterminals,
+    translate_nonterminals
+  ):
+    # translate is yytranslate
+    self.translate = translate_terminals
+
+    # rule_data[:, 0] is yyr1
+    # rule_data[:, 1] is yyr2
+    # note: production 0 (start production) can't be reduced, null it out
+    self.rule_data = numpy.concatenate(
+      [
+        numpy.zeros((1, 2), numpy.int16),
+        numpy.stack(
+          [
+            translate_nonterminals + n_terminals,
+            numpy.array([i for i, _ in lr1dfa.productions[1:]], numpy.int16)
+          ],
+          1
+        )
+      ],
+      0
+    )
+    print(self.translate)
+    print(self.rule_data)
+
+    # unpack tables into numpy arrays so we can manipulate them efficiently
+    # note: the goto table is transposed with respect to the action table,
+    # so the row in the table corresponds to the yypact[]/yypgoto[] index,
+    # and the column in the table is what gets added to yypact[]/yypgoto[]
+    orig_action_table = numpy.zeros(
+      (len(lr1dfa.states), lr1dfa.n_terminals),
+      numpy.int16
+    )
+    orig_goto_table = numpy.zeros(
+      (len(lr1dfa.productions), len(lr1dfa.states)),
+      numpy.int16
+    )
+    for i in range(len(lr1dfa.states)):
+      terminal_breaks, actions, nonterminal_breaks, gotos = lr1dfa.states[i]
+
+      terminal0 = 0
+      for j in range(len(terminal_breaks)):
+        terminal1 = terminal_breaks[j]
+        orig_action_table[i, terminal0:terminal1] = actions[j]
+        terminal0 = terminal1
+      assert terminal0 == lr1dfa.n_terminals
+
+      nonterminal0 = 0
+      for j in range(len(nonterminal_breaks)):
+        nonterminal1 = nonterminal_breaks[j]
+        orig_goto_table[nonterminal0:nonterminal1, i] = gotos[j]
+        nonterminal0 = nonterminal1
+      assert nonterminal0 == len(lr1dfa.productions)
+
+    # permute and combine columns/rows on the basis of the translate vectors
+    action_table = numpy.zeros(
+      (len(lr1dfa.states), n_terminals),
+      numpy.int16
+    )
+    action_table[:, translate_terminals] = orig_action_table
+    goto_table = numpy.zeros(
+      (n_nonterminals, len(lr1dfa.states)),
+      numpy.int16
+    )
+    goto_table[translate_nonterminals, :] = orig_goto_table[1:] # ignore start
+
+    # manipulate the table entries as follows:
+    # - ensure there is no shift or goto 0 (cannot return to starting state)
+    # - replace reduce 0 (start production) with shift to a nonexistent state
+    # - replace action or goto -1 (unrecognized terminal/nonterminal) with 0
+    # - change the low-bit indication of shift/reduce to positive/negative
+    # we do it here after removing redundant columns, as it's more efficient
+    assert numpy.all(action_table != 0)
+    mask = (action_table & 1).astype(numpy.bool)
+    action_table >>= 1
+    action_table[action_table == 0] = len(lr1dfa.states)
+    action_table[action_table == -1] = 0
+    action_table[mask] = -action_table[mask]
+    assert numpy.all(goto_table != 0)
+    goto_table[goto_table == -1] = 0
+    print(action_table)
+    print(goto_table)
+
+    # find out which column the transition to each state occurs in, this
+    # must be unique and is called the "accessing symbol" for the state
+    accessing_table = numpy.any(
+      numpy.concatenate(
+        [action_table, goto_table.transpose()],
+        1
+      )[:, :, numpy.newaxis] ==
+      numpy.arange(
+        1,
+        len(lr1dfa.states),
+        dtype = numpy.int16
+      )[numpy.newaxis, numpy.newaxis, :],
+      0
+    )
+    self.accessing_symbols = numpy.full(
+      (len(lr1dfa.states),),
+      undefined_terminal,
+      numpy.int16
+    )
+    for i in range(1, len(lr1dfa.states)):
+      accessing_symbol = numpy.nonzero(accessing_table[:, i - 1])[0]
+      if accessing_symbol.shape[0] == 0:
+        sys.stderr.write('warning: unreachable state {0:d}\n'.format(i))
+      elif accessing_symbol.shape[0] == 1:
+        self.accessing_symbols[i] = accessing_symbol[0]
        else:
-        # extend full_entries array if required
-        if len(self.states) >= full_entries.shape[0]:
-          new_full_entries = numpy.zeros(
-            (full_entries.shape[0] * 2, 0x101),
-            numpy.int16
+        assert False
+    #print(self.accessing_symbols)
+
+    # default_action is yydefact, default_goto is yydefgoto
+    # find default reduce (most common negative value per action_table row)
+    # and find default goto (most common non-zero value per goto_table row)
+    # note: 0 is used as a last resort if there is no proper default value
+    self.default_action = numpy.argmax(
+      numpy.concatenate(
+        [
+          numpy.zeros((len(lr1dfa.states), 1), numpy.int16),
+          numpy.sum(
+            (
+              action_table[:, :, numpy.newaxis] ==
+              numpy.arange(
+                1,
+                -len(lr1dfa.productions),
+                -1,
+                dtype = numpy.int16
+              )[numpy.newaxis, numpy.newaxis, :]
+            ).astype(numpy.int16),
+            1
            )
-          new_full_entries[:full_entries.shape[0], :] = full_entries
-          full_entries = new_full_entries
-
-        # calculate full entry from lr1dfa.state char-to-action table
-        breaks, actions, _ = lr1dfa.states[state]
-        char0 = 0
-        for i in range(len(breaks)):
-          char1 = breaks[i]
-          next_action = actions[i]
-          if next_action in action_to_flex_state:
-            next_flex_state = action_to_flex_state[next_action]
-          else:
-            next_flex_state = len(flex_state_to_action)
-            action_to_flex_state[next_action] = next_flex_state
-            flex_state_to_action.append(next_action)
-          full_entries[len(self.states), char0:char1] = next_flex_state
-          char0 = char1
-        assert char0 == 0x100
-
-        # remap NUL transition to 0x100 and replace with EOB transition
-        full_entries[len(self.states), 0x100] = \
-          full_entries[len(self.states), 0]
-        full_entries[len(self.states), 0] = eob_state
-        #print(len(self.states), full_entries[len(self.states), :])
-
-        # find most similar/earliest existing state to use as default
-        mask = (
-          full_entries[len(self.states):len(self.states) + 1, :] !=
-          full_entries[:len(self.states), :]
-        )
-        diff = numpy.sum(mask, 1)
-        flex_def = numpy.argmin(diff, 0)
-        if diff[flex_def] == 0: # exactly matching state
-          # can't use the normal similarity mechanism here, because it
-          # will choose a base of 0 (happens to correspond to the jam
-          # state) and this will make flex's inner loop abort early
-          # ... highlights various issues with flex's tables format,
-          # for instance that duplicate states may be unavoidable when
-          # start conditions are used, and that checking the base is
-          # not an ideal way to check if a state has the same transitions
-          # as the jam state, in fact a state can only share the same base
-          # as another state by COINCIDENCE due to the yy_chk[] issue!
-          # ... fix this by merging indistinguishable states (except for
-          # duplicate start conditions, which may have to use this hack)
-          flex_base = self.states[flex_def][1]
+        ],
+        1
+      ),
+      1
+    )
+    self.default_goto = numpy.argmax(
+      numpy.concatenate(
+        [
+          numpy.zeros((n_nonterminals, 1), numpy.int16),
+          numpy.sum(
+            (
+              goto_table[:, :, numpy.newaxis] ==
+              numpy.arange(
+                1,
+                len(lr1dfa.states),
+                dtype = numpy.int16
+              )[numpy.newaxis, numpy.newaxis, :]
+            ).astype(numpy.int16),
+            1
+          )
+        ],
+        1
+      ),
+      1
+    )
+
+    # fill in the zero entries in the tables with default reduce or goto
+    for i in range(len(lr1dfa.states)):
+      action_table[i, action_table[i, :] == 0] = -self.default_action[i]
+    for i in range(n_nonterminals):
+      goto_table[i, goto_table[i, :] == 0] = self.default_goto[i]
+
+    # entry_base indicates the most negative starting index we can expect
+    # we maintain entry_size such that entry_used[entry_size:, :] == False
+    # entry_data[i, 0] is yytable[i - entry_base]
+    # entry_data[i, 1] is yycheck[i - entry_base]
+    # entry_used[i, 0] == True if any vector has been stored starting at i
+    # entry_used[0, 0] does not go True as it is -infinity and can be reused
+    # entry_used[i, 1] == True if entry has been stored in entry_data[i, :]
+    self.entry_base = max(n_terminals, len(lr1dfa.states))
+    entry_size = self.entry_base
+    entry_data = numpy.full((self.entry_base, 2), 0x8000, numpy.int16)
+    entry_used = numpy.zeros((self.entry_base, 2), numpy.bool)
+    entry_used[:, 1] = True # padding so we don't use any negative entries
+
+    # action_pointer is yypact, goto_pointer is yypgoto
+    def pack_matrix(data, mask):
+      nonlocal entry_size, entry_data, entry_used # also uses self.entry_base
+
+      start_indices = numpy.zeros((data.shape[0],), numpy.int16)
+      for i in range(data.shape[0]):
+        indices = numpy.nonzero(mask[i, :])[0]
+        if indices.shape[0] == 0:
+          start_index = 0
          else:
-          mask = mask[flex_def, :]
-
-          # make sure used array is at least large enough to find a spot
-          while used.shape[0] < len(self.entries) + 0x101:
-            new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
-            new_used[:used.shape[0]] = used
-            used = new_used
-
-          # find a suitable spot and store differences from default state
-          flex_base = 0
-          while flex_base < len(self.entries):
-            if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
-              break
-            flex_base += 1
-          used[flex_base:flex_base + 0x101] |= mask
-          if len(self.entries) < flex_base + 0x101:
-            self.entries.extend(
-              [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
-            )
-          for i in numpy.nonzero(mask)[0]:
-            assert self.entries[flex_base + i] == (0xffff, 0xffff)
-            self.entries[flex_base + i] = (
-              full_entries[len(self.states), i],
-              len(self.states)
+          size = indices[-1] + 1
+
+          # ensure arrays are at least large enough to find a spot
+          while entry_data.shape[0] < entry_size + size:
+            # extend entry_data, copying only entry_size entries
+            new_entry_data = numpy.full(
+              (entry_data.shape[0] * 2, 2),
+              0x8000,
+              numpy.int16
              )
+            new_entry_data[:entry_size, :] = entry_data[:entry_size, :]
+            entry_data = new_entry_data
  
-      self.states.append((flex_accept, flex_base, flex_def))
-    #print(full_entries[:len(self.states), :])
-    #print(flex_state_to_action)
+            # extend entry_used, copying only entry_size entries
+            new_entry_used = numpy.zeros(
+              (entry_used.shape[0] * 2, 2),
+              numpy.bool
+            )
+            new_entry_used[:entry_size, :] = entry_used[:entry_size, :]
+            entry_used = new_entry_used
+   
+          # find a suitable spot and store differences from default
+          start_index = self.entry_base - indices[0]
+          while start_index < entry_size:
+            if (
+              not entry_used[start_index, 0] and
+              not numpy.any(entry_used[indices + start_index, 1])
+            ):
+              break
+            start_index += 1
+          entry_data[indices + start_index, 0] = data[i, indices]
+          entry_data[indices + start_index, 1] = indices
+          entry_used[start_index, 0] = True
+          entry_used[indices + start_index, 1] = True
+          if entry_size < start_index + size:
+            entry_size = start_index + size
+
+        start_indices[i] = start_index
+      return start_indices
+    self.action_pointer = pack_matrix(
+      action_table,
+      action_table != (-self.default_action)[:, numpy.newaxis]
+    ) - self.entry_base
+    self.goto_pointer = pack_matrix(
+      goto_table,
+      goto_table != self.default_goto[:, numpy.newaxis]
+    ) - self.entry_base
+    self.entry_data = numpy.zeros(
+      (entry_size - self.entry_base, 2),
+      numpy.int16
+    )
+    self.entry_data[:, :] = entry_data[self.entry_base:entry_size, :]
+
+    # n_states == self.action_pointer.shape[0]
+    # n_productions == self.rule_data.shape[0]
+    # n_nonterminals == self.goto_pointer.shape[0]
+    self.n_terminals = n_terminals
+    self.undefined_terminal = undefined_terminal
  
  def generate(pyacc, skel_file, out_file):
-  bison_lr1dfa = BisonLR1DFA(pyacc.grammar.to_lr1().to_lalr1())
+  # generate the tables using an expanded character set, consisting of:
+  # the full 0x100 character literals (whether they are referenced or not)
+  # the terminals with defined names (for each terminal, one character)
+  # the nonterminals (for each nonterminal, one character per production)
+  lr1dfa = pyacc.grammar.to_lr1().to_lalr1()
+
+  # squash this down to the set of character literals that are referenced,
+  # the set of terminals, then only one character per nonterminal (hence
+  # nonterminals referenced by pyacc.nonterminal_symbols[] index, rather
+  # than the internal way as only the set of lr1dfa.productions[] indices)
+
+  # generate translate table for character literals and terminal symbols
+  n_terminals = 0
+  translate_terminals = numpy.zeros(
+    (lr1dfa.n_terminals,),
+    numpy.int16
+  )
+  undefined_terminal = len(pyacc.characters_used) + 2
+  translate_terminals[:0x100] = undefined_terminal
+  for i in sorted(pyacc.characters_used):
+    translate_terminals[i] = n_terminals
+    n_terminals += 1
+  for i in pyacc.terminal_symbols:
+    for j in range(0, len(i.character_set), 2):
+      translate_terminals[
+        i.character_set[j]:
+        i.character_set[j + 1]
+      ] = n_terminals
+    n_terminals += 1
+
+  # generate translate table for nonterminal symbols
+  # this is effectively a map from productions back to nonterminal symbols
+  # we do not generate an entry for the first production (start production)
+  n_nonterminals = 0
+  translate_nonterminals = numpy.zeros(
+    (len(lr1dfa.productions) - 1,),
+    numpy.int16
+  )
+  for i in pyacc.nonterminal_symbols:
+    for j in range(0, len(i.character_set), 2):
+      translate_nonterminals[
+        i.character_set[j] - 1:
+        i.character_set[j + 1] - 1
+      ] = n_nonterminals
+    n_nonterminals += 1
+
+  # translate and compress the tables
+  bison_lr1dfa = BisonLR1DFA(
+    lr1dfa,
+    n_terminals,
+    undefined_terminal,
+    translate_terminals,
+    n_nonterminals,
+    translate_nonterminals
+  )
  
    with open(skel_file, 'r') as fin:
      with open(out_file, 'w+') as fout:
        line = fin.readline()
        while len(line):
-        if line == '/* GENERATE SECTION1 */\n':
+        if line == '/* GENERATE SECTION1FIRST */\n':
            fout.write(
-            '''/* GENERATE SECTION1 BEGIN */
-{0:s}/* GENERATE SECTION1 END*/
+            '''/* GENERATE SECTION1FIRST BEGIN */
+{0:s}/* GENERATE SECTION1FIRST END*/
  '''.format(
-              ''.join([element.get_text(i, 0) for i in pyacc[0].code_blocks])
+              ''.join([element.get_text(i, 0) for i in pyacc.prologue_text])
              )
            )
-        elif line == '/* GENERATE STARTCONDDECL */\n':
+        elif line == '/* GENERATE SECTION1SECOND */\n':
            fout.write(
-            '''/* GENERATE STARTCONDDECL BEGIN */
-{0:s}/* GENERATE STARTCONDDECL END*/
+            '''/* GENERATE SECTION1SECOND BEGIN */
+{0:s}/* GENERATE SECTION1SECOND END*/
  '''.format(
-              ''.join(
-                [
-                  '#define {0:s} {1:d}\n'.format(
-                    pyacc.start_conditions[i].name,
-                    i
-                  )
-                  for i in range(len(pyacc.start_conditions))
-                ]
-              )
+              '' # fix this later
              )
            )
          elif line == '/* GENERATE TABLES */\n':
-          yy_acclist = []
-          yy_accept = [0]
-          for flex_accept, _, _ in bison_lr1dfa.states:
-            yy_acclist.extend(flex_accept)
-            yy_accept.append(len(yy_acclist))
+          # yyrline (source line where rule is defined) not implemented yet
+          yyrline = numpy.zeros(
+            (bison_lr1dfa.rule_data.shape[0],),
+            numpy.int16
+          )
+
+          # yytname (textual terminal/nonterminal name) wraps 70 columns
+          x = 72
+          yytname_lines = []
+          for i in ( 
+            ['"{0:s}"'.format(i.name) for i in pyacc.terminal_symbols] +
+            ['"{0:s}"'.format(i.name) for i in pyacc.nonterminal_symbols] +
+            ['YY_NULLPTR']
+          ):
+            if x >= 72:
+              yytname_lines.append([])
+              x = 0
+            yytname_lines[-1].append(i)
+            x += len(i) + 2
+
+          # yytoknum is the approximate reverse of bison_lr1dfa.translate,
+          # do in reverse order so later duplicate entries get overwritten
+          yytoknum = numpy.zeros((bison_lr1dfa.n_terminals,), numpy.int16)
+          yytoknum[bison_lr1dfa.translate[::-1]] = numpy.arange(
+            bison_lr1dfa.translate.shape[0] - 1,
+            -1,
+            -1,
+            numpy.int16
+          )
+
            fout.write(
              '''/* GENERATE TABLES BEGIN */
  /* YYFINAL -- State number of the termination state.  */
-#define YYFINAL  2
+#define YYFINAL {0:d}
  /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   0
+#define YYLAST {1:d}
  
  /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  3
+#define YYNTOKENS {2:d}
  /* YYNNTS -- Number of nonterminals.  */
-#define YYNNTS  2
+#define YYNNTS {3:d}
  /* YYNRULES -- Number of rules.  */
-#define YYNRULES  2
+#define YYNRULES {4:d}
  /* YYNSTATES -- Number of states.  */
-#define YYNSTATES  3
+#define YYNSTATES {5:d}
  
  /* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned
     by yylex, with out-of-bounds checking.  */
-#define YYUNDEFTOK  2
-#define YYMAXUTOK   257
+#define YYUNDEFTOK {6:d}
+#define YYMAXUTOK {7:d}
  
  #define YYTRANSLATE(YYX)                                                \
    ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
  
  /* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
     as returned by yylex, without out-of-bounds checking.  */
-static const yytype_uint8 yytranslate[] =
-{
-       0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,     2,     2,     2,     1,     2
-};
+static const yytype_uint16 yytranslate[] =
+{{{8:s}
+}};
  
  #if YYDEBUG
    /* YYRLINE[YYN] -- Source line where rule number YYN was defined.  */
-static const yytype_uint8 yyrline[] =
-{
-       0,     3,     3
-};
+static const yytype_uint16 yyrline[] =
+{{{9:s}
+}};
  #endif
  
  #if YYDEBUG || YYERROR_VERBOSE || 0
  /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
     First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
  static const char *const yytname[] =
-{
-  "$end", "error", "$undefined", "$accept", "start", YY_NULLPTR
-};
+{{{10:s}
+}};
  #endif
  
  # ifdef YYPRINT
  /* YYTOKNUM[NUM] -- (External) token number corresponding to the
     (internal) symbol number NUM (which must be that of a token).  */
  static const yytype_uint16 yytoknum[] =
-{
-       0,   256,   257
-};
+{{{11:s}
+}};
  # endif
  
-#define YYPACT_NINF -1
+#define YYPACT_NINF {12:d}
  
  #define yypact_value_is_default(Yystate) \
    (!!((Yystate) == (-1)))
@@ -284,143 +429,243 @@ static const yytype_uint16 yytoknum[] =
  
    /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
       STATE-NUM.  */
-static const yytype_int8 yypact[] =
-{
-      -1,     0,    -1
-};
+static const yytype_int16 yypact[] =
+{{{13:s}
+}};
  
    /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
       Performed when YYTABLE does not specify something else to do.  Zero
       means the default is an error.  */
-static const yytype_uint8 yydefact[] =
-{
-       2,     0,     1
-};
+static const yytype_uint16 yydefact[] =
+{{{14:s}
+}};
  
    /* YYPGOTO[NTERM-NUM].  */
-static const yytype_int8 yypgoto[] =
-{
-      -1,    -1
-};
+static const yytype_int16 yypgoto[] =
+{{{15:s}
+}};
  
    /* YYDEFGOTO[NTERM-NUM].  */
-static const yytype_int8 yydefgoto[] =
-{
-      -1,     1
-};
+static const yytype_int16 yydefgoto[] =
+{{{16:s}
+}};
  
    /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM.  If
       positive, shift that token.  If negative, reduce the rule whose
       number is the opposite.  If YYTABLE_NINF, syntax error.  */
-static const yytype_uint8 yytable[] =
-{
-       2
-};
+static const yytype_uint16 yytable[] =
+{{{17:s}
+}};
  
-static const yytype_uint8 yycheck[] =
-{
-       0
-};
+static const yytype_uint16 yycheck[] =
+{{{18:s}
+}};
  
    /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
       symbol of state STATE-NUM.  */
-static const yytype_uint8 yystos[] =
-{
-       0,     4,     0
-};
+static const yytype_uint16 yystos[] =
+{{{19:s}
+}};
  
    /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
-static const yytype_uint8 yyr1[] =
-{
-       0,     3,     4
-};
+static const yytype_uint16 yyr1[] =
+{{{20:s}
+}};
  
    /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.  */
-static const yytype_uint8 yyr2[] =
-{
-       0,     2,     0
-};
-
-
+static const yytype_uint16 yyr2[] =
+{{{21:s}
+}};
  /* GENERATE TABLES END */
  '''.format(
-              len(pyacc.actions),
+              # YYFINAL
+              bison_lr1dfa.action_pointer.shape[0],
+              # YYLAST
+              bison_lr1dfa.entry_data.shape[0] - 1,
+              # YYNTOKENS
+              bison_lr1dfa.n_terminals,
+              # YYNNTS
+              bison_lr1dfa.goto_pointer.shape[0],
+              # YYNRULES
+              bison_lr1dfa.rule_data.shape[0],
+              # YYNSTATES
+              bison_lr1dfa.action_pointer.shape[0],
+              # YYUNDEFTOK
+              bison_lr1dfa.undefined_terminal,
+              # YYMAXUTOK
+              bison_lr1dfa.translate.shape[0] - 1,
+              # yytranslate
+              ','.join(
+                [
+                  '\n  {0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.translate[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, bison_lr1dfa.translate.shape[0], 10)
+                ]
+              ),
+              # yyrline
+              ','.join(
+                [
+                  '\n{0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in yyrline[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, yyrline.shape[0], 10)
+                ]
+              ),
+              # yytname
+              ','.join(
+                ['\n  {0:s}'.format(', '.join(i)) for i in yytname_lines]
+              ),
+              # yytoknum
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for j in yy_acclist[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in yytoknum[i:i + 10]
                        ]
                      )
                    )
-                  for i in range(0, len(yy_acclist), 10)
+                  for i in range(0, yytoknum.shape[0], 10)
                  ]
                ),
+              # YYPACT_NINF
+              -bison_lr1dfa.entry_base,
+              # yypact
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for j in yy_accept[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.action_pointer[i:i + 10]
                        ]
                      )
                    )
-                  for i in range(0, len(yy_accept), 10)
+                  for i in range(0, bison_lr1dfa.action_pointer.shape[0], 10)
                  ]
                ),
+              # yydefact
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for _, j, _ in bison_lr1dfa.states[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.default_action[i:i + 10]
                        ]
                      )
                    )
-                  for i in range(0, len(bison_lr1dfa.states), 10)
+                  for i in range(0, bison_lr1dfa.default_action.shape[0], 10)
                  ]
                ),
+              # yypgoto
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for _, _, j in bison_lr1dfa.states[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.goto_pointer[i:i + 10]
                        ]
                      )
                    )
-                  for i in range(0, len(bison_lr1dfa.states), 10)
+                  for i in range(0, bison_lr1dfa.goto_pointer.shape[0], 10)
                  ]
                ),
+              # yydefgoto
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for j, _ in bison_lr1dfa.entries[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.default_goto[i:i + 10]
                        ]
                      )
                    )
-                  for i in range(0, len(bison_lr1dfa.entries), 10)
+                  for i in range(0, bison_lr1dfa.default_goto.shape[0], 10)
                  ]
                ),
+              # yytable
                ','.join(
                  [
-                  '\n\t{0:s}'.format(
-                    ', '.join(
+                  '\n  {0:s}'.format(
+                    ','.join(
                        [
-                        '{0:5d}'.format(j)
-                        for _, j in bison_lr1dfa.entries[i:i + 10]
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.entry_data[i:i + 10, 0]
                        ]
                      )
                    )
-                  for i in range(0, len(bison_lr1dfa.entries), 10)
+                  for i in range(0, bison_lr1dfa.entry_data.shape[0], 10)
+                ]
+              ),
+              # yycheck
+              ','.join(
+                [
+                  '\n  {0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.entry_data[i:i + 10, 1]
+                      ]
+                    )
+                  )
+                  for i in range(0, bison_lr1dfa.entry_data.shape[0], 10)
+                ]
+              ),
+              # yystos
+              ','.join(
+                [
+                  '\n  {0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.accessing_symbols[i:i + 10]
+                      ]
+                    )
+                  )
+                  for i in range(0, bison_lr1dfa.accessing_symbols.shape[0], 10)
+                ]
+              ),
+              # yyr1
+              ','.join(
+                [
+                  '\n  {0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.rule_data[i:i + 10, 0]
+                      ]
+                    )
+                  )
+                  for i in range(0, bison_lr1dfa.rule_data.shape[0], 10)
+                ]
+              ),
+              # yyr2
+              ','.join(
+                [
+                  '\n  {0:s}'.format(
+                    ','.join(
+                      [
+                        '{0:6d}'.format(j)
+                        for j in bison_lr1dfa.rule_data[i:i + 10, 1]
+                      ]
+                    )
+                  )
+                  for i in range(0, bison_lr1dfa.rule_data.shape[0], 10)
                  ]
                )
              )
@@ -430,22 +675,13 @@ static const yytype_uint8 yyr2[] =
              '''/* GENERATE SECTION2INITIAL BEGIN */
  {0:s}/* GENERATE SECTION2INITIAL END */
  '''.format(
-              ''.join([element.get_text(i, 0) for i in pyacc[1].code_blocks])
+              '' #.join([element.get_text(i, 0) for i in pyacc[1].code_blocks])
              )
            )
          elif line == '/* GENERATE SECTION2 */\n':
-          eof_action_to_start_conditions = [
-            [
-              j
-              for j in range(len(pyacc.start_conditions))
-              if pyacc.start_conditions[i].eof_action == j
-            ]
-            for i in range(len(pyacc.eof_actions))
-          ]
-          #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
            fout.write(
              '''/* GENERATE SECTION2 BEGIN */
-{0:s}{1:s}/* GENERATE SECTION2 END */
+{0:s}/* GENERATE SECTION2 END */
  '''.format(
                ''.join(
                  [
@@ -454,26 +690,9 @@ YY_RULE_SETUP
  {1:s}  YY_BREAK
  '''.format(
                      i,
-                    element.get_text(pyacc.actions[i], 0)
-                  )
-                  for i in range(len(pyacc.actions))
-                ]
-              ),
-              ''.join(
-                [
-                  '{0:s}{1:s}'.format(
-                    ''.join(
-                      [
-                        '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
-                          pyacc.start_conditions[j].name
-                        )
-                        for j in eof_action_to_start_conditions[i]
-                      ]
-                    ),
-                    element.get_text(pyacc.eof_actions[i], 0)
+                    element.get_text(pyacc.actions_text[i], 0)
                    )
-                  for i in range(len(pyacc.eof_actions))
-                  if len(eof_action_to_start_conditions[i]) > 0
+                  for i in range(1, len(pyacc.actions_text))
                  ]
                )
              )
@@ -483,7 +702,7 @@ YY_RULE_SETUP
              '''/* GENERATE SECTION3 BEGIN */
  {0:s}/*GENERATE SECTION3 END */
  '''.format(
-              '' if len(pyacc) < 3 else element.get_text(pyacc[2], 0)
+              '' if len(pyacc) < 3 else element.get_text(pyacc[2][0], 0)
              )
            )
          else:
author	Nick Downing <downing.nick@gmail.com>
	Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Thu, 5 Jul 2018 12:10:27 +0000 (22:10 +1000)
ast.py		patch \| blob \| history
bison_lr1dfa.py		patch \| blob \| history