Rename full_entries to transition_table, do compression step after construction

author Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 01:14:47 +0000 (11:14 +1000)

committer Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 02:26:30 +0000 (12:26 +1000)
author Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 01:14:47 +0000 (11:14 +1000)
committer Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:26:30 +0000 (12:26 +1000)
diff --git a/flex_dfa.py b/flex_dfa.py

index 61c0360..ba5d9d2 100644 (file)
--- a/flex_dfa.py
+++ b/flex_dfa.py
@@ -66,23 +66,25 @@ class FlexDFA:
      eob_state = len(_dfa.start_action)
  
      # state 0 is the jam state, the EOB state will be added later on
-    self.states = [([], 0, 0)] # accept, base, def
+    self.states = [(0, 0)] # base, def
+    self.states_accept = [[]] # accept
      self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
  
-    # full_entries[i, j] is transition on character j in state i
+    # transition_table[i, j] is transition on character j in state i
      # in our way of thinking, 0 is don't care and -1 is failure
      # however, in the flex way these are both 0 (don't care),
      # the distinction being that failure has no associated action
      # thus all entries of states are filled, with 0 as a catch-all
-    full_entries = numpy.zeros((0x100, 0x101), numpy.int16)
-    full_entries[0, 0] = eob_state # all states go to EOB on NUL
+    transition_table = numpy.zeros((0x100, 0x101), numpy.int16)
+    transition_table[0, 0] = eob_state # all states go to EOB on NUL
      used = numpy.zeros(0x200, numpy.bool)
      used[:0x101] = True # account for the jam (don't care) state
  
-    while len(self.states) < len(flex_state_to_action):
-      action = flex_state_to_action[len(self.states)]
+    n_states = 1
+    while n_states < len(flex_state_to_action):
+      action = flex_state_to_action[n_states]
        state, transition = _dfa.actions[action]
-      #print('state', len(self.states), 'transition', transition)
+      #print('state', n_states, 'transition', transition)
  
        del threads0[prefix_slop:]
        transit(transition)
@@ -103,15 +105,16 @@ class FlexDFA:
            # mark start of (hopefully safe) trailing context
            flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
        #print(flex_accept)
+      self.states_accept.append(flex_accept)
  
-      # extend full_entries array if required
-      if len(self.states) >= full_entries.shape[0]:
-        new_full_entries = numpy.zeros(
-          (full_entries.shape[0] * 2, 0x101),
+      # extend transition_table array if required
+      if n_states >= transition_table.shape[0]:
+        new_transition_table = numpy.zeros(
+          (transition_table.shape[0] * 2, 0x101),
            numpy.int16
          )
-        new_full_entries[:full_entries.shape[0], :] = full_entries
-        full_entries = new_full_entries
+        new_transition_table[:transition_table.shape[0], :] = transition_table
+        transition_table = new_transition_table
  
        # calculate full entry from _dfa.state character-to-action table
        breaks, actions, _ = _dfa.states[state]
@@ -125,20 +128,23 @@ class FlexDFA:
            next_flex_state = len(flex_state_to_action)
            action_to_flex_state[next_action] = next_flex_state
            flex_state_to_action.append(next_action)
-        full_entries[len(self.states), character0:character1] = next_flex_state
+        transition_table[n_states, character0:character1] = next_flex_state
          character0 = character1
        assert character0 == 0x100
  
        # remap NUL transition to 0x100 and replace with EOB transition
-      full_entries[len(self.states), 0x100] = \
-        full_entries[len(self.states), 0]
-      full_entries[len(self.states), 0] = eob_state
-      #print(len(self.states), full_entries[len(self.states), :])
+      transition_table[n_states, 0x100] = \
+        transition_table[n_states, 0]
+      transition_table[n_states, 0] = eob_state
+      #print(n_states, transition_table[len(self.states), :])
  
+      n_states += 1
+
+    while len(self.states) < n_states:
        # find most similar/earliest existing state to use as default
        mask = (
-        full_entries[len(self.states):len(self.states) + 1, :] !=
-        full_entries[:len(self.states), :]
+        transition_table[len(self.states):len(self.states) + 1, :] !=
+        transition_table[:len(self.states), :]
        )
        diff = numpy.sum(mask, 1)
        flex_def = numpy.argmin(diff, 0)
@@ -154,7 +160,7 @@ class FlexDFA:
          # as another state by COINCIDENCE due to the yy_chk[] issue!
          # ... fix this by merging indistinguishable states (except for
          # duplicate start conditions, which may have to use this hack)
-        flex_base = self.states[flex_def][1]
+        flex_base = self.states[flex_def][0]
        else:
          mask = mask[flex_def, :]
  
@@ -178,12 +184,12 @@ class FlexDFA:
          for i in numpy.nonzero(mask)[0]:
            assert self.entries[flex_base + i] == (0xffff, 0xffff)
            self.entries[flex_base + i] = (
-            full_entries[len(self.states), i],
+            transition_table[len(self.states), i],
              len(self.states)
            )
  
-      self.states.append((flex_accept, flex_base, flex_def))
-    #print(full_entries[:len(self.states), :])
+      self.states.append((flex_base, flex_def))
+    #print(transition_table[:len(self.states), :])
      #print(flex_state_to_action)
  
  def generate(plex, skel_file, out_file):
@@ -212,7 +218,7 @@ def generate(plex, skel_file, out_file):
          elif line == '/* GENERATE TABLES */\n':
            yy_acclist = []
            yy_accept = [0]
-          for flex_accept, _, _ in flex_dfa.states:
+          for flex_accept in flex_dfa.states_accept:
              yy_acclist.extend(flex_accept)
              yy_accept.append(len(yy_acclist))
            fout.write(
@@ -265,7 +271,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                      ', '.join(
                        [
                          '{0:5d}'.format(j)
-                        for _, j, _ in flex_dfa.states[i:i + 10]
+                        for j, _ in flex_dfa.states[i:i + 10]
                        ]
                      )
                    )
@@ -278,7 +284,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                      ', '.join(
                        [
                          '{0:5d}'.format(j)
-                        for _, _, j in flex_dfa.states[i:i + 10]
+                        for _, j in flex_dfa.states[i:i + 10]
                        ]
                      )
                    )
author	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 01:14:47 +0000 (11:14 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 02:26:30 +0000 (12:26 +1000)