Encode states in reverse order (larger distances first)

author Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)

committer Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)
author Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)
committer Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)
diff --git a/flex_dfa.py b/flex_dfa.py

index 1f01604..5052d69 100644 (file)
--- a/flex_dfa.py
+++ b/flex_dfa.py
@@ -165,14 +165,8 @@ class FlexDFA:
  
      # state 0 is the jam state, the EOB state will be added later on
      self.states = numpy.zeros((n_states, 2), numpy.int16) # base, def
-    self.entries = numpy.full((0x200, 2), -1, numpy.int16) # nxt, chk
-    self.entries[:0x101, :] = 0 # jam state just returns to jam state
-    self.entries[0, 0] = eob_state # except for the EOB transition
-    entries_used = numpy.zeros(0x200, numpy.bool)
-    entries_used[:0x101] = True # account for the jam (don't care) state
-    n_entries = 0x101
  
-    # compress the transition table
+    # calculate default states by constructing minimum spanning tree
      dist = numpy.sum(
        transitions[:, numpy.newaxis, :] != transitions[numpy.newaxis, :, :],
        2
@@ -184,13 +178,41 @@ class FlexDFA:
        ],
        1
      )
+    order = []
      for i in range(1, n_states):
        # Prim's algorithm: find most similar (done, todo) state pair
        temp = dist[:i, i:]
        done, todo = numpy.unravel_index(numpy.argmin(temp), temp.shape)
        todo += i
-      state_done = dist_key[done, 1]
-      state_todo = dist_key[todo, 1]
+      order.append((dist_key[done, 1], dist_key[todo, 1])) # state done, todo
+
+      # permute to make done states consecutive, in order of hop count
+      dist_key[todo, 0] = dist_key[done, 0] + 1
+      j = done + 1
+      while j < i:
+        if tuple(dist_key[todo, :]) < tuple(dist_key[j, :]):
+          break
+        j += 1
+      temp = numpy.copy(dist[todo, :])
+      dist[j + 1:todo + 1, :] = dist[j:todo, :]
+      dist[j, :] = temp
+      temp = numpy.copy(dist[:, todo])
+      dist[:, j + 1:todo + 1] = dist[:, j:todo]
+      dist[:, j] = temp
+      temp = numpy.copy(dist_key[todo, :])
+      dist_key[j + 1:todo + 1, :] = dist_key[j:todo, :]
+      dist_key[j, :] = temp
+
+    # encode states in reverse order (larger distances first)
+    self.entries = numpy.full((0x200, 2), -1, numpy.int16) # nxt, chk
+    self.entries[:0x101, :] = 0 # jam state just returns to jam state
+    self.entries[0, 0] = eob_state # except for the EOB transition
+    entries_used = numpy.zeros(0x200, numpy.bool)
+    entries_used[:0x101] = True # account for the jam (don't care) state
+    n_entries = 0x101
+    dupes = []
+    while len(order):
+      state_done, state_todo = order.pop()
        indices = numpy.nonzero(
          transitions[state_todo, :] != transitions[state_done, :]
        )[0]
@@ -198,7 +220,7 @@ class FlexDFA:
          # when copying another state, need to have the same base, though
          # the base will not matter since there will be no entries, it is
          # is because of the awkward way the compressed lookup is written
-        start_index = self.states[state_done, 0]
+        dupes.append((state_done, state_todo))
        else:
          # make sure entries array is at least large enough to find a spot
          while self.entries.shape[0] < n_entries + 0x101:
@@ -233,29 +255,16 @@ class FlexDFA:
          if n_entries < start_index + 0x101:
            n_entries = start_index + 0x101
  
-      self.states[state_todo, 0] = start_index
+        self.states[state_todo, 0] = start_index
+        self.states[state_todo, 1] = state_done
+    while len(dupes):
+      state_done, state_todo = dupes.pop()
+      self.states[state_todo, 0] = self.states[state_done, 0]
        self.states[state_todo, 1] = state_done
  
-      # permute to make done states consecutive, in order of hop count
-      dist_key[todo, 0] = dist_key[done, 0] + 1
-      j = done + 1
-      while j < i:
-        if tuple(dist_key[todo, :]) < tuple(dist_key[j, :]):
-          break
-        j += 1
-      temp = numpy.copy(dist[todo, :])
-      dist[j + 1:todo + 1, :] = dist[j:todo, :]
-      dist[j, :] = temp
-      temp = numpy.copy(dist[:, todo])
-      dist[:, j + 1:todo + 1] = dist[:, j:todo]
-      dist[:, j] = temp
-      temp = numpy.copy(dist_key[todo, :])
-      dist_key[j + 1:todo + 1, :] = dist_key[j:todo, :]
-      dist_key[j, :] = temp
-
      # finalize entries table
      self.entries = self.entries[:n_entries, :]
-    #print('n_entries', n_entries)
+    print('n_entries', n_entries)
  
  def generate(plex, skel_file, out_file):
    nfa = plex.to_nfa()
author	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 09:22:35 +0000 (19:22 +1000)