Add Prim's minimal spanning tree algorithm to encode states in an optimal order

author Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)

committer Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)
author Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)
committer Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)
diff --git a/flex_dfa.py b/flex_dfa.py

index ff85938..1f01604 100644 (file)
--- a/flex_dfa.py
+++ b/flex_dfa.py
@@ -177,11 +177,22 @@ class FlexDFA:
        transitions[:, numpy.newaxis, :] != transitions[numpy.newaxis, :, :],
        2
      )
+    dist_key = numpy.stack(
+      [
+        numpy.zeros((n_states,), dtype = numpy.int16), # hop count
+        numpy.arange(0, n_states, dtype = numpy.int16) # permutation
+      ],
+      1
+    )
      for i in range(1, n_states):
-      # find most similar/earliest existing state to use as default
-      state_done = numpy.argmin(dist[:i, i], 0)
+      # Prim's algorithm: find most similar (done, todo) state pair
+      temp = dist[:i, i:]
+      done, todo = numpy.unravel_index(numpy.argmin(temp), temp.shape)
+      todo += i
+      state_done = dist_key[done, 1]
+      state_todo = dist_key[todo, 1]
        indices = numpy.nonzero(
-        transitions[i, :] != transitions[state_done, :]
+        transitions[state_todo, :] != transitions[state_done, :]
        )[0]
        if indices.shape[0] == 0:
          # when copying another state, need to have the same base, though
@@ -214,17 +225,37 @@ class FlexDFA:
            if not numpy.any(entries_used[indices + start_index]):
              break
            start_index += 1
-        self.entries[indices + start_index, 0] = transitions[i, indices]
-        self.entries[indices + start_index, 1] = i
+        self.entries[indices + start_index, 0] = (
+          transitions[state_todo, indices]
+        )
+        self.entries[indices + start_index, 1] = state_todo
          entries_used[indices + start_index] = True
          if n_entries < start_index + 0x101:
            n_entries = start_index + 0x101
  
-      self.states[i, 0] = start_index
-      self.states[i, 1] = state_done
+      self.states[state_todo, 0] = start_index
+      self.states[state_todo, 1] = state_done
+
+      # permute to make done states consecutive, in order of hop count
+      dist_key[todo, 0] = dist_key[done, 0] + 1
+      j = done + 1
+      while j < i:
+        if tuple(dist_key[todo, :]) < tuple(dist_key[j, :]):
+          break
+        j += 1
+      temp = numpy.copy(dist[todo, :])
+      dist[j + 1:todo + 1, :] = dist[j:todo, :]
+      dist[j, :] = temp
+      temp = numpy.copy(dist[:, todo])
+      dist[:, j + 1:todo + 1] = dist[:, j:todo]
+      dist[:, j] = temp
+      temp = numpy.copy(dist_key[todo, :])
+      dist_key[j + 1:todo + 1, :] = dist_key[j:todo, :]
+      dist_key[j, :] = temp
  
      # finalize entries table
      self.entries = self.entries[:n_entries, :]
+    #print('n_entries', n_entries)
  
  def generate(plex, skel_file, out_file):
    nfa = plex.to_nfa()
author	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 08:27:04 +0000 (18:27 +1000)