Change states from a list of base, def pairs to a numpy array

author Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 02:26:00 +0000 (12:26 +1000)

committer Nick Downing <downing.nick@gmail.com>

Sun, 22 Jul 2018 02:45:22 +0000 (12:45 +1000)
author Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:26:00 +0000 (12:26 +1000)
committer Nick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:45:22 +0000 (12:45 +1000)
diff --git a/flex_dfa.py b/flex_dfa.py

index 798c844..ecb0b00 100644 (file)
--- a/flex_dfa.py
+++ b/flex_dfa.py
@@ -66,7 +66,8 @@ class FlexDFA:
      eob_state = len(_dfa.start_action)
  
      # state 0 is the jam state, the EOB state will be added later on
-    self.states = [(0, 0)] # base, def
+    self.states = numpy.zeros((0x100, 2), numpy.int16) # base, def
+    n_states = 1
      self.acclist = numpy.zeros((0x100,), numpy.int16)
      n_acclist = 0
      self.accept = numpy.zeros((0x100,), numpy.int16)
@@ -168,27 +169,20 @@ class FlexDFA:
      self.accept[n_accept] = n_acclist
      self.accept = self.accept[:n_accept + 1]
  
-    while len(self.states) < n_accept:
+    # compress the transition table
+    while n_states < n_accept:
        # find most similar/earliest existing state to use as default
        mask = (
-        transition_table[len(self.states):len(self.states) + 1, :] !=
-        transition_table[:len(self.states), :]
+        transition_table[n_states:n_states + 1, :] !=
+        transition_table[:n_states, :]
        )
        diff = numpy.sum(mask, 1)
        flex_def = numpy.argmin(diff, 0)
-      if diff[flex_def] == 0: # exactly matching state
-        # can't use the normal similarity mechanism here, because it
-        # will choose a base of 0 (happens to correspond to the jam
-        # state) and this will make flex's inner loop abort early
-        # ... highlights various issues with flex's tables format,
-        # for instance that duplicate states may be unavoidable when
-        # start conditions are used, and that checking the base is
-        # not an ideal way to check if a state has the same transitions
-        # as the jam state, in fact a state can only share the same base
-        # as another state by COINCIDENCE due to the yy_chk[] issue!
-        # ... fix this by merging indistinguishable states (except for
-        # duplicate start conditions, which may have to use this hack)
-        flex_base = self.states[flex_def][0]
+      if diff[flex_def] == 0:
+        # when copying another state, need to have the same base, though
+        # the base will not matter since there will be no entries, it is
+        # is because of the awkward way the compressed lookup is written
+        flex_base = self.states[flex_def, 0]
        else:
          mask = mask[flex_def, :]
  
@@ -212,12 +206,25 @@ class FlexDFA:
          for i in numpy.nonzero(mask)[0]:
            assert self.entries[flex_base + i] == (0xffff, 0xffff)
            self.entries[flex_base + i] = (
-            transition_table[len(self.states), i],
-            len(self.states)
+            transition_table[n_states, i],
+            n_states
            )
  
-      self.states.append((flex_base, flex_def))
-    #print(transition_table[:len(self.states), :])
+      if n_states >= self.states.shape[0]:
+        new_states = numpy.zeros(
+          (self.states.shape[0] * 2, 2),
+          numpy.int16
+        )
+        new_states[:self.states.shape[0], :] = self.states
+        self.states = new_states
+      self.states[n_states, 0] = flex_base
+      self.states[n_states, 1] = flex_def
+      n_states += 1
+
+    # finalize states table
+    self.states = self.states[:n_states, :]
+
+    #print(transition_table[:n_states, :])
      #print(flex_state_to_action)
  
  def generate(plex, skel_file, out_file):
@@ -294,11 +301,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                      ', '.join(
                        [
                          '{0:5d}'.format(j)
-                        for j, _ in flex_dfa.states[i:i + 10]
+                        for j in flex_dfa.states[i:i + 10, 0]
                        ]
                      )
                    )
-                  for i in range(0, len(flex_dfa.states), 10)
+                  for i in range(0, flex_dfa.states.shape[0], 10)
                  ]
                ),
                ','.join(
@@ -307,11 +314,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                      ', '.join(
                        [
                          '{0:5d}'.format(j)
-                        for _, j in flex_dfa.states[i:i + 10]
+                        for j in flex_dfa.states[i:i + 10, 1]
                        ]
                      )
                    )
-                  for i in range(0, len(flex_dfa.states), 10)
+                  for i in range(0, flex_dfa.states.shape[0], 10)
                  ]
                ),
                ','.join(
author	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 02:26:00 +0000 (12:26 +1000)
committer	Nick Downing <downing.nick@gmail.com>
	Sun, 22 Jul 2018 02:45:22 +0000 (12:45 +1000)