Change states from a list of base, def pairs to a numpy array
authorNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:26:00 +0000 (12:26 +1000)
committerNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:45:22 +0000 (12:45 +1000)
flex_dfa.py

index 798c844..ecb0b00 100644 (file)
@@ -66,7 +66,8 @@ class FlexDFA:
     eob_state = len(_dfa.start_action)
 
     # state 0 is the jam state, the EOB state will be added later on
-    self.states = [(0, 0)] # base, def
+    self.states = numpy.zeros((0x100, 2), numpy.int16) # base, def
+    n_states = 1
     self.acclist = numpy.zeros((0x100,), numpy.int16)
     n_acclist = 0
     self.accept = numpy.zeros((0x100,), numpy.int16)
@@ -168,27 +169,20 @@ class FlexDFA:
     self.accept[n_accept] = n_acclist
     self.accept = self.accept[:n_accept + 1]
 
-    while len(self.states) < n_accept:
+    # compress the transition table
+    while n_states < n_accept:
       # find most similar/earliest existing state to use as default
       mask = (
-        transition_table[len(self.states):len(self.states) + 1, :] !=
-        transition_table[:len(self.states), :]
+        transition_table[n_states:n_states + 1, :] !=
+        transition_table[:n_states, :]
       )
       diff = numpy.sum(mask, 1)
       flex_def = numpy.argmin(diff, 0)
-      if diff[flex_def] == 0: # exactly matching state
-        # can't use the normal similarity mechanism here, because it
-        # will choose a base of 0 (happens to correspond to the jam
-        # state) and this will make flex's inner loop abort early
-        # ... highlights various issues with flex's tables format,
-        # for instance that duplicate states may be unavoidable when
-        # start conditions are used, and that checking the base is
-        # not an ideal way to check if a state has the same transitions
-        # as the jam state, in fact a state can only share the same base
-        # as another state by COINCIDENCE due to the yy_chk[] issue!
-        # ... fix this by merging indistinguishable states (except for
-        # duplicate start conditions, which may have to use this hack)
-        flex_base = self.states[flex_def][0]
+      if diff[flex_def] == 0:
+        # when copying another state, need to have the same base, though
+        # the base will not matter since there will be no entries, it is
+        # is because of the awkward way the compressed lookup is written
+        flex_base = self.states[flex_def, 0]
       else:
         mask = mask[flex_def, :]
 
@@ -212,12 +206,25 @@ class FlexDFA:
         for i in numpy.nonzero(mask)[0]:
           assert self.entries[flex_base + i] == (0xffff, 0xffff)
           self.entries[flex_base + i] = (
-            transition_table[len(self.states), i],
-            len(self.states)
+            transition_table[n_states, i],
+            n_states
           )
 
-      self.states.append((flex_base, flex_def))
-    #print(transition_table[:len(self.states), :])
+      if n_states >= self.states.shape[0]:
+        new_states = numpy.zeros(
+          (self.states.shape[0] * 2, 2),
+          numpy.int16
+        )
+        new_states[:self.states.shape[0], :] = self.states
+        self.states = new_states
+      self.states[n_states, 0] = flex_base
+      self.states[n_states, 1] = flex_def
+      n_states += 1
+
+    # finalize states table
+    self.states = self.states[:n_states, :]
+
+    #print(transition_table[:n_states, :])
     #print(flex_state_to_action)
 
 def generate(plex, skel_file, out_file):
@@ -294,11 +301,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for j, _ in flex_dfa.states[i:i + 10]
+                        for j in flex_dfa.states[i:i + 10, 0]
                       ]
                     )
                   )
-                  for i in range(0, len(flex_dfa.states), 10)
+                  for i in range(0, flex_dfa.states.shape[0], 10)
                 ]
               ),
               ','.join(
@@ -307,11 +314,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for _, j in flex_dfa.states[i:i + 10]
+                        for j in flex_dfa.states[i:i + 10, 1]
                       ]
                     )
                   )
-                  for i in range(0, len(flex_dfa.states), 10)
+                  for i in range(0, flex_dfa.states.shape[0], 10)
                 ]
               ),
               ','.join(