Change entries from a list of nxt, chk pairs to a numpy array
authorNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 03:49:41 +0000 (13:49 +1000)
committerNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 03:51:19 +0000 (13:51 +1000)
flex_dfa.py

index e4b808a..ea85fa1 100644 (file)
@@ -163,9 +163,12 @@ class FlexDFA:
     # state 0 is the jam state, the EOB state will be added later on
     self.states = numpy.zeros((0x100, 2), numpy.int16) # base, def
     n_states = 1
-    self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
-    used = numpy.zeros(0x200, numpy.bool)
-    used[:0x101] = True # account for the jam (don't care) state
+    self.entries = numpy.full((0x200, 2), -1, numpy.int16) # nxt, chk
+    self.entries[:0x101, :] = 0 # jam state just returns to jam state
+    self.entries[0, 0] = eob_state # except for the EOB transition
+    entries_used = numpy.zeros(0x200, numpy.bool)
+    entries_used[:0x101] = True # account for the jam (don't care) state
+    n_entries = 0x101
 
     # compress the transition table
     while n_states < n_accept:
@@ -184,29 +187,33 @@ class FlexDFA:
       else:
         mask = mask[flex_def, :]
 
-        # make sure used array is at least large enough to find a spot
-        while used.shape[0] < len(self.entries) + 0x101:
-          new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
-          new_used[:used.shape[0]] = used
-          used = new_used
+        # make sure entries_used array is at least large enough to find a spot
+        while self.entries.shape[0] < n_entries + 0x101:
+          new_entries = numpy.full(
+            (self.entries.shape[0] * 2, 2),
+            -1,
+            numpy.int16
+          )
+          new_entries[:self.entries.shape[0], :] = self.entries
+          self.entries = new_entries
+          new_entries_used = numpy.zeros(
+            (entries_used.shape[0] * 2,),
+            numpy.bool
+          )
+          new_entries_used[:entries_used.shape[0]] = entries_used
+          entries_used = new_entries_used
 
         # find a suitable spot and store differences from default state
         flex_base = 0
-        while flex_base < len(self.entries):
-          if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
+        while flex_base < n_entries:
+          if not numpy.any(entries_used[flex_base:flex_base + 0x101] & mask):
             break
           flex_base += 1
-        used[flex_base:flex_base + 0x101] |= mask
-        if len(self.entries) < flex_base + 0x101:
-          self.entries.extend(
-            [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
-          )
+        entries_used[flex_base:flex_base + 0x101] |= mask
         for i in numpy.nonzero(mask)[0]:
-          assert self.entries[flex_base + i] == (0xffff, 0xffff)
-          self.entries[flex_base + i] = (
-            transition_table[n_states, i],
-            n_states
-          )
+          self.entries[flex_base + i, 0] = transition_table[n_states, i]
+          self.entries[flex_base + i, 1] = n_states
+        n_entries = max(n_entries, flex_base + 0x101)
 
       if n_states >= self.states.shape[0]:
         new_states = numpy.zeros(
@@ -219,12 +226,10 @@ class FlexDFA:
       self.states[n_states, 1] = flex_def
       n_states += 1
 
-    # finalize states table
+    # finalize states and entries tables
+    self.entries = self.entries[:n_entries, :]
     self.states = self.states[:n_states, :]
 
-    #print(transition_table[:n_states, :])
-    #print(flex_state_to_action)
-
 def generate(plex, skel_file, out_file):
   nfa = plex.to_nfa()
   eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
@@ -325,11 +330,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for j, _ in flex_dfa.entries[i:i + 10]
+                        for j in flex_dfa.entries[i:i + 10, 0]
                       ]
                     )
                   )
-                  for i in range(0, len(flex_dfa.entries), 10)
+                  for i in range(0, flex_dfa.entries.shape[0], 10)
                 ]
               ),
               ','.join(
@@ -338,11 +343,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for _, j in flex_dfa.entries[i:i + 10]
+                        for j in flex_dfa.entries[i:i + 10, 1]
                       ]
                     )
                   )
-                  for i in range(0, len(flex_dfa.entries), 10)
+                  for i in range(0, flex_dfa.entries.shape[0], 10)
                 ]
               )
             )