Rename full_entries to transition_table, do compression step after construction
authorNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 01:14:47 +0000 (11:14 +1000)
committerNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:26:30 +0000 (12:26 +1000)
flex_dfa.py

index 61c0360..ba5d9d2 100644 (file)
@@ -66,23 +66,25 @@ class FlexDFA:
     eob_state = len(_dfa.start_action)
 
     # state 0 is the jam state, the EOB state will be added later on
-    self.states = [([], 0, 0)] # accept, base, def
+    self.states = [(0, 0)] # base, def
+    self.states_accept = [[]] # accept
     self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
 
-    # full_entries[i, j] is transition on character j in state i
+    # transition_table[i, j] is transition on character j in state i
     # in our way of thinking, 0 is don't care and -1 is failure
     # however, in the flex way these are both 0 (don't care),
     # the distinction being that failure has no associated action
     # thus all entries of states are filled, with 0 as a catch-all
-    full_entries = numpy.zeros((0x100, 0x101), numpy.int16)
-    full_entries[0, 0] = eob_state # all states go to EOB on NUL
+    transition_table = numpy.zeros((0x100, 0x101), numpy.int16)
+    transition_table[0, 0] = eob_state # all states go to EOB on NUL
     used = numpy.zeros(0x200, numpy.bool)
     used[:0x101] = True # account for the jam (don't care) state
 
-    while len(self.states) < len(flex_state_to_action):
-      action = flex_state_to_action[len(self.states)]
+    n_states = 1
+    while n_states < len(flex_state_to_action):
+      action = flex_state_to_action[n_states]
       state, transition = _dfa.actions[action]
-      #print('state', len(self.states), 'transition', transition)
+      #print('state', n_states, 'transition', transition)
 
       del threads0[prefix_slop:]
       transit(transition)
@@ -103,15 +105,16 @@ class FlexDFA:
           # mark start of (hopefully safe) trailing context
           flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
       #print(flex_accept)
+      self.states_accept.append(flex_accept)
 
-      # extend full_entries array if required
-      if len(self.states) >= full_entries.shape[0]:
-        new_full_entries = numpy.zeros(
-          (full_entries.shape[0] * 2, 0x101),
+      # extend transition_table array if required
+      if n_states >= transition_table.shape[0]:
+        new_transition_table = numpy.zeros(
+          (transition_table.shape[0] * 2, 0x101),
           numpy.int16
         )
-        new_full_entries[:full_entries.shape[0], :] = full_entries
-        full_entries = new_full_entries
+        new_transition_table[:transition_table.shape[0], :] = transition_table
+        transition_table = new_transition_table
 
       # calculate full entry from _dfa.state character-to-action table
       breaks, actions, _ = _dfa.states[state]
@@ -125,20 +128,23 @@ class FlexDFA:
           next_flex_state = len(flex_state_to_action)
           action_to_flex_state[next_action] = next_flex_state
           flex_state_to_action.append(next_action)
-        full_entries[len(self.states), character0:character1] = next_flex_state
+        transition_table[n_states, character0:character1] = next_flex_state
         character0 = character1
       assert character0 == 0x100
 
       # remap NUL transition to 0x100 and replace with EOB transition
-      full_entries[len(self.states), 0x100] = \
-        full_entries[len(self.states), 0]
-      full_entries[len(self.states), 0] = eob_state
-      #print(len(self.states), full_entries[len(self.states), :])
+      transition_table[n_states, 0x100] = \
+        transition_table[n_states, 0]
+      transition_table[n_states, 0] = eob_state
+      #print(n_states, transition_table[len(self.states), :])
 
+      n_states += 1
+
+    while len(self.states) < n_states:
       # find most similar/earliest existing state to use as default
       mask = (
-        full_entries[len(self.states):len(self.states) + 1, :] !=
-        full_entries[:len(self.states), :]
+        transition_table[len(self.states):len(self.states) + 1, :] !=
+        transition_table[:len(self.states), :]
       )
       diff = numpy.sum(mask, 1)
       flex_def = numpy.argmin(diff, 0)
@@ -154,7 +160,7 @@ class FlexDFA:
         # as another state by COINCIDENCE due to the yy_chk[] issue!
         # ... fix this by merging indistinguishable states (except for
         # duplicate start conditions, which may have to use this hack)
-        flex_base = self.states[flex_def][1]
+        flex_base = self.states[flex_def][0]
       else:
         mask = mask[flex_def, :]
 
@@ -178,12 +184,12 @@ class FlexDFA:
         for i in numpy.nonzero(mask)[0]:
           assert self.entries[flex_base + i] == (0xffff, 0xffff)
           self.entries[flex_base + i] = (
-            full_entries[len(self.states), i],
+            transition_table[len(self.states), i],
             len(self.states)
           )
 
-      self.states.append((flex_accept, flex_base, flex_def))
-    #print(full_entries[:len(self.states), :])
+      self.states.append((flex_base, flex_def))
+    #print(transition_table[:len(self.states), :])
     #print(flex_state_to_action)
 
 def generate(plex, skel_file, out_file):
@@ -212,7 +218,7 @@ def generate(plex, skel_file, out_file):
         elif line == '/* GENERATE TABLES */\n':
           yy_acclist = []
           yy_accept = [0]
-          for flex_accept, _, _ in flex_dfa.states:
+          for flex_accept in flex_dfa.states_accept:
             yy_acclist.extend(flex_accept)
             yy_accept.append(len(yy_acclist))
           fout.write(
@@ -265,7 +271,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for _, j, _ in flex_dfa.states[i:i + 10]
+                        for j, _ in flex_dfa.states[i:i + 10]
                       ]
                     )
                   )
@@ -278,7 +284,7 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for _, _, j in flex_dfa.states[i:i + 10]
+                        for _, j in flex_dfa.states[i:i + 10]
                       ]
                     )
                   )