Change states_accept (as list of lists) to acclist and accept (as numpy arrays)
authorNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 01:57:33 +0000 (11:57 +1000)
committerNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 02:26:34 +0000 (12:26 +1000)
flex_dfa.py

index ba5d9d2..798c844 100644 (file)
@@ -67,7 +67,10 @@ class FlexDFA:
 
     # state 0 is the jam state, the EOB state will be added later on
     self.states = [(0, 0)] # base, def
-    self.states_accept = [[]] # accept
+    self.acclist = numpy.zeros((0x100,), numpy.int16)
+    n_acclist = 0
+    self.accept = numpy.zeros((0x100,), numpy.int16)
+    n_accept = 1
     self.entries = [(eob_state, 0)] + [(0, 0)] * 0x100 # nxt, chk
 
     # transition_table[i, j] is transition on character j in state i
@@ -80,35 +83,48 @@ class FlexDFA:
     used = numpy.zeros(0x200, numpy.bool)
     used[:0x101] = True # account for the jam (don't care) state
 
-    n_states = 1
-    while n_states < len(flex_state_to_action):
-      action = flex_state_to_action[n_states]
+    while n_accept < len(flex_state_to_action):
+      action = flex_state_to_action[n_accept]
       state, transition = _dfa.actions[action]
-      #print('state', n_states, 'transition', transition)
+      #print('state', n_accept, 'transition', transition)
 
       del threads0[prefix_slop:]
       transit(transition)
       #print(threads0[prefix_slop:])
+      if n_accept >= self.accept.shape[0]:
+        new_accept = numpy.zeros(
+          (self.accept.shape[0] * 2,),
+          numpy.int16
+        )
+        new_accept[:self.accept.shape[0]] = self.accept
+        self.accept = new_accept
+      self.accept[n_accept] = n_acclist
       flex_accept = []
       for k in [j for i in threads0[prefix_slop:] for j in i]:
+        acc = k >> 1
         if k & 1:
           if (
-            len(flex_accept) > 0 and
-            flex_accept[-1] == (k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK
+            n_acclist == self.accept[n_accept] or
+            self.acclist[n_acclist - 1] != acc | FlexDFA.YY_TRAILING_HEAD_MASK
           ):
-            # zero length trailing context, accept immediately
-            flex_accept.append(k >> 1)
-          else:
             # look back to start of trailing context, then accept
-            flex_accept.append((k >> 1) | FlexDFA.YY_HEAD_MASK)
+            acc |= FlexDFA.YY_HEAD_MASK
+          # otherwise zero length trailing context, accept immediately
         else:
           # mark start of (hopefully safe) trailing context
-          flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
-      #print(flex_accept)
-      self.states_accept.append(flex_accept)
+          acc |= FlexDFA.YY_TRAILING_HEAD_MASK
+        if n_acclist >= self.acclist.shape[0]:
+          new_acclist = numpy.zeros(
+            (self.acclist.shape[0] * 2,),
+            numpy.int16
+          )
+          new_acclist[:self.acclist.shape[0]] = self.acclist
+          self.acclist = new_acclist
+        self.acclist[n_acclist] = acc
+        n_acclist += 1
 
       # extend transition_table array if required
-      if n_states >= transition_table.shape[0]:
+      if n_accept >= transition_table.shape[0]:
         new_transition_table = numpy.zeros(
           (transition_table.shape[0] * 2, 0x101),
           numpy.int16
@@ -128,19 +144,31 @@ class FlexDFA:
           next_flex_state = len(flex_state_to_action)
           action_to_flex_state[next_action] = next_flex_state
           flex_state_to_action.append(next_action)
-        transition_table[n_states, character0:character1] = next_flex_state
+        transition_table[n_accept, character0:character1] = next_flex_state
         character0 = character1
       assert character0 == 0x100
 
       # remap NUL transition to 0x100 and replace with EOB transition
-      transition_table[n_states, 0x100] = \
-        transition_table[n_states, 0]
-      transition_table[n_states, 0] = eob_state
-      #print(n_states, transition_table[len(self.states), :])
+      transition_table[n_accept, 0x100] = \
+        transition_table[n_accept, 0]
+      transition_table[n_accept, 0] = eob_state
+      #print(n_accept, transition_table[len(self.states), :])
 
-      n_states += 1
+      n_accept += 1
+
+    self.acclist = self.acclist[:n_acclist]
+
+    if n_accept >= self.accept.shape[0]:
+      new_accept = numpy.zeros(
+        (self.accept.shape[0] * 2,),
+        numpy.int16
+      )
+      new_accept[:self.accept.shape[0]] = self.accept
+      self.accept = new_accept
+    self.accept[n_accept] = n_acclist
+    self.accept = self.accept[:n_accept + 1]
 
-    while len(self.states) < n_states:
+    while len(self.states) < n_accept:
       # find most similar/earliest existing state to use as default
       mask = (
         transition_table[len(self.states):len(self.states) + 1, :] !=
@@ -216,11 +244,6 @@ def generate(plex, skel_file, out_file):
             )
           )
         elif line == '/* GENERATE TABLES */\n':
-          yy_acclist = []
-          yy_accept = [0]
-          for flex_accept in flex_dfa.states_accept:
-            yy_acclist.extend(flex_accept)
-            yy_accept.append(len(yy_acclist))
           fout.write(
             '''/* GENERATE TABLES BEGIN */
 #define YY_END_OF_BUFFER {0:d}
@@ -245,11 +268,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for j in yy_acclist[i:i + 10]
+                        for j in flex_dfa.acclist[i:i + 10]
                       ]
                     )
                   )
-                  for i in range(0, len(yy_acclist), 10)
+                  for i in range(0, flex_dfa.acclist.shape[0], 10)
                 ]
               ),
               ','.join(
@@ -258,11 +281,11 @@ static const flex_int16_t yy_chk[] = {{{6:s}
                     ', '.join(
                       [
                         '{0:5d}'.format(j)
-                        for j in yy_accept[i:i + 10]
+                        for j in flex_dfa.accept[i:i + 10]
                       ]
                     )
                   )
-                  for i in range(0, len(yy_accept), 10)
+                  for i in range(0, flex_dfa.accept.shape[0], 10)
                 ]
               ),
               ','.join(