Remove useless state_to_flex_base_def (can't reuse base, def due to yy_chk[])
authorNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 01:12:04 +0000 (11:12 +1000)
committerNick Downing <downing.nick@gmail.com>
Sun, 22 Jul 2018 01:12:04 +0000 (11:12 +1000)
flex_dfa.py

index 98ef6f7..61c0360 100644 (file)
@@ -49,12 +49,6 @@ class FlexDFA:
     threads1 = [None]
     prefix_slop = 1
 
-    # this is basically just a renumbering
-
-    # state numbers in the DFA become base/def numbers in the FlexDFA,
-    # obviously with gaps in the numbering depending on how things fit
-    state_to_flex_base_def = {}
-
     # action numbers in the DFA become state numbers in the FlexDFA,
     # with the start-action for each start-condition being copied into
     # the correctly numbered slot (may cause duplicates), and then all
@@ -110,86 +104,83 @@ class FlexDFA:
           flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
       #print(flex_accept)
 
-      if state in state_to_flex_base_def:
-        flex_base, flex_def = state_to_flex_base[state]
-      else:
-        # extend full_entries array if required
-        if len(self.states) >= full_entries.shape[0]:
-          new_full_entries = numpy.zeros(
-            (full_entries.shape[0] * 2, 0x101),
-            numpy.int16
-          )
-          new_full_entries[:full_entries.shape[0], :] = full_entries
-          full_entries = new_full_entries
+      # extend full_entries array if required
+      if len(self.states) >= full_entries.shape[0]:
+        new_full_entries = numpy.zeros(
+          (full_entries.shape[0] * 2, 0x101),
+          numpy.int16
+        )
+        new_full_entries[:full_entries.shape[0], :] = full_entries
+        full_entries = new_full_entries
 
-        # calculate full entry from _dfa.state character-to-action table
-        breaks, actions, _ = _dfa.states[state]
-        character0 = 0
-        for i in range(len(breaks)):
-          character1 = breaks[i]
-          next_action = actions[i]
-          if next_action in action_to_flex_state:
-            next_flex_state = action_to_flex_state[next_action]
-          else:
-            next_flex_state = len(flex_state_to_action)
-            action_to_flex_state[next_action] = next_flex_state
-            flex_state_to_action.append(next_action)
-          full_entries[len(self.states), character0:character1] = next_flex_state
-          character0 = character1
-        assert character0 == 0x100
+      # calculate full entry from _dfa.state character-to-action table
+      breaks, actions, _ = _dfa.states[state]
+      character0 = 0
+      for i in range(len(breaks)):
+        character1 = breaks[i]
+        next_action = actions[i]
+        if next_action in action_to_flex_state:
+          next_flex_state = action_to_flex_state[next_action]
+        else:
+          next_flex_state = len(flex_state_to_action)
+          action_to_flex_state[next_action] = next_flex_state
+          flex_state_to_action.append(next_action)
+        full_entries[len(self.states), character0:character1] = next_flex_state
+        character0 = character1
+      assert character0 == 0x100
 
-        # remap NUL transition to 0x100 and replace with EOB transition
-        full_entries[len(self.states), 0x100] = \
-          full_entries[len(self.states), 0]
-        full_entries[len(self.states), 0] = eob_state
-        #print(len(self.states), full_entries[len(self.states), :])
+      # remap NUL transition to 0x100 and replace with EOB transition
+      full_entries[len(self.states), 0x100] = \
+        full_entries[len(self.states), 0]
+      full_entries[len(self.states), 0] = eob_state
+      #print(len(self.states), full_entries[len(self.states), :])
 
-        # find most similar/earliest existing state to use as default
-        mask = (
-          full_entries[len(self.states):len(self.states) + 1, :] !=
-          full_entries[:len(self.states), :]
-        )
-        diff = numpy.sum(mask, 1)
-        flex_def = numpy.argmin(diff, 0)
-        if diff[flex_def] == 0: # exactly matching state
-          # can't use the normal similarity mechanism here, because it
-          # will choose a base of 0 (happens to correspond to the jam
-          # state) and this will make flex's inner loop abort early
-          # ... highlights various issues with flex's tables format,
-          # for instance that duplicate states may be unavoidable when
-          # start conditions are used, and that checking the base is
-          # not an ideal way to check if a state has the same transitions
-          # as the jam state, in fact a state can only share the same base
-          # as another state by COINCIDENCE due to the yy_chk[] issue!
-          # ... fix this by merging indistinguishable states (except for
-          # duplicate start conditions, which may have to use this hack)
-          flex_base = self.states[flex_def][1]
-        else:
-          mask = mask[flex_def, :]
+      # find most similar/earliest existing state to use as default
+      mask = (
+        full_entries[len(self.states):len(self.states) + 1, :] !=
+        full_entries[:len(self.states), :]
+      )
+      diff = numpy.sum(mask, 1)
+      flex_def = numpy.argmin(diff, 0)
+      if diff[flex_def] == 0: # exactly matching state
+        # can't use the normal similarity mechanism here, because it
+        # will choose a base of 0 (happens to correspond to the jam
+        # state) and this will make flex's inner loop abort early
+        # ... highlights various issues with flex's tables format,
+        # for instance that duplicate states may be unavoidable when
+        # start conditions are used, and that checking the base is
+        # not an ideal way to check if a state has the same transitions
+        # as the jam state, in fact a state can only share the same base
+        # as another state by COINCIDENCE due to the yy_chk[] issue!
+        # ... fix this by merging indistinguishable states (except for
+        # duplicate start conditions, which may have to use this hack)
+        flex_base = self.states[flex_def][1]
+      else:
+        mask = mask[flex_def, :]
 
-          # make sure used array is at least large enough to find a spot
-          while used.shape[0] < len(self.entries) + 0x101:
-            new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
-            new_used[:used.shape[0]] = used
-            used = new_used
+        # make sure used array is at least large enough to find a spot
+        while used.shape[0] < len(self.entries) + 0x101:
+          new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
+          new_used[:used.shape[0]] = used
+          used = new_used
 
-          # find a suitable spot and store differences from default state
-          flex_base = 0
-          while flex_base < len(self.entries):
-            if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
-              break
-            flex_base += 1
-          used[flex_base:flex_base + 0x101] |= mask
-          if len(self.entries) < flex_base + 0x101:
-            self.entries.extend(
-              [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
-            )
-          for i in numpy.nonzero(mask)[0]:
-            assert self.entries[flex_base + i] == (0xffff, 0xffff)
-            self.entries[flex_base + i] = (
-              full_entries[len(self.states), i],
-              len(self.states)
-            )
+        # find a suitable spot and store differences from default state
+        flex_base = 0
+        while flex_base < len(self.entries):
+          if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
+            break
+          flex_base += 1
+        used[flex_base:flex_base + 0x101] |= mask
+        if len(self.entries) < flex_base + 0x101:
+          self.entries.extend(
+            [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
+          )
+        for i in numpy.nonzero(mask)[0]:
+          assert self.entries[flex_base + i] == (0xffff, 0xffff)
+          self.entries[flex_base + i] = (
+            full_entries[len(self.states), i],
+            len(self.states)
+          )
 
       self.states.append((flex_accept, flex_base, flex_def))
     #print(full_entries[:len(self.states), :])