threads1 = [None]
prefix_slop = 1
- # this is basically just a renumbering
-
- # state numbers in the DFA become base/def numbers in the FlexDFA,
- # obviously with gaps in the numbering depending on how things fit
- state_to_flex_base_def = {}
-
# action numbers in the DFA become state numbers in the FlexDFA,
# with the start-action for each start-condition being copied into
# the correctly numbered slot (may cause duplicates), and then all
flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
#print(flex_accept)
- if state in state_to_flex_base_def:
- flex_base, flex_def = state_to_flex_base[state]
- else:
- # extend full_entries array if required
- if len(self.states) >= full_entries.shape[0]:
- new_full_entries = numpy.zeros(
- (full_entries.shape[0] * 2, 0x101),
- numpy.int16
- )
- new_full_entries[:full_entries.shape[0], :] = full_entries
- full_entries = new_full_entries
+ # extend full_entries array if required
+ if len(self.states) >= full_entries.shape[0]:
+ new_full_entries = numpy.zeros(
+ (full_entries.shape[0] * 2, 0x101),
+ numpy.int16
+ )
+ new_full_entries[:full_entries.shape[0], :] = full_entries
+ full_entries = new_full_entries
- # calculate full entry from _dfa.state character-to-action table
- breaks, actions, _ = _dfa.states[state]
- character0 = 0
- for i in range(len(breaks)):
- character1 = breaks[i]
- next_action = actions[i]
- if next_action in action_to_flex_state:
- next_flex_state = action_to_flex_state[next_action]
- else:
- next_flex_state = len(flex_state_to_action)
- action_to_flex_state[next_action] = next_flex_state
- flex_state_to_action.append(next_action)
- full_entries[len(self.states), character0:character1] = next_flex_state
- character0 = character1
- assert character0 == 0x100
+ # calculate full entry from _dfa.state character-to-action table
+ breaks, actions, _ = _dfa.states[state]
+ character0 = 0
+ for i in range(len(breaks)):
+ character1 = breaks[i]
+ next_action = actions[i]
+ if next_action in action_to_flex_state:
+ next_flex_state = action_to_flex_state[next_action]
+ else:
+ next_flex_state = len(flex_state_to_action)
+ action_to_flex_state[next_action] = next_flex_state
+ flex_state_to_action.append(next_action)
+ full_entries[len(self.states), character0:character1] = next_flex_state
+ character0 = character1
+ assert character0 == 0x100
- # remap NUL transition to 0x100 and replace with EOB transition
- full_entries[len(self.states), 0x100] = \
- full_entries[len(self.states), 0]
- full_entries[len(self.states), 0] = eob_state
- #print(len(self.states), full_entries[len(self.states), :])
+ # remap NUL transition to 0x100 and replace with EOB transition
+ full_entries[len(self.states), 0x100] = \
+ full_entries[len(self.states), 0]
+ full_entries[len(self.states), 0] = eob_state
+ #print(len(self.states), full_entries[len(self.states), :])
- # find most similar/earliest existing state to use as default
- mask = (
- full_entries[len(self.states):len(self.states) + 1, :] !=
- full_entries[:len(self.states), :]
- )
- diff = numpy.sum(mask, 1)
- flex_def = numpy.argmin(diff, 0)
- if diff[flex_def] == 0: # exactly matching state
- # can't use the normal similarity mechanism here, because it
- # will choose a base of 0 (happens to correspond to the jam
- # state) and this will make flex's inner loop abort early
- # ... highlights various issues with flex's tables format,
- # for instance that duplicate states may be unavoidable when
- # start conditions are used, and that checking the base is
- # not an ideal way to check if a state has the same transitions
- # as the jam state, in fact a state can only share the same base
- # as another state by COINCIDENCE due to the yy_chk[] issue!
- # ... fix this by merging indistinguishable states (except for
- # duplicate start conditions, which may have to use this hack)
- flex_base = self.states[flex_def][1]
- else:
- mask = mask[flex_def, :]
+ # find most similar/earliest existing state to use as default
+ mask = (
+ full_entries[len(self.states):len(self.states) + 1, :] !=
+ full_entries[:len(self.states), :]
+ )
+ diff = numpy.sum(mask, 1)
+ flex_def = numpy.argmin(diff, 0)
+ if diff[flex_def] == 0: # exactly matching state
+ # can't use the normal similarity mechanism here, because it
+ # will choose a base of 0 (happens to correspond to the jam
+ # state) and this will make flex's inner loop abort early
+ # ... highlights various issues with flex's tables format,
+ # for instance that duplicate states may be unavoidable when
+ # start conditions are used, and that checking the base is
+ # not an ideal way to check if a state has the same transitions
+ # as the jam state, in fact a state can only share the same base
+ # as another state by COINCIDENCE due to the yy_chk[] issue!
+ # ... fix this by merging indistinguishable states (except for
+ # duplicate start conditions, which may have to use this hack)
+ flex_base = self.states[flex_def][1]
+ else:
+ mask = mask[flex_def, :]
- # make sure used array is at least large enough to find a spot
- while used.shape[0] < len(self.entries) + 0x101:
- new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
- new_used[:used.shape[0]] = used
- used = new_used
+ # make sure used array is at least large enough to find a spot
+ while used.shape[0] < len(self.entries) + 0x101:
+ new_used = numpy.zeros((used.shape[0] * 2,), numpy.bool)
+ new_used[:used.shape[0]] = used
+ used = new_used
- # find a suitable spot and store differences from default state
- flex_base = 0
- while flex_base < len(self.entries):
- if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
- break
- flex_base += 1
- used[flex_base:flex_base + 0x101] |= mask
- if len(self.entries) < flex_base + 0x101:
- self.entries.extend(
- [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
- )
- for i in numpy.nonzero(mask)[0]:
- assert self.entries[flex_base + i] == (0xffff, 0xffff)
- self.entries[flex_base + i] = (
- full_entries[len(self.states), i],
- len(self.states)
- )
+ # find a suitable spot and store differences from default state
+ flex_base = 0
+ while flex_base < len(self.entries):
+ if not numpy.any(used[flex_base:flex_base + 0x101] & mask):
+ break
+ flex_base += 1
+ used[flex_base:flex_base + 0x101] |= mask
+ if len(self.entries) < flex_base + 0x101:
+ self.entries.extend(
+ [(0xffff, 0xffff)] * (flex_base + 0x101 - len(self.entries))
+ )
+ for i in numpy.nonzero(mask)[0]:
+ assert self.entries[flex_base + i] == (0xffff, 0xffff)
+ self.entries[flex_base + i] = (
+ full_entries[len(self.states), i],
+ len(self.states)
+ )
self.states.append((flex_accept, flex_base, flex_def))
#print(full_entries[:len(self.states), :])