YY_TRAILING_HEAD_MASK = 0x4000
def __init__(self, dfa):
+ # we use a modified version of the transition routine, we do not know
+ # how many threads are active, so we just create null threads as they
+ # are referred to (resulting threads have current marks but no history),
+ # each thread is a list in forward order, not a stack in reverse order
+ def transit(transition):
+ nonlocal threads0, threads1, prefix_slop # note: also uses i
+ j = prefix_slop
+ for trans in transition:
+ if len(threads0) < j + trans[1]:
+ threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
+ if trans[0] == regex.DFA.TRANSITION_POP:
+ j += trans[1]
+ elif trans[0] == regex.DFA.TRANSITION_DUP:
+ while j < trans[1]:
+ threads0[:0] = [None] * prefix_slop
+ threads1[:0] = [None] * prefix_slop
+ j += prefix_slop
+ prefix_slop *= 2
+ threads0[j - trans[1]:j] = [
+ list(k)
+ for k in threads0[j:j + trans[1]]
+ ]
+ j -= trans[1]
+ elif trans[0] == regex.DFA.TRANSITION_MARK:
+ for k in range(j, j + trans[1]):
+ threads0[j].append(trans[2])
+ elif trans[0] == regex.DFA.TRANSITION_MOVE:
+ threads1.extend(threads0[j:j + trans[1]])
+ j += trans[1]
+ #elif trans[0] == regex.DFA.TRANSITION_DEL:
+ # del threads1[-trans[1]:]
+ else:
+ assert False
+ assert j == len(threads0)
+ threads0, threads1 = threads1, threads0
+ del threads1[prefix_slop:]
+
+ threads0 = [None]
+ threads1 = [None]
+ prefix_slop = 1
+
# this is basically just a renumbering
# state numbers in the DFA become base/def numbers in the FlexDFA,
state, transition = dfa.actions[action]
#print('state', len(self.states), 'transition', transition)
- # we collect marks without regard to which thread they refer to,
- # they should already be in priority order without any duplicates
- # (because the deduplication removes subsequent identical threads)
+ del threads0[prefix_slop:]
+ transit(transition)
+ #print(threads0[prefix_slop:])
flex_accept = []
- for j in [i[2] for i in transition if i[0] == regex.DFA.TRANSITION_MARK]:
- #print(j)
- if j & 1:
+ for k in [j for i in threads0[prefix_slop:] for j in i]:
+ if k & 1:
if (
len(flex_accept) > 0 and
- flex_accept[-1] == (j >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK
+ flex_accept[-1] == (k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK
):
# zero length trailing context, accept immediately
- flex_accept.append(j >> 1)
+ flex_accept.append(k >> 1)
else:
# look back to start of trailing context, then accept
- flex_accept.append((j >> 1) | FlexDFA.YY_HEAD_MASK)
+ flex_accept.append((k >> 1) | FlexDFA.YY_HEAD_MASK)
else:
# mark start of (hopefully safe) trailing context
- flex_accept.append((j >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
+ flex_accept.append((k >> 1) | FlexDFA.YY_TRAILING_HEAD_MASK)
#print(flex_accept)
if state in state_to_flex_base_def:
diff = numpy.sum(mask, 1)
flex_def = numpy.argmin(diff, 0)
if diff[flex_def] == 0: # exactly matching state
- # this is supposed to have been compressed by DFA already,
- # but can happen since we ignore the accept field of the DFA,
- # just copy the base and def fields from the matching state
- _, flex_base, flex_def = self.states[flex_def]
+ # can't use the normal similarity mechanism here, because it
+ # will choose a base of 0 (happens to correspond to the jam
+ # state) and this will make flex's inner loop abort early
+ # ... highlights various issues with flex's tables format,
+ # for instance that duplicate states may be unavoidable when
+ # start conditions are used, and that checking the base is
+ # not an ideal way to check if a state has the same transitions
+ # as the jam state, in fact a state can only share the same base
+ # as another state by COINCIDENCE due to the yy_chk[] issue!
+ # ... fix this by merging indistinguishable states (except for
+ # duplicate start conditions, which may have to use this hack)
+ flex_base = self.states[flex_def][1]
else:
mask = mask[flex_def, :]
self.states.append((flex_accept, flex_base, flex_def))
#print(full_entries[:len(self.states), :])
-
+ #print(flex_state_to_action)
+
if len(sys.argv) < 2:
sys.stdout.write(
'usage: {0:s} rules.l\n'.format(
dfa = nfa.to_dfa()
#print(dfa.start_action)
#print(dfa.actions[2])
+#print(dfa.match_text('1.0 + 5', 0))
+
flex_dfa = FlexDFA(dfa) #nfa.to_dfa())
with open('skel/lex.yy.c', 'r') as fin:
with open('lex.yy.c', 'w+') as fout: