#!/bin/sh
-if ./generate.py ast <ast.py >ast.py.new && ! diff -q ast.py ast.py.new
+if ./generate_ast.py ast <ast.py >ast.py.new && ! diff -q ast.py ast.py.new
then
mv ast.py.new ast.py
else
import ast
import element
-import flex_dfa
+import generate_flex
import getopt
import os
import sys
plex.post_process()
element.serialize(plex, 'b.xml', 'utf-8')
plex = element.deserialize('b.xml', ast.factory, 'utf-8')
-flex_dfa.generate(plex, skel_file, out_file)
+generate_flex.generate_flex(plex, skel_file, out_file)
import bisect
import element
+import flex_dfa
+import numpy
+import numpy_heap
import work
import sys
self.actions = actions
self.start_action = start_action
+ def to_flex_dfa(self):
+ # we use a modified version of the transition routine, we do not know
+ # how many threads are active, so we just create null threads as they
+ # are referred to (resulting threads have current marks but no history),
+ # each thread is a list in forward order, not a stack in reverse order
+ def transit(transition):
+ nonlocal threads0, threads1, prefix_slop # note: also uses i
+ j = prefix_slop
+ for trans in transition:
+ if len(threads0) < j + trans[1]:
+ threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
+ if trans[0] == DFA.TRANSITION_POP:
+ j += trans[1]
+ elif trans[0] == DFA.TRANSITION_DUP:
+ while j < trans[1]:
+ threads0[:0] = [None] * prefix_slop
+ threads1[:0] = [None] * prefix_slop
+ j += prefix_slop
+ prefix_slop *= 2
+ threads0[j - trans[1]:j] = [
+ list(k)
+ for k in threads0[j:j + trans[1]]
+ ]
+ j -= trans[1]
+ elif trans[0] == DFA.TRANSITION_MARK:
+ for k in range(j, j + trans[1]):
+ threads0[j].append(trans[2])
+ elif trans[0] == DFA.TRANSITION_MOVE:
+ threads1.extend(threads0[j:j + trans[1]])
+ j += trans[1]
+ #elif trans[0] == DFA.TRANSITION_DEL:
+ # del threads1[-trans[1]:]
+ else:
+ assert False
+ assert j == len(threads0)
+ threads0, threads1 = threads1, threads0
+ del threads1[prefix_slop:]
+
+ threads0 = [None]
+ threads1 = [None]
+ prefix_slop = 1
+
+ # action numbers in the DFA become state numbers in the FlexDFA,
+ # with the start-action for each start-condition being copied into
+ # the correctly numbered slot (may cause duplicates), and then all
+ # actions reachable from these being copied to the subsequent slots
+ # (if start-action is reached again, uses the lower numbered copy)
+ flex_state_to_action = [0] + self.start_action
+ action_to_flex_state = {-1: 0} # see comment about state -1 below
+ for i in range(len(flex_state_to_action)):
+ action = flex_state_to_action[i]
+ if action not in action_to_flex_state:
+ action_to_flex_state[action] = i
+
+ # last start-condition is really end-of-buffer (EOB), it has only
+ # a dummy rule that accepts the null string and executes EOB action
+ eob_state = len(self.start_action)
+
+ # state 0 is the jam state, make it exist but have empty acclist
+ acclist = numpy.zeros((0x100,), numpy.uint16)
+ n_acclist = 0
+ accept = numpy.zeros((0x100,), numpy.uint16)
+ n_states = 1
+
+ # transitions[i, j] is transition on character j in state i
+ # in our way of thinking, 0 is don't care and -1 is failure
+ # however, in the flex way these are both 0 (don't care),
+ # the distinction being that failure has no associated action
+ # thus all entries of states are filled, with 0 as a catch-all
+ transitions = numpy.zeros((0x100, 0x101), numpy.uint16)
+
+ # generate acclist, accept and transition tables
+ while n_states < len(flex_state_to_action):
+ action = flex_state_to_action[n_states]
+ state, transition = self.actions[action]
+ #print('state', n_states, 'transition', transition)
+
+ del threads0[prefix_slop:]
+ transit(transition)
+ #print(threads0[prefix_slop:])
+ if n_states >= accept.shape[0]:
+ # extend accept
+ new_accept = numpy.zeros(
+ (accept.shape[0] * 2,),
+ numpy.uint16
+ )
+ new_accept[:accept.shape[0]] = accept
+ accept = new_accept
+ accept[n_states] = n_acclist
+ accept_set = set()
+ for k in [j for i in threads0[prefix_slop:] for j in i]:
+ acc = k >> 1
+ if k & 1:
+ if (acc | flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set:
+ # look back to start of trailing context, then accept
+ acc |= flex_dfa.FlexDFA.YY_TRAILING_MASK
+ # otherwise zero length trailing context, accept immediately
+ else:
+ # mark start of (hopefully safe) trailing context
+ acc |= flex_dfa.FlexDFA.YY_TRAILING_HEAD_MASK
+ if acc not in accept_set:
+ if n_acclist >= acclist.shape[0]:
+ # extend acclist
+ new_acclist = numpy.zeros(
+ (acclist.shape[0] * 2,),
+ numpy.uint16
+ )
+ new_acclist[:acclist.shape[0]] = acclist
+ acclist = new_acclist
+ acclist[n_acclist] = acc
+ n_acclist += 1
+ accept_set.add(acc)
+
+ # calculate transition row from self.state character-to-action table
+ if n_states >= transitions.shape[0]:
+ # extend transitions
+ new_transitions = numpy.zeros(
+ (transitions.shape[0] * 2, 0x101),
+ numpy.uint16
+ )
+ new_transitions[:transitions.shape[0], :] = transitions
+ transitions = new_transitions
+ breaks, actions, _ = self.states[state]
+ character0 = 0
+ for i in range(len(breaks)):
+ character1 = breaks[i]
+ next_action = actions[i]
+ if next_action in action_to_flex_state:
+ next_flex_state = action_to_flex_state[next_action]
+ else:
+ next_flex_state = len(flex_state_to_action)
+ action_to_flex_state[next_action] = next_flex_state
+ flex_state_to_action.append(next_action)
+ transitions[n_states, character0:character1] = next_flex_state
+ character0 = character1
+ assert character0 == 0x100
+
+ n_states += 1
+
+ # finalize the acclist and accept tables
+ acclist = acclist[:n_acclist]
+ if n_states >= accept.shape[0]:
+ new_accept = numpy.zeros(
+ (accept.shape[0] * 2,),
+ numpy.uint16
+ )
+ new_accept[:accept.shape[0]] = accept
+ accept = new_accept
+ accept[n_states] = n_acclist
+ accept = accept[:n_states + 1]
+
+ # finalize the transitions table
+ transitions = transitions[:n_states, :]
+ transitions[:, 0x100] = transitions[:, 0]
+ transitions[:, 0] = eob_state
+
+ # calculate default states by constructing minimum spanning tree
+ # heap contains n states todo followed by n_states - n states done
+ # each heap entry is [distance, hop count, state done, state todo]
+ heap = numpy.zeros((n_states, 4), numpy.uint16)
+ heap[:-1, 0] = numpy.sum(transitions[1:, :] != transitions[:1, :], 1)
+ heap[:-1, 3] = numpy.arange(1, n_states, dtype = numpy.uint16)
+ numpy_heap.heapify(heap, n_states - 1)
+ for n in range(n_states - 2, 0, -1):
+ if n % 100 == 0:
+ print('mst', n)
+
+ key = tuple(heap[n, :])
+ heap[n, :] = heap[0, :]
+ numpy_heap.bubble_down(heap, 0, key, n)
+ hop_count = heap[n, 1] + 1 # proposed hop_count is current hop_count + 1
+ state_done = heap[n, 3] # proposed state_done is current state_todo
+ dist = numpy.sum(
+ transitions[heap[:n, 3], :] !=
+ transitions[state_done:state_done + 1, :],
+ 1
+ )
+ # although numpy cannot do lexicographic comparisons, check the
+ # first field via numpy to quickly generate a list of candidates
+ for i in numpy.nonzero(dist <= heap[:n, 0])[0]:
+ key = (dist[i], hop_count, state_done, heap[i, 3])
+ if key < tuple(heap[i, :]):
+ numpy_heap.bubble_up(heap, i, key)
+
+ # state 0 is the jam state, the EOB state will be added later on
+ states = numpy.zeros((n_states, 2), numpy.uint16) # base, def
+ entries = numpy.full((0x200, 2), -1, numpy.uint16) # nxt, chk
+ entries[:0x101, :] = 0 # jam state just returns to jam state
+ entries[0, 0] = eob_state # except for the EOB transition
+ entries_free = numpy.full(0x200, True, numpy.bool)
+ entries_free[:0x101] = False # account for the jam (don't care) state
+ n_entries = 0x101
+
+ # pack states in reverse order (larger distances first)
+ dupes = []
+ for i in range(n_states - 1):
+ if (n_states - i) % 100 == 0:
+ print('pack', n_states - i)
+
+ if heap[i, 0] == 0:
+ # when copying another state, need to have the same base, though
+ # the base will not matter since there will be no entries, it is
+ # is because of the awkward way the compressed lookup is written
+ dupes.append(i)
+ else:
+ state_done = heap[i, 2]
+ state_todo = heap[i, 3]
+ indices = numpy.nonzero(
+ transitions[state_todo, :] != transitions[state_done, :]
+ )[0]
+
+ # make sure entries array is at least large enough to find a spot
+ while entries.shape[0] < n_entries + 0x101:
+ # extend entries, copying only n_entries entries
+ new_entries = numpy.full(
+ (entries.shape[0] * 2, 2),
+ -1,
+ numpy.uint16
+ )
+ new_entries[:n_entries, :] = entries[:n_entries, :]
+ entries = new_entries
+
+ # extend entries_free, copying only n_entries entries
+ new_entries_free = numpy.full(
+ (entries_free.shape[0] * 2,),
+ True,
+ numpy.bool
+ )
+ new_entries_free[:n_entries] = entries_free[:n_entries]
+ entries_free = new_entries_free
+
+ # find a suitable spot and store differences from default state
+ # generate a list of candidates via numpy for the first slot only
+ for start_index in numpy.nonzero(
+ entries_free[indices[0]:n_entries]
+ )[0]:
+ indices_offset = indices + start_index
+ if numpy.all(entries_free[indices_offset]):
+ break
+ else:
+ start_index = n_entries
+ indices_offset = indices + start_index
+ entries[indices_offset, 0] = transitions[state_todo, indices]
+ entries[indices_offset, 1] = state_todo
+ entries_free[indices_offset] = False
+ if n_entries < start_index + 0x101:
+ n_entries = start_index + 0x101
+
+ states[state_todo, 0] = start_index
+ states[state_todo, 1] = state_done
+ while len(dupes):
+ if len(dupes) % 100 == 0:
+ print('dupe', len(dupes))
+
+ i = dupes.pop()
+ state_done = heap[i, 2]
+ state_todo = heap[i, 3]
+ states[state_todo, 0] = states[state_done, 0]
+ states[state_todo, 1] = state_done
+
+ # finalize entries table
+ entries = entries[:n_entries, :]
+ print('n_entries', n_entries)
+
+ return flex_dfa.FlexDFA(accept, acclist, states, entries)
+
def match_text(self, text, i, start_index = 0):
def transit(transition):
nonlocal threads0, threads1, prefix_slop # note: also uses i
-import dfa
-import element
-import numpy
-import numpy_heap
-import regex
-
class FlexDFA:
YY_TRAILING_MASK = 0x2000
YY_TRAILING_HEAD_MASK = 0x4000
- def __init__(self, _dfa):
- # we use a modified version of the transition routine, we do not know
- # how many threads are active, so we just create null threads as they
- # are referred to (resulting threads have current marks but no history),
- # each thread is a list in forward order, not a stack in reverse order
- def transit(transition):
- nonlocal threads0, threads1, prefix_slop # note: also uses i
- j = prefix_slop
- for trans in transition:
- if len(threads0) < j + trans[1]:
- threads0.extend([[] for k in range(j + trans[1] - len(threads0))])
- if trans[0] == dfa.DFA.TRANSITION_POP:
- j += trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_DUP:
- while j < trans[1]:
- threads0[:0] = [None] * prefix_slop
- threads1[:0] = [None] * prefix_slop
- j += prefix_slop
- prefix_slop *= 2
- threads0[j - trans[1]:j] = [
- list(k)
- for k in threads0[j:j + trans[1]]
- ]
- j -= trans[1]
- elif trans[0] == dfa.DFA.TRANSITION_MARK:
- for k in range(j, j + trans[1]):
- threads0[j].append(trans[2])
- elif trans[0] == dfa.DFA.TRANSITION_MOVE:
- threads1.extend(threads0[j:j + trans[1]])
- j += trans[1]
- #elif trans[0] == dfa.DFA.TRANSITION_DEL:
- # del threads1[-trans[1]:]
- else:
- assert False
- assert j == len(threads0)
- threads0, threads1 = threads1, threads0
- del threads1[prefix_slop:]
-
- threads0 = [None]
- threads1 = [None]
- prefix_slop = 1
-
- # action numbers in the DFA become state numbers in the FlexDFA,
- # with the start-action for each start-condition being copied into
- # the correctly numbered slot (may cause duplicates), and then all
- # actions reachable from these being copied to the subsequent slots
- # (if start-action is reached again, uses the lower numbered copy)
- flex_state_to_action = [0] + _dfa.start_action
- action_to_flex_state = {-1: 0} # see comment about state -1 below
- for i in range(len(flex_state_to_action)):
- action = flex_state_to_action[i]
- if action not in action_to_flex_state:
- action_to_flex_state[action] = i
-
- # last start-condition is really end-of-buffer (EOB), it has only
- # a dummy rule that accepts the null string and executes EOB action
- eob_state = len(_dfa.start_action)
-
- # state 0 is the jam state, make it exist but have empty acclist
- self.acclist = numpy.zeros((0x100,), numpy.uint16)
- n_acclist = 0
- self.accept = numpy.zeros((0x100,), numpy.uint16)
- n_states = 1
-
- # transitions[i, j] is transition on character j in state i
- # in our way of thinking, 0 is don't care and -1 is failure
- # however, in the flex way these are both 0 (don't care),
- # the distinction being that failure has no associated action
- # thus all entries of states are filled, with 0 as a catch-all
- transitions = numpy.zeros((0x100, 0x101), numpy.uint16)
-
- # generate acclist, accept and transition tables
- while n_states < len(flex_state_to_action):
- action = flex_state_to_action[n_states]
- state, transition = _dfa.actions[action]
- #print('state', n_states, 'transition', transition)
-
- del threads0[prefix_slop:]
- transit(transition)
- #print(threads0[prefix_slop:])
- if n_states >= self.accept.shape[0]:
- # extend accept
- new_accept = numpy.zeros(
- (self.accept.shape[0] * 2,),
- numpy.uint16
- )
- new_accept[:self.accept.shape[0]] = self.accept
- self.accept = new_accept
- self.accept[n_states] = n_acclist
- accept_set = set()
- for k in [j for i in threads0[prefix_slop:] for j in i]:
- acc = k >> 1
- if k & 1:
- if (acc | FlexDFA.YY_TRAILING_HEAD_MASK) not in accept_set:
- # look back to start of trailing context, then accept
- acc |= FlexDFA.YY_TRAILING_MASK
- # otherwise zero length trailing context, accept immediately
- else:
- # mark start of (hopefully safe) trailing context
- acc |= FlexDFA.YY_TRAILING_HEAD_MASK
- if acc not in accept_set:
- if n_acclist >= self.acclist.shape[0]:
- # extend acclist
- new_acclist = numpy.zeros(
- (self.acclist.shape[0] * 2,),
- numpy.uint16
- )
- new_acclist[:self.acclist.shape[0]] = self.acclist
- self.acclist = new_acclist
- self.acclist[n_acclist] = acc
- n_acclist += 1
- accept_set.add(acc)
-
- # calculate transition row from _dfa.state character-to-action table
- if n_states >= transitions.shape[0]:
- # extend transitions
- new_transitions = numpy.zeros(
- (transitions.shape[0] * 2, 0x101),
- numpy.uint16
- )
- new_transitions[:transitions.shape[0], :] = transitions
- transitions = new_transitions
- breaks, actions, _ = _dfa.states[state]
- character0 = 0
- for i in range(len(breaks)):
- character1 = breaks[i]
- next_action = actions[i]
- if next_action in action_to_flex_state:
- next_flex_state = action_to_flex_state[next_action]
- else:
- next_flex_state = len(flex_state_to_action)
- action_to_flex_state[next_action] = next_flex_state
- flex_state_to_action.append(next_action)
- transitions[n_states, character0:character1] = next_flex_state
- character0 = character1
- assert character0 == 0x100
-
- n_states += 1
-
- # finalize the acclist and accept tables
- self.acclist = self.acclist[:n_acclist]
- if n_states >= self.accept.shape[0]:
- new_accept = numpy.zeros(
- (self.accept.shape[0] * 2,),
- numpy.uint16
- )
- new_accept[:self.accept.shape[0]] = self.accept
- self.accept = new_accept
- self.accept[n_states] = n_acclist
- self.accept = self.accept[:n_states + 1]
-
- # finalize the transitions table
- transitions = transitions[:n_states, :]
- transitions[:, 0x100] = transitions[:, 0]
- transitions[:, 0] = eob_state
-
- # calculate default states by constructing minimum spanning tree
- # heap contains n states todo followed by n_states - n states done
- # each heap entry is [distance, hop count, state done, state todo]
- heap = numpy.zeros((n_states, 4), numpy.uint16)
- heap[:-1, 0] = numpy.sum(transitions[1:, :] != transitions[:1, :], 1)
- heap[:-1, 3] = numpy.arange(1, n_states, dtype = numpy.uint16)
- numpy_heap.heapify(heap, n_states - 1)
- for n in range(n_states - 2, 0, -1):
- if n % 100 == 0:
- print('mst', n)
-
- key = tuple(heap[n, :])
- heap[n, :] = heap[0, :]
- numpy_heap.bubble_down(heap, 0, key, n)
- hop_count = heap[n, 1] + 1 # proposed hop_count is current hop_count + 1
- state_done = heap[n, 3] # proposed state_done is current state_todo
- dist = numpy.sum(
- transitions[heap[:n, 3], :] !=
- transitions[state_done:state_done + 1, :],
- 1
- )
- # although numpy cannot do lexicographic comparisons, check the
- # first field via numpy to quickly generate a list of candidates
- for i in numpy.nonzero(dist <= heap[:n, 0])[0]:
- key = (dist[i], hop_count, state_done, heap[i, 3])
- if key < tuple(heap[i, :]):
- numpy_heap.bubble_up(heap, i, key)
-
- # state 0 is the jam state, the EOB state will be added later on
- self.states = numpy.zeros((n_states, 2), numpy.uint16) # base, def
- self.entries = numpy.full((0x200, 2), -1, numpy.uint16) # nxt, chk
- self.entries[:0x101, :] = 0 # jam state just returns to jam state
- self.entries[0, 0] = eob_state # except for the EOB transition
- entries_free = numpy.full(0x200, True, numpy.bool)
- entries_free[:0x101] = False # account for the jam (don't care) state
- n_entries = 0x101
-
- # pack states in reverse order (larger distances first)
- dupes = []
- for i in range(n_states - 1):
- if (n_states - i) % 100 == 0:
- print('pack', n_states - i)
-
- if heap[i, 0] == 0:
- # when copying another state, need to have the same base, though
- # the base will not matter since there will be no entries, it is
- # is because of the awkward way the compressed lookup is written
- dupes.append(i)
- else:
- state_done = heap[i, 2]
- state_todo = heap[i, 3]
- indices = numpy.nonzero(
- transitions[state_todo, :] != transitions[state_done, :]
- )[0]
-
- # make sure entries array is at least large enough to find a spot
- while self.entries.shape[0] < n_entries + 0x101:
- # extend entries, copying only n_entries entries
- new_entries = numpy.full(
- (self.entries.shape[0] * 2, 2),
- -1,
- numpy.uint16
- )
- new_entries[:n_entries, :] = self.entries[:n_entries, :]
- self.entries = new_entries
-
- # extend entries_free, copying only n_entries entries
- new_entries_free = numpy.full(
- (entries_free.shape[0] * 2,),
- True,
- numpy.bool
- )
- new_entries_free[:n_entries] = entries_free[:n_entries]
- entries_free = new_entries_free
-
- # find a suitable spot and store differences from default state
- # generate a list of candidates via numpy for the first slot only
- for start_index in numpy.nonzero(
- entries_free[indices[0]:n_entries]
- )[0]:
- indices_offset = indices + start_index
- if numpy.all(entries_free[indices_offset]):
- break
- else:
- start_index = n_entries
- indices_offset = indices + start_index
- self.entries[indices_offset, 0] = transitions[state_todo, indices]
- self.entries[indices_offset, 1] = state_todo
- entries_free[indices_offset] = False
- if n_entries < start_index + 0x101:
- n_entries = start_index + 0x101
-
- self.states[state_todo, 0] = start_index
- self.states[state_todo, 1] = state_done
- while len(dupes):
- if len(dupes) % 100 == 0:
- print('dupe', len(dupes))
-
- i = dupes.pop()
- state_done = heap[i, 2]
- state_todo = heap[i, 3]
- self.states[state_todo, 0] = self.states[state_done, 0]
- self.states[state_todo, 1] = state_done
-
- # finalize entries table
- self.entries = self.entries[:n_entries, :]
- print('n_entries', n_entries)
-
-def generate(plex, skel_file, out_file):
- _nfa = plex.to_nfa()
- #print(len(_nfa.states), _nfa.start_state)
- eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
- eob_expr.post_process(len(plex.actions_text))
- eob_expr.add_to_nfa(_nfa)
- _dfa = _nfa.to_dfa()
- #print(len(_dfa.states), len(_dfa.actions), _dfa.start_action)
- flex_dfa = FlexDFA(_dfa)
+ def __init__( self, accept, acclist, entries, states):
+ self.accept = accept
+ self.acclist = acclist
+ self.states = states
+ self.entries = entries
- if out_file is None:
- out_file = (
- plex[0].out_file
- if len(plex[0].outfile) else
- 'lex.{0:s}.c'.format(plex[0].prefix)
- )
- with open(skel_file, 'r') as fin:
- with open(out_file, 'w+') as fout:
- line = fin.readline()
- while len(line):
- if line == '/* GENERATE PREFIX */\n':
- fout.write(
- '''/* GENERATE PREFIX BEGIN */
-{0:s}/* GENERATE END */
-'''.format(
- ''
- if plex[0].prefix == 'yy' else
- ''.join(
- [
- '#define yy{0:s} {1:s}{2:s}\n'.format(
- i,
- plex[0].prefix,
- i
- )
- for i in [
- '_create_buffer',
- '_delete_buffer',
- '_scan_buffer',
- '_scan_string',
- '_scan_bytes',
- '_init_buffer',
- '_flush_buffer',
- '_load_buffer_state',
- '_switch_to_buffer',
- 'push_buffer_state',
- 'pop_buffer_state',
- 'ensure_buffer_stack',
- '_flex_debug',
- 'in',
- 'leng',
- 'lex',
- 'lineno',
- 'out',
- 'restart',
- 'text',
- 'wrap',
- 'alloc',
- 'realloc',
- 'free',
- '_create_buffer',
- '_delete_buffer',
- '_scan_buffer',
- '_scan_string',
- '_scan_bytes',
- '_init_buffer',
- '_flush_buffer',
- '_load_buffer_state',
- '_switch_to_buffer',
- 'push_buffer_state',
- 'pop_buffer_state',
- 'ensure_buffer_stack',
- 'lex',
- 'restart',
- 'lex_init',
- 'lex_init_extra',
- 'lex_destroy',
- 'get_debug',
- 'set_debug',
- 'get_extra',
- 'set_extra',
- 'get_in',
- 'set_in',
- 'get_out',
- 'set_out',
- 'get_leng',
- 'get_text',
- 'get_lineno',
- 'set_lineno',
- 'wrap',
- 'alloc',
- 'realloc',
- 'free',
- 'text',
- 'leng',
- 'in',
- 'out',
- '_flex_debug',
- 'lineno'
- ]
- ]
- )
- )
- )
- elif line == '/* GENERATE YYWRAP */\n':
- fout.write(
- '''/* GENERATE YYWRAP BEGIN */
-{0:s}/* GENERATE END */
-'''.format(
- ''
- if plex[0].yywrap else
- '''#define {0:s}wrap() (/*CONSTCOND*/1)
-#define YY_SKIP_YYWRAP
-'''.format(
- plex[0].prefix
- )
- )
- )
- elif line == '/* GENERATE TABLES */\n':
- fout.write(
- '''/* GENERATE TABLES BEGIN */
-#define YY_END_OF_BUFFER {0:d}
-static const flex_uint16_t yy_acclist[] = {{{1:s}
-}};
-static const flex_uint16_t yy_accept[] = {{{2:s}
-}};
-static const flex_uint16_t yy_base[] = {{{3:s}
-}};
-static const flex_uint16_t yy_def[] = {{{4:s}
-}};
-static const flex_uint16_t yy_nxt[] = {{{5:s}
-}};
-static const flex_uint16_t yy_chk[] = {{{6:s}
-}};
-/* GENERATE END */
-'''.format(
- len(plex.actions_text),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.acclist[i:i + 10]
- ]
- )
- )
- for i in range(0, flex_dfa.acclist.shape[0], 10)
- ]
- ),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.accept[i:i + 10]
- ]
- )
- )
- for i in range(0, flex_dfa.accept.shape[0], 10)
- ]
- ),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.states[i:i + 10, 0]
- ]
- )
- )
- for i in range(0, flex_dfa.states.shape[0], 10)
- ]
- ),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.states[i:i + 10, 1]
- ]
- )
- )
- for i in range(0, flex_dfa.states.shape[0], 10)
- ]
- ),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.entries[i:i + 10, 0]
- ]
- )
- )
- for i in range(0, flex_dfa.entries.shape[0], 10)
- ]
- ),
- ','.join(
- [
- '\n\t{0:s}'.format(
- ', '.join(
- [
- '{0:5d}'.format(j)
- for j in flex_dfa.entries[i:i + 10, 1]
- ]
- )
- )
- for i in range(0, flex_dfa.entries.shape[0], 10)
- ]
- )
- )
- )
- elif line == '/* GENERATE SECTION1 */\n':
- fout.write(
- '''/* GENERATE SECTION1 BEGIN */
-{0:s}/* GENERATE END */
-'''.format(
- ''.join([i.get_text() for i in plex[0].code_blocks_text])
- )
- )
- elif line == '/* GENERATE STARTCONDDECL */\n':
- fout.write(
- '''/* GENERATE STARTCONDDECL BEGIN */
-{0:s}/* GENERATE END*/
-'''.format(
- ''.join(
- [
- '#define {0:s} {1:d}\n'.format(
- plex.start_conditions[i].name,
- i
- )
- for i in range(len(plex.start_conditions))
- ]
- )
- )
- )
- elif line == '/* GENERATE SECTION2INITIAL */\n':
- fout.write(
- '''/* GENERATE SECTION2INITIAL BEGIN */
-{0:s}/* GENERATE END */
-'''.format(
- ''.join([i.get_text() for i in plex[1].code_blocks_text])
- )
- )
- elif line == '/* GENERATE SECTION2 */\n':
- eof_action_to_start_conditions = [
- [
- j
- for j in range(len(plex.start_conditions))
- if plex.start_conditions[j].eof_action == i
- ]
- for i in range(len(plex.eof_actions_text))
- ]
- #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
- fout.write(
- '''/* GENERATE SECTION2 BEGIN */
-{0:s}{1:s}/* GENERATE END */
-'''.format(
- ''.join(
- [
- '''case {0:d}:
-YY_RULE_SETUP
-{1:s} YY_BREAK
-'''.format(
- i,
- plex.actions_text[i].get_text()
- )
- for i in range(len(plex.actions_text))
- ]
- ),
- ''.join(
- [
- '{0:s}{1:s}'.format(
- ''.join(
- [
- '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
- plex.start_conditions[j].name
- )
- for j in eof_action_to_start_conditions[i]
- ]
- ),
- plex.eof_actions_text[i].get_text()
- )
- for i in range(len(plex.eof_actions_text))
- if len(eof_action_to_start_conditions[i]) > 0
- ]
- )
- )
- )
- elif line == '/* GENERATE SECTION3 */\n':
- fout.write(
- '''/* GENERATE SECTION3 BEGIN */
-{0:s}/* GENERATE END */
-'''.format(
- '' if len(plex) < 3 else plex[2].get_text()
- )
- )
- else:
- if plex[0].prefix != 'yy':
- line = line.replace('yywrap', '{0:s}wrap'.format(plex[0].prefix))
- fout.write(line)
- line = fin.readline()
+ # add some test routines here for scanning text, etc, similar to DFA()
+ # add repr() routine, to use with wrap_repr() for basic serialization
--- /dev/null
+import regex
+
+def generate_flex(plex, skel_file, out_file):
+ _nfa = plex.to_nfa()
+
+ # end of buffer expression (do it here because only necessary for flex)
+ _regex = regex.RegexGroup(children = [regex.RegexEmpty()])
+ _regex.post_process(len(plex.actions_text))
+ _regex.add_to_nfa(_nfa)
+
+ _flex_dfa = _nfa.to_dfa().to_flex_dfa()
+
+ if out_file is None:
+ out_file = (
+ plex[0].out_file
+ if len(plex[0].outfile) else
+ 'lex.{0:s}.c'.format(plex[0].prefix)
+ )
+ with open(skel_file, 'r') as fin:
+ with open(out_file, 'w+') as fout:
+ line = fin.readline()
+ while len(line):
+ if line == '/* GENERATE PREFIX */\n':
+ fout.write(
+ '''/* GENERATE PREFIX BEGIN */
+{0:s}/* GENERATE END */
+'''.format(
+ ''
+ if plex[0].prefix == 'yy' else
+ ''.join(
+ [
+ '#define yy{0:s} {1:s}{2:s}\n'.format(
+ i,
+ plex[0].prefix,
+ i
+ )
+ for i in [
+ '_create_buffer',
+ '_delete_buffer',
+ '_scan_buffer',
+ '_scan_string',
+ '_scan_bytes',
+ '_init_buffer',
+ '_flush_buffer',
+ '_load_buffer_state',
+ '_switch_to_buffer',
+ 'push_buffer_state',
+ 'pop_buffer_state',
+ 'ensure_buffer_stack',
+ '_flex_debug',
+ 'in',
+ 'leng',
+ 'lex',
+ 'lineno',
+ 'out',
+ 'restart',
+ 'text',
+ 'wrap',
+ 'alloc',
+ 'realloc',
+ 'free',
+ '_create_buffer',
+ '_delete_buffer',
+ '_scan_buffer',
+ '_scan_string',
+ '_scan_bytes',
+ '_init_buffer',
+ '_flush_buffer',
+ '_load_buffer_state',
+ '_switch_to_buffer',
+ 'push_buffer_state',
+ 'pop_buffer_state',
+ 'ensure_buffer_stack',
+ 'lex',
+ 'restart',
+ 'lex_init',
+ 'lex_init_extra',
+ 'lex_destroy',
+ 'get_debug',
+ 'set_debug',
+ 'get_extra',
+ 'set_extra',
+ 'get_in',
+ 'set_in',
+ 'get_out',
+ 'set_out',
+ 'get_leng',
+ 'get_text',
+ 'get_lineno',
+ 'set_lineno',
+ 'wrap',
+ 'alloc',
+ 'realloc',
+ 'free',
+ 'text',
+ 'leng',
+ 'in',
+ 'out',
+ '_flex_debug',
+ 'lineno'
+ ]
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE YYWRAP */\n':
+ fout.write(
+ '''/* GENERATE YYWRAP BEGIN */
+{0:s}/* GENERATE END */
+'''.format(
+ ''
+ if plex[0].yywrap else
+ '''#define {0:s}wrap() (/*CONSTCOND*/1)
+#define YY_SKIP_YYWRAP
+'''.format(
+ plex[0].prefix
+ )
+ )
+ )
+ elif line == '/* GENERATE TABLES */\n':
+ fout.write(
+ '''/* GENERATE TABLES BEGIN */
+#define YY_END_OF_BUFFER {0:d}
+static const flex_uint16_t yy_acclist[] = {{{1:s}
+}};
+static const flex_uint16_t yy_accept[] = {{{2:s}
+}};
+static const flex_uint16_t yy_base[] = {{{3:s}
+}};
+static const flex_uint16_t yy_def[] = {{{4:s}
+}};
+static const flex_uint16_t yy_nxt[] = {{{5:s}
+}};
+static const flex_uint16_t yy_chk[] = {{{6:s}
+}};
+/* GENERATE END */
+'''.format(
+ len(plex.actions_text),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.acclist[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.acclist.shape[0], 10)
+ ]
+ ),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.accept[i:i + 10]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.accept.shape[0], 10)
+ ]
+ ),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.states[i:i + 10, 0]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.states.shape[0], 10)
+ ]
+ ),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.states[i:i + 10, 1]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.states.shape[0], 10)
+ ]
+ ),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.entries[i:i + 10, 0]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.entries.shape[0], 10)
+ ]
+ ),
+ ','.join(
+ [
+ '\n\t{0:s}'.format(
+ ', '.join(
+ [
+ '{0:5d}'.format(j)
+ for j in _flex_dfa.entries[i:i + 10, 1]
+ ]
+ )
+ )
+ for i in range(0, _flex_dfa.entries.shape[0], 10)
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE SECTION1 */\n':
+ fout.write(
+ '''/* GENERATE SECTION1 BEGIN */
+{0:s}/* GENERATE END */
+'''.format(
+ ''.join([i.get_text() for i in plex[0].code_blocks_text])
+ )
+ )
+ elif line == '/* GENERATE STARTCONDDECL */\n':
+ fout.write(
+ '''/* GENERATE STARTCONDDECL BEGIN */
+{0:s}/* GENERATE END*/
+'''.format(
+ ''.join(
+ [
+ '#define {0:s} {1:d}\n'.format(
+ plex.start_conditions[i].name,
+ i
+ )
+ for i in range(len(plex.start_conditions))
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE SECTION2INITIAL */\n':
+ fout.write(
+ '''/* GENERATE SECTION2INITIAL BEGIN */
+{0:s}/* GENERATE END */
+'''.format(
+ ''.join([i.get_text() for i in plex[1].code_blocks_text])
+ )
+ )
+ elif line == '/* GENERATE SECTION2 */\n':
+ eof_action_to_start_conditions = [
+ [
+ j
+ for j in range(len(plex.start_conditions))
+ if plex.start_conditions[j].eof_action == i
+ ]
+ for i in range(len(plex.eof_actions_text))
+ ]
+ #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
+ fout.write(
+ '''/* GENERATE SECTION2 BEGIN */
+{0:s}{1:s}/* GENERATE END */
+'''.format(
+ ''.join(
+ [
+ '''case {0:d}:
+YY_RULE_SETUP
+{1:s} YY_BREAK
+'''.format(
+ i,
+ plex.actions_text[i].get_text()
+ )
+ for i in range(len(plex.actions_text))
+ ]
+ ),
+ ''.join(
+ [
+ '{0:s}{1:s}'.format(
+ ''.join(
+ [
+ '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
+ plex.start_conditions[j].name
+ )
+ for j in eof_action_to_start_conditions[i]
+ ]
+ ),
+ plex.eof_actions_text[i].get_text()
+ )
+ for i in range(len(plex.eof_actions_text))
+ if len(eof_action_to_start_conditions[i]) > 0
+ ]
+ )
+ )
+ )
+ elif line == '/* GENERATE SECTION3 */\n':
+ fout.write(
+ '''/* GENERATE SECTION3 BEGIN */
+{0:s}/* GENERATE END */
+'''.format(
+ '' if len(plex) < 3 else plex[2].get_text()
+ )
+ )
+ else:
+ if plex[0].prefix != 'yy':
+ line = line.replace('yywrap', '{0:s}wrap'.format(plex[0].prefix))
+ fout.write(line)
+ line = fin.readline()
#!/bin/sh
-if ./generate.py regex <regex.py >regex.py.new && ! diff -q regex.py regex.py.new
+if ./generate_ast.py regex <regex.py >regex.py.new && ! diff -q regex.py regex.py.new
then
mv regex.py.new regex.py
else