skel/skel_flex.c.orig
tests/*.c
tests/*.o
+tests/*.py
tests/*.xml
tests/cal
tests/flex0
import ast
import element
import generate_flex
+import generate_py
import getopt
import os
import sys
home_dir = os.path.dirname(sys.argv[0])
try:
- opts, args = getopt.getopt(sys.argv[1:], 'o:S:', ['outfile=', 'skel='])
+ opts, args = getopt.getopt(
+ sys.argv[1:],
+ 'o:pS:',
+ ['outfile=', 'python', 'skel=']
+ )
except getopt.GetoptError as err:
sys.stderr.write('{0:s}\n'.format(str(err)))
sys.exit(1)
out_file = None
-skel_file = os.path.join(home_dir, 'skel/skel_flex.c')
+python = False
+skel_file = None
for opt, arg in opts:
if opt == '-o' or opt == '--outfile':
out_file = arg
+ elif opt == '-p' or opt == '--python':
+ python = True
elif opt == '-S' or opt == '--skel':
skel_file = arg
else:
assert False
if len(args) < 1:
sys.stdout.write(
- 'usage: {0:s} rules.l\n'.format(
+ 'usage: {0:s} [options] rules.l\n'.format(
sys.argv[0]
)
)
#element.serialize(plex, 'a.xml', 'utf-8')
#plex = element.deserialize('a.xml', ast.factory, 'utf-8')
plex.post_process()
-element.serialize(plex, 'b.xml', 'utf-8')
-plex = element.deserialize('b.xml', ast.factory, 'utf-8')
-generate_flex.generate_flex(plex, skel_file, out_file)
+#element.serialize(plex, 'b.xml', 'utf-8')
+#plex = element.deserialize('b.xml', ast.factory, 'utf-8')
+(generate_py.generate_py if python else generate_flex.generate_flex)(
+ plex,
+ home_dir,
+ skel_file,
+ out_file
+)
def __init__(
self,
groups = [],
- states = [([n_characters], [0], 0)],
+ states = [([n_characters], [0], [0])],
actions = [(0, [])],
start_action = [] # can have multiple DFAs in same container
):
# group_desc: (tag, kwargs)
# tag, kwargs will be passed to apply_markup() hence factory()
# states: list of state_desc
- # state_desc: (list of breaks, list of action to do, accept_thread)
+ # state_desc: (list of breaks, list of action to do, accept_threads)
# actions: list of action_desc
# action_desc: (state to go to next, compiled transition to do first)
- # accept_thread: which thread of thread list to use, -1 don't accept
+ # accept_threads: list of accepting thread numbers (in thread list)
self.groups = groups
self.states = states
self.actions = actions
if state == 0:
# there is only one match, which is complete
assert len(threads0) == prefix_slop + 1
+ assert self.states[state][2] == [0]
return threads0[prefix_slop]
if i >= len(text):
# return best match we have, but not incomplete match
- i = self.states[state][2]
- return (None if i == -1 else threads0[prefix_slop + i])
+ accept = self.states[state][2]
+ return threads0[prefix_slop + accept[0]] if len(accept) else None
action = self.states[state][1][
bisect.bisect_right(self.states[state][0], ord(text[i]))
]
if state == 0:
# there is only one match, which is complete
assert len(threads0) == prefix_slop + 1
+ assert self.states[state][2] == [0]
return threads0[prefix_slop]
while off >= len(text):
if pos < len(root):
next(yychunk_iter)
except StopIteration:
# return best match we have, but not incomplete match
- i = self.states[state][2]
- return (None if i == -1 else threads0[prefix_slop + i])
+ accept = self.states[state][2]
+ return threads0[prefix_slop + accept[0]] if len(accept) else None
text = element.get_text(root, pos)
#print(
# 'state {0:d} pos {1:d} off {2:d} text "{3:s}"'.format(
+import os
import regex
-def generate_flex(plex, skel_file, out_file):
+def generate_flex(plex, home_dir, skel_file, out_file):
_nfa = plex.to_nfa()
# end of buffer expression (do it here because only necessary for flex)
_flex_dfa = _nfa.to_dfa().to_flex_dfa()
+ if skel_file is None:
+ skel_file = os.path.join(home_dir, 'skel/skel_flex.c')
if out_file is None:
out_file = (
plex[0].outfile
--- /dev/null
+import os
+import wrap_repr
+
+def plex_text_to_python(plex_text, indent):
+ text = plex_text.get_text()
+ text_strip = text.strip()
+ if text_strip[:1] == '{' and text_strip[-1:] == '}':
+ text = text_strip[1:-1]
+ lines = text.rstrip().split('\n')
+ while len(lines) and len(lines[0].lstrip()) == 0:
+ lines = lines[1:]
+ while len(lines) and len(lines[-1].lstrip()) == 0:
+ lines = lines[:-1]
+ if len(lines) == 0:
+ return '' #{0:s}pass\n'.format(indent)
+ for j in range(len(lines[0])):
+ if lines[0][j] != '\t' and lines[0][j] != ' ':
+ break
+ else:
+ print(text)
+ assert False
+ #print('---')
+ #print(text)
+ prefix = lines[0][:j]
+ for j in range(len(lines)):
+ if len(lines[j]) == 0:
+ lines[j] = '\n'
+ else:
+ assert lines[j][:len(prefix)] == prefix
+ lines[j] = '{0:s}{1:s}\n'.format(indent, lines[j][len(prefix):])
+ return ''.join(lines)
+
+def generate_py(plex, home_dir, skel_file, out_file):
+ _dfa = plex.to_nfa().to_dfa()
+
+ if skel_file is None:
+ skel_file = os.path.join(home_dir, 'skel/skel_py.py')
+ if out_file is None:
+ out_file = (
+ plex[0].outfile
+ if len(plex[0].outfile) else
+ 'lex_{0:s}.py'.format(plex[0].prefix)
+ )
+ with open(skel_file, 'r') as fin:
+ with open(out_file, 'w+') as fout:
+ line = fin.readline()
+ while len(line):
+ if line == '# GENERATE SECTION1\n':
+ fout.write(
+ '''# GENERATE SECTION1 BEGIN
+{0:s}# GENERATE END
+'''.format(
+ ''.join(
+ [
+ plex_text_to_python(i, '')
+ for i in plex[0].code_blocks_text
+ ]
+ )
+ )
+ )
+ elif line == '# GENERATE STARTCONDDECL\n':
+ fout.write(
+ '''# GENERATE STARTCONDDECL BEGIN
+{0:s}# GENERATE END
+'''.format(
+ ''.join(
+ [
+ '{0:s} = {1:d}\n'.format(
+ plex.start_conditions[i].name,
+ i
+ )
+ for i in range(len(plex.start_conditions))
+ ]
+ )
+ )
+ )
+ elif line == '# GENERATE SECTION2\n':
+ fout.write(
+ '''# GENERATE SECTION2 BEGIN
+{0:s}{1:s}{2:s}{3:s}{4:s}yy_actions = [{5:s}
+]
+{6:s}yy_eof_actions = [{7:s}
+]
+# GENERATE END
+'''.format(
+ wrap_repr.wrap_repr(
+ 'yy_dfa_groups = {0:s}'.format(repr(_dfa.groups)),
+ 79
+ ),
+ wrap_repr.wrap_repr(
+ 'yy_dfa_states = {0:s}'.format(repr(_dfa.states)),
+ 79
+ ),
+ wrap_repr.wrap_repr(
+ 'yy_dfa_actions = {0:s}'.format(repr(_dfa.actions)),
+ 79
+ ),
+ wrap_repr.wrap_repr(
+ 'yy_dfa_start_action = {0:s}'.format(repr(_dfa.start_action)),
+ 79
+ ),
+ ''.join(
+ [
+ '''def yy_action{0:d}():
+{1:s} raise YYContinue()
+'''.format(
+ i,
+ plex_text_to_python(plex.actions_text[i], ' ')
+ )
+ for i in range(len(plex.actions_text))
+ ]
+ ),
+ ','.join(
+ [
+ '\n yy_action{0:d}'.format(i)
+ for i in range(len(plex.actions_text))
+ ]
+ ),
+ ''.join(
+ [
+ '''def yy_eof_action{0:d}():
+{1:s} return 0
+'''.format(
+ i,
+ plex_text_to_python(plex.eof_actions_text[i], ' ')
+ )
+ for i in range(len(plex.eof_actions_text))
+ ]
+ ),
+ ','.join(
+ [
+ '\n yy_eof_action{0:d}'.format(i.eof_action)
+ for i in plex.start_conditions
+ ]
+ )
+ )
+ )
+ elif line == ' # GENERATE SECTION2INITIAL\n':
+ fout.write(
+ ''' # GENERATE SECTION2INITIAL BEGIN
+{0:s} # GENERATE END
+'''.format(
+ ''.join(
+ [
+ plex_text_to_python(i, ' ')
+ for i in plex[1].code_blocks_text
+ ]
+ )
+ )
+ )
+ elif line == '# GENERATE SECTION3\n':
+ fout.write(
+ '''# GENERATE SECTION3 BEGIN
+{0:s}# GENERATE END
+'''.format(
+ '' if len(plex) < 3 else plex_text_to_python(plex[2], '')
+ )
+ )
+ else:
+ #if plex[0].prefix != 'yy':
+ # line = line.replace('yywrap', '{0:s}wrap'.format(plex[0].prefix))
+ fout.write(line)
+ line = fin.readline()
def multistate_accept(root_multistate):
i = 0
+ result = []
def accept(multistate):
- nonlocal i
+ nonlocal i # also uses result
if multistate[0] == NFA.MULTISTATE_ACCEPT:
- return True
- if multistate[0] == NFA.MULTISTATE_AND:
- _, _, _, child = multistate
- i += child[1]
- return False
- if multistate[0] == NFA.MULTISTATE_OR:
+ result.append(i)
+ i += 1
+ elif multistate[0] == NFA.MULTISTATE_AND:
+ i += multistate[1]
+ elif multistate[0] == NFA.MULTISTATE_OR:
_, _, child0, child1 = multistate
- return accept(child0) or accept(child1)
- assert False
- return i if accept(root_multistate) else -1
+ accept(child0)
+ accept(child1)
+ else:
+ assert False
+ accept(root_multistate)
+ return result
def match_text(self, text, i, start_index = 0):
def transit(transition):
return threads0[prefix_slop]
if i >= len(text):
# return best match we have, but not incomplete match
- i = NFA.multistate_accept(next_multistate)
- return (None if i == -1 else threads0[prefix_slop + i])
+ accept = NFA.multistate_accept(next_multistate)
+ return threads0[prefix_slop + accept[0]] if len(accept) else None
next_multistate, transition, _, _ = (
self.multistate_next(next_multistate, ord(text[i]))
)
next(yychunk_iter)
except StopIteration:
# return best match we have, but not incomplete match
- i = NFA.multistate_accept(next_multistate)
- return (None if i == -1 else threads0[prefix_slop + i])
+ accept = NFA.multistate_accept(next_multistate)
+ return threads0[prefix_slop + accept[0]] if len(accept) else None
text = element.get_text(root, pos)
next_multistate, transition, _, _ = (
self.multistate_next(next_multistate, ord(text[off]))
--- /dev/null
+import bisect
+import sys
+
+# GENERATE SECTION1
+
+# GENERATE STARTCONDDECL
+
+class YYContinue(Exception):
+ pass
+
+class YYTerminate(Exception):
+ pass
+
+class YYBufferList:
+ def __init__(self, next = None):
+ self.next = next
+
+class YYBufferBlock(YYBufferList):
+ def __init__(self, next = None, pos = 0, text = ''):
+ YYBufferList.__init__(self, next)
+ self.pos = pos
+ self.text = text
+
+class YYBufferState(YYBufferList):
+ def __init__(self, next = None, file_in = None):
+ YYBufferList.__init__(self, next)
+ self.file_in = file_in
+
+yyin = sys.stdin
+yyout = sys.stdout
+yy_buffer_stack = [YYBufferState(None, None)]
+
+yystart = INITIAL
+yy_threads0 = [None]
+yy_threads1 = [None]
+yy_prefix_slop = 1
+
+yytext = None
+
+def yyterminate():
+ raise YYTerminate()
+
+# GENERATE SECTION2
+
+def yylex():
+ global yyin, yy_threads0, yy_threads1, yy_prefix_slop, yytext
+
+ # GENERATE SECTION2INITIAL
+
+ while True:
+ assert len(yy_threads0) == yy_prefix_slop
+ assert len(yy_threads1) == yy_prefix_slop
+ yy_threads0.append(None)
+
+ i = 0
+ buffer_ptr = len(yy_buffer_stack) - 1
+ block_prev = yy_buffer_stack[buffer_ptr]
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+ file_in = yyin
+
+ action = yy_dfa_start_action[yystart]
+ while action != -1:
+ state, transition = yy_dfa_actions[action]
+ #print('i', i, 'action', action, 'state', state, 'transition', transition)
+
+ j = yy_prefix_slop
+ for trans in transition:
+ if trans[0] == 0: #DFA.TRANSITION_POP:
+ j += trans[1]
+ elif trans[0] == 1: #DFA.TRANSITION_DUP:
+ while j < trans[1]:
+ yy_threads0[:0] = [None] * yy_prefix_slop
+ yy_threads1[:0] = [None] * yy_prefix_slop
+ j += yy_prefix_slop
+ yy_prefix_slop *= 2
+ yy_threads0[j - trans[1]:j] = yy_threads0[j:j + trans[1]]
+ j -= trans[1]
+ elif trans[0] == 2: #DFA.TRANSITION_MARK:
+ yy_threads0[j:j + trans[1]] = [
+ (i, trans[2], thread)
+ for thread in yy_threads0[j:j + trans[1]]
+ ]
+ elif trans[0] == 3: #DFA.TRANSITION_MOVE:
+ yy_threads1.extend(yy_threads0[j:j + trans[1]])
+ j += trans[1]
+ #elif trans[0] == DFA.TRANSITION_DEL:
+ # del yy_threads1[-trans[1]:]
+ else:
+ assert False
+ assert j == len(yy_threads0)
+ yy_threads0, yy_threads1 = yy_threads1, yy_threads0
+ del yy_threads1[yy_prefix_slop:]
+
+ if state == 0:
+ # there is only one match, which is complete
+ assert len(yy_threads0) == yy_prefix_slop + 1
+ assert yy_dfa_states[state][2] == [0]
+ break
+
+ while block is None or block_pos >= len(block.text):
+ if block is None:
+ text = file_in.readline()
+ if len(text):
+ block = YYBufferBlock(None, 0, text)
+ block_pos = 0
+ block_prev.next = block
+ else:
+ buffer_ptr -= 1
+ if buffer_ptr < 0:
+ break # EOF
+ block_prev = yy_buffer_stack[buffer_ptr]
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+ file_in = yy_buffer_stack[buffer_ptr].file_in
+ else:
+ block_prev = block
+ block = block_prev.next
+ if block is not None:
+ block_pos = block.pos
+ else:
+ #print('block_pos', block_pos, 'block.text', block.text)
+ action = yy_dfa_states[state][1][
+ bisect.bisect_right(
+ yy_dfa_states[state][0],
+ ord(block.text[block_pos])
+ )
+ ]
+ i += 1
+ block_pos += 1
+ continue
+ # EOF
+ if i == 0:
+ del yy_threads0[yy_prefix_slop:]
+ try:
+ return yy_eof_actions[yystart]()
+ except YYTerminate:
+ return 0
+ break
+
+ accept = yy_dfa_states[state][2]
+ if len(accept) == 0:
+ del yy_threads0[yy_prefix_slop:]
+ raise Exception('scanner jammed')
+ _, _, thread = yy_threads0[yy_prefix_slop + accept[0]]
+ del yy_threads0[yy_prefix_slop:]
+ #print('thread', thread)
+ i, mark, thread = thread
+ assert thread is None
+
+ yytext = ''
+ while len(yytext) < i:
+ block = yy_buffer_stack[-1].next
+ while block is None or block.pos >= len(block.text):
+ if block is None:
+ yy_buffer_stack.pop()
+ block = yy_buffer_stack[-1].next
+ yyin = yy_buffer_stack[-1].file_in
+ else:
+ block = block.next
+ yy_buffer_stack[-1].next = block
+ j = min(i - len(yytext), len(block.text) - block.pos)
+ yytext += block.text[block.pos:block.pos + j]
+ block.pos += j
+ #print('yytext', yytext)
+
+ try:
+ return yy_actions[mark >> 1]()
+ except YYContinue:
+ pass
+ except YYTerminate:
+ return 0
+
+# GENERATE SECTION3
-all: cal flex0 flex1
+all: lex_yy.py cal flex0 flex1
+
+# Python scanner test
+lex_yy.py: cal_py.l
+ ../../bootstrap_flex.git/src/flex -o /dev/null $< 2>$<.xml
+ ../bootstrap_plex.py --python $<.xml
# cal program
cal: y.tab.o
# other
clean:
- rm -f *.c *.o *.xml cal flex0 flex1
+ rm -f *.c *.o *.py *.xml cal flex0 flex1
--- /dev/null
+%{
+# this is section 1
+NUM = 0x100
+yylval = None
+%}
+
+DIGIT [0-9]+\.?|[0-9]*\.[0-9]+
+
+%option noecs nometa-ecs noyywrap reject yymore
+
+%%
+
+ # this is section 2 initial
+
+[ ]
+{DIGIT} {
+ global yylval
+ yylval = float(yytext)
+ return NUM
+}
+\n|. {
+ return ord(yytext[0])
+}
+
+%%
+
+# this is section 3
+if __name__ == '__main__':
+ token = yylex()
+ while token != 0:
+ if token == NUM:
+ print('NUM', yylval)
+ else:
+ print('{0:02x}'.format(token))
+ token = yylex()