import element
import regex
+class Name(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'Name',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Name if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.Name({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class Section1(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
for i in self:
i.process(options)
+class StartCondDecl(element.Element):
+ # GENERATE ELEMENT(bool exclusive) BEGIN
+ def __init__(
+ self,
+ tag = 'StartCondDecl',
+ attrib = {},
+ text = '',
+ children = [],
+ exclusive = False
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ self.exclusive = (
+ element.deserialize_bool(exclusive)
+ if isinstance(exclusive, str) else
+ exclusive
+ )
+ def serialize(self, ref_list, indent = 0):
+ element.Element.serialize(self, ref_list, indent)
+ self.set('exclusive', element.serialize_bool(self.exclusive))
+ def deserialize(self, ref_list):
+ element.Element.deserialize(self, ref_list)
+ self.exclusive = element.deserialize_bool(self.get('exclusive', 'false'))
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ StartCondDecl if factory is None else factory
+ )
+ result.exclusive = self.exclusive
+ return result
+ def repr_serialize(self, params):
+ element.Element.repr_serialize(self, params)
+ if self.exclusive != False:
+ params.append(
+ 'exclusive = {0:s}'.format(repr(self.exclusive))
+ )
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.StartCondDecl({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class Section2(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
return 'ast.StartCondNone({0:s})'.format(', '.join(params))
# GENERATE END
+class StartCond(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'StartCond',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ StartCond if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.StartCond({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class BOLRule(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
return 'ast.Rule({0:s})'.format(', '.join(params))
# GENERATE END
+class Action(element.Element):
+ # GENERATE ELEMENT() BEGIN
+ def __init__(
+ self,
+ tag = 'Action',
+ attrib = {},
+ text = '',
+ children = []
+ ):
+ element.Element.__init__(
+ self,
+ tag,
+ attrib,
+ text,
+ children
+ )
+ def copy(self, factory = None):
+ result = element.Element.copy(
+ self,
+ Action if factory is None else factory
+ )
+ return result
+ def __repr__(self):
+ params = []
+ self.repr_serialize(params)
+ return 'ast.Action({0:s})'.format(', '.join(params))
+ # GENERATE END
+
class Section3(element.Element):
# GENERATE ELEMENT() BEGIN
def __init__(
# GENERATE FACTORY(regex.factory) BEGIN
tag_to_class = {
+ 'Name': Name,
'Section1': Section1,
'CodeBlock': CodeBlock,
'Option': Option,
'Options_Reject': Options.Reject,
'Options_YYMore': Options.YYMore,
'Options_YYWrap': Options.YYWrap,
+ 'StartCondDecl': StartCondDecl,
'Section2': Section2,
'StartCondNone': StartCondNone,
+ 'StartCond': StartCond,
'BOLRule': BOLRule,
'EOFRule': EOFRule,
'Rule': Rule,
+ 'Action': Action,
'Section3': Section3
}
def factory(tag, attrib = {}, *args, **kwargs):
import ast
import element
+import getopt
#import lex
import numpy
+import os
import re
import regex
import sys
self.states.append((flex_accept, flex_base, flex_def))
#print(full_entries[:len(self.states), :])
#print(flex_state_to_action)
-
-if len(sys.argv) < 2:
+
+home_dir = os.path.dirname(sys.argv[0])
+try:
+ opts, args = getopt.getopt(sys.argv[1:], 'o:S:', ['outfile=', 'skel='])
+except getopt.GetoptError as err:
+ sys.stderr.write(str(err))
+ sys.exit(1)
+
+out_file = 'lex.yy.c'
+skel_file = os.path.join(home_dir, 'skel/lex.yy.c')
+for opt, arg in opts:
+ if opt == '-o' or opt == '--outfile':
+ out_file = arg
+ elif opt == '-S' or opt == '--skel':
+ skel_file = arg
+ else:
+ assert False
+if len(args) < 1:
sys.stdout.write(
'usage: {0:s} rules.l\n'.format(
sys.argv[0]
)
)
sys.exit(1)
+in_file = args[0]
+print(in_file, out_file, skel_file)
#root = element.Element('root')
#mark = []
#macro_dict = {}
-#with open(sys.argv[1]) as fin:
+#with open(in_file) as fin:
# assert not yacc.yyparse(
# root,
# mark,
# for i in node:
# post_process(i)
#post_process(root)
-with open(sys.argv[1] + '.xml') as fin:
+with open(in_file + '.xml') as fin:
root = element.deserialize(fin, ast.factory)
#xml.etree.ElementTree.dump(root)
-options = Options()
-assert isinstance(root[0], ast.Section1)
-for i in root[0]:
- if isinstance(i, ast.Options):
- i.process(options)
-#print(options.yywrap)
-
class StartCondition:
def __init__(self, name, eof_action):
self.name = name
self.eof_action = eof_action
+name_to_start_condition = {'INITIAL': 0}
+inclusive_start_conditions = set([0])
start_conditions = [StartCondition('INITIAL', 0)]
-start_condition_exprs = [regex.RegexNone(), regex.RegexNone()]
-actions = []
-eof_actions = ['\t\t\t\tyyterminate();\n']
-assert isinstance(root[1], ast.Section2)
-for i in root[1]:
+section1 = root[0]
+assert isinstance(section1, ast.Section1)
+section2 = root[1]
+assert isinstance(section2, ast.Section2)
+if len(root) < 3:
+ section3 = ast.Section3()
+else:
+ section3 = root[2]
+ assert isinstance(section3, ast.Section3)
+
+options = Options()
+for i in section1:
+ if isinstance(i, ast.Options):
+ i.process(options)
+ elif isinstance(i, ast.StartCondDecl):
+ for j in i:
+ assert isinstance(j, ast.Name)
+ name = element.get_text(j, 0)
+ assert name not in name_to_start_condition
+ name_to_start_condition[name] = len(start_conditions)
+ if not i.exclusive:
+ inclusive_start_conditions.add(len(start_conditions))
+ start_conditions.append(StartCondition(name, 0))
+
+actions = []
+eof_actions = [ast.Action(text = '\t\t\t\tyyterminate();\n')]
+start_condition_exprs = [
+ regex.RegexNone()
+ for i in range(len(start_conditions) * 2) # normal followed by BOL expr
+]
+for i in section2:
if isinstance(i, ast.Rule):
- assert isinstance(i[0], ast.StartCondNone)
+ if isinstance(i[0], ast.StartCondNone):
+ default = True
+ rule_start_conditions = inclusive_start_conditions
+ else:
+ default = False
+ rule_start_conditions = set()
+ for j in i[0]:
+ assert isinstance(j, ast.Name)
+ rule_start_conditions.add(
+ name_to_start_condition[element.get_text(j, 0)]
+ )
rule_expr = i[1]
+ rule_trailing_context = i[2]
+ assert isinstance(rule_trailing_context, regex.Regex)
+ rule_action = i[3]
+ assert isinstance(rule_action, ast.Action)
if isinstance(rule_expr, ast.EOFRule):
- assert isinstance(i[2], regex.RegexEmpty)
- assert start_conditions[0].eof_action is None
- start_conditions[0].eof_action = len(eof_actions)
- eof_actions.append(i[3])
+ assert isinstance(rule_trailing_context, regex.RegexNone)
+ for j in rule_start_conditions:
+ if default and start_conditions[j].eof_action != 0:
+ continue # rule applies to start conditions with no EOF rule yet
+ assert start_conditions[j].eof_action == 0
+ start_conditions[j].eof_action = len(eof_actions)
+ eof_actions.append(rule_action)
else:
if isinstance(rule_expr, ast.BOLRule):
bol_rule = True
rule_expr = rule_expr[0]
else:
bol_rule = False
+ assert isinstance(rule_expr, regex.Regex)
rule_expr = regex.RegexSequence(
children = [
rule_expr,
regex.RegexGroup(
children = [
- i[2] # trailing context
+ rule_trailing_context
]
)
]
)
rule_expr.post_process(len(actions))
- for j in range(int(bol_rule), 2):
- start_condition_exprs[j] = regex.RegexOr(
- children = [
- start_condition_exprs[j],
- rule_expr
- ]
- )
- actions.append(i[3])
+ for j in rule_start_conditions:
+ for k in range(j * 2 + int(bol_rule), j * 2 + 2):
+ start_condition_exprs[k] = regex.RegexOr(
+ children = [
+ start_condition_exprs[k],
+ rule_expr
+ ]
+ )
+ actions.append(rule_action)
nfa = regex.NFA()
for i in range(len(start_condition_exprs)):
# make expr match as much as possible
+ # add default rule to match one char
start_condition_exprs[i] = regex.RegexAnd(
children = [
regex.RegexRepeat(
)
]
),
- start_condition_exprs[i]
+ regex.RegexOr(
+ children = [
+ start_condition_exprs[i],
+ regex.RegexSequence(
+ children = [
+ regex.RegexCharacter(
+ char_set = [0, 0x100]
+ ),
+ regex.RegexGroup(
+ group_index = len(actions),
+ children = [
+ regex.RegexEmpty()
+ ]
+ )
+ ]
+ )
+ ]
+ )
]
)
- print('i', i, 'expr', repr(start_condition_exprs[i]))
+ #print('i', i, 'expr', repr(start_condition_exprs[i]))
start_condition_exprs[i].add_to_nfa(nfa)
+actions.append(ast.Action(text = 'ECHO;\n'))
eob_expr = regex.RegexGroup(children = [regex.RegexEmpty()])
eob_expr.post_process(len(actions))
-print('eob expr', repr(eob_expr))
+#print('eob expr', repr(eob_expr))
eob_expr.add_to_nfa(nfa)
dfa = nfa.to_dfa()
#print(dfa.match_text('1.0 + 5', 0))
flex_dfa = FlexDFA(dfa) #nfa.to_dfa())
-with open('skel/lex.yy.c', 'r') as fin:
- with open('lex.yy.c', 'w+') as fout:
+with open(skel_file, 'r') as fin:
+ with open(out_file, 'w+') as fout:
line = fin.readline()
while len(line):
if line == '/* GENERATE SECTION1 */\n':
fout.write(
'''/* GENERATE SECTION1 BEGIN */
-{0:s}/*GENERATE SECTION1 END*/
+{0:s}/* GENERATE SECTION1 END*/
'''.format(
''.join(
[
element.get_text(i, 0)
- for i in root[0]
+ for i in section1
if isinstance(i, ast.CodeBlock)
]
)
)
)
+ elif line == '/* GENERATE STARTCONDDECL */\n':
+ fout.write(
+ '''/* GENERATE STARTCONDDECL BEGIN */
+{0:s}/* GENERATE STARTCONDDECL END*/
+'''.format(
+ ''.join(
+ [
+ '#define {0:s} {1:d}\n'.format(start_conditions[i].name, i)
+ for i in range(len(start_conditions))
+ ]
+ )
+ )
+ )
elif line == '/* GENERATE TABLES */\n':
yy_acclist = []
yy_accept = [0]
elif line == '/* GENERATE SECTION2INITIAL */\n':
fout.write(
'''/* GENERATE SECTION2INITIAL BEGIN */
-/* GENERATE SECTION2INITIAL END */
+{0:s}/* GENERATE SECTION2INITIAL END */
'''.format(
''.join(
[
element.get_text(i, 0)
- for i in root[1]
+ for i in section2
if isinstance(i, ast.CodeBlock)
]
)
[
j
for j in range(len(start_conditions))
- if start_conditions[i].eof_action == i
+ if start_conditions[i].eof_action == j
]
for i in range(len(eof_actions))
]
+ #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
fout.write(
'''/* GENERATE SECTION2 BEGIN */
{0:s}{1:s}/* GENERATE SECTION2 END */
for j in eof_action_to_start_conditions[i]
]
),
- eof_actions[i]
+ element.get_text(eof_actions[i], 0)
)
for i in range(len(eof_actions))
if len(eof_action_to_start_conditions[i]) > 0
)
)
elif line == '/* GENERATE SECTION3 */\n':
- assert len(root) < 2 or isinstance(root[2], ast.Section3)
fout.write(
'''/* GENERATE SECTION3 BEGIN */
{0:s}/*GENERATE SECTION3 END */
'''.format(
- element.get_text(root[2], 0) if len(root) >= 3 else ''
+ element.get_text(section3, 0)
)
)
else: