From 84741d5c1f0fd72a0b91813a4b534475eab45709 Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Sun, 12 Aug 2018 12:14:30 +1000 Subject: [PATCH] Minor adjustments, change xml_to_l.py to scan_to_l.py, make it fix token names --- .gitignore | 7 +-- l_to_python.py | 31 +++++++++---- n.sh | 5 +- scan_to_l.py | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ xml_to_l.py | 60 ------------------------ 5 files changed, 151 insertions(+), 76 deletions(-) create mode 100755 scan_to_l.py delete mode 100755 xml_to_l.py diff --git a/.gitignore b/.gitignore index 984a9d3..d9555a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ __pycache__ -a -b -c a.c a.i tests/*.l.xml +tests/*.l.new.xml +tests/*.l.new tests/*.y.xml +tests/*.y.new.xml +tests/*.y.new diff --git a/l_to_python.py b/l_to_python.py index 056491c..4f11487 100755 --- a/l_to_python.py +++ b/l_to_python.py @@ -26,6 +26,13 @@ def my_rstrip(text, indent): i -= 1 return text[:i].rstrip('\t ') + indent + text[i:] +context = ast.Context() +context.translate_identifier['BEGIN'] = 'self.BEGIN' +context.translate_identifier['yylval'] = 'ref_data.yylval' +context.translate_identifier['yytext'] = 'self.yytext' +context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state' +context.translate_identifier['yy_push_state'] = 'self.yy_push_state' + actions = [] with open('a.c', 'w') as fout: def extract(i, parent, indent): @@ -44,7 +51,12 @@ with open('a.c', 'w') as fout: initial = False else: child_indent = indent - if ( + if i.tag == 'PLex_Section1_StartConditions': + for j in i: + assert j.tag == 'PLex_Name' + text = element.get_text(j, 0) + context.translate_identifier[text] = 'DFA.{0:s}'.format(text) + elif ( i.tag == 'PLex_Section2_Rule' or i.tag == 'PLex_Section2_Rule_FLexRule' ): @@ -103,13 +115,6 @@ with open('a.c', 'w') as fout: actions.append((i, parent, indent, initial)) extract(root, None, '') -context = ast.Context() -context.translate_identifier['BEGIN'] = 'self.BEGIN' -context.translate_identifier['yylval'] = 'yy.yylval' -context.translate_identifier['yytext'] = 'yy.yytext' -context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state' -context.translate_identifier['yy_push_state'] = 'self.yy_push_state' - os.system('gcc -I tests/flex_h -E a.c >a.i') with open('a.i') as fin: for i, parent, indent, initial in actions: @@ -143,12 +148,18 @@ with open('a.i') as fin: prefix = element.get_text(parent, 2).rstrip('\t ') if len(text) == 0: text = '\n' + elif text == '\n': + pass elif text.index('\n') == len(text) - 1: prefix += ' /*COLUMN32*/ ' - text = text.lstrip('\t ') + text = 'pass {0:s}'.format(text.lstrip('\t ')) else: prefix += ' ' - text = '{{\n{0:s}{1:s}}}\n'.format(text, indent) + text = '{{\n{0:s} pass\n{1:s}{2:s}}}\n'.format( + indent, + text, + indent + ) element.set_text(parent, 2, prefix) element.set_text(i[0], 0, text) diff --git a/n.sh b/n.sh index a2cceca..7f3024d 100755 --- a/n.sh +++ b/n.sh @@ -1,5 +1,4 @@ #!/bin/sh ../bootstrap_flex.git/src/flex tests/scan.l 2>tests/scan.l.xml -./l_to_python.py a -./xml_to_l.py b -diff --unified --ignore-space-change tests/scan.l b >c +./l_to_python.py tests/scan.l.new.xml +./scan_to_l.py tests/scan.l.new diff --git a/scan_to_l.py b/scan_to_l.py new file mode 100755 index 0000000..d1f5291 --- /dev/null +++ b/scan_to_l.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +import element +import sys +import xml.etree.ElementTree + +root = xml.etree.ElementTree.parse(sys.stdin).getroot() + +def replace_in_action(i): + if i.tag == 'PLex_Section2_Rule_Action' and len(i): + assert i[0].tag == 'PLex_Text' + text = element.get_text(i[0], 0) + + # see tests/parse.y + text = text.replace('CHAR', 'y_tab.LR1.CHAR') + text = text.replace('NUMBER', 'y_tab.LR1.NUMBER') + text = text.replace('SECTEND', 'y_tab.LR1.SECTEND') + text = text.replace('SCDECL', 'y_tab.LR1.SCDECL') + text = text.replace('Xy_tab.LR1.SCDECL', 'y_tab.LR1.XSCDECL') + text = text.replace('NAME', 'y_tab.LR1.NAME') + text = text.replace('PREVCCL', 'y_tab.LR1.PREVCCL') + text = text.replace('EOF_OP', 'y_tab.LR1.EOF_OP') + text = text.replace('TOK_OPTION', 'y_tab.LR1.TOK_OPTION') + text = text.replace('TOK_OUTFILE', 'y_tab.LR1.TOK_OUTFILE') + text = text.replace('TOK_PREFIX', 'y_tab.LR1.TOK_PREFIX') + text = text.replace('TOK_YYCLASS', 'y_tab.LR1.TOK_YYCLASS') + text = text.replace('TOK_HEADER_FILE', 'y_tab.LR1.TOK_HEADER_FILE') + text = text.replace('TOK_EXTRA_TYPE', 'y_tab.LR1.TOK_EXTRA_TYPE') + text = text.replace('TOK_TABLES_FILE', 'y_tab.LR1.TOK_TABLES_FILE') + + text = text.replace('CCE_ALNUM', 'y_tab.LR1.CCE_ALNUM') + text = text.replace('CCE_ALPHA', 'y_tab.LR1.CCE_ALPHA') + text = text.replace('CCE_BLANK', 'y_tab.LR1.CCE_BLANK') + text = text.replace('CCE_CNTRL', 'y_tab.LR1.CCE_CNTRL') + text = text.replace('CCE_DIGIT', 'y_tab.LR1.CCE_DIGIT') + text = text.replace('CCE_GRAPH', 'y_tab.LR1.CCE_GRAPH') + text = text.replace('CCE_LOWER', 'y_tab.LR1.CCE_LOWER') + text = text.replace('CCE_PRINT', 'y_tab.LR1.CCE_PRINT') + text = text.replace('CCE_PUNCT', 'y_tab.LR1.CCE_PUNCT') + text = text.replace('CCE_SPACE', 'y_tab.LR1.CCE_SPACE') + text = text.replace('CCE_UPPER', 'y_tab.LR1.CCE_UPPER') + text = text.replace('CCE_XDIGIT', 'y_tab.LR1.CCE_XDIGIT') + + text = text.replace('CCE_NEG_ALNUM', 'y_tab.LR1.CCE_NEG_ALNUM') + text = text.replace('CCE_NEG_ALPHA', 'y_tab.LR1.CCE_NEG_ALPHA') + text = text.replace('CCE_NEG_BLANK', 'y_tab.LR1.CCE_NEG_BLANK') + text = text.replace('CCE_NEG_CNTRL', 'y_tab.LR1.CCE_NEG_CNTRL') + text = text.replace('CCE_NEG_DIGIT', 'y_tab.LR1.CCE_NEG_DIGIT') + text = text.replace('CCE_NEG_GRAPH', 'y_tab.LR1.CCE_NEG_GRAPH') + text = text.replace('CCE_NEG_LOWER', 'y_tab.LR1.CCE_NEG_LOWER') + text = text.replace('CCE_NEG_PRINT', 'y_tab.LR1.CCE_NEG_PRINT') + text = text.replace('CCE_NEG_PUNCT', 'y_tab.LR1.CCE_NEG_PUNCT') + text = text.replace('CCE_NEG_SPACE', 'y_tab.LR1.CCE_NEG_SPACE') + text = text.replace('CCE_NEG_UPPER', 'y_tab.LR1.CCE_NEG_UPPER') + text = text.replace('CCE_NEG_XDIGIT', 'y_tab.LR1.CCE_NEG_XDIGIT') + + text = text.replace('CCL_OP_DIFF', 'y_tab.LR1.CCL_OP_DIFF') + text = text.replace('CCL_OP_UNION', 'y_tab.LR1.CCL_OP_UNION') + + text = text.replace('BEGIN_REPEAT_POSIX', 'y_tab.LR1.BEGIN_REPEAT_POSIX') + text = text.replace('END_REPEAT_POSIX', 'y_tab.LR1.END_REPEAT_POSIX') + text = text.replace('BEGIN_REPEAT_FLEX', 'y_tab.LR1.BEGIN_REPEAT_FLEX') + text = text.replace('END_REPEAT_FLEX', 'y_tab.LR1.END_REPEAT_FLEX') + + element.set_text(i[0], 0, text) + else: + for j in i: + replace_in_action(j) +replace_in_action(root) + +text = element.to_text(root) + +# see tests/scan.l +text = text.replace(r'{WS}[[:blank:]]+', '{WS}') +text = text.replace(r'{WS}([[:blank:]]+)', '{WS}') +text = text.replace(r'{OPTWS}[[:blank:]]*', '{OPTWS}') +text = text.replace(r'{OPTWS}([[:blank:]]*)', '{OPTWS}') +text = text.replace(r'{NOT_WS}[^[:blank:]\r\n]', '{NOT_WS}') +text = text.replace(r'{NOT_WS}([^[:blank:]\r\n])', '{NOT_WS}') + +text = text.replace(r'{NL}\r?\n', '{NL}') +text = text.replace(r'{NL}(\r?\n)', '{NL}') + +text = text.replace(r'{NAME}([[:alpha:]_][[:alnum:]_-]*)', '{NAME}') +text = text.replace(r'{NAME}(([[:alpha:]_][[:alnum:]_-]*))', '{NAME}') +text = text.replace(r'{NOT_NAME}[^[:alpha:]_*\n]+', '{NOT_NAME}') +text = text.replace(r'{NOT_NAME}([^[:alpha:]_*\n]+)', '{NOT_NAME}') + +text = text.replace(r'{SCNAME}{NAME}', '{SCNAME}') +text = text.replace(r'{SCNAME}({NAME})', '{SCNAME}') + +text = text.replace(r'{ESCSEQ}(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))', '{ESCSEQ}') +text = text.replace(r'{ESCSEQ}((\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})))', '{ESCSEQ}') + +text = text.replace(r'{FIRST_CCL_CHAR}([^\\\n]|{ESCSEQ})', '{FIRST_CCL_CHAR}') +text = text.replace(r'{FIRST_CCL_CHAR}(([^\\\n]|{ESCSEQ}))', '{FIRST_CCL_CHAR}') +text = text.replace(r'{CCL_CHAR}([^\\\n\]]|{ESCSEQ})', '{CCL_CHAR}') +text = text.replace(r'{CCL_CHAR}(([^\\\n\]]|{ESCSEQ}))', '{CCL_CHAR}') +text = text.replace(r'{CCL_EXPR}("[:"^?[[:alpha:]]+":]")', '{CCL_EXPR}') +text = text.replace(r'{CCL_EXPR}(("[:"^?[[:alpha:]]+":]"))', '{CCL_EXPR}') + +text = text.replace(r'{LEXOPT}[aceknopr]', '{LEXOPT}') +text = text.replace(r'{LEXOPT}([aceknopr])', '{LEXOPT}') + +text = text.replace(r'{M4QSTART}"[""["', '{M4QSTART}') +text = text.replace(r'{M4QSTART}("[""[")', '{M4QSTART}') +text = text.replace(r'{M4QEND}"]""]"', '{M4QEND}') +text = text.replace(r'{M4QEND}("]""]")', '{M4QEND}') + +# we can only calculate column numbering once all substitutions done +i = 0 +j = text.find(' /*COLUMN32*/ ', i) +while j != -1: + k = text.rfind('\n', 0, j) + col = j - k - 1 + if col >= 32: + tab = ' ' + else: + tab = '\t' * ((32 - col + 7) // 8) + text = text[:j] + tab + text[j + 14:] + i = j + j = text.find(' /*COLUMN32*/ ', i) + +sys.stdout.write(text) diff --git a/xml_to_l.py b/xml_to_l.py deleted file mode 100755 index 7685908..0000000 --- a/xml_to_l.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 - -import element -import sys -import xml.etree.ElementTree - -root = xml.etree.ElementTree.parse(sys.stdin).getroot() -text = element.to_text(root) - -# see tests/scan.l -text = text.replace(r'{WS}[[:blank:]]+', '{WS}') -text = text.replace(r'{WS}([[:blank:]]+)', '{WS}') -text = text.replace(r'{OPTWS}[[:blank:]]*', '{OPTWS}') -text = text.replace(r'{OPTWS}([[:blank:]]*)', '{OPTWS}') -text = text.replace(r'{NOT_WS}[^[:blank:]\r\n]', '{NOT_WS}') -text = text.replace(r'{NOT_WS}([^[:blank:]\r\n])', '{NOT_WS}') - -text = text.replace(r'{NL}\r?\n', '{NL}') -text = text.replace(r'{NL}(\r?\n)', '{NL}') - -text = text.replace(r'{NAME}([[:alpha:]_][[:alnum:]_-]*)', '{NAME}') -text = text.replace(r'{NAME}(([[:alpha:]_][[:alnum:]_-]*))', '{NAME}') -text = text.replace(r'{NOT_NAME}[^[:alpha:]_*\n]+', '{NOT_NAME}') -text = text.replace(r'{NOT_NAME}([^[:alpha:]_*\n]+)', '{NOT_NAME}') - -text = text.replace(r'{SCNAME}{NAME}', '{SCNAME}') -text = text.replace(r'{SCNAME}({NAME})', '{SCNAME}') - -text = text.replace(r'{ESCSEQ}(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))', '{ESCSEQ}') -text = text.replace(r'{ESCSEQ}((\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})))', '{ESCSEQ}') - -text = text.replace(r'{FIRST_CCL_CHAR}([^\\\n]|{ESCSEQ})', '{FIRST_CCL_CHAR}') -text = text.replace(r'{FIRST_CCL_CHAR}(([^\\\n]|{ESCSEQ}))', '{FIRST_CCL_CHAR}') -text = text.replace(r'{CCL_CHAR}([^\\\n\]]|{ESCSEQ})', '{CCL_CHAR}') -text = text.replace(r'{CCL_CHAR}(([^\\\n\]]|{ESCSEQ}))', '{CCL_CHAR}') -text = text.replace(r'{CCL_EXPR}("[:"^?[[:alpha:]]+":]")', '{CCL_EXPR}') -text = text.replace(r'{CCL_EXPR}(("[:"^?[[:alpha:]]+":]"))', '{CCL_EXPR}') - -text = text.replace(r'{LEXOPT}[aceknopr]', '{LEXOPT}') -text = text.replace(r'{LEXOPT}([aceknopr])', '{LEXOPT}') - -text = text.replace(r'{M4QSTART}"[""["', '{M4QSTART}') -text = text.replace(r'{M4QSTART}("[""[")', '{M4QSTART}') -text = text.replace(r'{M4QEND}"]""]"', '{M4QEND}') -text = text.replace(r'{M4QEND}("]""]")', '{M4QEND}') - -i = 0 -j = text.find(' /*COLUMN32*/ ', i) -while j != -1: - k = text.rfind('\n', 0, j) - col = j - k - 1 - if col >= 32: - tab = ' ' - else: - tab = '\t' * ((32 - col + 7) // 8) - text = text[:j] + tab + text[j + 14:] - i = j - j = text.find(' /*COLUMN32*/ ', i) - -sys.stdout.write(text) -- 2.34.1