From 84741d5c1f0fd72a0b91813a4b534475eab45709 Mon Sep 17 00:00:00 2001
From: Nick Downing <downing.nick@gmail.com>
Date: Sun, 12 Aug 2018 12:14:30 +1000
Subject: [PATCH] Minor adjustments, change xml_to_l.py to scan_to_l.py, make
 it fix token names

---
 .gitignore     |   7 +--
 l_to_python.py |  31 +++++++++----
 n.sh           |   5 +-
 scan_to_l.py   | 124 +++++++++++++++++++++++++++++++++++++++++++++++++
 xml_to_l.py    |  60 ------------------------
 5 files changed, 151 insertions(+), 76 deletions(-)
 create mode 100755 scan_to_l.py
 delete mode 100755 xml_to_l.py
diff --git a/.gitignore b/.gitignore
index 984a9d3..d9555a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,9 @@
 __pycache__
-a
-b
-c
 a.c
 a.i
 tests/*.l.xml
+tests/*.l.new.xml
+tests/*.l.new
 tests/*.y.xml
+tests/*.y.new.xml
+tests/*.y.new
diff --git a/l_to_python.py b/l_to_python.py
index 056491c..4f11487 100755
--- a/l_to_python.py
+++ b/l_to_python.py
@@ -26,6 +26,13 @@ def my_rstrip(text, indent):
     i -= 1
   return text[:i].rstrip('\t ') + indent + text[i:]
 
+context = ast.Context()
+context.translate_identifier['BEGIN'] = 'self.BEGIN'
+context.translate_identifier['yylval'] = 'ref_data.yylval'
+context.translate_identifier['yytext'] = 'self.yytext'
+context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
+context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
+
 actions = []
 with open('a.c', 'w') as fout:
   def extract(i, parent, indent):
@@ -44,7 +51,12 @@ with open('a.c', 'w') as fout:
       initial = False
     else:
       child_indent = indent
-      if (
+      if i.tag == 'PLex_Section1_StartConditions':
+        for j in i:
+          assert j.tag == 'PLex_Name'
+          text = element.get_text(j, 0)
+          context.translate_identifier[text] = 'DFA.{0:s}'.format(text)
+      elif (
         i.tag == 'PLex_Section2_Rule' or
         i.tag == 'PLex_Section2_Rule_FLexRule'
       ):
@@ -103,13 +115,6 @@ with open('a.c', 'w') as fout:
     actions.append((i, parent, indent, initial))
   extract(root, None, '')
 
-context = ast.Context()
-context.translate_identifier['BEGIN'] = 'self.BEGIN'
-context.translate_identifier['yylval'] = 'yy.yylval'
-context.translate_identifier['yytext'] = 'yy.yytext'
-context.translate_identifier['yy_pop_state'] = 'self.yy_pop_state'
-context.translate_identifier['yy_push_state'] = 'self.yy_push_state'
-
 os.system('gcc -I tests/flex_h -E a.c >a.i')
 with open('a.i') as fin:
   for i, parent, indent, initial in actions:
@@ -143,12 +148,18 @@ with open('a.i') as fin:
       prefix = element.get_text(parent, 2).rstrip('\t ')
       if len(text) == 0:
         text = '\n'
+      elif text == '\n':
+        pass
       elif text.index('\n') == len(text) - 1:
         prefix += ' /*COLUMN32*/ '
-        text = text.lstrip('\t ')
+        text = 'pass {0:s}'.format(text.lstrip('\t '))
       else:
         prefix += ' '
-        text = '{{\n{0:s}{1:s}}}\n'.format(text, indent)
+        text = '{{\n{0:s}  pass\n{1:s}{2:s}}}\n'.format(
+          indent,
+          text,
+          indent
+        )
       element.set_text(parent, 2, prefix)
       element.set_text(i[0], 0, text)
 
diff --git a/n.sh b/n.sh
index a2cceca..7f3024d 100755
--- a/n.sh
+++ b/n.sh
@@ -1,5 +1,4 @@
 #!/bin/sh
 ../bootstrap_flex.git/src/flex tests/scan.l 2>tests/scan.l.xml
-./l_to_python.py <tests/scan.l.xml >a
-./xml_to_l.py <a >b
-diff --unified --ignore-space-change tests/scan.l b >c
+./l_to_python.py <tests/scan.l.xml >tests/scan.l.new.xml
+./scan_to_l.py <tests/scan.l.new.xml >tests/scan.l.new
diff --git a/scan_to_l.py b/scan_to_l.py
new file mode 100755
index 0000000..d1f5291
--- /dev/null
+++ b/scan_to_l.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+import element
+import sys
+import xml.etree.ElementTree
+
+root = xml.etree.ElementTree.parse(sys.stdin).getroot()
+
+def replace_in_action(i):
+  if i.tag == 'PLex_Section2_Rule_Action' and len(i):
+    assert i[0].tag == 'PLex_Text'
+    text = element.get_text(i[0], 0)
+
+    # see tests/parse.y
+    text = text.replace('CHAR', 'y_tab.LR1.CHAR')
+    text = text.replace('NUMBER', 'y_tab.LR1.NUMBER')
+    text = text.replace('SECTEND', 'y_tab.LR1.SECTEND')
+    text = text.replace('SCDECL', 'y_tab.LR1.SCDECL')
+    text = text.replace('Xy_tab.LR1.SCDECL', 'y_tab.LR1.XSCDECL')
+    text = text.replace('NAME', 'y_tab.LR1.NAME')
+    text = text.replace('PREVCCL', 'y_tab.LR1.PREVCCL')
+    text = text.replace('EOF_OP', 'y_tab.LR1.EOF_OP')
+    text = text.replace('TOK_OPTION', 'y_tab.LR1.TOK_OPTION')
+    text = text.replace('TOK_OUTFILE', 'y_tab.LR1.TOK_OUTFILE')
+    text = text.replace('TOK_PREFIX', 'y_tab.LR1.TOK_PREFIX')
+    text = text.replace('TOK_YYCLASS', 'y_tab.LR1.TOK_YYCLASS')
+    text = text.replace('TOK_HEADER_FILE', 'y_tab.LR1.TOK_HEADER_FILE')
+    text = text.replace('TOK_EXTRA_TYPE', 'y_tab.LR1.TOK_EXTRA_TYPE')
+    text = text.replace('TOK_TABLES_FILE', 'y_tab.LR1.TOK_TABLES_FILE')
+
+    text = text.replace('CCE_ALNUM', 'y_tab.LR1.CCE_ALNUM')
+    text = text.replace('CCE_ALPHA', 'y_tab.LR1.CCE_ALPHA')
+    text = text.replace('CCE_BLANK', 'y_tab.LR1.CCE_BLANK')
+    text = text.replace('CCE_CNTRL', 'y_tab.LR1.CCE_CNTRL')
+    text = text.replace('CCE_DIGIT', 'y_tab.LR1.CCE_DIGIT')
+    text = text.replace('CCE_GRAPH', 'y_tab.LR1.CCE_GRAPH')
+    text = text.replace('CCE_LOWER', 'y_tab.LR1.CCE_LOWER')
+    text = text.replace('CCE_PRINT', 'y_tab.LR1.CCE_PRINT')
+    text = text.replace('CCE_PUNCT', 'y_tab.LR1.CCE_PUNCT')
+    text = text.replace('CCE_SPACE', 'y_tab.LR1.CCE_SPACE')
+    text = text.replace('CCE_UPPER', 'y_tab.LR1.CCE_UPPER')
+    text = text.replace('CCE_XDIGIT', 'y_tab.LR1.CCE_XDIGIT')
+
+    text = text.replace('CCE_NEG_ALNUM', 'y_tab.LR1.CCE_NEG_ALNUM')
+    text = text.replace('CCE_NEG_ALPHA', 'y_tab.LR1.CCE_NEG_ALPHA')
+    text = text.replace('CCE_NEG_BLANK', 'y_tab.LR1.CCE_NEG_BLANK')
+    text = text.replace('CCE_NEG_CNTRL', 'y_tab.LR1.CCE_NEG_CNTRL')
+    text = text.replace('CCE_NEG_DIGIT', 'y_tab.LR1.CCE_NEG_DIGIT')
+    text = text.replace('CCE_NEG_GRAPH', 'y_tab.LR1.CCE_NEG_GRAPH')
+    text = text.replace('CCE_NEG_LOWER', 'y_tab.LR1.CCE_NEG_LOWER')
+    text = text.replace('CCE_NEG_PRINT', 'y_tab.LR1.CCE_NEG_PRINT')
+    text = text.replace('CCE_NEG_PUNCT', 'y_tab.LR1.CCE_NEG_PUNCT')
+    text = text.replace('CCE_NEG_SPACE', 'y_tab.LR1.CCE_NEG_SPACE')
+    text = text.replace('CCE_NEG_UPPER', 'y_tab.LR1.CCE_NEG_UPPER')
+    text = text.replace('CCE_NEG_XDIGIT', 'y_tab.LR1.CCE_NEG_XDIGIT')
+
+    text = text.replace('CCL_OP_DIFF', 'y_tab.LR1.CCL_OP_DIFF')
+    text = text.replace('CCL_OP_UNION', 'y_tab.LR1.CCL_OP_UNION')
+
+    text = text.replace('BEGIN_REPEAT_POSIX', 'y_tab.LR1.BEGIN_REPEAT_POSIX')
+    text = text.replace('END_REPEAT_POSIX', 'y_tab.LR1.END_REPEAT_POSIX')
+    text = text.replace('BEGIN_REPEAT_FLEX', 'y_tab.LR1.BEGIN_REPEAT_FLEX')
+    text = text.replace('END_REPEAT_FLEX', 'y_tab.LR1.END_REPEAT_FLEX')
+
+    element.set_text(i[0], 0, text)
+  else:
+    for j in i:
+      replace_in_action(j)
+replace_in_action(root)
+
+text = element.to_text(root)
+
+# see tests/scan.l
+text = text.replace(r'{WS}[[:blank:]]+', '{WS}')
+text = text.replace(r'{WS}([[:blank:]]+)', '{WS}')
+text = text.replace(r'{OPTWS}[[:blank:]]*', '{OPTWS}')
+text = text.replace(r'{OPTWS}([[:blank:]]*)', '{OPTWS}')
+text = text.replace(r'{NOT_WS}[^[:blank:]\r\n]', '{NOT_WS}')
+text = text.replace(r'{NOT_WS}([^[:blank:]\r\n])', '{NOT_WS}')
+
+text = text.replace(r'{NL}\r?\n', '{NL}')
+text = text.replace(r'{NL}(\r?\n)', '{NL}')
+
+text = text.replace(r'{NAME}([[:alpha:]_][[:alnum:]_-]*)', '{NAME}')
+text = text.replace(r'{NAME}(([[:alpha:]_][[:alnum:]_-]*))', '{NAME}')
+text = text.replace(r'{NOT_NAME}[^[:alpha:]_*\n]+', '{NOT_NAME}')
+text = text.replace(r'{NOT_NAME}([^[:alpha:]_*\n]+)', '{NOT_NAME}')
+
+text = text.replace(r'{SCNAME}{NAME}', '{SCNAME}')
+text = text.replace(r'{SCNAME}({NAME})', '{SCNAME}')
+
+text = text.replace(r'{ESCSEQ}(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))', '{ESCSEQ}')
+text = text.replace(r'{ESCSEQ}((\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})))', '{ESCSEQ}')
+
+text = text.replace(r'{FIRST_CCL_CHAR}([^\\\n]|{ESCSEQ})', '{FIRST_CCL_CHAR}')
+text = text.replace(r'{FIRST_CCL_CHAR}(([^\\\n]|{ESCSEQ}))', '{FIRST_CCL_CHAR}')
+text = text.replace(r'{CCL_CHAR}([^\\\n\]]|{ESCSEQ})', '{CCL_CHAR}')
+text = text.replace(r'{CCL_CHAR}(([^\\\n\]]|{ESCSEQ}))', '{CCL_CHAR}')
+text = text.replace(r'{CCL_EXPR}("[:"^?[[:alpha:]]+":]")', '{CCL_EXPR}')
+text = text.replace(r'{CCL_EXPR}(("[:"^?[[:alpha:]]+":]"))', '{CCL_EXPR}')
+
+text = text.replace(r'{LEXOPT}[aceknopr]', '{LEXOPT}')
+text = text.replace(r'{LEXOPT}([aceknopr])', '{LEXOPT}')
+
+text = text.replace(r'{M4QSTART}"[""["', '{M4QSTART}')
+text = text.replace(r'{M4QSTART}("[""[")', '{M4QSTART}')
+text = text.replace(r'{M4QEND}"]""]"', '{M4QEND}')
+text = text.replace(r'{M4QEND}("]""]")', '{M4QEND}')
+
+# we can only calculate column numbering once all substitutions done
+i = 0
+j = text.find(' /*COLUMN32*/ ', i)
+while j != -1:
+  k = text.rfind('\n', 0, j)
+  col = j - k - 1
+  if col >= 32:
+    tab = ' '
+  else:
+    tab = '\t' * ((32 - col + 7) // 8)
+  text = text[:j] + tab + text[j + 14:]
+  i = j
+  j = text.find(' /*COLUMN32*/ ', i)
+
+sys.stdout.write(text)
diff --git a/xml_to_l.py b/xml_to_l.py
deleted file mode 100755
index 7685908..0000000
--- a/xml_to_l.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-
-import element
-import sys
-import xml.etree.ElementTree
-
-root = xml.etree.ElementTree.parse(sys.stdin).getroot()
-text = element.to_text(root)
-
-# see tests/scan.l
-text = text.replace(r'{WS}[[:blank:]]+', '{WS}')
-text = text.replace(r'{WS}([[:blank:]]+)', '{WS}')
-text = text.replace(r'{OPTWS}[[:blank:]]*', '{OPTWS}')
-text = text.replace(r'{OPTWS}([[:blank:]]*)', '{OPTWS}')
-text = text.replace(r'{NOT_WS}[^[:blank:]\r\n]', '{NOT_WS}')
-text = text.replace(r'{NOT_WS}([^[:blank:]\r\n])', '{NOT_WS}')
-
-text = text.replace(r'{NL}\r?\n', '{NL}')
-text = text.replace(r'{NL}(\r?\n)', '{NL}')
-
-text = text.replace(r'{NAME}([[:alpha:]_][[:alnum:]_-]*)', '{NAME}')
-text = text.replace(r'{NAME}(([[:alpha:]_][[:alnum:]_-]*))', '{NAME}')
-text = text.replace(r'{NOT_NAME}[^[:alpha:]_*\n]+', '{NOT_NAME}')
-text = text.replace(r'{NOT_NAME}([^[:alpha:]_*\n]+)', '{NOT_NAME}')
-
-text = text.replace(r'{SCNAME}{NAME}', '{SCNAME}')
-text = text.replace(r'{SCNAME}({NAME})', '{SCNAME}')
-
-text = text.replace(r'{ESCSEQ}(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))', '{ESCSEQ}')
-text = text.replace(r'{ESCSEQ}((\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})))', '{ESCSEQ}')
-
-text = text.replace(r'{FIRST_CCL_CHAR}([^\\\n]|{ESCSEQ})', '{FIRST_CCL_CHAR}')
-text = text.replace(r'{FIRST_CCL_CHAR}(([^\\\n]|{ESCSEQ}))', '{FIRST_CCL_CHAR}')
-text = text.replace(r'{CCL_CHAR}([^\\\n\]]|{ESCSEQ})', '{CCL_CHAR}')
-text = text.replace(r'{CCL_CHAR}(([^\\\n\]]|{ESCSEQ}))', '{CCL_CHAR}')
-text = text.replace(r'{CCL_EXPR}("[:"^?[[:alpha:]]+":]")', '{CCL_EXPR}')
-text = text.replace(r'{CCL_EXPR}(("[:"^?[[:alpha:]]+":]"))', '{CCL_EXPR}')
-
-text = text.replace(r'{LEXOPT}[aceknopr]', '{LEXOPT}')
-text = text.replace(r'{LEXOPT}([aceknopr])', '{LEXOPT}')
-
-text = text.replace(r'{M4QSTART}"[""["', '{M4QSTART}')
-text = text.replace(r'{M4QSTART}("[""[")', '{M4QSTART}')
-text = text.replace(r'{M4QEND}"]""]"', '{M4QEND}')
-text = text.replace(r'{M4QEND}("]""]")', '{M4QEND}')
-
-i = 0
-j = text.find(' /*COLUMN32*/ ', i)
-while j != -1:
-  k = text.rfind('\n', 0, j)
-  col = j - k - 1
-  if col >= 32:
-    tab = ' '
-  else:
-    tab = '\t' * ((32 - col + 7) // 8)
-  text = text[:j] + tab + text[j + 14:]
-  i = j
-  j = text.find(' /*COLUMN32*/ ', i)
-
-sys.stdout.write(text)
-- 
2.34.1