scan_to_l.py

   1 #!/usr/bin/env python3
   2
   3 import sys
   4 import xml.etree.ElementTree
   5
   6 def get_text(root, i):
   7   if i < 0:
   8     i += len(root) + 1
   9   text = root.text if i == 0 else root[i - 1].tail
  10   return '' if text is None else text
  11
  12 def set_text(root, i, text):
  13   if i < 0:
  14     i += len(root) + 1
  15   if len(text) == 0:
  16     text = None
  17   if i == 0:
  18     root.text = text
  19   else:
  20     root[i - 1].tail = text
  21
  22 def to_text(root):
  23   return ''.join(
  24     [
  25       j
  26       for i in range(len(root))
  27       for j in [get_text(root, i), to_text(root[i])]
  28     ] +
  29     [get_text(root, len(root))]
  30   )
  31
  32 root = xml.etree.ElementTree.parse(sys.stdin).getroot()[0]
  33
  34 def replace_in_action(i):
  35   if i.tag == 'AST_Section2_Rule_Action' and len(i):
  36     assert i[0].tag == 'AST_Text'
  37     text = get_text(i[0], 0)
  38
  39     # see tests/parse.y
  40     text = text.replace('CHAR', 'y_tab.CHAR')
  41     text = text.replace('NUMBER', 'y_tab.NUMBER')
  42     text = text.replace('SECTEND', 'y_tab.SECTEND')
  43     text = text.replace('SCDECL', 'y_tab.SCDECL')
  44     text = text.replace('Xy_tab.SCDECL', 'y_tab.XSCDECL')
  45     text = text.replace('NAME', 'y_tab.NAME')
  46     text = text.replace('PREVCCL', 'y_tab.PREVCCL')
  47     text = text.replace('EOF_OP', 'y_tab.EOF_OP')
  48     text = text.replace('TOK_OPTION', 'y_tab.TOK_OPTION')
  49     text = text.replace('TOK_OUTFILE', 'y_tab.TOK_OUTFILE')
  50     text = text.replace('TOK_PREFIX', 'y_tab.TOK_PREFIX')
  51     text = text.replace('TOK_YYCLASS', 'y_tab.TOK_YYCLASS')
  52     text = text.replace('TOK_HEADER_FILE', 'y_tab.TOK_HEADER_FILE')
  53     text = text.replace('TOK_EXTRA_TYPE', 'y_tab.TOK_EXTRA_TYPE')
  54     text = text.replace('TOK_TABLES_FILE', 'y_tab.TOK_TABLES_FILE')
  55
  56     text = text.replace('CCE_ALNUM', 'y_tab.CCE_ALNUM')
  57     text = text.replace('CCE_ALPHA', 'y_tab.CCE_ALPHA')
  58     text = text.replace('CCE_BLANK', 'y_tab.CCE_BLANK')
  59     text = text.replace('CCE_CNTRL', 'y_tab.CCE_CNTRL')
  60     text = text.replace('CCE_DIGIT', 'y_tab.CCE_DIGIT')
  61     text = text.replace('CCE_GRAPH', 'y_tab.CCE_GRAPH')
  62     text = text.replace('CCE_LOWER', 'y_tab.CCE_LOWER')
  63     text = text.replace('CCE_PRINT', 'y_tab.CCE_PRINT')
  64     text = text.replace('CCE_PUNCT', 'y_tab.CCE_PUNCT')
  65     text = text.replace('CCE_SPACE', 'y_tab.CCE_SPACE')
  66     text = text.replace('CCE_UPPER', 'y_tab.CCE_UPPER')
  67     text = text.replace('CCE_XDIGIT', 'y_tab.CCE_XDIGIT')
  68
  69     text = text.replace('CCE_NEG_ALNUM', 'y_tab.CCE_NEG_ALNUM')
  70     text = text.replace('CCE_NEG_ALPHA', 'y_tab.CCE_NEG_ALPHA')
  71     text = text.replace('CCE_NEG_BLANK', 'y_tab.CCE_NEG_BLANK')
  72     text = text.replace('CCE_NEG_CNTRL', 'y_tab.CCE_NEG_CNTRL')
  73     text = text.replace('CCE_NEG_DIGIT', 'y_tab.CCE_NEG_DIGIT')
  74     text = text.replace('CCE_NEG_GRAPH', 'y_tab.CCE_NEG_GRAPH')
  75     text = text.replace('CCE_NEG_LOWER', 'y_tab.CCE_NEG_LOWER')
  76     text = text.replace('CCE_NEG_PRINT', 'y_tab.CCE_NEG_PRINT')
  77     text = text.replace('CCE_NEG_PUNCT', 'y_tab.CCE_NEG_PUNCT')
  78     text = text.replace('CCE_NEG_SPACE', 'y_tab.CCE_NEG_SPACE')
  79     text = text.replace('CCE_NEG_UPPER', 'y_tab.CCE_NEG_UPPER')
  80     text = text.replace('CCE_NEG_XDIGIT', 'y_tab.CCE_NEG_XDIGIT')
  81
  82     text = text.replace('CCL_OP_DIFF', 'y_tab.CCL_OP_DIFF')
  83     text = text.replace('CCL_OP_UNION', 'y_tab.CCL_OP_UNION')
  84
  85     text = text.replace('BEGIN_REPEAT_POSIX', 'y_tab.BEGIN_REPEAT_POSIX')
  86     text = text.replace('END_REPEAT_POSIX', 'y_tab.END_REPEAT_POSIX')
  87     text = text.replace('BEGIN_REPEAT_FLEX', 'y_tab.BEGIN_REPEAT_FLEX')
  88     text = text.replace('END_REPEAT_FLEX', 'y_tab.END_REPEAT_FLEX')
  89
  90     set_text(i[0], 0, text)
  91   else:
  92     for j in i:
  93       replace_in_action(j)
  94 replace_in_action(root)
  95
  96 text = to_text(root)
  97
  98 # see tests/scan.l
  99 text = text.replace(r'{WS}[[:blank:]]+', '{WS}')
 100 text = text.replace(r'{WS}(?:[[:blank:]]+)', '{WS}')
 101 text = text.replace(r'{OPTWS}[[:blank:]]*', '{OPTWS}')
 102 text = text.replace(r'{OPTWS}(?:[[:blank:]]*)', '{OPTWS}')
 103 text = text.replace(r'{NOT_WS}[^[:blank:]\r\n]', '{NOT_WS}')
 104 text = text.replace(r'{NOT_WS}(?:[^[:blank:]\r\n])', '{NOT_WS}')
 105
 106 text = text.replace(r'{NL}\r?\n', '{NL}')
 107 text = text.replace(r'{NL}(?:\r?\n)', '{NL}')
 108
 109 text = text.replace(r'{NAME}([[:alpha:]_][[:alnum:]_-]*)', '{NAME}')
 110 text = text.replace(r'{NAME}(?:([[:alpha:]_][[:alnum:]_-]*))', '{NAME}')
 111 text = text.replace(r'{NOT_NAME}[^[:alpha:]_*\n]+', '{NOT_NAME}')
 112 text = text.replace(r'{NOT_NAME}(?:[^[:alpha:]_*\n]+)', '{NOT_NAME}')
 113
 114 text = text.replace(r'{SCNAME}{NAME}', '{SCNAME}')
 115 text = text.replace(r'{SCNAME}(?:{NAME})', '{SCNAME}')
 116
 117 text = text.replace(r'{ESCSEQ}(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))', '{ESCSEQ}')
 118 text = text.replace(r'{ESCSEQ}(?:(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})))', '{ESCSEQ}')
 119
 120 text = text.replace(r'{FIRST_CCL_CHAR}([^\\\n]|{ESCSEQ})', '{FIRST_CCL_CHAR}')
 121 text = text.replace(r'{FIRST_CCL_CHAR}(?:([^\\\n]|{ESCSEQ}))', '{FIRST_CCL_CHAR}')
 122 text = text.replace(r'{CCL_CHAR}([^\\\n\]]|{ESCSEQ})', '{CCL_CHAR}')
 123 text = text.replace(r'{CCL_CHAR}(?:([^\\\n\]]|{ESCSEQ}))', '{CCL_CHAR}')
 124 text = text.replace(r'{CCL_EXPR}("[:"^?[[:alpha:]]+":]")', '{CCL_EXPR}')
 125 text = text.replace(r'{CCL_EXPR}(?:("[:"^?[[:alpha:]]+":]"))', '{CCL_EXPR}')
 126
 127 text = text.replace(r'{LEXOPT}[aceknopr]', '{LEXOPT}')
 128 text = text.replace(r'{LEXOPT}(?:[aceknopr])', '{LEXOPT}')
 129
 130 text = text.replace(r'{M4QSTART}"[""["', '{M4QSTART}')
 131 text = text.replace(r'{M4QSTART}(?:"[""[")', '{M4QSTART}')
 132 text = text.replace(r'{M4QEND}"]""]"', '{M4QEND}')
 133 text = text.replace(r'{M4QEND}(?:"]""]")', '{M4QEND}')
 134
 135 # we can only calculate column numbering once all substitutions done
 136 i = 0
 137 j = text.find(' /*COLUMN32*/ ', i)
 138 while j != -1:
 139   k = text.rfind('\n', 0, j)
 140   col = j - k - 1
 141   if col >= 32:
 142     tab = ' '
 143   else:
 144     tab = '\t' * ((32 - col + 7) // 8)
 145   text = text[:j] + tab + text[j + 14:]
 146   i = j
 147   j = text.find(' /*COLUMN32*/ ', i)
 148
 149 sys.stdout.write(text)