Fix several bugs to get Python scanner/parser basically working, processes ../tests...
[pilex.git] / generate_flex.py
1 import os
2 import regex
3
4 def generate_flex(_ast, _element, home_dir, skel_file, out_file):
5   # generate group_ref_data which emulates the old way where
6   # start = even, end = odd, remaining bits = flex rule index,
7   # ignoring user-defined groups by putting start = end = -1:
8   group_ref_data = []
9   for i in range(len(_ast.flex_rules)):
10     group_ref_data.extend(
11       [(-1, -1) for j in range(len(_ast.flex_rules[i].groups0))] +
12       [(i * 2, i * 2 + 1)] +
13       [(-1, -1) for j in range(len(_ast.flex_rules[i].groups1))]
14     )
15
16   _nfa = _ast.to_nfa(group_ref_data)
17
18   # end of buffer expression (do here because only necessary for flex)
19   eob_regex = regex.RegexGroup(children = [regex.RegexEmpty()])
20   eob_groups = []
21   eob_regex.post_process(eob_groups, caseless = _ast[0].caseless)
22   assert len(eob_groups) == 1
23   eob_regex.add_to_nfa(
24     _nfa,
25     [(len(_ast.flex_rules) * 2, len(_ast.flex_rules) * 2 + 1)]
26   )
27
28   _flex_dfa = _nfa.to_dfa().to_flex_dfa()
29
30   if skel_file is None:
31     skel_file = os.path.join(home_dir, 'skel/skel_flex.c')
32   if out_file is None:
33     out_file = (
34       _ast[0].outfile
35     if len(_ast[0].outfile) else
36       'lex.{0:s}.c'.format(_ast[0].prefix)
37     )
38   with open(skel_file, 'r') as fin:
39     with open(out_file, 'w+') as fout:
40       line = fin.readline()
41       while len(line):
42         if line == '/* GENERATE PREFIX */\n':
43           fout.write(
44             '''/* GENERATE PREFIX BEGIN */
45 {0:s}/* GENERATE END */
46 '''.format(
47               ''
48             if _ast[0].prefix == 'yy' else
49               ''.join(
50                 [
51                   '#define yy{0:s} {1:s}{2:s}\n'.format(
52                     i,
53                     _ast[0].prefix,
54                     i
55                   )
56                   for i in [
57                     '_create_buffer',
58                     '_delete_buffer',
59                     '_scan_buffer',
60                     '_scan_string',
61                     '_scan_bytes',
62                     '_init_buffer',
63                     '_flush_buffer',
64                     '_load_buffer_state',
65                     '_switch_to_buffer',
66                     'push_buffer_state',
67                     'pop_buffer_state',
68                     'ensure_buffer_stack',
69                     '_flex_debug',
70                     'in',
71                     'leng',
72                     'lex',
73                     'lineno',
74                     'out',
75                     'restart',
76                     'text',
77                     'wrap',
78                     'alloc',
79                     'realloc',
80                     'free',
81                     '_create_buffer',
82                     '_delete_buffer',
83                     '_scan_buffer',
84                     '_scan_string',
85                     '_scan_bytes',
86                     '_init_buffer',
87                     '_flush_buffer',
88                     '_load_buffer_state',
89                     '_switch_to_buffer',
90                     'push_buffer_state',
91                     'pop_buffer_state',
92                     'ensure_buffer_stack',
93                     'lex',
94                     'restart',
95                     'lex_init',
96                     'lex_init_extra',
97                     'lex_destroy',
98                     'get_debug',
99                     'set_debug',
100                     'get_extra',
101                     'set_extra',
102                     'get_in',
103                     'set_in',
104                     'get_out',
105                     'set_out',
106                     'get_leng',
107                     'get_text',
108                     'get_lineno',
109                     'set_lineno',
110                     'wrap',
111                     'alloc',
112                     'realloc',
113                     'free',
114                     'text',
115                     'leng',
116                     'in',
117                     'out',
118                     '_flex_debug',
119                     'lineno'
120                   ]
121                 ]
122               )
123             )
124           )
125         elif line == '/* GENERATE YYWRAP */\n':
126           fout.write(
127             '''/* GENERATE YYWRAP BEGIN */
128 {0:s}/* GENERATE END */
129 '''.format(
130               ''
131             if _ast[0].yywrap else
132               '''#define {0:s}wrap() (/*CONSTCOND*/1)
133 #define YY_SKIP_YYWRAP
134 '''.format(
135                 _ast[0].prefix
136               )
137             )
138           )
139         elif line == '/* GENERATE TABLES */\n':
140           fout.write(
141             '''/* GENERATE TABLES BEGIN */
142 #define YY_END_OF_BUFFER {0:d}
143 static const flex_uint16_t yy_acclist[] = {{{1:s}
144 }};
145 static const flex_uint16_t yy_accept[] = {{{2:s}
146 }};
147 static const flex_uint16_t yy_base[] = {{{3:s}
148 }};
149 static const flex_uint16_t yy_def[] = {{{4:s}
150 }};
151 static const flex_uint16_t yy_nxt[] = {{{5:s}
152 }};
153 static const flex_uint16_t yy_chk[] = {{{6:s}
154 }};
155 /* GENERATE END */
156 '''.format(
157               len(_ast.actions_text),
158               ','.join(
159                 [
160                   '\n\t{0:s}'.format(
161                     ', '.join(
162                       [
163                         '{0:5d}'.format(j)
164                         for j in _flex_dfa.acclist[i:i + 10]
165                       ]
166                     )
167                   )
168                   for i in range(0, _flex_dfa.acclist.shape[0], 10)
169                 ]
170               ),
171               ','.join(
172                 [
173                   '\n\t{0:s}'.format(
174                     ', '.join(
175                       [
176                         '{0:5d}'.format(j)
177                         for j in _flex_dfa.accept[i:i + 10]
178                       ]
179                     )
180                   )
181                   for i in range(0, _flex_dfa.accept.shape[0], 10)
182                 ]
183               ),
184               ','.join(
185                 [
186                   '\n\t{0:s}'.format(
187                     ', '.join(
188                       [
189                         '{0:5d}'.format(j)
190                         for j in _flex_dfa.states[i:i + 10, 0]
191                       ]
192                     )
193                   )
194                   for i in range(0, _flex_dfa.states.shape[0], 10)
195                 ]
196               ),
197               ','.join(
198                 [
199                   '\n\t{0:s}'.format(
200                     ', '.join(
201                       [
202                         '{0:5d}'.format(j)
203                         for j in _flex_dfa.states[i:i + 10, 1]
204                       ]
205                     )
206                   )
207                   for i in range(0, _flex_dfa.states.shape[0], 10)
208                 ]
209               ),
210               ','.join(
211                 [
212                   '\n\t{0:s}'.format(
213                     ', '.join(
214                       [
215                         '{0:5d}'.format(j)
216                         for j in _flex_dfa.entries[i:i + 10, 0]
217                       ]
218                     )
219                   )
220                   for i in range(0, _flex_dfa.entries.shape[0], 10)
221                 ]
222               ),
223               ','.join(
224                 [
225                   '\n\t{0:s}'.format(
226                     ', '.join(
227                       [
228                         '{0:5d}'.format(j)
229                         for j in _flex_dfa.entries[i:i + 10, 1]
230                       ]
231                     )
232                   )
233                   for i in range(0, _flex_dfa.entries.shape[0], 10)
234                 ]
235               )
236             )
237           )
238         elif line == '/* GENERATE SECTION1 */\n':
239           fout.write(
240             '''/* GENERATE SECTION1 BEGIN */
241 {0:s}/* GENERATE END */
242 '''.format(
243               ''.join([i.get_text() for i in _ast[0].code_blocks_text])
244             )
245           )
246         elif line == '/* GENERATE STARTCONDDECL */\n':
247           fout.write(
248             '''/* GENERATE STARTCONDDECL BEGIN */
249 {0:s}/* GENERATE END*/
250 '''.format(
251               ''.join(
252                 [
253                   '#define {0:s} {1:d}\n'.format(
254                     _ast.start_conditions[i].name,
255                     i
256                   )
257                   for i in range(len(_ast.start_conditions))
258                 ]
259               )
260             )
261           )
262         elif line == '/* GENERATE SECTION2INITIAL */\n':
263           fout.write(
264             '''/* GENERATE SECTION2INITIAL BEGIN */
265 {0:s}/* GENERATE END */
266 '''.format(
267               ''.join([i.get_text() for i in _ast[1].code_blocks_text])
268             )
269           )
270         elif line == '/* GENERATE SECTION2 */\n':
271           eof_action_to_start_conditions = [
272             [
273               j
274               for j in range(len(_ast.start_conditions))
275               if _ast.start_conditions[j].eof_action == i
276             ]
277             for i in range(len(_ast.eof_actions_text))
278           ]
279           #print('eof_action_to_start_conditions', eof_action_to_start_conditions)
280           fout.write(
281             '''/* GENERATE SECTION2 BEGIN */
282 {0:s}{1:s}/* GENERATE END */
283 '''.format(
284               ''.join(
285                 [
286                   '''case {0:d}:
287 YY_RULE_SETUP
288 {1:s}   YY_BREAK
289 '''.format(
290                     i,
291                     _ast.actions_text[i].get_text()
292                   )
293                   for i in range(len(_ast.actions_text))
294                 ]
295               ),
296               ''.join(
297                 [
298                   '{0:s}{1:s}'.format(
299                     ''.join(
300                       [
301                         '\t\t\tcase YY_STATE_EOF({0:s}):\n'.format(
302                           _ast.start_conditions[j].name
303                         )
304                         for j in eof_action_to_start_conditions[i]
305                       ]
306                     ),
307                     _ast.eof_actions_text[i].get_text()
308                   )
309                   for i in range(len(_ast.eof_actions_text))
310                   if len(eof_action_to_start_conditions[i]) > 0
311                 ]
312               )
313             )
314           )
315         elif line == '/* GENERATE SECTION3 */\n':
316           fout.write(
317             '''/* GENERATE SECTION3 BEGIN */
318 {0:s}/* GENERATE END */
319 '''.format(
320               '' if len(_ast) < 3 else _ast[2].get_text()
321             )
322           )
323         else:
324           if _ast[0].prefix != 'yy':
325             line = line.replace('yywrap', '{0:s}wrap'.format(_ast[0].prefix))
326           fout.write(line)
327         line = fin.readline()