Pristine Ack-5.5
[Ack-5.5.git] / util / ceg / EM_parser / common / mylex.c
1 #include "Lpars.h"
2 #include <stdio.h>
3 #include <ctype.h>
4
5 /* This file contains the function mylex() which recognizes the following
6  * tokens :
7  *      EOFILE          
8  *      C_INSTR         - 'C_loc', 'C_lol', etc.
9  *      DEF_C_INSTR     - 'C_loe..', 'C_ste..', '..icon, '..fcon', etc
10  *      CONDITION       - C-expression, for example: '$1 == 481'
11  *      ARROW           - '==>'
12  *      CALL            - C-style functioncall, for example: 'error( 17)'
13  *      ASSEM_INSTR     - C-style string, for example: '"mov r0, (r1)"'
14  *      DEFAULT         - 'default'
15  *      ERROR           - An error occured in one of the tokens.
16  *
17  * If the input matches non of these tokens the next character will be returned.
18  *
19  * Besides mylex() the following variable is exported :
20  *
21  *      char yytext[];  - Contains the string representation of the current
22  *                        token.
23  *      char *next;     - Points to the first free position in yytext[].
24  */
25
26
27 #define YYTEXT  65536
28 char yytext[YYTEXT],    /* string-buffer for the token */
29      *next;             /* points to the first free posistion in yytext[] */
30 extern char scanc();
31
32 #define FALSE   0
33 #define TRUE    1
34
35 int CD_pos = FALSE;     /* 'CD_pos' is used as a flag to signal if it is
36                          * possible to match a CONDITION or DEFAULT-token at 
37                          * this moment. Thus mylex() knows about the grammar
38                          * of the "EM_table"!!
39                          * This flag is needed because CALL is a subset of
40                          * CONDITION.
41                          */
42 int CALL_pos = FALSE;   /* Needed to distinguish between 
43                          *      C_INSTR CONDITION       and     CALL
44                          */
45
46
47 static char skip_space();
48 static read_string();
49
50 int mylex()
51 {
52         char c;
53
54         static int special = FALSE;     /* rule with conditions + default ? */
55
56         next = yytext;
57         c = *next++ = skip_space();
58         switch ( c) {
59           case EOF : next = yytext;
60                      return( 0);
61
62           case '"' : read_string();
63                      return( ASSEM_INSTR);
64
65           case '.' : c = scanc();
66                      backc( c);
67                      if ( c != '.') {   /* Just a plain '.', not something like
68                                          * '..icon'.
69                                          */
70                         if ( special)
71                                 CD_pos = TRUE;
72                         CALL_pos = FALSE;
73                         return( '.');
74                      }
75                      break;
76
77           case ';' : return( ';');
78
79           case '=' : if ( arrow()) {
80                         CD_pos = FALSE;
81                         CALL_pos = TRUE;
82                         return( ARROW);
83                      }
84                      break;
85
86           case 'd' : if ( CD_pos && _default()) {
87                         CD_pos = FALSE;
88                         special = FALSE;
89                         return( DEFAULT);
90                      }
91                      break;
92         }
93         /* Possible tokens at this place : CONDITION, CALL, C_INSTR,
94          * DEF_C_INSTR
95          */
96
97         if ( CD_pos) {
98                 read_condition();
99                 CD_pos = FALSE;
100                 special = TRUE;
101                 return( CONDITION);
102         }
103         if ( isalpha( c)) {
104                 read_ident();
105                 if ( CALL_pos) {
106                         c = skip_space();
107                         if ( c == '(') {
108                                 *next++ = c;
109                                 read_call();
110                                 return( CALL);
111                         }
112                         else {
113                                 backc( c);
114                                 return( ERROR);
115                         }
116                 }
117                 else {
118                         if ( is_DEF_C_INSTR( yytext)) {
119                                 CD_pos = TRUE;
120                                 return( DEF_C_INSTR);
121                         }
122                         if ( is_C_INSTR( yytext)) {
123                                 CD_pos = TRUE;
124                                 return( C_INSTR);
125                         }
126                         return( ERROR);
127                 }
128         }
129         if ( c == '.') {
130                 c = scanc();
131                 if ( c == '.') {
132                         *next++ = '.';
133                         read_ident();
134                         if ( is_DEF_C_INSTR( yytext)) {
135                                 CD_pos = TRUE;
136                                 return( DEF_C_INSTR);
137                         }
138                         return( ERROR);
139                 }
140                 else {
141                         backc( c);
142                         return( '.');
143                 }
144         }
145         return( c);
146 }
147
148 static int isletter( c)
149 char c;
150 {
151         return( isalpha( c) || isdigit( c) || c == '_');
152 }
153
154 static char skip_space()
155 {
156         char c;
157
158         while ( isspace( c = scanc()))
159                 ;
160         return( c);
161 }
162
163
164 /* first character has been read */
165
166
167 static read_string()
168
169 /* match something like "mov r0, (r1)".
170  * strip the double quotes off! Inside a string, the character '"' must
171  * be preceded by a '\'.
172  */
173 {
174         next--;
175         while( ( *next = scanc()) != '"' || *(next-1) == '\\')
176                 next++;
177 }
178
179 int arrow() /* '==>' */
180 {
181         if ( ( *next++ = scanc()) == '=')
182                 if ( ( *next++ = scanc()) == '>')
183                         return( TRUE);
184                 else
185                         backc( *--next);
186         else
187                 backc( *--next);
188         return( FALSE);
189 }
190
191 int _default() /* 'default' */
192 {
193         char c;
194
195         if ( ( *next++ = scanc()) == 'e')
196             if ( ( *next++ = scanc()) == 'f')
197                 if ( ( *next++ = scanc()) == 'a')
198                     if ( ( *next++ = scanc()) == 'u')
199                         if ( ( *next++ = scanc()) == 'l')
200                             if ( ( *next++ = scanc()) == 't')
201                                 if ( !isletter( c = skip_space())) {
202                                         backc( c);
203                                         return( TRUE);
204                                 }
205                                 else
206                                         backc( c);
207                             else
208                                 backc( *--next);
209                         else
210                             backc( *--next);
211                     else
212                         backc( *--next);
213                 else
214                     backc( *--next);
215             else
216                 backc( *--next);
217         else
218             backc( *--next);
219         return( FALSE);
220 }
221
222 read_ident()
223 {
224         char c;
225
226         while ( isletter( c = scanc()))
227                 *next++ = c;
228         backc( c);
229 }
230
231 read_call()
232 {
233         int n = 1;
234
235         while ( TRUE)
236                 switch( *next++ = scanc()) {
237                   case EOF : return;
238
239                   case '(' : n++;
240                              break;
241
242                   case ')' : n--;
243                              if ( n == 0)
244                                 return;
245                              break;
246                 }
247 }
248
249 read_condition()
250
251 /* A CONDITION is followed by '==>'
252  */
253 {
254         while ( TRUE) {
255                 switch ( *next++ = scanc()) {
256                   case EOF : return;
257
258                   case '=' : if ( arrow()) {
259                                 backc( '>');
260                                 backc( '=');
261                                 backc( '=');
262                                 next -= 3;
263                                 return;
264                              }
265                              break;
266                 }
267         }
268 }
269
270 is_C_INSTR( str)
271 char *str;
272 {
273         if ( *str == 'C' && *(str+1) == '_')    /* C_xxx */
274                 return( TRUE);
275         else
276                 return( FALSE);
277 }
278
279 is_DEF_C_INSTR( str)
280 char *str;
281
282 /* yytext[] contains either '..[letter]*' ( 2 dots possibly followed by an
283  * identifer) * or '[letter]+' ( just an identifier)
284  * Try to match something like 'C_loe..' or '..icon'
285  */
286 {
287         if ( *str == '.' && *(str+1) == '.')
288                 return( next > yytext+1);
289
290         if ( ( *next++ = scanc()) == '.')
291                 if ( ( *next++ = scanc()) == '.')
292                         return( next > yytext+1);
293                 else
294                         backc( *--next);
295         else
296                 backc( *--next);
297         return( FALSE);
298 }