Pristine Ack-5.5
[Ack-5.5.git] / util / cpp / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  */
5 /* $Id: LLlex.c,v 1.9 1994/06/24 10:17:46 ceriel Exp $ */
6 /*                  L E X I C A L   A N A L Y Z E R                     */
7
8 #include        "idfsize.h"
9 #include        "numsize.h"
10 #include        "strsize.h"
11
12 #include        <alloc.h>
13 #include        "input.h"
14 #include        "idf.h"
15 #include        "LLlex.h"
16 #include        "Lpars.h"
17 #include        "class.h"
18 #include        "bits.h"
19
20 /* Data about the token yielded */
21 struct token dot;
22
23 int ReplaceMacros = 1;          /* replacing macros                     */
24 int AccFileSpecifier = 0;       /* return filespecifier <...>           */
25 int AccDefined = 0;             /* accept "defined(...)"                */
26 int UnknownIdIsZero = 0;        /* interpret unknown id as integer 0    */
27
28 char *string_token();
29 char *strcpy();
30
31 PushLex()
32 {
33         DOT = 0;
34 }
35
36 PopLex()
37 {}
38
39 int
40 LLlex()
41 {
42         return (DOT != EOF) ? GetToken(&dot) : EOF;
43 }
44
45 #define BUFSIZ 1024
46
47 int
48 GetToken(ptok)
49         register struct token *ptok;
50 {
51         char buf[BUFSIZ];
52         register int c, nch;
53
54 again:  /* rescan the input after an error or replacement       */
55         LoadChar(c);
56         if ((c & 0200) && c != EOI)
57                 fatal("non-ascii '\\%03o' read", c & 0377);
58         switch (class(c)) {     /* detect character class       */
59         case STNL:
60                 LineNumber++;
61                 return ptok->tk_symb = EOF;
62         case STSKIP:
63                 goto again;
64         case STGARB:            /* garbage character                    */
65                 if (c == '\\') {
66                         /* a '\\' is allowed in #if/#elif expression    */
67                         LoadChar(c);
68                         if (class(c) == STNL) { /* vt , ff ?    */
69                                 ++LineNumber;
70                                 goto again;
71                         }
72                         PushBack();
73                         c = '\\';
74                 }
75                 if (040 < c && c < 0177)
76                         error("garbage char %c", c);
77                 else
78                         error("garbage char \\%03o", c);
79                 goto again;
80         case STSIMP:    /* a simple character, no part of compound token*/
81                 if (c == '/') { /* probably the start of comment        */
82                         LoadChar(c);
83                         if (c == '*') { /* start of comment     */
84                                 skipcomment();
85                                 goto again;
86                         }
87                         else {
88                                 PushBack();
89                                 c = '/';        /* restore c    */
90                         }
91                 }
92                 return ptok->tk_symb = c;
93         case STCOMP:    /* maybe the start of a compound token          */
94                 LoadChar(nch);                  /* character lookahead  */
95                 switch (c) {
96                 case '!':
97                         if (nch == '=')
98                                 return ptok->tk_symb = NOTEQUAL;
99                         PushBack();
100                         return ptok->tk_symb = c;
101                 case '&':
102                         if (nch == '&')
103                                 return ptok->tk_symb = AND;
104                         PushBack();
105                         return ptok->tk_symb = c;
106                 case '<':
107                         if (AccFileSpecifier) {
108                                 PushBack();     /* pushback nch */
109                                 ptok->tk_str =
110                                         string_token("file specifier", '>');
111                                 return ptok->tk_symb = FILESPECIFIER;
112                         }
113                         if (nch == '<')
114                                 return ptok->tk_symb = LEFT;
115                         if (nch == '=')
116                                 return ptok->tk_symb = LESSEQ;
117                         PushBack();
118                         return ptok->tk_symb = c;
119                 case '=':
120                         if (nch != '=') {
121                                 PushBack();
122                                 error("missing =");
123                         }
124                         return ptok->tk_symb = EQUAL;
125                 case '>':
126                         if (nch == '=')
127                                 return ptok->tk_symb = GREATEREQ;
128                         if (nch == '>')
129                                 return ptok->tk_symb = RIGHT;
130                         PushBack();
131                         return ptok->tk_symb = c;
132                 case '|':
133                         if (nch == '|')
134                                 return ptok->tk_symb = OR;
135                         PushBack();
136                         return ptok->tk_symb = c;
137                 }
138         case STIDF:
139         {
140                 extern int idfsize;             /* ??? */
141                 register char *tg = &buf[0];
142                 register char *maxpos = &buf[idfsize];
143                 register struct idf *idef;
144
145 #define tstmac(bx)      if (!(bits[c] & bx)) goto nomac
146 #define cpy             if (Unstacked) EnableMacros(); *tg++ = c
147 #define load            LoadChar(c); if (!in_idf(c)) goto endidf
148
149 #ifdef DOBITS
150                 cpy; tstmac(bit0); load;
151                 cpy; tstmac(bit1); load;
152                 cpy; tstmac(bit2); load;
153                 cpy; tstmac(bit3); load;
154                 cpy; tstmac(bit4); load;
155                 cpy; tstmac(bit5); load;
156                 cpy; tstmac(bit6); load;
157                 cpy; tstmac(bit7); load;
158 #endif
159
160                 for(;;) {
161                         if (tg < maxpos) {
162                                 cpy;
163                         }
164                         load;
165                 }
166         endidf:
167                 PushBack();
168                 *tg = '\0';     /* mark the end of the identifier */
169                 if (ReplaceMacros) {
170                         idef = findidf(buf);
171                         if ((idef && idef->id_macro && replace(idef))) {
172                                 goto again;
173                         }
174                 }
175         nomac:
176                 LoadChar(c);
177                 while (in_idf(c)) {
178                         if (tg < maxpos) *tg++ = c;
179                         LoadChar(c);
180                 }
181                 PushBack();
182                 *tg++ = '\0';   /* mark the end of the identifier       */
183                 if (UnknownIdIsZero) {
184                         ptok->tk_val = 0;
185                         return ptok->tk_symb = INTEGER;
186                 }
187                 ptok->tk_str = Malloc(tg - buf);
188                 strcpy(ptok->tk_str, buf);
189                 return ptok->tk_symb = IDENTIFIER;
190         }
191         case STCHAR:                            /* character constant   */
192         {
193                 register arith val = 0;
194                 register int size = 0;
195
196                 LoadChar(c);
197                 if (c == '\'')
198                         error("character constant too short");
199                 else
200                 while (c != '\'') {
201                         if (c == '\n') {
202                                 error("newline in character constant");
203                                 PushBack();
204                                 break;
205                         }
206                         if (c == '\\') {
207                                 LoadChar(c);
208                                 if (c == '\n') {
209                                         LineNumber++;
210                                 }
211                                 c = quoted(c);
212                         }
213                         if (c >= 128) c -= 256;
214                         val = val*256 + c;
215                         size++;
216                         LoadChar(c);
217                 }
218                 if (size > sizeof(arith))
219                         error("character constant too long");
220                 ptok->tk_val = val;
221                 return ptok->tk_symb = INTEGER;
222         }
223         case STNUM:
224         {
225                 register char *np = &buf[1];
226                 register int base = 10;
227                 register int vch;
228                 register arith val = 0;
229
230                 if (c == '0') {
231                         *np++ = c;
232                         LoadChar(c);
233                         if (c == 'x' || c == 'X') {
234                                 base = 16;
235                                 LoadChar(c);
236                         }
237                         else
238                                 base = 8;
239                 }
240                 while (vch = val_in_base(c, base), vch >= 0) {
241                         val = val*base + vch;
242                         if (np < &buf[NUMSIZE])
243                                 *np++ = c;
244                         LoadChar(c);
245                 }
246                 if (c == 'l' || c == 'L')
247                         LoadChar(c);
248                 PushBack();
249                 ptok->tk_val = val;
250                 return ptok->tk_symb = INTEGER;
251         }
252         case STSTR:
253                 ptok->tk_str = string_token("string", '"');
254                 return ptok->tk_symb = STRING;
255         case STEOI:                     /* end of text on source file   */
256                 return ptok->tk_symb = EOF;
257         default:
258                 crash("Impossible character class");
259         }
260         /*NOTREACHED*/
261 }
262
263 skipcomment()
264 {
265         register int c;
266
267         NoUnstack++;
268         LoadChar(c);
269         do {
270                 while (c != '*') {
271                         if (class(c) == STNL)
272                                 ++LineNumber;
273                         else
274                         if (c == EOI) {
275                                 NoUnstack--;
276                                 return;
277                         }
278                         LoadChar(c);
279                 }
280                 /* Last Character seen was '*' */
281                 LoadChar(c);
282         } while (c != '/');
283         NoUnstack--;
284 }
285
286 char *
287 string_token(nm, stop_char)
288         char *nm;
289 {
290         register int c;
291         register unsigned int str_size;
292         register char *str = Malloc(str_size = ISTRSIZE);
293         register int pos = 0;
294         
295         LoadChar(c);
296         while (c != stop_char) {
297                 if (c == '\n') {
298                         error("newline in %s", nm);
299                         PushBack();
300                         break;
301                 }
302                 if (c == EOI) {
303                         error("end-of-file inside %s", nm);
304                         break;
305                 }
306                 if (c == '\\') {
307                         LoadChar(c);
308                         if (c == '\n') {
309                                 LineNumber++;
310                                 LoadChar(c);
311                                 continue;
312                         }
313                         c = quoted(c);
314                 }
315                 str[pos++] = c;
316                 if (pos == str_size)
317                         str = Realloc(str, str_size <<= 1);
318                 LoadChar(c);
319         }
320         str[pos++] = '\0'; /* for filenames etc. */
321         str = Realloc(str, pos);
322         return str;
323 }
324
325 int
326 quoted(c)
327         register int c;
328 {       
329         /*      quoted() replaces an escaped character sequence by the
330                 character meant.
331         */
332         /* first char after backslash already in c */
333         if (!is_oct(c)) {               /* a quoted char */
334                 switch (c) {
335                 case 'n':
336                         c = '\n';
337                         break;
338                 case 't':
339                         c = '\t';
340                         break;
341                 case 'b':
342                         c = '\b';
343                         break;
344                 case 'r':
345                         c = '\r';
346                         break;
347                 case 'f':
348                         c = '\f';
349                         break;
350                 }
351         }
352         else {                          /* a quoted octal */
353                 register int oct = 0, cnt = 0;
354
355                 do {
356                         oct = oct*8 + (c-'0');
357                         LoadChar(c);
358                 } while (is_oct(c) && ++cnt < 3);
359                 PushBack();
360                 c = oct;
361         }
362         return c&0377;
363 }
364
365 /* provisional */
366 int
367 val_in_base(c, base)
368         register int c;
369 {
370         return
371                 is_dig(c) ? c - '0' :
372                 base != 16 ? -1 :
373                 is_hex(c) ? (c - 'a' + 10) & 017 :
374                 -1;
375 }