Pristine Ack-5.5
[Ack-5.5.git] / lang / cem / cpp.ansi / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  */
5 /* $Id: LLlex.c,v 1.6 1994/06/24 11:36:19 ceriel Exp $ */
6 /*                  L E X I C A L   A N A L Y Z E R                     */
7
8 #include        "idfsize.h"
9 #include        "numsize.h"
10 #include        "strsize.h"
11
12 #include        <alloc.h>
13 #include        "input.h"
14 #include        "arith.h"
15 #include        "macro.h"
16 #include        "idf.h"
17 #include        "LLlex.h"
18 #include        "Lpars.h"
19 #include        "class.h"
20 #include        "bits.h"
21
22 #define BUFSIZ  1024
23
24 struct token dot;
25
26 int ReplaceMacros = 1;          /* replacing macros                     */
27 int AccDefined = 0;             /* accept "defined(...)"                */
28 int UnknownIdIsZero = 0;        /* interpret unknown id as integer 0    */
29 int Unstacked = 0;              /* an unstack is done                   */
30 int AccFileSpecifier = 0;       /* return filespecifier <...>           */
31 int LexSave = 0;                /* last character read by GetChar       */
32 extern int InputLevel;          /* # of current macro expansions        */
33
34 extern char     *string_token();
35 extern char     *strcpy();
36 extern arith    char_constant();
37 #define         FLG_ESEEN       0x01    /* possibly a floating point number */
38 #define         FLG_DOTSEEN     0x02    /* certainly a floating point number */
39
40 int
41 LLlex()
42 {
43         return (DOT != EOF) ? GetToken(&dot) : EOF;
44 }
45
46
47 int
48 GetToken(ptok)
49         register struct token *ptok;
50 {
51         /*      GetToken() is the actual token recognizer. It calls the
52                 control line interpreter if it encounters a "\n{w}*#"
53                 combination. Macro replacement is also performed if it is
54                 needed.
55         */
56         char buf[BUFSIZ];
57         register int ch, nch;
58
59 again:  /* rescan the input after an error or replacement       */
60         ch = GetChar();
61         /* rescan, the following character has been read        */
62         if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
63                 fatal("non-ascii '\\%03o' read", ch & 0377);
64         /* keep track of the place of the token in the file     */
65
66         switch (class(ch)) {    /* detect character class       */
67         case STNL:              /* newline, vertical space or formfeed  */
68                 LineNumber++;
69                 return ptok->tk_symb = EOF;
70         case STSKIP:            /* just skip the skip characters        */
71                 goto again;
72         case STGARB:            /* garbage character                    */
73 garbage:
74                 if (040 < ch && ch < 0177)
75                         error("garbage char %c", ch);
76                 else
77                         error("garbage char \\%03o", ch);
78                 goto again;
79         case STSIMP:    /* a simple character, no part of compound token*/
80                 return ptok->tk_symb = ch;
81         case STCOMP:    /* maybe the start of a compound token          */
82                 nch = GetChar();                /* character lookahead  */
83                 switch (ch) {
84                 case '!':
85                         if (nch == '=')
86                                 return ptok->tk_symb = NOTEQUAL;
87                         UnGetChar();
88                         return ptok->tk_symb = ch;
89                 case '&':
90                         if (nch == '&')
91                                 return ptok->tk_symb = AND;
92                         else if (nch == '=')
93                                 return ptok->tk_symb = ANDAB;
94                         UnGetChar();
95                         return ptok->tk_symb = ch;
96                 case '+':
97                         if (nch == '+')
98                                 return ptok->tk_symb = PLUSPLUS;
99                         else if (nch == '=')
100                                 return ptok->tk_symb = PLUSAB;
101                         UnGetChar();
102                         return ptok->tk_symb = ch;
103                 case '-':
104                         if (nch == '-')
105                                 return ptok->tk_symb = MINMIN;
106                         else if (nch == '>')
107                                 return ptok->tk_symb = ARROW;
108                         else if (nch == '=')
109                                 return ptok->tk_symb = MINAB;
110                         UnGetChar();
111                         return ptok->tk_symb = ch;
112                 case '<':
113                         if (AccFileSpecifier) {
114                                 UnGetChar();    /* pushback nch */
115                                 ptok->tk_str =
116                                         string_token("file specifier", '>');
117                                 return ptok->tk_symb = FILESPECIFIER;
118                         } else if (nch == '<') {
119                                 if ((nch = GetChar()) == '=')
120                                         return ptok->tk_symb = LEFTAB;
121                                 UnGetChar();
122                                 return ptok->tk_symb = LEFT;
123                         } else if (nch == '=')
124                                 return ptok->tk_symb = LESSEQ;
125                         UnGetChar();
126                         return ptok->tk_symb = ch;
127                 case '=':
128                         if (nch == '=')
129                                 return ptok->tk_symb = EQUAL;
130                         UnGetChar();
131                         return ptok->tk_symb = ch;
132                 case '>':
133                         if (nch == '=')
134                                 return ptok->tk_symb = GREATEREQ;
135                         else if (nch == '>') {
136                                 if ((nch = GetChar()) == '=')
137                                         return ptok->tk_symb = RIGHTAB;
138                                 UnGetChar();
139                                 return ptok->tk_symb = RIGHT;
140                         }
141                         UnGetChar();
142                         return ptok->tk_symb = ch;
143                 case '|':
144                         if (nch == '|')
145                                 return ptok->tk_symb = OR;
146                         else if (nch == '=')
147                                 return ptok->tk_symb = ORAB;
148                         UnGetChar();
149                         return ptok->tk_symb = ch;
150                 case '%':
151                         if (nch == '=')
152                                 return ptok->tk_symb = MODAB;
153                         UnGetChar();
154                         return ptok->tk_symb = ch;
155                 case '*':
156                         if (nch == '=')
157                                 return ptok->tk_symb = TIMESAB;
158                         UnGetChar();
159                         return ptok->tk_symb = ch;
160                 case '^':
161                         if (nch == '=')
162                                 return ptok->tk_symb = XORAB;
163                         UnGetChar();
164                         return ptok->tk_symb = ch;
165                 case '/':
166                         if (nch == '*' && !InputLevel) {
167                                 skipcomment();
168                                 goto again;
169                         }
170                         else if (nch == '=')
171                                 return ptok->tk_symb = DIVAB;
172                         UnGetChar();
173                         return ptok->tk_symb = ch;
174                 default:
175                         crash("bad class for char 0%o", ch);
176                         /* NOTREACHED */
177                 }
178         case STCHAR:                            /* character constant   */
179                 ptok->tk_val = char_constant("character");
180                 return ptok->tk_symb = INTEGER;
181         case STSTR:                                     /* string       */
182                 ptok->tk_str = string_token("string", '"');
183                 return ptok->tk_symb = STRING;
184         case STELL:             /* wide character constant/string prefix */
185                 nch = GetChar();
186                 if (nch == '"') {
187                         ptok->tk_str =
188                                 string_token("wide character string", '"');
189                         return ptok->tk_symb = STRING;
190                 } else if (nch == '\'') {
191                         ptok->tk_val = char_constant("wide character");
192                         return ptok->tk_symb = INTEGER;
193                 }
194                 UnGetChar();
195                 /* fallthrough */
196         case STIDF:
197         {
198                 extern int idfsize;             /* ??? */
199                 register char *tg = &buf[0];
200                 register char *maxpos = &buf[idfsize];
201                 int NoExpandNext = 0;
202
203 #define tstmac(bx)      if (!(bits[ch] & bx)) goto nomac
204 #define cpy             *tg++ = ch
205 #define load            (ch = GetChar()); if (!in_idf(ch)) goto endidf
206
207                 if (Unstacked) EnableMacros();  /* unstack macro's when allowed. */
208                 if (ch == NOEXPM)  {
209                         NoExpandNext = 1;
210                         ch = GetChar();
211                 }
212 #ifdef DOBITS
213                 cpy; tstmac(bit0); load;
214                 cpy; tstmac(bit1); load;
215                 cpy; tstmac(bit2); load;
216                 cpy; tstmac(bit3); load;
217                 cpy; tstmac(bit4); load;
218                 cpy; tstmac(bit5); load;
219                 cpy; tstmac(bit6); load;
220                 cpy; tstmac(bit7); load;
221 #endif
222                 for(;;) {
223                         if (tg < maxpos) {
224                                 cpy;
225
226                         }
227                         load;
228                 }
229         endidf:
230                 /*if (ch != EOI) UnGetChar();*/
231                 UnGetChar();
232                 *tg++ = '\0';   /* mark the end of the identifier       */
233                 if (ReplaceMacros) {
234                         register struct idf *idef = findidf(buf);
235
236                         if (idef && idef->id_macro && !NoExpandNext) {
237                                 if (replace(idef))
238                                         goto again;
239                         }
240                 }
241
242         nomac:                  /* buf can already be null-terminated. soit */
243                 ch = GetChar();
244                 while (in_idf(ch)) {
245                         if (tg < maxpos) *tg++ = ch;
246                         ch = GetChar();
247                 }
248                 UnGetChar();
249                 *tg++ = '\0';   /* mark the end of the identifier       */
250
251                 NoExpandNext = 0;
252                 if (UnknownIdIsZero) {
253                         ptok->tk_val = (arith)0;
254                         return ptok->tk_symb = INTEGER;
255                 }
256                 ptok->tk_str = Malloc((unsigned)(tg - buf));
257                 strcpy(ptok->tk_str, buf);
258                 return IDENTIFIER;
259         }
260         case STNUM:                             /* a numeric constant   */
261         {                       /* it may only be an integer constant */
262                 register int base = 10, vch;
263                 register arith val = 0;
264                 int ovfl = 0;
265                 arith ubound = ~(1<<(sizeof(arith)*8-1))/(base/2);
266
267                 /* Since the preprocessor only knows integers and has
268                  * nothing to do with ellipsis we just return when the
269                  * pp-number starts with a '.'
270                  */
271                 if (ch == '.') {
272                         return ptok->tk_symb = ch;
273                 }
274                 if (ch == '0') {
275                         ch = GetChar();
276                         if (ch == 'x' || ch == 'X') {
277                                 base = 16;
278                                 ch = GetChar();
279                         } else {
280                                 base = 8;
281                         }
282
283                 }
284                 while ((vch = val_in_base(ch, base)) >= 0) {
285                         if (val < 0 || val > ubound) ovfl++;
286                         val *= base;
287                         if (val < 0 && val + vch >= 0) ovfl++;
288                         val += vch;
289                         ch = GetChar();
290                 }
291                 ptok->tk_unsigned = 0;
292                 if (ch == 'u' || ch == 'U') {
293                         ptok->tk_unsigned = 1;
294                         ch = GetChar();
295                         if (ch == 'l' || ch == 'L') {
296                                 ch = GetChar();
297                         }
298                 }
299                 else if (ch == 'l' || ch == 'L') {
300                         ch = GetChar();
301                         if (ch == 'u' || ch == 'U') {
302                                 ptok->tk_unsigned = 1;
303                                 ch = GetChar();
304                         }
305                 }
306                 if (ovfl) {
307                         warning("overflow in constant");
308                         ptok->tk_unsigned = 1;
309                 }
310                 else if (val < 0) {
311                         /* give warning??? */
312                         ptok->tk_unsigned = 1;
313                 }
314                 UnGetChar();
315                 ptok->tk_val = val;
316                 return ptok->tk_symb = INTEGER;
317         }
318         case STEOI:                     /* end of text on source file   */
319                 return ptok->tk_symb = EOF;
320         case STMSPEC:
321                 if (!InputLevel) goto garbage;
322                 if (ch == TOKSEP) goto again;
323                 /* fallthrough shouldn't happen */
324         default:                                /* this cannot happen   */
325                 crash("bad class for char 0%o", ch);
326         }
327         /*NOTREACHED*/
328 }
329
330 skipcomment()
331 {
332         /*      The last character read has been the '*' of '/_*'.  The
333                 characters, except NL and EOI, between '/_*' and the first
334                 occurring '*_/' are not interpreted.
335                 NL only affects the LineNumber.  EOI is not legal.
336
337                 Important note: it is not possible to stop skipping comment
338                 beyond the end-of-file of an included file.
339                 EOI is returned by LoadChar only on encountering EOF of the
340                 top-level file...
341         */
342         register int c;
343
344         NoUnstack++;
345         c = GetChar();
346         do {
347                 while (c != '*') {
348                         if (class(c) == STNL) {
349                                 ++LineNumber;
350                         } else if (c == EOI) {
351                                 NoUnstack--;
352                                 return;
353                         }
354                         c = GetChar();
355                 } /* last Character seen was '*' */
356                 c = GetChar();
357         } while (c != '/');
358         NoUnstack--;
359 }
360
361 arith
362 char_constant(nm)
363         char *nm;
364 {
365         register arith val = 0;
366         register int ch;
367         int size = 0;
368
369         ch = GetChar();
370         if (ch == '\'')
371                 error("%s constant too short", nm);
372         else
373         while (ch != '\'') {
374                 if (ch == '\n') {
375                         error("newline in %s constant", nm);
376                         LineNumber++;
377                         break;
378                 }
379                 if (ch == '\\')
380                         ch = quoted(GetChar());
381                 if (ch >= 128) ch -= 256;
382                 if (size < sizeof(arith))
383                         val |= ch << (8 * size);
384                 size++;
385                 ch = GetChar();
386         }
387         if (size > sizeof(arith))
388                 error("%s constant too long", nm);
389         else if (size > 1)
390                 strict("%s constant includes more than one character", nm);
391         return val;
392 }
393
394 char *
395 string_token(nm, stop_char)
396         char *nm;
397 {
398         register int ch;
399         register int str_size;
400         register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
401         register int pos = 0;
402         
403         ch = GetChar();
404         while (ch != stop_char) {
405                 if (ch == '\n') {
406                         error("newline in %s", nm);
407                         LineNumber++;
408                         break;
409                 }
410                 if (ch == EOI) {
411                         error("end-of-file inside %s", nm);
412                         break;
413                 }
414                 if (ch == '\\' && !AccFileSpecifier)
415                         ch = quoted(GetChar());
416                 str[pos++] = ch;
417                 if (pos == str_size)
418                         str = Realloc(str, (unsigned)(str_size <<= 1));
419                 ch = GetChar();
420         }
421         str[pos++] = '\0'; /* for filenames etc. */
422         str = Realloc(str, (unsigned)pos);
423         return str;
424 }
425
426 int
427 quoted(ch)
428         register int ch;
429 {       
430         /*      quoted() replaces an escaped character sequence by the
431                 character meant.
432         */
433         /* first char after backslash already in ch */
434         if (!is_oct(ch)) {              /* a quoted char */
435                 switch (ch) {
436                 case 'n':
437                         ch = '\n';
438                         break;
439                 case 't':
440                         ch = '\t';
441                         break;
442                 case 'b':
443                         ch = '\b';
444                         break;
445                 case 'r':
446                         ch = '\r';
447                         break;
448                 case 'f':
449                         ch = '\f';
450                         break;
451                 case 'a':               /* alert */
452                         ch = '\007';
453                         break;
454                 case 'v':               /* vertical tab */
455                         ch = '\013';
456                         break;
457                 case 'x':               /* quoted hex */
458                 {
459                         register int hex = 0;
460                         register int vch;
461
462                         for (;;) {
463                                 ch = GetChar();
464                                 if (vch = val_in_base(ch, 16), vch == -1)
465                                         break;
466                                 hex = hex * 16 + vch;
467                         }
468                         UnGetChar();
469                         ch = hex;
470                 }
471                 }
472         } else {                                /* a quoted octal */
473                 register int oct = 0, cnt = 0;
474
475                 do {
476                         oct = oct*8 + (ch-'0');
477                         ch = GetChar();
478                 } while (is_oct(ch) && ++cnt < 3);
479                 UnGetChar();
480                 ch = oct;
481         }
482         return ch&0377;
483 }
484
485
486 int
487 val_in_base(ch, base)
488         register int ch;
489 {
490         switch (base) {
491         case 8:
492                 return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
493         case 10:
494                 return is_dig(ch) ? ch - '0' : -1;
495         case 16:
496                 return is_dig(ch) ? ch - '0'
497                         : is_hex(ch) ? (ch - 'a' + 10) & 017
498                         : -1;
499         default:
500                 fatal("(val_in_base) illegal base value %d", base);
501                 /* NOTREACHED */
502         }
503 }
504
505
506 int
507 GetChar()
508 {
509         /*      The routines GetChar and trigraph parses the trigraph
510                 sequences and removes occurences of \\\n.
511         */
512         register int ch;
513
514 again:
515         LoadChar(ch);
516
517         /* possible trigraph sequence */
518         if (ch == '?')
519                 ch = trigraph();
520
521         /* \\\n are removed from the input stream */
522         if (ch == '\\') {
523                 LoadChar(ch);
524                 if (ch == '\n') {
525                         ++LineNumber;
526                         goto again;
527                 }
528                 PushBack();
529                 ch = '\\';
530         }
531         return(LexSave = ch);
532 }
533
534
535 int
536 trigraph()
537 {
538         register int ch;
539
540         LoadChar(ch);
541         if (ch == '?') {
542                 LoadChar(ch);
543                 switch (ch) {           /* its a trigraph */
544                 case '=':
545                         ch =  '#';
546                         return(ch);
547                 case '(':
548                         ch = '[';
549                         return(ch);
550                 case '/':
551                         ch = '\\';
552                         return(ch);
553                 case ')':
554                         ch = ']';
555                         return(ch);
556                 case '\'':
557                         ch = '^';
558                         return(ch);
559                 case '<':
560                         ch = '{';
561                         return(ch);
562                 case '!':
563                         ch = '|';
564                         return(ch);
565                 case '>':
566                         ch = '}';
567                         return(ch);
568                 case '-':
569                         ch = '~';
570                         return(ch);
571                 }
572                 PushBack();
573         }
574         PushBack();
575         return('?');
576 }