2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 /* $Id: LLlex.c,v 1.6 1994/06/24 11:36:19 ceriel Exp $ */
6 /* L E X I C A L A N A L Y Z E R */
26 int ReplaceMacros = 1; /* replacing macros */
27 int AccDefined = 0; /* accept "defined(...)" */
28 int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
29 int Unstacked = 0; /* an unstack is done */
30 int AccFileSpecifier = 0; /* return filespecifier <...> */
31 int LexSave = 0; /* last character read by GetChar */
32 extern int InputLevel; /* # of current macro expansions */
34 extern char *string_token();
35 extern char *strcpy();
36 extern arith char_constant();
37 #define FLG_ESEEN 0x01 /* possibly a floating point number */
38 #define FLG_DOTSEEN 0x02 /* certainly a floating point number */
43 return (DOT != EOF) ? GetToken(&dot) : EOF;
49 register struct token *ptok;
51 /* GetToken() is the actual token recognizer. It calls the
52 control line interpreter if it encounters a "\n{w}*#"
53 combination. Macro replacement is also performed if it is
59 again: /* rescan the input after an error or replacement */
61 /* rescan, the following character has been read */
62 if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
63 fatal("non-ascii '\\%03o' read", ch & 0377);
64 /* keep track of the place of the token in the file */
66 switch (class(ch)) { /* detect character class */
67 case STNL: /* newline, vertical space or formfeed */
69 return ptok->tk_symb = EOF;
70 case STSKIP: /* just skip the skip characters */
72 case STGARB: /* garbage character */
74 if (040 < ch && ch < 0177)
75 error("garbage char %c", ch);
77 error("garbage char \\%03o", ch);
79 case STSIMP: /* a simple character, no part of compound token*/
80 return ptok->tk_symb = ch;
81 case STCOMP: /* maybe the start of a compound token */
82 nch = GetChar(); /* character lookahead */
86 return ptok->tk_symb = NOTEQUAL;
88 return ptok->tk_symb = ch;
91 return ptok->tk_symb = AND;
93 return ptok->tk_symb = ANDAB;
95 return ptok->tk_symb = ch;
98 return ptok->tk_symb = PLUSPLUS;
100 return ptok->tk_symb = PLUSAB;
102 return ptok->tk_symb = ch;
105 return ptok->tk_symb = MINMIN;
107 return ptok->tk_symb = ARROW;
109 return ptok->tk_symb = MINAB;
111 return ptok->tk_symb = ch;
113 if (AccFileSpecifier) {
114 UnGetChar(); /* pushback nch */
116 string_token("file specifier", '>');
117 return ptok->tk_symb = FILESPECIFIER;
118 } else if (nch == '<') {
119 if ((nch = GetChar()) == '=')
120 return ptok->tk_symb = LEFTAB;
122 return ptok->tk_symb = LEFT;
123 } else if (nch == '=')
124 return ptok->tk_symb = LESSEQ;
126 return ptok->tk_symb = ch;
129 return ptok->tk_symb = EQUAL;
131 return ptok->tk_symb = ch;
134 return ptok->tk_symb = GREATEREQ;
135 else if (nch == '>') {
136 if ((nch = GetChar()) == '=')
137 return ptok->tk_symb = RIGHTAB;
139 return ptok->tk_symb = RIGHT;
142 return ptok->tk_symb = ch;
145 return ptok->tk_symb = OR;
147 return ptok->tk_symb = ORAB;
149 return ptok->tk_symb = ch;
152 return ptok->tk_symb = MODAB;
154 return ptok->tk_symb = ch;
157 return ptok->tk_symb = TIMESAB;
159 return ptok->tk_symb = ch;
162 return ptok->tk_symb = XORAB;
164 return ptok->tk_symb = ch;
166 if (nch == '*' && !InputLevel) {
171 return ptok->tk_symb = DIVAB;
173 return ptok->tk_symb = ch;
175 crash("bad class for char 0%o", ch);
178 case STCHAR: /* character constant */
179 ptok->tk_val = char_constant("character");
180 return ptok->tk_symb = INTEGER;
181 case STSTR: /* string */
182 ptok->tk_str = string_token("string", '"');
183 return ptok->tk_symb = STRING;
184 case STELL: /* wide character constant/string prefix */
188 string_token("wide character string", '"');
189 return ptok->tk_symb = STRING;
190 } else if (nch == '\'') {
191 ptok->tk_val = char_constant("wide character");
192 return ptok->tk_symb = INTEGER;
198 extern int idfsize; /* ??? */
199 register char *tg = &buf[0];
200 register char *maxpos = &buf[idfsize];
201 int NoExpandNext = 0;
203 #define tstmac(bx) if (!(bits[ch] & bx)) goto nomac
204 #define cpy *tg++ = ch
205 #define load (ch = GetChar()); if (!in_idf(ch)) goto endidf
207 if (Unstacked) EnableMacros(); /* unstack macro's when allowed. */
213 cpy; tstmac(bit0); load;
214 cpy; tstmac(bit1); load;
215 cpy; tstmac(bit2); load;
216 cpy; tstmac(bit3); load;
217 cpy; tstmac(bit4); load;
218 cpy; tstmac(bit5); load;
219 cpy; tstmac(bit6); load;
220 cpy; tstmac(bit7); load;
230 /*if (ch != EOI) UnGetChar();*/
232 *tg++ = '\0'; /* mark the end of the identifier */
234 register struct idf *idef = findidf(buf);
236 if (idef && idef->id_macro && !NoExpandNext) {
242 nomac: /* buf can already be null-terminated. soit */
245 if (tg < maxpos) *tg++ = ch;
249 *tg++ = '\0'; /* mark the end of the identifier */
252 if (UnknownIdIsZero) {
253 ptok->tk_val = (arith)0;
254 return ptok->tk_symb = INTEGER;
256 ptok->tk_str = Malloc((unsigned)(tg - buf));
257 strcpy(ptok->tk_str, buf);
260 case STNUM: /* a numeric constant */
261 { /* it may only be an integer constant */
262 register int base = 10, vch;
263 register arith val = 0;
265 arith ubound = ~(1<<(sizeof(arith)*8-1))/(base/2);
267 /* Since the preprocessor only knows integers and has
268 * nothing to do with ellipsis we just return when the
269 * pp-number starts with a '.'
272 return ptok->tk_symb = ch;
276 if (ch == 'x' || ch == 'X') {
284 while ((vch = val_in_base(ch, base)) >= 0) {
285 if (val < 0 || val > ubound) ovfl++;
287 if (val < 0 && val + vch >= 0) ovfl++;
291 ptok->tk_unsigned = 0;
292 if (ch == 'u' || ch == 'U') {
293 ptok->tk_unsigned = 1;
295 if (ch == 'l' || ch == 'L') {
299 else if (ch == 'l' || ch == 'L') {
301 if (ch == 'u' || ch == 'U') {
302 ptok->tk_unsigned = 1;
307 warning("overflow in constant");
308 ptok->tk_unsigned = 1;
311 /* give warning??? */
312 ptok->tk_unsigned = 1;
316 return ptok->tk_symb = INTEGER;
318 case STEOI: /* end of text on source file */
319 return ptok->tk_symb = EOF;
321 if (!InputLevel) goto garbage;
322 if (ch == TOKSEP) goto again;
323 /* fallthrough shouldn't happen */
324 default: /* this cannot happen */
325 crash("bad class for char 0%o", ch);
332 /* The last character read has been the '*' of '/_*'. The
333 characters, except NL and EOI, between '/_*' and the first
334 occurring '*_/' are not interpreted.
335 NL only affects the LineNumber. EOI is not legal.
337 Important note: it is not possible to stop skipping comment
338 beyond the end-of-file of an included file.
339 EOI is returned by LoadChar only on encountering EOF of the
348 if (class(c) == STNL) {
350 } else if (c == EOI) {
355 } /* last Character seen was '*' */
365 register arith val = 0;
371 error("%s constant too short", nm);
375 error("newline in %s constant", nm);
380 ch = quoted(GetChar());
381 if (ch >= 128) ch -= 256;
382 if (size < sizeof(arith))
383 val |= ch << (8 * size);
387 if (size > sizeof(arith))
388 error("%s constant too long", nm);
390 strict("%s constant includes more than one character", nm);
395 string_token(nm, stop_char)
399 register int str_size;
400 register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
401 register int pos = 0;
404 while (ch != stop_char) {
406 error("newline in %s", nm);
411 error("end-of-file inside %s", nm);
414 if (ch == '\\' && !AccFileSpecifier)
415 ch = quoted(GetChar());
418 str = Realloc(str, (unsigned)(str_size <<= 1));
421 str[pos++] = '\0'; /* for filenames etc. */
422 str = Realloc(str, (unsigned)pos);
430 /* quoted() replaces an escaped character sequence by the
433 /* first char after backslash already in ch */
434 if (!is_oct(ch)) { /* a quoted char */
451 case 'a': /* alert */
454 case 'v': /* vertical tab */
457 case 'x': /* quoted hex */
459 register int hex = 0;
464 if (vch = val_in_base(ch, 16), vch == -1)
466 hex = hex * 16 + vch;
472 } else { /* a quoted octal */
473 register int oct = 0, cnt = 0;
476 oct = oct*8 + (ch-'0');
478 } while (is_oct(ch) && ++cnt < 3);
487 val_in_base(ch, base)
492 return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
494 return is_dig(ch) ? ch - '0' : -1;
496 return is_dig(ch) ? ch - '0'
497 : is_hex(ch) ? (ch - 'a' + 10) & 017
500 fatal("(val_in_base) illegal base value %d", base);
509 /* The routines GetChar and trigraph parses the trigraph
510 sequences and removes occurences of \\\n.
517 /* possible trigraph sequence */
521 /* \\\n are removed from the input stream */
531 return(LexSave = ch);
543 switch (ch) { /* its a trigraph */