2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 /* $Id: LLlex.c,v 3.27 1994/06/24 12:01:36 ceriel Exp $ */
6 /* L E X I C A L A N A L Y Z E R */
26 /* Data about the token yielded */
27 struct token dot, ahead, aside;
28 int token_nmb = 0; /* number of the ahead token */
29 int tk_nmb_at_last_syn_err = -5/*ERR_SHADOW*/;
30 /* token number at last syntax error */
33 int ReplaceMacros = 1; /* replacing macros */
34 int AccDefined = 0; /* accept "defined(...)" */
35 int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
36 int Unstacked = 0; /* an unstack is done */
38 int SkipEscNewline = 0; /* how to interpret backslash-newline */
39 int AccFileSpecifier = 0; /* return filespecifier <...> */
40 int EoiForNewline = 0; /* return EOI upon encountering newline */
41 int File_Inserted = 0; /* a file has just been inserted */
43 extern int lint_skip_comment;
46 #define MAX_LL_DEPTH 2
48 static struct token LexStack[MAX_LL_DEPTH];
51 /* In PushLex() the actions are taken in order to initialise or
52 re-initialise the lexical scanner.
53 E.g. at the invocation of a sub-parser that uses LLlex(), the
54 state of the current parser should be saved.
59 ASSERT(ASIDE == 0); /* ASIDE = 0; */
61 LexStack[LexSP++] = dot;
67 dot = LexStack[--LexSP];
73 /* LLlex() plays the role of Lexical Analyzer for the C parser.
74 The look-ahead and putting aside of tokens are taken into
77 if (ASIDE) { /* a token is put aside */
81 else { /* read ahead and return the old one */
86 /* the following test is performed due to the dual
87 task of LLlex(): it is also called for parsing the
88 restricted constant expression following a #if or
89 #elif. The newline character causes EOF to be
90 returned in this case to stop the LLgen parsing task.
100 char *string_token();
104 register struct token *ptok;
106 /* GetToken() is the actual token recognizer. It calls the
107 control line interpreter if it encounters a "\n#"
108 combination. Macro replacement is also performed if it is
111 char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
112 register int ch, nch;
121 again: /* rescan the input after an error or replacement */
123 if (Unstacked) EnableMacros();
126 go_on: /* rescan, the following character has been read */
127 if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
128 fatal("non-ascii '\\%03o' read", ch & 0377);
129 /* keep track of the place of the token in the file */
130 ptok->tk_file = FileName;
131 ptok->tk_line = LineNumber;
133 switch (class(ch)) { /* detect character class */
134 case STNL: /* newline, vertical space or formfeed */
136 LineNumber++; /* also at vs and ff */
137 ptok->tk_file = FileName;
138 ptok->tk_line = LineNumber;
139 if (EoiForNewline) /* called in control line */
140 /* a newline in a control line indicates the
141 end-of-information of the line.
143 return ptok->tk_symb = EOI;
144 while (LoadChar(ch), ch == '#') { /* a control line follows */
151 /* We have to loop here, because in
152 `domacro' the nl, vt or ff is read. The
153 character following it may again be a `#'.
156 case STSKIP: /* just skip the skip characters */
158 case STGARB: /* garbage character */
160 if (SkipEscNewline && (ch == '\\')) {
161 /* a '\\' is allowed in #if/#elif expression */
163 if (class(ch) == STNL) { /* vt , ff ? */
171 if (040 < ch && ch < 0177)
172 lexerror("garbage char %c", ch);
174 lexerror("garbage char \\%03o", ch);
176 case STSIMP: /* a simple character, no part of compound token*/
177 if (ch == '/') { /* probably the start of comment */
179 if (ch == '*') { /* start of comment */
185 ch = '/'; /* restore ch */
188 return ptok->tk_symb = ch;
189 case STCOMP: /* maybe the start of a compound token */
190 LoadChar(nch); /* character lookahead */
194 return ptok->tk_symb = NOTEQUAL;
196 return ptok->tk_symb = ch;
199 return ptok->tk_symb = AND;
201 return ptok->tk_symb = ch;
204 return ptok->tk_symb = PLUSPLUS;
206 return ptok->tk_symb = ch;
209 return ptok->tk_symb = MINMIN;
211 return ptok->tk_symb = ARROW;
213 return ptok->tk_symb = ch;
215 if (AccFileSpecifier) {
216 PushBack(); /* pushback nch */
217 ptok->tk_bts = string_token("file specifier",
218 '>', &(ptok->tk_len));
219 return ptok->tk_symb = FILESPECIFIER;
222 return ptok->tk_symb = LEFT;
224 return ptok->tk_symb = LESSEQ;
226 return ptok->tk_symb = ch;
229 return ptok->tk_symb = EQUAL;
231 return ptok->tk_symb = ch;
234 return ptok->tk_symb = GREATEREQ;
236 return ptok->tk_symb = RIGHT;
238 return ptok->tk_symb = ch;
241 return ptok->tk_symb = OR;
243 return ptok->tk_symb = ch;
247 register char *tg = &buf[0];
248 register int pos = -1;
250 register struct idf *idef;
251 extern int idfsize; /* ??? */
254 do { /* read the identifier */
255 if (++pos < idfsize) {
257 if (Unstacked) EnableMacros();
260 hash = ENHASH(hash, ch, pos);
263 } while (in_idf(ch));
264 hash = STOPHASH(hash);
267 *tg++ = '\0'; /* mark the end of the identifier */
268 idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);
269 idef->id_file = ptok->tk_file;
270 idef->id_line = ptok->tk_line;
272 if (idef->id_macro && ReplaceMacros && replace(idef))
273 /* macro replacement should be performed */
275 if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {
276 ptok->tk_ival = (arith)0;
278 return ptok->tk_symb = INTEGER;
282 idef->id_reserved ? idef->id_reserved
283 : idef->id_def && idef->id_def->df_sc == TYPEDEF ?
289 case STCHAR: /* character constant */
291 register arith val = 0;
296 lexerror("character constant too short");
300 lexerror("newline in character constant");
310 if (ch >= 128) ch -= 256;
315 if (size > (int)int_size)
316 lexerror("character constant too long");
319 return ptok->tk_symb = INTEGER;
321 case STSTR: /* string */
322 ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
323 return ptok->tk_symb = STRING;
324 case STNUM: /* a numeric constant */
326 /* It should be noted that 099 means 81(decimal) and
327 099.5 means 99.5 . This severely limits the tricks
328 we can use to scan a numeric value.
330 register char *np = &buf[1];
331 register int base = 10;
333 register arith val = 0;
335 if (ch == '.') { /* an embarrassing ambiguity */
339 if (!is_dig(vch)) /* just a `.' */
340 return ptok->tk_symb = ch;
342 /* in the rest of the compiler, all floats
343 have to start with a digit.
346 return ptok->tk_symb = ch;
352 if (ch == 'x' || ch == 'X') {
359 while (vch = val_in_base(ch, base), vch >= 0) {
360 val = val*base + vch;
361 if (np < &buf[NUMSIZE])
365 if (ch == 'l' || ch == 'L') {
367 ptok->tk_fund = LONG;
368 return ptok->tk_symb = INTEGER;
371 if (base == 16 || !(ch == '.' || ch == 'e' || ch == 'E'))
376 /* The semantic analyser must know if the
377 integral constant is given in octal/hexa-
378 decimal form, in which case its type is
379 UNSIGNED, or in decimal form, in which case
380 its type is signed, indicated by
384 (base == 10 || (base == 8 && val == (arith)0))
385 ? INTEGER : UNSIGNED;
386 return ptok->tk_symb = INTEGER;
388 /* where's the test for the length of the integral ??? */
391 if (np < &buf[NUMSIZE])
396 if (np < &buf[NUMSIZE])
400 if (ch == 'e' || ch == 'E') {
401 if (np < &buf[NUMSIZE])
404 if (ch == '+' || ch == '-') {
405 if (np < &buf[NUMSIZE])
410 lexerror("malformed floating constant");
411 if (np < &buf[NUMSIZE])
415 if (np < &buf[NUMSIZE])
422 buf[0] = '-'; /* good heavens... */
423 if (np == &buf[NUMSIZE+1]) {
424 lexerror("floating constant too long");
425 ptok->tk_fval = Salloc("-0.0",(unsigned) 5) + 1;
428 ptok->tk_fval = Salloc(buf,(unsigned) (np - buf)) + 1;
429 return ptok->tk_symb = FLOATING;
432 case STEOI: /* end of text on source file */
433 return ptok->tk_symb = EOI;
434 default: /* this cannot happen */
435 crash("bad class for char 0%o", ch);
442 /* The last character read has been the '*' of '/_*'. The
443 characters, except NL and EOI, between '/_*' and the first
444 occurring '*_/' are not interpreted.
445 NL only affects the LineNumber. EOI is not legal.
447 Important note: it is not possible to stop skipping comment
448 beyond the end-of-file of an included file.
449 EOI is returned by LoadChar only on encountering EOF of the
457 if (! lint_skip_comment) {
458 lint_start_comment();
459 lint_comment_char(c);
464 if (class(c) == STNL)
470 if (! lint_skip_comment) lint_end_comment();
476 if (! lint_skip_comment) lint_comment_char(c);
478 } /* last Character seen was '*' */
481 if (! lint_skip_comment) lint_comment_char(c);
485 if (! lint_skip_comment) lint_end_comment();
491 string_token(nm, stop_char, plen)
496 register int str_size;
497 register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
498 register int pos = 0;
501 while (ch != stop_char) {
503 lexerror("newline in %s", nm);
508 lexerror("end-of-file inside %s", nm);
522 str = Srealloc(str, (unsigned) (str_size += RSTRSIZE));
525 str[pos++] = '\0'; /* for filenames etc. */
534 /* quoted() replaces an escaped character sequence by the
537 /* first char after backslash already in ch */
538 if (!is_oct(ch)) { /* a quoted char */
557 else { /* a quoted octal */
558 register int oct = 0, cnt = 0;
561 oct = oct*8 + (ch-'0');
563 } while (is_oct(ch) && ++cnt < 3);
572 val_in_base(ch, base)
576 is_dig(ch) ? ch - '0'
578 : is_hex(ch) ? (ch - 'a' + 10) & 017