2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 /* $Id: LLlex.c,v 1.9 1994/06/24 10:17:46 ceriel Exp $ */
6 /* L E X I C A L A N A L Y Z E R */
20 /* Data about the token yielded */
23 int ReplaceMacros = 1; /* replacing macros */
24 int AccFileSpecifier = 0; /* return filespecifier <...> */
25 int AccDefined = 0; /* accept "defined(...)" */
26 int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
42 return (DOT != EOF) ? GetToken(&dot) : EOF;
49 register struct token *ptok;
54 again: /* rescan the input after an error or replacement */
56 if ((c & 0200) && c != EOI)
57 fatal("non-ascii '\\%03o' read", c & 0377);
58 switch (class(c)) { /* detect character class */
61 return ptok->tk_symb = EOF;
64 case STGARB: /* garbage character */
66 /* a '\\' is allowed in #if/#elif expression */
68 if (class(c) == STNL) { /* vt , ff ? */
75 if (040 < c && c < 0177)
76 error("garbage char %c", c);
78 error("garbage char \\%03o", c);
80 case STSIMP: /* a simple character, no part of compound token*/
81 if (c == '/') { /* probably the start of comment */
83 if (c == '*') { /* start of comment */
89 c = '/'; /* restore c */
92 return ptok->tk_symb = c;
93 case STCOMP: /* maybe the start of a compound token */
94 LoadChar(nch); /* character lookahead */
98 return ptok->tk_symb = NOTEQUAL;
100 return ptok->tk_symb = c;
103 return ptok->tk_symb = AND;
105 return ptok->tk_symb = c;
107 if (AccFileSpecifier) {
108 PushBack(); /* pushback nch */
110 string_token("file specifier", '>');
111 return ptok->tk_symb = FILESPECIFIER;
114 return ptok->tk_symb = LEFT;
116 return ptok->tk_symb = LESSEQ;
118 return ptok->tk_symb = c;
124 return ptok->tk_symb = EQUAL;
127 return ptok->tk_symb = GREATEREQ;
129 return ptok->tk_symb = RIGHT;
131 return ptok->tk_symb = c;
134 return ptok->tk_symb = OR;
136 return ptok->tk_symb = c;
140 extern int idfsize; /* ??? */
141 register char *tg = &buf[0];
142 register char *maxpos = &buf[idfsize];
143 register struct idf *idef;
145 #define tstmac(bx) if (!(bits[c] & bx)) goto nomac
146 #define cpy if (Unstacked) EnableMacros(); *tg++ = c
147 #define load LoadChar(c); if (!in_idf(c)) goto endidf
150 cpy; tstmac(bit0); load;
151 cpy; tstmac(bit1); load;
152 cpy; tstmac(bit2); load;
153 cpy; tstmac(bit3); load;
154 cpy; tstmac(bit4); load;
155 cpy; tstmac(bit5); load;
156 cpy; tstmac(bit6); load;
157 cpy; tstmac(bit7); load;
168 *tg = '\0'; /* mark the end of the identifier */
171 if ((idef && idef->id_macro && replace(idef))) {
178 if (tg < maxpos) *tg++ = c;
182 *tg++ = '\0'; /* mark the end of the identifier */
183 if (UnknownIdIsZero) {
185 return ptok->tk_symb = INTEGER;
187 ptok->tk_str = Malloc(tg - buf);
188 strcpy(ptok->tk_str, buf);
189 return ptok->tk_symb = IDENTIFIER;
191 case STCHAR: /* character constant */
193 register arith val = 0;
194 register int size = 0;
198 error("character constant too short");
202 error("newline in character constant");
213 if (c >= 128) c -= 256;
218 if (size > sizeof(arith))
219 error("character constant too long");
221 return ptok->tk_symb = INTEGER;
225 register char *np = &buf[1];
226 register int base = 10;
228 register arith val = 0;
233 if (c == 'x' || c == 'X') {
240 while (vch = val_in_base(c, base), vch >= 0) {
241 val = val*base + vch;
242 if (np < &buf[NUMSIZE])
246 if (c == 'l' || c == 'L')
250 return ptok->tk_symb = INTEGER;
253 ptok->tk_str = string_token("string", '"');
254 return ptok->tk_symb = STRING;
255 case STEOI: /* end of text on source file */
256 return ptok->tk_symb = EOF;
258 crash("Impossible character class");
271 if (class(c) == STNL)
280 /* Last Character seen was '*' */
287 string_token(nm, stop_char)
291 register unsigned int str_size;
292 register char *str = Malloc(str_size = ISTRSIZE);
293 register int pos = 0;
296 while (c != stop_char) {
298 error("newline in %s", nm);
303 error("end-of-file inside %s", nm);
317 str = Realloc(str, str_size <<= 1);
320 str[pos++] = '\0'; /* for filenames etc. */
321 str = Realloc(str, pos);
329 /* quoted() replaces an escaped character sequence by the
332 /* first char after backslash already in c */
333 if (!is_oct(c)) { /* a quoted char */
352 else { /* a quoted octal */
353 register int oct = 0, cnt = 0;
356 oct = oct*8 + (c-'0');
358 } while (is_oct(c) && ++cnt < 3);
371 is_dig(c) ? c - '0' :
373 is_hex(c) ? (c - 'a' + 10) & 017 :