2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 * Author: Ceriel J.H. Jacobs
8 /* L E X I C A L A N A L Y S E R F O R M O D U L A - 2 */
10 /* $Id: LLlex.c,v 1.70 1996/11/19 09:12:35 ceriel Exp $ */
33 extern char *getwdir();
38 int idfsize = IDFSIZE;
45 int tk_nmb_at_last_syn_err = -ERR_SHADOW;
47 extern char options[];
48 extern int flt_status;
53 /* Skip Modula-2 comments (* ... *).
54 Note that comments may be nested (par. 3.5).
57 register int CommentLevel = 0;
64 /* Foreign; This definition module has an
65 implementation in another language.
66 In this case, don't generate prefixes in front
67 of the names. Also, don't generate call to
68 initialization routine.
70 ForeignFlag = D_FOREIGN;
75 case 'A': /* Extra array bound checks, on or off */
76 case 'R': /* Range checks, on or off */
78 int on_on_minus = ch == 'R';
81 options[ch] = on_on_minus;
85 options[ch] = !on_on_minus;
96 if (!(ch & 0200) && class(ch) == STNL) {
102 else if (ch == '(') {
104 if (ch == '*') CommentLevel++;
107 else if (ch == '*') {
111 if (CommentLevel < 0) break;
115 else if (ch == EOI) {
116 lexerror("unterminated comment");
124 STATIC struct string *
127 /* Read a Modula-2 string, delimited by the character "upto".
130 register struct string *str = (struct string *)
131 Malloc((unsigned) sizeof(struct string));
136 str->s_str = p = Malloc((unsigned int) ISTRSIZE);
137 while (LoadChar(ch), ch != upto) {
138 if (!(ch & 0200) && class(ch) == STNL) {
139 lexerror("newline in string");
147 lexerror("end-of-file in string");
151 if (p - str->s_str == len) {
152 str->s_str = Realloc(str->s_str,
153 (unsigned int) len + RSTRSIZE);
154 p = str->s_str + len;
158 str->s_length = p - str->s_str;
159 len = (str->s_length+(int)word_size) & ~((int)word_size-1);
160 while (p - str->s_str < len) {
163 str->s_str = Realloc(str->s_str, (unsigned) len);
164 if (str->s_length == 0) str->s_length = 1;
165 /* ??? string length at least 1 ??? */
169 static char *s_error = "illegal line directive";
176 while (LoadChar(ch), (ch & 0200) && ch != EOI) {
177 error("non-ascii '\\%03o' read", ch & 0377);
182 CheckForLineDirective()
184 register int ch = getch();
187 register char *c = buf;
196 * Do not skip newlines
199 if (class(ch) == STNL || class(ch) == STEOI) {
204 } while (class(ch) != STNUM);
205 while (class(ch) == STNUM) {
206 i = i*10 + (ch - '0');
209 while (ch != '"' && class(ch) != STNL && class(ch) != STEOI)
215 if (c < &buf[IDFSIZE]) *c++ = ch;
216 if (class(ch) == STNL || class(ch) == STEOI) {
225 } while (class(ch) != STNL && class(ch) != STEOI);
227 * Remember the file name
229 if (class(ch) == STNL && strcmp(FileName,buf)) {
230 FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
231 WorkingDir = getwdir(FileName);
234 if (class(ch) == STEOI) {
248 if (class(ch) == STIDF) {
249 lexerror("token separator required between identifier and number");
258 /* LLlex() is the Lexical Analyzer.
259 The putting aside of tokens is taken into account.
261 register t_token *tk = ˙
262 char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 2];
263 register int ch, nch;
265 toktype = error_type;
267 if (ASIDE) { /* a token is put aside */
276 tk->tk_lineno = LineNumber;
285 CheckForLineDirective();
292 if ((unsigned) ch - 040 < 0137) {
293 lexerror("garbage char %c", ch);
295 else lexerror("garbage char \\%03o", ch);
307 if (ch == '&') return tk->tk_symb = AND;
308 if (ch == '~') return tk->tk_symb = NOT;
309 return tk->tk_symb = ch;
317 return tk->tk_symb = UPTO;
323 return tk->tk_symb = BECOMES;
329 return tk->tk_symb = LESSEQUAL;
332 return tk->tk_symb = '#';
338 return tk->tk_symb = GREATEREQUAL;
343 crash("(LLlex, STCOMP)");
346 return tk->tk_symb = ch;
350 register char *tag = &buf[0];
354 if (tag - buf < idfsize) *tag++ = ch;
356 if (ch == '_' && *(tag-1) == '_') {
357 lexerror("an identifier may not contain two consecutive underscores");
363 if (*(tag - 1) == '_') {
364 lexerror("last character of an identifier may not be an underscore");
367 tk->TOK_IDF = id = str2idf(buf, 1);
368 return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
372 register struct string *str = GetString(ch);
374 if (str->s_length == 1) {
375 tk->TOK_INT = *(str->s_str) & 0377;
381 tk->tk_data.tk_str = str;
382 if (! fit((arith)(str->s_length), (int) word_size)) {
383 lexerror("string too long");
385 toktype = standard_type(T_STRING, 1, (arith)(str->s_length));
387 return tk->tk_symb = STRING;
392 /* The problem arising with the "parsing" of a number
393 is that we don't know the base in advance so we
394 have to read the number with the help of a rather
395 complex finite automaton.
397 enum statetp {Oct,OptHex,Hex,Dec,OctEndOrHex,End,OptReal,Real};
398 register enum statetp state;
399 register int base = 8;
400 register char *np = &buf[0];
403 state = is_oct(ch) ? Oct : Dec;
409 if (np < &buf[NUMSIZE]) *np++ = ch;
412 if (ch == 'B' || ch == 'C') {
420 if (np < &buf[NUMSIZE]) {
425 if (ch == 'D') state = OptHex;
426 else if (is_hex(ch)) state = Hex;
427 else if (ch == '.') state = OptReal;
430 if (ch == 'H') base = 16;
438 if (np < &buf[NUMSIZE]) *np++ = 'D';
450 if (np < &buf[NUMSIZE]) *np++ = ch;
456 lexerror("H expected after hex number");
462 if (np < &buf[NUMSIZE]) *np++ = ch;
482 if (np >= &buf[NUMSIZE]) {
484 lexerror("constant too long");
487 /* The upperbound will be the same as
488 when computed with something like
489 max(unsigned long) / base (when base
490 is even). The problem is that
491 unsigned long or unsigned arith is
492 not accepted by all compilers
494 arith ubound = max_int[sizeof(arith)]
497 while (*np == '0') np++;
507 c = *np++ - 'A' + 10;
509 if (tk->TOK_INT < 0 ||
510 tk->TOK_INT > ubound) {
513 tk->TOK_INT = tk->TOK_INT*base;
514 if (tk->TOK_INT < 0 &&
515 tk->TOK_INT + c >= 0) {
522 if (ch == 'C' && base == 8) {
524 if (ovfl != 0 || tk->TOK_INT>255 ||
526 lexwarning(W_ORDINARY, "character constant out of range");
529 return tk->tk_symb = INTEGER;
539 if (ch == 'D' && (options['l'] || base == 10)) {
541 /* Local extension: LONGCARD exists,
542 so internally also longintorcard_type
545 toktype = longcard_type;
546 if (ovfl == 0 && tk->TOK_INT >= 0 &&
547 tk->TOK_INT<=max_int[(int)long_size]) {
548 toktype = longintorcard_type;
550 else if (! chk_bounds(tk->TOK_INT,
551 full_mask[(int)long_size],
558 tk->TOK_INT > max_int[(int)long_size] ||
562 toktype = longint_type;
565 else if (ovfl == 0 && tk->TOK_INT >= 0 &&
566 tk->TOK_INT<=max_int[(int)int_size]) {
567 toktype = intorcard_type;
569 else if (! chk_bounds(tk->TOK_INT,
570 full_mask[(int)int_size],
575 lexwarning(W_ORDINARY, "overflow in constant");
577 return tk->tk_symb = INTEGER;
581 /* The '.' could be the first of the '..'
582 token. At this point, we need a
583 look-ahead of two characters.
587 /* Indeed the '..' token
598 if (state == Real) break;
601 /* a real real constant */
602 if (np < &buf[NUMSIZE]) *np++ = '.';
609 if (np < &buf[NUMSIZE]) *np++ = ch;
614 toktype = longreal_type;
616 if (ch == '+' || ch == '-' || is_dig(ch)) {
624 if (np < &buf[NUMSIZE]) *np++ = ch;
626 if (ch == '+' || ch == '-') {
627 /* Signed scalefactor
629 if (np < &buf[NUMSIZE]) *np++ = ch;
634 if (np < &buf[NUMSIZE]) *np++ = ch;
636 } while (is_dig(ch));
639 lexerror("bad scale factor");
646 tk->tk_data.tk_real = new_real();
647 if (np >= &buf[NUMSIZE]) {
648 tk->TOK_RSTR = Salloc("0.0", 4);
649 lexerror("real constant too long");
651 else tk->TOK_RSTR = Salloc(buf, (unsigned) (np - buf));
653 flt_str2flt(tk->TOK_RSTR, &(tk->TOK_RVAL));
654 if (flt_status == FLT_OVFL) {
655 lexwarning(W_ORDINARY, "overflow in floating point constant");
657 return tk->tk_symb = REAL;
663 return tk->tk_symb = -1;
667 crash("(LLlex) Impossible character class");