Pristine Ack-5.5
[Ack-5.5.git] / lang / m2 / comp / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  *
5  * Author: Ceriel J.H. Jacobs
6  */
7
8 /* L E X I C A L   A N A L Y S E R   F O R   M O D U L A - 2 */
9
10 /* $Id: LLlex.c,v 1.70 1996/11/19 09:12:35 ceriel Exp $ */
11
12 #include        "debug.h"
13 #include        "idfsize.h"
14 #include        "numsize.h"
15 #include        "strsize.h"
16
17 #include        <alloc.h>
18 #include        <em_arith.h>
19 #include        <em_label.h>
20 #include        <assert.h>
21
22 #include        "LLlex.h"
23 #include        "input.h"
24 #include        "f_info.h"
25 #include        "Lpars.h"
26 #include        "class.h"
27 #include        "idf.h"
28 #include        "def.h"
29 #include        "type.h"
30 #include        "warning.h"
31 #include        "errout.h"
32
33 extern char *getwdir();
34
35 t_token         dot,
36                 aside;
37 t_type          *toktype;
38 int             idfsize = IDFSIZE;
39 int             ForeignFlag;
40 #ifdef DEBUG
41 extern int      cntlines;
42 #endif
43
44 int     token_nmb = 0;
45 int     tk_nmb_at_last_syn_err = -ERR_SHADOW;
46
47 extern char     options[];
48 extern int      flt_status;
49
50 STATIC
51 SkipComment()
52 {
53         /*      Skip Modula-2 comments (* ... *).
54                 Note that comments may be nested (par. 3.5).
55         */
56         register int ch, c;
57         register int CommentLevel = 0;
58
59         LoadChar(ch);
60         if (ch == '$') {
61                 LoadChar(ch);
62                 switch(ch) {
63                 case 'F':
64                         /* Foreign; This definition module has an
65                            implementation in another language.
66                            In this case, don't generate prefixes in front
67                            of the names. Also, don't generate call to
68                            initialization routine.
69                         */
70                         ForeignFlag = D_FOREIGN;
71                         break;
72                 case 'U':
73                         inidf['_'] = 1;
74                         break;
75                 case 'A': /* Extra array bound checks, on or off */
76                 case 'R': /* Range checks, on or off */
77                 {
78                         int on_on_minus = ch == 'R';
79                         LoadChar(c);
80                         if (c == '-') {
81                                 options[ch] = on_on_minus;
82                                 break;
83                         }
84                         if (c == '+') {
85                                 options[ch] = !on_on_minus;
86                                 break;
87                         }
88                         ch = c;
89                 }
90                         /* fall through */
91                 default:
92                         break;
93                 }
94         }
95         for (;;) {
96                 if (!(ch & 0200) && class(ch) == STNL) {
97                         LineNumber++;
98 #ifdef DEBUG
99                         cntlines++;
100 #endif
101                 }
102                 else if (ch == '(') {
103                         LoadChar(ch);
104                         if (ch == '*') CommentLevel++;
105                         else continue;
106                 }
107                 else if (ch == '*') {
108                         LoadChar(ch);
109                         if (ch == ')') {
110                                 CommentLevel--;
111                                 if (CommentLevel < 0) break;
112                         }
113                         else continue;
114                 }
115                 else if (ch == EOI) {
116                         lexerror("unterminated comment");
117                         PushBack();
118                         break;
119                 }
120                 LoadChar(ch);
121         }
122 }
123
124 STATIC struct string *
125 GetString(upto)
126 {
127         /*      Read a Modula-2 string, delimited by the character "upto".
128         */
129         register int ch;
130         register struct string *str = (struct string *)
131                         Malloc((unsigned) sizeof(struct string));
132         register char *p;
133         register int len;
134         
135         len = ISTRSIZE;
136         str->s_str = p = Malloc((unsigned int) ISTRSIZE);
137         while (LoadChar(ch), ch != upto)        {
138                 if (!(ch & 0200) && class(ch) == STNL)  {
139                         lexerror("newline in string");
140                         LineNumber++;
141 #ifdef DEBUG
142                         cntlines++;
143 #endif
144                         break;
145                 }
146                 if (ch == EOI)  {
147                         lexerror("end-of-file in string");
148                         break;
149                 }
150                 *p++ = ch;
151                 if (p - str->s_str == len)      {
152                         str->s_str = Realloc(str->s_str,
153                                 (unsigned int) len + RSTRSIZE);
154                         p = str->s_str + len;
155                         len += RSTRSIZE;
156                 }
157         }
158         str->s_length = p - str->s_str;
159         len = (str->s_length+(int)word_size) & ~((int)word_size-1);
160         while (p - str->s_str < len) {
161                 *p++ = '\0';
162         }
163         str->s_str = Realloc(str->s_str, (unsigned) len);
164         if (str->s_length == 0) str->s_length = 1;
165         /* ??? string length at least 1 ??? */
166         return str;
167 }
168
169 static char *s_error = "illegal line directive";
170
171 STATIC int
172 getch()
173 {
174         register int ch;
175
176         while (LoadChar(ch), (ch & 0200) && ch != EOI) {
177                 error("non-ascii '\\%03o' read", ch & 0377);
178         }
179         return ch;
180 }
181
182 CheckForLineDirective()
183 {
184         register int ch = getch();
185         register int    i = 0;
186         char            buf[IDFSIZE];
187         register char   *c = buf;
188
189
190         if (ch != '#') {
191                 PushBack();
192                 return;
193         }
194         do {    /*
195                  * Skip to next digit
196                  * Do not skip newlines
197                  */
198                 ch = getch();
199                 if (class(ch) == STNL || class(ch) == STEOI) {
200                         LineNumber++;
201                         error(s_error);
202                         return;
203                 }
204         } while (class(ch) != STNUM);
205         while (class(ch) == STNUM)  {
206                 i = i*10 + (ch - '0');
207                 ch = getch();
208         }
209         while (ch != '"' && class(ch) != STNL && class(ch) != STEOI)
210                 ch = getch();
211         if (ch == '"') {
212                 c = buf;
213                 do {
214                         ch = getch();
215                         if (c < &buf[IDFSIZE]) *c++ = ch;
216                         if (class(ch) == STNL || class(ch) == STEOI) {
217                                 LineNumber++;
218                                 error(s_error);
219                                 return;
220                         }
221                 } while (ch != '"');
222                 *--c = '\0';
223                 do {
224                         ch = getch();
225                 } while (class(ch) != STNL && class(ch) != STEOI);
226                 /*
227                  * Remember the file name
228                  */
229                 if (class(ch) == STNL && strcmp(FileName,buf)) {
230                         FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
231                         WorkingDir = getwdir(FileName);
232                 }
233         }
234         if (class(ch) == STEOI) {
235                 error(s_error);
236                 return;
237         }
238         LineNumber = i;
239 }
240
241 STATIC
242 CheckForLet()
243 {
244         register int ch;
245
246         LoadChar(ch);
247         if (ch != EOI) {
248                 if (class(ch) == STIDF) {
249                         lexerror("token separator required between identifier and number");
250                 }
251                 PushBack();
252         }
253 }
254
255 int
256 LLlex()
257 {
258         /*      LLlex() is the Lexical Analyzer.
259                 The putting aside of tokens is taken into account.
260         */
261         register t_token *tk = &dot;
262         char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 2];
263         register int ch, nch;
264
265         toktype = error_type;
266
267         if (ASIDE)      {       /* a token is put aside         */
268                 *tk = aside;
269                 ASIDE = 0;
270                 return tk->tk_symb;
271         }
272
273         token_nmb++;
274 again:
275         ch = getch();
276         tk->tk_lineno = LineNumber;
277
278         switch (class(ch))      {
279
280         case STNL:
281                 LineNumber++;
282 #ifdef DEBUG
283                 cntlines++;
284 #endif
285                 CheckForLineDirective();
286                 goto again;
287
288         case STSKIP:
289                 goto again;
290
291         case STGARB:
292                 if ((unsigned) ch - 040 < 0137) {
293                         lexerror("garbage char %c", ch);
294                 }
295                 else    lexerror("garbage char \\%03o", ch);
296                 goto again;
297
298         case STSIMP:
299                 if (ch == '(')  {
300                         LoadChar(nch);
301                         if (nch == '*') {
302                                 SkipComment();
303                                 goto again;
304                         }
305                         PushBack();
306                 }
307                 if (ch == '&') return tk->tk_symb = AND;
308                 if (ch == '~') return tk->tk_symb = NOT;
309                 return tk->tk_symb = ch;
310
311         case STCOMP:
312                 LoadChar(nch);
313                 switch (ch)     {
314
315                 case '.':
316                         if (nch == '.') {
317                                 return tk->tk_symb = UPTO;
318                         }
319                         break;
320
321                 case ':':
322                         if (nch == '=') {
323                                 return tk->tk_symb = BECOMES;
324                         }
325                         break;
326
327                 case '<':
328                         if (nch == '=') {
329                                 return tk->tk_symb = LESSEQUAL;
330                         }
331                         if (nch == '>') {
332                                 return tk->tk_symb = '#';
333                         }
334                         break;
335
336                 case '>':
337                         if (nch == '=') {
338                                 return tk->tk_symb = GREATEREQUAL;
339                         }
340                         break;
341
342                 default :
343                         crash("(LLlex, STCOMP)");
344                 }
345                 PushBack();
346                 return tk->tk_symb = ch;
347
348         case STIDF:
349         {
350                 register char *tag = &buf[0];
351                 register t_idf *id;
352
353                 do      {
354                         if (tag - buf < idfsize) *tag++ = ch;
355                         LoadChar(ch);
356                         if (ch == '_' && *(tag-1) == '_') {
357                                 lexerror("an identifier may not contain two consecutive underscores");
358                         }
359                 } while(in_idf(ch));
360
361                 PushBack();
362                 *tag = '\0';
363                 if (*(tag - 1) == '_') {
364                         lexerror("last character of an identifier may not be an underscore");
365                 }
366
367                 tk->TOK_IDF = id = str2idf(buf, 1);
368                 return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
369         }
370
371         case STSTR: {
372                 register struct string *str = GetString(ch);
373
374                 if (str->s_length == 1) {
375                         tk->TOK_INT = *(str->s_str) & 0377;
376                         toktype = char_type;
377                         free(str->s_str);
378                         free((char *) str);
379                 }
380                 else {
381                         tk->tk_data.tk_str = str;
382                         if (! fit((arith)(str->s_length), (int) word_size)) {
383                                 lexerror("string too long");
384                         }
385                         toktype = standard_type(T_STRING, 1, (arith)(str->s_length));
386                 }
387                 return tk->tk_symb = STRING;
388                 }
389
390         case STNUM:
391         {
392                 /*      The problem arising with the "parsing" of a number
393                         is that we don't know the base in advance so we
394                         have to read the number with the help of a rather
395                         complex finite automaton.
396                 */
397                 enum statetp {Oct,OptHex,Hex,Dec,OctEndOrHex,End,OptReal,Real};
398                 register enum statetp state;
399                 register int base = 8;
400                 register char *np = &buf[0];
401
402                 *np++ = ch;
403                 state = is_oct(ch) ? Oct : Dec;
404                 LoadChar(ch);
405                 for (;;) {
406                         switch(state) {
407                         case Oct:
408                                 while (is_oct(ch))      {
409                                         if (np < &buf[NUMSIZE]) *np++ = ch;
410                                         LoadChar(ch);
411                                 }
412                                 if (ch == 'B' || ch == 'C') {
413                                         state = OctEndOrHex;
414                                         break;
415                                 }
416                                 /* Fall Through */
417                         case Dec:
418                                 base = 10;
419                                 while (is_dig(ch))      {
420                                         if (np < &buf[NUMSIZE]) {
421                                                 *np++ = ch;
422                                         }
423                                         LoadChar(ch);
424                                 }
425                                 if (ch == 'D') state = OptHex;
426                                 else if (is_hex(ch)) state = Hex;
427                                 else if (ch == '.') state = OptReal;
428                                 else {
429                                         state = End;
430                                         if (ch == 'H') base = 16;
431                                         else PushBack();
432                                 }
433                                 break;
434
435                         case OptHex:
436                                 LoadChar(ch);
437                                 if (is_hex(ch)) {
438                                         if (np < &buf[NUMSIZE]) *np++ = 'D';
439                                         state = Hex;
440                                 }
441                                 else {
442                                         state = End;
443                                         ch = 'D';
444                                         PushBack();
445                                 }
446                                 break;
447
448                         case Hex:
449                                 while (is_hex(ch))      {
450                                         if (np < &buf[NUMSIZE]) *np++ = ch;
451                                         LoadChar(ch);
452                                 }
453                                 base = 16;
454                                 state = End;
455                                 if (ch != 'H') {
456                                         lexerror("H expected after hex number");
457                                         PushBack();
458                                 }
459                                 break;
460
461                         case OctEndOrHex:
462                                 if (np < &buf[NUMSIZE]) *np++ = ch;
463                                 LoadChar(ch);
464                                 if (ch == 'H') {
465                                         base = 16;
466                                         state = End;
467                                         break;
468                                 }
469                                 if (is_hex(ch)) {
470                                         state = Hex;
471                                         break;
472                                 }
473                                 PushBack();
474                                 ch = *--np;
475                                 *np++ = '\0';
476                                 /* Fall through */
477                                 
478                         case End: {
479                                 int ovfl = 0;
480
481                                 *np = '\0';
482                                 if (np >= &buf[NUMSIZE]) {
483                                         tk->TOK_INT = 1;
484                                         lexerror("constant too long");
485                                 }
486                                 else {
487                                         /* The upperbound will be the same as
488                                            when computed with something like
489                                            max(unsigned long) / base (when base
490                                            is even). The problem is that
491                                            unsigned long or unsigned arith is
492                                            not accepted by all compilers
493                                         */
494                                         arith ubound = max_int[sizeof(arith)]
495                                                         / (base >> 1);
496                                         np = &buf[0];
497                                         while (*np == '0') np++;
498                                         tk->TOK_INT = 0;
499                                         while (*np) {
500                                                 int c;
501
502                                                 if (is_dig(*np)) {
503                                                         c = *np++ - '0';
504                                                 }
505                                                 else {
506                                                         assert(is_hex(*np));
507                                                         c = *np++ - 'A' + 10;
508                                                 }
509                                                 if (tk->TOK_INT < 0 ||
510                                                     tk->TOK_INT > ubound) {
511                                                         ovfl++;
512                                                 }
513                                                 tk->TOK_INT = tk->TOK_INT*base;
514                                                 if (tk->TOK_INT < 0 &&
515                                                     tk->TOK_INT + c >= 0) {
516                                                         ovfl++;
517                                                 }
518                                                 tk->TOK_INT += c;
519                                         }
520                                 }
521                                 toktype = card_type;
522                                 if (ch == 'C' && base == 8) {
523                                         toktype = char_type;
524                                         if (ovfl != 0 || tk->TOK_INT>255 ||
525                                             tk->TOK_INT < 0) {
526 lexwarning(W_ORDINARY, "character constant out of range");
527                                         }
528                                         CheckForLet();
529                                         return tk->tk_symb = INTEGER;
530                                 }
531                                 if (options['l']) {
532                                         if (base != 10) {
533                                                 LoadChar(ch);
534                                                 if (ch != 'D') {
535                                                         PushBack();
536                                                 }
537                                         }
538                                 }
539                                 if (ch == 'D' && (options['l'] || base == 10)) {
540                                     if (options['l']) {
541                                         /* Local extension: LONGCARD exists,
542                                            so internally also longintorcard_type
543                                            exists.
544                                         */
545                                         toktype = longcard_type;
546                                         if (ovfl == 0 && tk->TOK_INT >= 0 &&
547                                             tk->TOK_INT<=max_int[(int)long_size]) {
548                                             toktype = longintorcard_type;
549                                         }
550                                         else if (! chk_bounds(tk->TOK_INT,
551                                                       full_mask[(int)long_size],
552                                                       T_CARDINAL)) {
553                                             ovfl = 1;
554                                         }
555                                     }
556                                     else {
557                                         if (ovfl != 0 ||
558                                             tk->TOK_INT > max_int[(int)long_size] ||
559                                             tk->TOK_INT < 0) {
560                                                 ovfl = 1;
561                                         }
562                                         toktype = longint_type;
563                                     }
564                                 }
565                                 else if (ovfl == 0 && tk->TOK_INT >= 0 &&
566                                          tk->TOK_INT<=max_int[(int)int_size]) {
567                                         toktype = intorcard_type;
568                                 }
569                                 else if (! chk_bounds(tk->TOK_INT,
570                                                       full_mask[(int)int_size],
571                                                       T_CARDINAL)) {
572                                         ovfl = 1;
573                                 }
574                                 if (ovfl)
575 lexwarning(W_ORDINARY, "overflow in constant");
576                                 CheckForLet();
577                                 return tk->tk_symb = INTEGER;
578                                 }
579
580                         case OptReal:
581                                 /*      The '.' could be the first of the '..'
582                                         token. At this point, we need a
583                                         look-ahead of two characters.
584                                 */
585                                 LoadChar(ch);
586                                 if (ch == '.') {
587                                         /*      Indeed the '..' token
588                                         */
589                                         PushBack();
590                                         PushBack();
591                                         state = End;
592                                         base = 10;
593                                         break;
594                                 }
595                                 state = Real;
596                                 break;
597                         }
598                         if (state == Real) break;
599                 }
600
601                 /* a real real constant */
602                 if (np < &buf[NUMSIZE]) *np++ = '.';
603
604                 toktype = real_type;
605
606                 while (is_dig(ch)) {
607                         /*      Fractional part
608                         */
609                         if (np < &buf[NUMSIZE]) *np++ = ch;
610                         LoadChar(ch);
611                 }
612
613                 if (ch == 'D') {
614                         toktype = longreal_type;
615                         LoadChar(ch);
616                         if (ch == '+' || ch == '-' || is_dig(ch)) {
617                                 ch = 'E';
618                                 PushBack();
619                         }
620                 }
621                 if (ch == 'E') {
622                         /*      Scale factor
623                         */
624                         if (np < &buf[NUMSIZE]) *np++ = ch;
625                         LoadChar(ch);
626                         if (ch == '+' || ch == '-') {
627                                 /*      Signed scalefactor
628                                 */
629                                 if (np < &buf[NUMSIZE]) *np++ = ch;
630                                 LoadChar(ch);
631                         }
632                         if (is_dig(ch)) {
633                                 do {
634                                         if (np < &buf[NUMSIZE]) *np++ = ch;
635                                         LoadChar(ch);
636                                 } while (is_dig(ch));
637                         }
638                         else {
639                                 lexerror("bad scale factor");
640                         }
641                 }
642
643                 *np++ = '\0';
644                 PushBack();
645
646                 tk->tk_data.tk_real = new_real();
647                 if (np >= &buf[NUMSIZE]) {
648                         tk->TOK_RSTR = Salloc("0.0", 4);
649                         lexerror("real constant too long");
650                 }
651                 else    tk->TOK_RSTR = Salloc(buf, (unsigned) (np - buf));
652                 CheckForLet();
653                 flt_str2flt(tk->TOK_RSTR, &(tk->TOK_RVAL));
654                 if (flt_status == FLT_OVFL) {
655 lexwarning(W_ORDINARY, "overflow in floating point constant");
656                 }
657                 return tk->tk_symb = REAL;
658
659                 /*NOTREACHED*/
660         }
661
662         case STEOI:
663                 return tk->tk_symb = -1;
664
665         case STCHAR:
666         default:
667                 crash("(LLlex) Impossible character class");
668                 /*NOTREACHED*/
669         }
670         /*NOTREACHED*/
671 }