Pristine Ack-5.5
[Ack-5.5.git] / lang / pc / comp / LLlex.c
1 /* L E X I C A L   A N A L Y S E R   F O R   I S O - P A S C A L */
2
3 #include        "debug.h"
4 #include        "idfsize.h"
5 #include        "numsize.h"
6 #include        "strsize.h"
7
8 #include        <alloc.h>
9 #include        <em_arith.h>
10 #include        <em_label.h>
11
12 #include        "LLlex.h"
13 #include        "Lpars.h"
14 #include        "class.h"
15 #include        "const.h"
16 #include        "f_info.h"
17 #include        "idf.h"
18 #include        "input.h"
19 #include        "main.h"
20 #include        "type.h"
21
22 extern long     str2long();
23 extern char     *Malloc();
24
25 #define TO_LOWER(ch)    (ch |= ( ch>='A' && ch<='Z' ) ? 0x0020 : 0)
26
27 #ifdef DEBUG
28 extern int cntlines;
29 #endif
30
31 int idfsize = IDFSIZE;
32 struct token    dot,
33                 aside;
34
35 struct type     *toktype,
36                 *asidetype;
37
38 static int      eofseen;
39
40 int tokenseen = 0;      /* Some comment-options must precede any program text */
41
42 /* Warning: The options specified inside comments take precedence over
43  * the ones on the command line.
44  */
45 CommentOptions()
46 {
47         register int ch, ci;
48         int     on_on_minus = 0;
49         /* Parse options inside comments */
50
51         do {
52                 LoadChar(ch);
53                 ci = ch;
54                 switch ( ci ) {
55                 case 'c':               /* for strings */
56                 case 'd':               /* for longs */
57                 case 's':               /* check for standard */
58                 case 'u':               /* for underscores */
59                 case 'C':               /* for different cases */
60                 case 'U':               /* for underscores */
61                         if( tokenseen ) {
62                                 lexwarning("the '%c' option must precede any program text", ci);
63                                 break;
64                         }
65
66                         LoadChar(ch);
67                         if( ci == 's' && options[ci] && ch == '-')
68                                 lexwarning("option '%c-' overrides previous one", ci);
69                         if( ch == '-' ) options[ci] = 0;
70                         else if( ch == '+' ) options[ci] = 1;
71                         else PushBack();
72                         break;
73
74                 case 'l':       ci = 'L' ;      /* for indexing */
75                         /* fall through */
76                 case 'L':                       /* FIL & LIN instructions */
77                 case 'R':                       /* range checks */
78                 case 'a':                       /* assertions */
79                         on_on_minus = 1;
80                         /* fall through */
81                 case 't':                       /* tracing */
82                 case 'A':                       /* extra array range-checks */
83                         LoadChar(ch);
84                         if( ch == '-' ) options[ci] = on_on_minus;
85                         else if( ch == '+' ) options[ci] = !on_on_minus;
86                         else PushBack();
87                         on_on_minus = 0;
88                         break;
89
90                 case 'i':
91                 {
92                         register int i=0;
93
94                         LoadChar(ch);
95                         while( ch >= '0' && ch <= '9' ) {
96                                 i = 10 * i + (ch - '0');
97                                 LoadChar(ch);
98                         }
99                         PushBack();
100                         if( tokenseen ) {
101                                 lexwarning("the '%c' option must precede any program text", ci);
102                                 break;
103                         }
104                         if( i <= 0 ) {
105                                 lexwarning("bad '%c' option", ci);
106                                 break;
107                         }
108                         max_intset = i;
109                         break;
110                 }
111
112                 default:
113                         break;
114                 }
115                 LoadChar(ch);
116         } while (ch == ',' );
117
118         PushBack();
119 }
120
121
122 STATIC
123 SkipComment()
124 {
125         /*      Skip ISO-Pascal comments (* ... *) or { ... }.
126                 Note :
127                         comments may not be nested (ISO 6.1.8).
128                         (* and { are interchangeable, so are *) and }.
129         */
130         register int ch;
131
132         LoadChar(ch);
133         if (ch == '$') CommentOptions();
134         for (;;)        {
135                 if( class(ch) == STNL ) {
136                         LineNumber++;
137 #ifdef DEBUG
138                         cntlines++;
139 #endif
140                 }
141                 else if( ch == '*' )    {
142                         LoadChar(ch);
143                         if( ch == ')' ) return;         /* *) */
144                         else continue;
145                 }
146                 else if( ch == '}' ) return;
147                 else if( ch == EOI )    {
148                         lexerror("unterminated comment");
149                         break;
150                 }
151                 LoadChar(ch);
152         }
153 }
154
155 STATIC struct string *
156 GetString( delim )
157 register int delim;
158 {
159         /*      Read a Pascal string, delimited by the character ' or ".
160         */
161         register int ch;
162         register struct string *str = (struct string *)
163                                 Malloc((unsigned) sizeof(struct string));
164         register char *p;
165         register int len = ISTRSIZE;
166         
167         str->s_str = p = Malloc((unsigned int) ISTRSIZE);
168         for( ; ; )      {
169                 LoadChar(ch);
170                 if( ch & 0200 ) {
171                         fatal("non-ascii '\\%03o' read", ch & 0377);
172                         /*NOTREACHED*/
173                 }
174                 if( class(ch) == STNL ) {
175                         lexerror("newline in string");
176                         LineNumber++;
177 #ifdef DEBUG
178                         cntlines++;
179 #endif
180                         break;
181                 }
182                 if( ch == EOI ) {
183                         lexerror("end-of-file in string");
184                         break;
185                 }
186                 if( ch == delim )       {
187                         LoadChar(ch);
188                         if( ch != delim )
189                                 break;
190                 }
191                 *p++ = ch;
192                 if( p - str->s_str == len )     {
193                         extern char *Srealloc();
194
195                         str->s_str = Srealloc(str->s_str,
196                                         (unsigned int) len + RSTRSIZE);
197                         p = str->s_str + len;
198                         len += RSTRSIZE;
199                 }
200         }
201         if( ch == EOI ) eofseen = 1;
202         else PushBack();
203
204         str->s_length = p - str->s_str;
205         *p++ = '\0';
206
207         /* ISO 6.1.7: string length at least 1 */
208         if( str->s_length == 0 )        {
209                 lexerror("character-string: at least one character expected");
210                 str->s_length = 1;
211         }
212
213         return str;
214 }
215
216 static char *s_error = "illegal line directive";
217
218 CheckForLineDirective()
219 {
220         register int    ch;
221         register int    i = 0;
222         char            buf[IDFSIZE + 2];
223         register char   *c = buf;
224
225         LoadChar(ch);
226
227         if( ch != '#' ) {
228                 PushBack();
229                 return;
230         }
231         do {    /*
232                  * Skip to next digit. Do not skip newlines.
233                  */
234                 LoadChar(ch);
235                 if( class(ch) == STNL ) {
236                         LineNumber++;
237                         lexerror(s_error);
238                         return;
239                 }
240                 else if( ch == EOI ) {
241                         eofseen = 1;
242                         break;
243                 }
244         } while( class(ch) != STNUM );
245         while( class(ch) == STNUM )     {
246                 i = i * 10 + (ch - '0');
247                 LoadChar(ch);
248         }
249         if( ch == EOI ) {
250                 eofseen = 1;
251         }
252         while( ch != '"' && ch != EOI && class(ch) != STNL) LoadChar(ch);
253         if( ch == '"' ) {
254                 do {
255                         LoadChar(ch);
256                         *c++ = ch;
257                         if( class(ch) == STNL ) {
258                                 LineNumber++;
259                                 error(s_error);
260                                 return;
261                         }
262                 } while( ch != '"' );
263                 *--c = '\0';
264                 do {
265                         LoadChar(ch);
266                 } while( class(ch) != STNL );
267                 /*
268                  * Remember the filename
269                  */
270                  if( !eofseen && strcmp(FileName, buf) ) {
271                         FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
272                 }
273         }
274         if( eofseen ) {
275                 error(s_error);
276                 return;
277         }
278         LineNumber = i;
279 }
280
281 int
282 LLlex()
283 {
284         /*      LLlex() is the Lexical Analyzer.
285                 The putting aside of tokens is taken into account.
286         */
287         register struct token *tk = &dot;
288         register int ch, nch;
289
290         toktype = error_type;
291
292         if( ASIDE )     {       /* a token is put aside */
293                 *tk = aside;
294                 toktype = asidetype;
295                 ASIDE = 0;
296                 return tk->tk_symb;
297         }
298
299         tk->tk_lineno = LineNumber;
300
301 again1:
302         if( eofseen )   {
303                 eofseen = 0;
304                 ch = EOI;
305         }
306         else    {
307 again:
308                 LoadChar(ch);
309                 if( !options['C'] )             /* -C : cases are different */
310                         TO_LOWER(ch);
311
312                 if( (ch & 0200) && ch != EOI ) {
313                         fatal("non-ascii '\\%03o' read", ch & 0377);
314                         /*NOTREACHED*/
315                 }
316         }
317
318         switch( class(ch) )     {
319
320         case STNL:
321                 LineNumber++;
322                 tk->tk_lineno++;
323 #ifdef DEBUG
324                 cntlines++;
325 #endif
326                 CheckForLineDirective();
327                 goto again1;
328
329         case STSKIP:
330                 goto again;
331
332         case STGARB:
333                 if( !tokenseen && (ch == '"' || ch == '_') ) {
334                         return tk->tk_symb = ch;
335                 }
336                 if( (unsigned) ch < 0177 )
337                         lexerror("garbage char %c", ch);
338                 else
339                         crash("(LLlex) garbage char \\%03o", ch);
340                 goto again;
341
342         case STSIMP:
343                 if( ch == '(' ) {
344                         LoadChar(nch);
345                         if( nch == '*' )        {               /* (* */
346                                 SkipComment();
347                                 tk->tk_lineno = LineNumber;
348                                 goto again1;
349                         }
350                         if( nch == '.' )                        /* (. is [ */
351                                 return tk->tk_symb = '[';
352                         if( nch == EOI ) eofseen = 1;
353                         else PushBack();
354                 }
355                 else if( ch == '{' )    {
356                         SkipComment();
357                         tk->tk_lineno = LineNumber;
358                         goto again1;
359                 }
360                 else if( ch == '@' ) ch = '^';          /* @ is ^ */
361
362                 return tk->tk_symb = ch;
363
364         case STCOMP:
365                 LoadChar(nch);
366                 switch( ch )    {
367
368                 case '.':
369                         if( nch == '.' )                        /* .. */
370                                 return tk->tk_symb = UPTO;
371                         if( nch == ')' )                        /* .) is ] */
372                                 return tk->tk_symb = ']';
373                         break;
374
375                 case ':':
376                         if( nch == '=' )                        /* := */
377                                 return tk->tk_symb = BECOMES;
378                         break;
379
380                 case '<':
381                         if( nch == '=' )                        /* <= */
382                                 return tk->tk_symb = LESSEQUAL;
383                         if( nch == '>' )                        /* <> */
384                                 return tk->tk_symb = NOTEQUAL;
385                         break;
386
387                 case '>':
388                         if( nch == '=' )                        /* >= */
389                                 return tk->tk_symb = GREATEREQUAL;
390                         break;
391
392                 default :
393                         crash("(LLlex, STCOMP)");
394                         /*NOTREACHED*/
395                 }
396                 if( nch == EOI ) eofseen = 1;
397                 else PushBack();
398                 return tk->tk_symb = ch;
399
400         case STIDF:     {
401                 char buf[IDFSIZE + 1];
402                 register char *tag = &buf[0];
403                 register struct idf *id;
404                 extern struct idf *str2idf();
405
406                 do      {
407                         if( !options['C'] )     /* -C : cases are different */
408                                 TO_LOWER(ch);
409                         if( tag - buf < idfsize )
410                                 *tag++ = ch;
411                         LoadChar(ch);
412                 } while( in_idf(ch) );
413                 *tag = '\0';
414
415                 if( ch == EOI ) eofseen = 1;
416                 else PushBack();
417
418                 if( buf[0] == '_' ) lexerror("underscore starts identifier");
419                 tk->TOK_IDF = id = str2idf(buf, 1);
420                 return tk->tk_symb = id->id_reserved ? id->id_reserved : IDENT;
421         }
422
423         case STSTR:     {
424                 register struct string *str = GetString(ch);
425
426                 if( str->s_length == 1 && ch == '\'')   {
427 #ifdef DEBUG
428                         if( options['l'] )      {
429                                 /* to prevent LexScan from crashing */
430                                 tk->tk_data.tk_str = str;
431                                 return tk->tk_symb = STRING;
432                         }
433 #endif
434                         tk->TOK_INT = *(str->s_str) & 0377;
435                         toktype = char_type;
436                         free(str->s_str);
437                         free((char *) str);
438                 }
439                 else    {
440                         if( ch == '\'' )        {
441                                 tk->tk_data.tk_str = str;
442                                 toktype = standard_type(T_STRINGCONST, 1, str->s_length);
443                         }
444                         else    {
445                                 tk->tk_data.tk_str = str;
446                                 toktype = string_type;
447                         }
448                 }
449                 return tk->tk_symb = STRING;
450         }
451
452         case STNUM:     {
453 #define INT_MODE        0
454 #define REAL_MODE       1
455
456                 char buf[NUMSIZE+2];
457                 register char *np = &buf[1];
458                 register int state = INT_MODE;
459                 extern char *Salloc();
460
461                 buf[0] = '-';
462                 do      {
463                         if( np <= &buf[NUMSIZE] )
464                                 *np++ = ch;
465                         LoadChar(ch);
466                 } while( is_dig(ch) );
467
468                 if( ch == '.' ) {
469                         LoadChar(ch);
470                         if( is_dig(ch) )        {
471                                 if( np <= &buf[NUMSIZE] )
472                                         *np++ = '.';
473                                 do      {
474                                         /* fractional part */
475                                         if( np <= &buf[NUMSIZE] )
476                                                 *np++ = ch;
477                                         LoadChar(ch);
478                                 } while( is_dig(ch) );
479                                 state = REAL_MODE;
480                         }
481                         else    {
482                                 PushBack();
483                                 PushBack();
484                                 goto end;
485                         }
486                                 
487                 }
488                 if( ch == 'e' || ch == 'E' )    {
489                         char *tp = np;          /* save position in string */
490
491                         /* scale factor */
492                         if( np <= &buf[NUMSIZE] )
493                                 *np++ = ch;
494                         LoadChar(ch);
495                         if( ch == '+' || ch == '-' )    {
496                                 /* signed scale factor */
497                                 if( np <= &buf[NUMSIZE] )
498                                         *np++ = ch;
499                                 LoadChar(ch);
500                         }
501                         if( is_dig(ch) )        {
502                                 do      {
503                                         if( np <= &buf[NUMSIZE] )
504                                                 *np++ = ch;
505                                         LoadChar(ch);
506                                 } while( is_dig(ch) );
507                                 state = REAL_MODE;
508                         }
509                         else    {
510                                 PushBack();
511                                 PushBack();
512                                 if( np - tp == 2 )      /* sign */
513                                         PushBack();
514                                 np = tp;                /* restore position */
515                                 goto end;
516                         }
517                 }
518                 /* syntax of number is correct */
519                 if( ch == EOI ) eofseen = 1;
520                 else PushBack();
521         end:
522                 *np++ = '\0';
523
524                 if( state == INT_MODE ) {
525                         if( np > &buf[NUMSIZE+1] )      {
526                                 tk->TOK_INT = 1;
527                                 lexerror("constant too long");
528                         }
529                         else    {
530                                 np = &buf[1];
531                                 while (*np == '0')      /* skip leading zeros */
532                                         np++;
533                                 tk->TOK_INT = str2long(np, 10);
534                                 if( tk->TOK_INT < 0 ||
535                                     strlen(np) > strlen(maxint_str) ||
536                                         strlen(np) == strlen(maxint_str) &&
537                                         strcmp(np, maxint_str) > 0 )
538                                              lexwarning("overflow in constant");
539                         }
540                         toktype = int_type;
541                         return tk->tk_symb = INTEGER;
542                 }
543                 /* REAL_MODE */
544                 tk->tk_data.tk_real = (struct real *)
545                                                 Malloc(sizeof(struct real));
546                 /* allocate struct for inverse */
547                 tk->TOK_RIV = (struct real *) Malloc(sizeof(struct real));
548                 tk->TOK_RIV->r_inverse = tk->tk_data.tk_real;
549                 tk->TOK_RLA = 0;
550                 tk->TOK_RIV->r_lab = 0;
551
552                 if( np > &buf[NUMSIZE+1] )      {
553                         tk->TOK_REL = Salloc("0.0", 4);
554                         tk->TOK_RIV->r_real = tk->TOK_REL;
555                         lexerror("floating constant too long");
556                 }
557                 else {
558                         tk->TOK_RIV->r_real = Salloc(buf,(unsigned) (np - buf));
559                         tk->TOK_REL = tk->TOK_RIV->r_real + 1;
560                 }
561
562                 toktype = real_type;
563                 return tk->tk_symb = REAL;
564
565                 /*NOTREACHED*/
566         }
567
568         case STEOI:
569                 return tk->tk_symb = -1;
570
571         case STCHAR:
572         default:
573                 crash("(LLlex) Impossible character class");
574                 /*NOTREACHED*/
575         }
576         /*NOTREACHED*/
577 }