Pristine Ack-5.5
[Ack-5.5.git] / lang / cem / cemcom / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  */
5 /* $Id: LLlex.c,v 3.27 1994/06/24 12:01:36 ceriel Exp $ */
6 /*                  L E X I C A L   A N A L Y Z E R                     */
7
8 #include        "lint.h"
9 #include        <alloc.h>
10 #include        "nofloat.h"
11 #include        "idfsize.h"
12 #include        "numsize.h"
13 #include        "debug.h"
14 #include        "strsize.h"
15 #include        "nopp.h"
16 #include        "input.h"
17 #include        "arith.h"
18 #include        "def.h"
19 #include        "idf.h"
20 #include        "LLlex.h"
21 #include        "Lpars.h"
22 #include        "class.h"
23 #include        "assert.h"
24 #include        "sizes.h"
25
26 /* Data about the token yielded */
27 struct token dot, ahead, aside;
28 int token_nmb = 0;              /* number of the ahead token */
29 int tk_nmb_at_last_syn_err = -5/*ERR_SHADOW*/;
30                                 /* token number at last syntax error */
31
32 #ifndef NOPP
33 int ReplaceMacros = 1;          /* replacing macros                     */
34 int AccDefined = 0;             /* accept "defined(...)"                */
35 int UnknownIdIsZero = 0;        /* interpret unknown id as integer 0    */
36 int Unstacked = 0;              /* an unstack is done                   */
37 #endif
38 int SkipEscNewline = 0;         /* how to interpret backslash-newline   */
39 int AccFileSpecifier = 0;       /* return filespecifier <...>           */
40 int EoiForNewline = 0;          /* return EOI upon encountering newline */
41 int File_Inserted = 0;          /* a file has just been inserted        */
42 #ifdef LINT
43 extern int lint_skip_comment;
44 #endif
45
46 #define MAX_LL_DEPTH    2
47
48 static struct token LexStack[MAX_LL_DEPTH];
49 static LexSP = 0;
50
51 /*      In PushLex() the actions are taken in order to initialise or
52         re-initialise the lexical scanner.
53         E.g. at the invocation of a sub-parser that uses LLlex(), the
54         state of the current parser should be saved.
55 */
56 PushLex()
57 {
58         ASSERT(LexSP < 2);
59         ASSERT(ASIDE == 0);     /* ASIDE = 0;   */
60         GetToken(&ahead);
61         LexStack[LexSP++] = dot;
62 }
63
64 PopLex()
65 {
66         ASSERT(LexSP > 0);
67         dot = LexStack[--LexSP];
68 }
69
70 int
71 LLlex()
72 {
73         /*      LLlex() plays the role of Lexical Analyzer for the C parser.
74                 The look-ahead and putting aside of tokens are taken into
75                 account.
76         */
77         if (ASIDE) {    /* a token is put aside         */
78                 dot = aside;
79                 ASIDE = 0;
80         }
81         else {          /* read ahead and return the old one    */
82 #ifdef  LINT
83                 lint_comment_ahead();
84 #endif  /* LINT */
85                 dot = ahead;
86                 /*      the following test is performed due to the dual
87                         task of LLlex(): it is also called for parsing the
88                         restricted constant expression following a #if or
89                         #elif.  The newline character causes EOF to be
90                         returned in this case to stop the LLgen parsing task.
91                 */
92                 if (DOT != EOI)
93                         GetToken(&ahead);
94                 else
95                         DOT = EOF;
96         }
97         return DOT;
98 }
99
100 char *string_token();
101
102 int
103 GetToken(ptok)
104         register struct token *ptok;
105 {
106         /*      GetToken() is the actual token recognizer. It calls the
107                 control line interpreter if it encounters a "\n#"
108                 combination. Macro replacement is also performed if it is
109                 needed.
110         */
111         char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
112         register int ch, nch;
113
114         token_nmb++;
115
116         if (File_Inserted) {
117                 File_Inserted = 0;
118                 goto firstline;
119         }
120
121 again:  /* rescan the input after an error or replacement       */
122 #ifndef NOPP
123         if (Unstacked) EnableMacros();
124 #endif
125         LoadChar(ch);
126 go_on:  /* rescan, the following character has been read        */
127         if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
128                 fatal("non-ascii '\\%03o' read", ch & 0377);
129         /* keep track of the place of the token in the file     */
130         ptok->tk_file = FileName;
131         ptok->tk_line = LineNumber;
132
133         switch (class(ch)) {    /* detect character class       */
134         case STNL:              /* newline, vertical space or formfeed  */
135 firstline:
136                 LineNumber++;                   /* also at vs and ff    */
137                 ptok->tk_file = FileName;
138                 ptok->tk_line = LineNumber;
139                 if (EoiForNewline)      /* called in control line       */
140                         /*      a newline in a control line indicates the
141                                 end-of-information of the line.
142                         */
143                         return ptok->tk_symb = EOI;
144                 while (LoadChar(ch), ch == '#') { /* a control line follows */
145                         domacro();
146                         if (File_Inserted) {
147                                 File_Inserted = 0;
148                                 goto firstline;
149                         }
150                 }
151                         /*      We have to loop here, because in
152                                 `domacro' the nl, vt or ff is read. The
153                                 character following it may again be a `#'.
154                         */
155                 goto go_on;
156         case STSKIP:            /* just skip the skip characters        */
157                 goto again;
158         case STGARB:            /* garbage character                    */
159 #ifndef NOPP
160                 if (SkipEscNewline && (ch == '\\')) {
161                         /* a '\\' is allowed in #if/#elif expression    */
162                         LoadChar(ch);
163                         if (class(ch) == STNL) {        /* vt , ff ?    */
164                                 ++LineNumber;
165                                 goto again;
166                         }
167                         PushBack();
168                         ch = '\\';
169                 }
170 #endif /* NOPP */
171                 if (040 < ch && ch < 0177)
172                         lexerror("garbage char %c", ch);
173                 else
174                         lexerror("garbage char \\%03o", ch);
175                 goto again;
176         case STSIMP:    /* a simple character, no part of compound token*/
177                 if (ch == '/') { /* probably the start of comment       */
178                         LoadChar(ch);
179                         if (ch == '*') { /* start of comment */
180                                 skipcomment();
181                                 goto again;
182                         }
183                         else {
184                                 PushBack();
185                                 ch = '/';       /* restore ch   */
186                         }
187                 }
188                 return ptok->tk_symb = ch;
189         case STCOMP:    /* maybe the start of a compound token          */
190                 LoadChar(nch);                  /* character lookahead  */
191                 switch (ch) {
192                 case '!':
193                         if (nch == '=')
194                                 return ptok->tk_symb = NOTEQUAL;
195                         PushBack();
196                         return ptok->tk_symb = ch;
197                 case '&':
198                         if (nch == '&')
199                                 return ptok->tk_symb = AND;
200                         PushBack();
201                         return ptok->tk_symb = ch;
202                 case '+':
203                         if (nch == '+')
204                                 return ptok->tk_symb = PLUSPLUS;
205                         PushBack();
206                         return ptok->tk_symb = ch;
207                 case '-':
208                         if (nch == '-')
209                                 return ptok->tk_symb = MINMIN;
210                         if (nch == '>')
211                                 return ptok->tk_symb = ARROW;
212                         PushBack();
213                         return ptok->tk_symb = ch;
214                 case '<':
215                         if (AccFileSpecifier) {
216                                 PushBack();     /* pushback nch */
217                                 ptok->tk_bts = string_token("file specifier",
218                                                         '>', &(ptok->tk_len));
219                                 return ptok->tk_symb = FILESPECIFIER;
220                         }
221                         if (nch == '<')
222                                 return ptok->tk_symb = LEFT;
223                         if (nch == '=')
224                                 return ptok->tk_symb = LESSEQ;
225                         PushBack();
226                         return ptok->tk_symb = ch;
227                 case '=':
228                         if (nch == '=')
229                                 return ptok->tk_symb = EQUAL;
230                         PushBack();
231                         return ptok->tk_symb = ch;
232                 case '>':
233                         if (nch == '=')
234                                 return ptok->tk_symb = GREATEREQ;
235                         if (nch == '>')
236                                 return ptok->tk_symb = RIGHT;
237                         PushBack();
238                         return ptok->tk_symb = ch;
239                 case '|':
240                         if (nch == '|')
241                                 return ptok->tk_symb = OR;
242                         PushBack();
243                         return ptok->tk_symb = ch;
244                 }
245         case STIDF:
246         {
247                 register char *tg = &buf[0];
248                 register int pos = -1;
249                 register int hash;
250                 register struct idf *idef;
251                 extern int idfsize;             /* ??? */
252
253                 hash = STARTHASH();
254                 do      {                       /* read the identifier  */
255                         if (++pos < idfsize) {
256 #ifndef NOPP
257                                 if (Unstacked) EnableMacros();
258 #endif
259                                 *tg++ = ch;
260                                 hash = ENHASH(hash, ch, pos);
261                         }
262                         LoadChar(ch);
263                 } while (in_idf(ch));
264                 hash = STOPHASH(hash);
265                 if (ch != EOI)
266                         PushBack();
267                 *tg++ = '\0';   /* mark the end of the identifier       */
268                 idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);
269                 idef->id_file = ptok->tk_file;
270                 idef->id_line = ptok->tk_line;
271 #ifndef NOPP
272                 if (idef->id_macro && ReplaceMacros && replace(idef))
273                         /* macro replacement should be performed        */
274                         goto again;
275                 if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {
276                         ptok->tk_ival = (arith)0;
277                         ptok->tk_fund = INT;
278                         return ptok->tk_symb = INTEGER;
279                 }
280 #endif /* NOPP */
281                 ptok->tk_symb = (
282                         idef->id_reserved ? idef->id_reserved
283                         : idef->id_def && idef->id_def->df_sc == TYPEDEF ?
284                                 TYPE_IDENTIFIER
285                         : IDENTIFIER
286                 );
287                 return IDENTIFIER;
288         }
289         case STCHAR:                            /* character constant   */
290         {
291                 register arith val = 0;
292                 int size = 0;
293
294                 LoadChar(ch);
295                 if (ch == '\'')
296                         lexerror("character constant too short");
297                 else
298                 while (ch != '\'') {
299                         if (ch == '\n') {
300                                 lexerror("newline in character constant");
301                                 PushBack();
302                                 break;
303                         }
304                         if (ch == '\\') {
305                                 LoadChar(ch);
306                                 if (ch == '\n')
307                                         LineNumber++;
308                                 ch = quoted(ch);
309                         }
310                         if (ch >= 128) ch -= 256;
311                         val = val*256 + ch;
312                         size++;
313                         LoadChar(ch);
314                 }
315                 if (size > (int)int_size)
316                         lexerror("character constant too long");
317                 ptok->tk_ival = val;
318                 ptok->tk_fund = INT;
319                 return ptok->tk_symb = INTEGER;
320         }
321         case STSTR:                                     /* string       */
322                 ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
323                 return ptok->tk_symb = STRING;
324         case STNUM:                             /* a numeric constant   */
325         {
326                 /*      It should be noted that 099 means 81(decimal) and
327                         099.5 means 99.5 . This severely limits the tricks
328                         we can use to scan a numeric value.
329                 */
330                 register char *np = &buf[1];
331                 register int base = 10;
332                 register int vch;
333                 register arith val = 0;
334
335                 if (ch == '.') {        /* an embarrassing ambiguity */
336 #ifndef NOFLOAT
337                         LoadChar(vch);
338                         PushBack();
339                         if (!is_dig(vch))       /* just a `.'   */
340                                 return ptok->tk_symb = ch;
341                         *np++ = '0';
342                         /*      in the rest of the compiler, all floats
343                                 have to start with a digit.
344                         */
345 #else /* NOFLOAT */
346                         return ptok->tk_symb = ch;
347 #endif /* NOFLOAT */
348                 }
349                 if (ch == '0') {
350                         *np++ = ch;
351                         LoadChar(ch);
352                         if (ch == 'x' || ch == 'X') {
353                                 base = 16;
354                                 LoadChar(ch);
355                         }
356                         else
357                                 base = 8;
358                 }
359                 while (vch = val_in_base(ch, base), vch >= 0) {
360                         val = val*base + vch;
361                         if (np < &buf[NUMSIZE])
362                                 *np++ = ch;
363                         LoadChar(ch);
364                 }
365                 if (ch == 'l' || ch == 'L') {
366                         ptok->tk_ival = val;
367                         ptok->tk_fund = LONG;
368                         return ptok->tk_symb = INTEGER;
369                 }
370 #ifndef NOFLOAT
371                 if (base == 16 || !(ch == '.' || ch == 'e' || ch == 'E'))
372 #endif /* NOFLOAT */
373                 {
374                         PushBack();
375                         ptok->tk_ival = val;
376                         /*      The semantic analyser must know if the
377                                 integral constant is given in octal/hexa-
378                                 decimal form, in which case its type is
379                                 UNSIGNED, or in decimal form, in which case
380                                 its type is signed, indicated by
381                                 the fund INTEGER.
382                         */
383                         ptok->tk_fund = 
384                                 (base == 10 || (base == 8 && val == (arith)0))
385                                         ? INTEGER : UNSIGNED;
386                         return ptok->tk_symb = INTEGER;
387                 }
388                 /* where's the test for the length of the integral ???  */
389 #ifndef NOFLOAT
390                 if (ch == '.'){
391                         if (np < &buf[NUMSIZE])
392                                 *np++ = ch;
393                         LoadChar(ch);
394                 }
395                 while (is_dig(ch)){
396                         if (np < &buf[NUMSIZE])
397                                 *np++ = ch;
398                         LoadChar(ch);
399                 }
400                 if (ch == 'e' || ch == 'E') {
401                         if (np < &buf[NUMSIZE])
402                                 *np++ = ch;
403                         LoadChar(ch);
404                         if (ch == '+' || ch == '-') {
405                                 if (np < &buf[NUMSIZE])
406                                         *np++ = ch;
407                                 LoadChar(ch);
408                         }
409                         if (!is_dig(ch)) {
410                                 lexerror("malformed floating constant");
411                                 if (np < &buf[NUMSIZE])
412                                         *np++ = ch;
413                         }
414                         while (is_dig(ch)) {
415                                 if (np < &buf[NUMSIZE])
416                                         *np++ = ch;
417                                 LoadChar(ch);
418                         }
419                 }
420                 PushBack();
421                 *np++ = '\0';
422                 buf[0] = '-';   /* good heavens...      */
423                 if (np == &buf[NUMSIZE+1]) {
424                         lexerror("floating constant too long");
425                         ptok->tk_fval = Salloc("-0.0",(unsigned) 5) + 1;
426                 }
427                 else
428                         ptok->tk_fval = Salloc(buf,(unsigned) (np - buf)) + 1;
429                 return ptok->tk_symb = FLOATING;
430 #endif /* NOFLOAT */
431         }
432         case STEOI:                     /* end of text on source file   */
433                 return ptok->tk_symb = EOI;
434         default:                                /* this cannot happen   */
435                 crash("bad class for char 0%o", ch);
436         }
437         /*NOTREACHED*/
438 }
439
440 skipcomment()
441 {
442         /*      The last character read has been the '*' of '/_*'.  The
443                 characters, except NL and EOI, between '/_*' and the first
444                 occurring '*_/' are not interpreted.
445                 NL only affects the LineNumber.  EOI is not legal.
446
447                 Important note: it is not possible to stop skipping comment
448                 beyond the end-of-file of an included file.
449                 EOI is returned by LoadChar only on encountering EOF of the
450                 top-level file...
451         */
452         register int c;
453
454         NoUnstack++;
455         LoadChar(c);
456 #ifdef  LINT
457         if (! lint_skip_comment) {
458                 lint_start_comment();
459                 lint_comment_char(c);
460         }
461 #endif  /* LINT */
462         do {
463                 while (c != '*') {
464                         if (class(c) == STNL)
465                                 ++LineNumber;
466                         else
467                         if (c == EOI) {
468                                 NoUnstack--;
469 #ifdef  LINT
470                                 if (! lint_skip_comment) lint_end_comment();
471 #endif  /* LINT */
472                                 return;
473                         }
474                         LoadChar(c);
475 #ifdef  LINT
476                         if (! lint_skip_comment) lint_comment_char(c);
477 #endif  /* LINT */
478                 } /* last Character seen was '*' */
479                 LoadChar(c);
480 #ifdef  LINT
481                 if (! lint_skip_comment) lint_comment_char(c);
482 #endif  /* LINT */
483         } while (c != '/');
484 #ifdef  LINT
485         if (! lint_skip_comment) lint_end_comment();
486 #endif  /* LINT */
487         NoUnstack--;
488 }
489
490 char *
491 string_token(nm, stop_char, plen)
492         char *nm;
493         int *plen;
494 {
495         register int ch;
496         register int str_size;
497         register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
498         register int pos = 0;
499         
500         LoadChar(ch);
501         while (ch != stop_char) {
502                 if (ch == '\n') {
503                         lexerror("newline in %s", nm);
504                         PushBack();
505                         break;
506                 }
507                 if (ch == EOI) {
508                         lexerror("end-of-file inside %s", nm);
509                         break;
510                 }
511                 if (ch == '\\') {
512                         LoadChar(ch);
513                         if (ch == '\n') {
514                                 LineNumber++;
515                                 LoadChar(ch);
516                                 continue;
517                         }
518                         ch = quoted(ch);
519                 }
520                 str[pos++] = ch;
521                 if (pos == str_size)
522                         str = Srealloc(str, (unsigned) (str_size += RSTRSIZE));
523                 LoadChar(ch);
524         }
525         str[pos++] = '\0'; /* for filenames etc. */
526         *plen = pos;
527         return str;
528 }
529
530 int
531 quoted(ch)
532         register int ch;
533 {       
534         /*      quoted() replaces an escaped character sequence by the
535                 character meant.
536         */
537         /* first char after backslash already in ch */
538         if (!is_oct(ch)) {              /* a quoted char */
539                 switch (ch) {
540                 case 'n':
541                         ch = '\n';
542                         break;
543                 case 't':
544                         ch = '\t';
545                         break;
546                 case 'b':
547                         ch = '\b';
548                         break;
549                 case 'r':
550                         ch = '\r';
551                         break;
552                 case 'f':
553                         ch = '\f';
554                         break;
555                 }
556         }
557         else {                          /* a quoted octal */
558                 register int oct = 0, cnt = 0;
559
560                 do {
561                         oct = oct*8 + (ch-'0');
562                         LoadChar(ch);
563                 } while (is_oct(ch) && ++cnt < 3);
564                 PushBack();
565                 ch = oct;
566         }
567         return ch&0377;
568 }
569
570 /* provisional */
571 int
572 val_in_base(ch, base)
573         register int ch;
574 {
575         return
576                 is_dig(ch) ? ch - '0'
577                 : base != 16 ? -1
578                 : is_hex(ch) ? (ch - 'a' + 10) & 017
579                 : -1;
580 }