Pristine Ack-5.5
[Ack-5.5.git] / lang / cem / cemcom.ansi / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  */
5 /* $Id: LLlex.c,v 1.27 1994/06/27 07:57:57 ceriel Exp $ */
6 /*                  L E X I C A L   A N A L Y Z E R                     */
7
8 #include        "debug.h"
9 #include        "lint.h"
10 #include        <alloc.h>
11 #include        "idfsize.h"
12 #include        "numsize.h"
13 #include        "strsize.h"
14 #include        "nopp.h"
15 #include        "input.h"
16 #include        "arith.h"
17 #include        "def.h"
18 #include        "macro.h"
19 #include        "idf.h"
20 #include        "LLlex.h"
21 #include        "Lpars.h"
22 #include        "class.h"
23 #include        "assert.h"
24 #include        "sizes.h"
25 #include        "specials.h"    /* registration of special identifiers */
26
27 /* Data about the token yielded */
28 struct token dot, ahead, aside;
29 int token_nmb = 0;              /* number of the ahead token */
30 int tk_nmb_at_last_syn_err = -5/*ERR_SHADOW*/;
31                                 /* token number at last syntax error */
32 int idfsize = IDFSIZE;
33 char sp_occurred[SP_TOTAL+1];
34
35 #ifndef NOPP
36 int ReplaceMacros = 1;          /* replacing macros                     */
37 int AccDefined = 0;             /* accept "defined(...)"                */
38 int UnknownIdIsZero = 0;        /* interpret unknown id as integer 0    */
39 int Unstacked = 0;              /* an unstack is done                   */
40 extern int InputLevel;
41 #endif
42 int AccFileSpecifier = 0;       /* return filespecifier <...>           */
43 int EoiForNewline = 0;          /* return EOI upon encountering newline */
44 int File_Inserted = 0;          /* a file has just been inserted        */
45 int LexSave = 0;                /* last character read by GetChar       */
46 #define MAX_LL_DEPTH    2
47
48 #define FLG_ESEEN       0x01    /* possibly a floating point number */
49 #define FLG_DOTSEEN     0x02    /* certainly a floating point number */
50 extern arith full_mask[];
51
52 #ifdef LINT
53 extern int lint_skip_comment;
54 #endif
55
56 #ifndef NOPP
57 static struct token LexStack[MAX_LL_DEPTH];
58 static LexSP = 0;
59
60 /*      In PushLex() the actions are taken in order to initialise or
61         re-initialise the lexical scanner.
62         E.g. at the invocation of a sub-parser that uses LLlex(), the
63         state of the current parser should be saved.
64 */
65 PushLex()
66 {
67         ASSERT(LexSP < MAX_LL_DEPTH);
68         ASSERT(ASIDE == 0);     /* ASIDE = 0;   */
69         GetToken(&ahead);
70         LexStack[LexSP++] = dot;
71 }
72
73 PopLex()
74 {
75         ASSERT(LexSP > 0);
76         dot = LexStack[--LexSP];
77 }
78 #endif  /* NOPP */
79
80 int
81 LLlex()
82 {
83         /*      LLlex() plays the role of Lexical Analyzer for the C parser.
84                 The look-ahead and putting aside of tokens are taken into
85                 account.
86         */
87         if (ASIDE) {    /* a token is put aside         */
88                 dot = aside;
89                 ASIDE = 0;
90         }
91         else {          /* read ahead and return the old one    */
92 #ifdef  LINT
93                 lint_comment_ahead();
94 #endif  /* LINT */
95                 dot = ahead;
96                 /*      the following test is performed due to the dual
97                         task of LLlex(): it is also called for parsing the
98                         restricted constant expression following a #if or
99                         #elif.  The newline character causes EOF to be
100                         returned in this case to stop the LLgen parsing task.
101                 */
102                 if (DOT != EOI)
103                         GetToken(&ahead);
104                 else
105                         DOT = EOF;
106         }
107         return DOT;
108 }
109
110
111 char    *string_token();
112 arith   char_constant();
113
114 int
115 GetToken(ptok)
116         register struct token *ptok;
117 {
118         /*      GetToken() is the actual token recognizer. It calls the
119                 control line interpreter if it encounters a "\n{w}*#"
120                 combination. Macro replacement is also performed if it is
121                 needed.
122         */
123         char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
124         register int ch, nch;
125
126         token_nmb++;
127
128         if (File_Inserted) {
129                 File_Inserted = 0;
130                 goto firstline;
131         }
132
133 again:  /* rescan the input after an error or replacement       */
134         ch = GetChar();
135 go_on:  /* rescan, the following character has been read        */
136         if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
137         {
138                 fatal("non-ascii '\\%03o' read", ch & 0377);
139         }
140         /* keep track of the place of the token in the file     */
141         ptok->tk_file = FileName;
142         ptok->tk_line = LineNumber;
143
144         switch (class(ch)) {    /* detect character class       */
145         case STNL:              /* newline, vertical space or formfeed  */
146 firstline:
147                 LineNumber++;                   /* also at vs and ff    */
148                 ptok->tk_file = FileName;
149                 ptok->tk_line = LineNumber;
150                 if (EoiForNewline)      /* called in control line       */
151                         /*      a newline in a control line indicates the
152                                 end-of-information of the line.
153                         */
154                         return ptok->tk_symb = EOI;
155
156                 while ((ch = GetChar()),
157                         (ch == '#'
158 #ifndef NOPP
159                         || ch == '/'
160 #endif
161                         || class(ch) == STSKIP)) {
162                         /* blanks are allowed before hashes */
163                         if (ch == '#') {
164                                 /* a control line follows */
165                                 domacro();
166 #ifndef NOPP
167                                 if (File_Inserted) {
168                                         File_Inserted = 0;
169                                         goto firstline;
170                                 }
171                         } else if (ch == '/') {
172                                 if ((GetChar() == '*') && !InputLevel) {
173                                         skipcomment();
174                                 } else {
175                                         UnGetChar();
176                                         break;
177                                 }
178 #endif  /* NOPP */
179                         }
180                 }
181                         /*      We have to loop here, because in
182                                 `domacro' the nl, vt or ff is read. The
183                                 character following it may again be a `#'.
184                         */
185                 goto go_on;
186         case STSKIP:            /* just skip the skip characters        */
187                 goto again;
188         case STGARB:            /* garbage character                    */
189 #ifndef NOPP
190 garbage:
191 #endif
192                 if (040 < ch && ch < 0177) {
193                         return ptok->tk_symb = ch;
194                 } else {
195                         lexerror("garbage char \\%03o", ch);
196                 }
197                 goto again;
198         case STSIMP:    /* a simple character, no part of compound token*/
199                 return ptok->tk_symb = ch;
200         case STCOMP:    /* maybe the start of a compound token          */
201                 nch = GetChar();                /* character lookahead  */
202                 switch (ch) {
203                 case '!':
204                         if (nch == '=')
205                                 return ptok->tk_symb = NOTEQUAL;
206                         break;
207                 case '&':
208                         if (nch == '&')
209                                 return ptok->tk_symb = AND;
210                         if (nch == '=')
211                                 return ptok->tk_symb = ANDAB;
212                         break;
213                 case '+':
214                         if (nch == '+')
215                                 return ptok->tk_symb = PLUSPLUS;
216                         if (nch == '=')
217                                 return ptok->tk_symb = PLUSAB;
218                         break;
219                 case '-':
220                         if (nch == '-')
221                                 return ptok->tk_symb = MINMIN;
222                         if (nch == '>')
223                                 return ptok->tk_symb = ARROW;
224                         if (nch == '=')
225                                 return ptok->tk_symb = MINAB;
226                         break;
227                 case '<':
228                         if (AccFileSpecifier) {
229                                 UnGetChar();    /* pushback nch */
230                                 ptok->tk_bts = string_token("file specifier",
231                                                         '>', &(ptok->tk_len));
232                                 return ptok->tk_symb = FILESPECIFIER;
233                         }
234                         if (nch == '<') {
235                                 if ((nch = GetChar()) == '=')
236                                         return ptok->tk_symb = LEFTAB;
237                                 UnGetChar();
238                                 return ptok->tk_symb = LEFT;
239                         }
240                         if (nch == '=')
241                                 return ptok->tk_symb = LESSEQ;
242                         break;
243                 case '=':
244                         if (nch == '=')
245                                 return ptok->tk_symb = EQUAL;
246                         break;
247                 case '>':
248                         if (nch == '=')
249                                 return ptok->tk_symb = GREATEREQ;
250                         if (nch == '>') {
251                                 if ((nch = GetChar()) == '=')
252                                         return ptok->tk_symb = RIGHTAB;
253                                 UnGetChar();
254                                 return ptok->tk_symb = RIGHT;
255                         }
256                         break;
257                 case '|':
258                         if (nch == '|')
259                                 return ptok->tk_symb = OR;
260                         if (nch == '=')
261                                 return ptok->tk_symb = ORAB;
262                         break;
263                 case '%':
264                         if (nch == '=')
265                                 return ptok->tk_symb = MODAB;
266                         break;
267                 case '*':
268                         if (nch == '=')
269                                 return ptok->tk_symb = TIMESAB;
270                         break;
271                 case '^':
272                         if (nch == '=')
273                                 return ptok->tk_symb = XORAB;
274                         break;
275                 case '/':
276 #ifndef NOPP
277                         if (nch == '*' && !InputLevel) {
278                                 skipcomment();
279                                 goto again;
280                         }
281 #endif
282                         if (nch == '=')
283                                 return ptok->tk_symb = DIVAB;
284                         break;
285                 default:
286                         crash("bad class for char 0%o", ch);
287                         /* NOTREACHED */
288                 }
289                 UnGetChar();
290                 return ptok->tk_symb = ch;
291         case STCHAR:                            /* character constant   */
292                 ptok->tk_ival = char_constant("character");
293                 ptok->tk_fund = INT;
294                 return ptok->tk_symb = INTEGER;
295         case STSTR:                                     /* string       */
296                 ptok->tk_bts = string_token("string", '"', &(ptok->tk_len));
297                 ptok->tk_fund = CHAR;           /* string of characters */
298                 return ptok->tk_symb = STRING;
299         case STELL:             /* wide character constant/string prefix */
300                 nch = GetChar();
301                 if (nch == '"') {
302                         ptok->tk_bts = string_token("wide character string",
303                                         '"', &(ptok->tk_len));
304                         ptok->tk_fund = WCHAR;  /* string of wide characters */
305                         return ptok->tk_symb = STRING;
306                 } else if (nch == '\'') {
307                         ptok->tk_ival = char_constant("wide character");
308                         ptok->tk_fund = INT;
309                         return ptok->tk_symb = INTEGER;
310                 }
311                 UnGetChar();
312                 /* fallthrough */
313         case STIDF:
314         {
315                 register char *tg = &buf[0];
316                 register int pos = -1;
317                 register struct idf *idef;
318                 extern int idfsize;             /* ??? */
319 #ifndef NOPP
320                 int NoExpandNext = 0;
321
322                 if (Unstacked) EnableMacros();  /* unstack macro's when allowed. */
323                 if (ch == NOEXPM)  {
324                         NoExpandNext = 1;
325                         ch = GetChar();
326                 }
327 #endif
328                 do      {                       /* read the identifier  */
329                         if (++pos < idfsize) {
330                                 *tg++ = ch;
331                         }
332                         ch = GetChar();
333                 } while (in_idf(ch));
334
335                 if (ch != EOI)
336                         UnGetChar();
337                 *tg++ = '\0';   /* mark the end of the identifier       */
338                 idef = ptok->tk_idf = str2idf(buf, 1);
339                 sp_occurred[idef->id_special] = 1;
340                 idef->id_file = ptok->tk_file;
341                 idef->id_line = ptok->tk_line;
342 #ifndef NOPP
343                 if (idef->id_macro && ReplaceMacros && !NoExpandNext) {
344                         if (replace(idef))
345                                         goto again;
346                 }
347                 if (UnknownIdIsZero && idef->id_reserved != SIZEOF) {
348                         ptok->tk_ival = (arith)0;
349                         ptok->tk_fund = INT;
350                         return ptok->tk_symb = INTEGER;
351                 }
352 #endif /* NOPP */
353                 ptok->tk_symb = (
354                         idef->id_reserved
355                             ? idef->id_reserved
356                             : idef->id_def && idef->id_def->df_sc == TYPEDEF
357                                 ? TYPE_IDENTIFIER
358                                 : IDENTIFIER
359                 );
360                 return IDENTIFIER;
361         }
362         case STNUM:                             /* a numeric constant   */
363         {
364                 register int siz_left = NUMSIZE - 1;
365                 register char *np = &buf[0];
366                 int flags = 0;
367
368 #define store(ch)       if (--siz_left >= 0)            \
369                                 *np++ = ch;
370
371                 if (ch == '.') {
372                         /*      An embarrasing ambiguity. We have either a
373                                 pp-number, a field operator, an ELLIPSIS or
374                                 an error (..).
375                         */
376                         ch = GetChar();
377                         if (!is_dig(ch)) {      /* . or ... */
378                                 if (ch == '.') {
379                                         if ((ch = GetChar()) == '.')
380                                                 return ptok->tk_symb = ELLIPSIS;
381                                         UnGetChar();            /* not '.' */
382                                         ChPushBack('.');        /* sigh ... */
383                                 } else
384                                         UnGetChar();            /* not '.' */
385                                 return ptok->tk_symb = '.';
386                         }
387                         UnGetChar();
388                         ch = '.';
389                         flags |= FLG_DOTSEEN;
390                  }
391                 store(ch);
392                 ch = GetChar();
393                 while(in_idf(ch) || ch == '.') {
394                         store(ch);
395                         if (ch == '.') flags |= FLG_DOTSEEN;
396                         if (ch == 'e' || ch == 'E') {
397                                 flags |= FLG_ESEEN;
398                                 ch = GetChar();
399                                 if (ch == '+' || ch == '-') {
400                                         flags |= FLG_DOTSEEN;   /* trick */
401                                         store(ch);
402                                         ch = GetChar();
403                                 }
404                         } else ch = GetChar();
405                 }
406                 store('\0');
407                 UnGetChar();
408
409                 np = &buf[0];
410                 ch = *np++;
411                 if (siz_left < 0) {
412                         lexerror("number too long");
413                         if ((flags & FLG_DOTSEEN)
414                             || (flags & FLG_ESEEN
415                                 && !(ch == '0'
416                                     && (*np == 'x' || *np == 'X')))) {
417                             ptok->tk_fval = Salloc("0.0", (unsigned) 4);
418                             ptok->tk_fund = DOUBLE;
419                             return ptok->tk_symb = FLOATING;
420                         }
421                         ptok->tk_ival = 1;
422                         ptok->tk_fund = ULONG;
423                         ptok->tk_symb = INTEGER;
424                 }
425                 /* Now, the pp-number must be converted into a token */
426                 if ((flags & FLG_DOTSEEN)
427                         || (flags & FLG_ESEEN
428                             && !(ch == '0' && (*np == 'x' || *np == 'X')))) {
429                         strflt2tok(&buf[0], ptok);
430                         return ptok->tk_symb = FLOATING;
431                 }
432                 strint2tok(&buf[0], ptok);
433                 return ptok->tk_symb = INTEGER;
434         }
435         case STEOI:                     /* end of text on source file   */
436                 return ptok->tk_symb = EOI;
437 #ifndef NOPP
438         case STMSPEC:
439                 if (!InputLevel) goto garbage;
440                 if (ch == TOKSEP) goto again;
441                 /* fallthrough shouldn't happen */
442 #endif
443         default:                                /* this cannot happen   */
444                 crash("bad class for char 0%o", ch);
445         }
446         /*NOTREACHED*/
447 }
448
449 #ifndef NOPP
450 skipcomment()
451 {
452         /*      The last character read has been the '*' of '/_*'.  The
453                 characters, except NL and EOI, between '/_*' and the first
454                 occurring '*_/' are not interpreted.
455                 NL only affects the LineNumber.  EOI is not legal.
456
457                 Important note: it is not possible to stop skipping comment
458                 beyond the end-of-file of an included file.
459                 EOI is returned by LoadChar only on encountering EOF of the
460                 top-level file...
461         */
462         register int c, oldc = '\0';
463
464         NoUnstack++;
465         c = GetChar();
466 #ifdef  LINT
467         if (! lint_skip_comment) {
468                 lint_start_comment();
469                 lint_comment_char(c);
470         }
471 #endif  /* LINT */
472         do {
473                 while (c != '*') {
474                         if (class(c) == STNL) {
475                                 ++LineNumber;
476                         } else if (c == EOI) {
477                                 NoUnstack--;
478 #ifdef  LINT
479                                 if (! lint_skip_comment) lint_end_comment();
480 #endif  /* LINT */
481                                 return;
482                         }
483                         oldc = c;
484                         c = GetChar();
485 #ifdef  LINT
486                         if (! lint_skip_comment) lint_comment_char(c);
487 #endif  /* LINT */
488                 } /* last Character seen was '*' */
489                 c = GetChar();
490                 if ( c != '/' && oldc == '/')
491                         lexwarning("comment inside comment ?");
492                 oldc = '*';
493 #ifdef  LINT
494                 if (! lint_skip_comment) lint_comment_char(c);
495 #endif  /* LINT */
496         } while (c != '/');
497 #ifdef  LINT
498         if (! lint_skip_comment) lint_end_comment();
499 #endif  /* LINT */
500         NoUnstack--;
501 }
502 #endif  /* NOPP */
503
504 arith
505 char_constant(nm)
506         char *nm;
507 {
508         register arith val = 0;
509         register int ch;
510         int size = 0;
511
512         ch = GetChar();
513         if (ch == '\'')
514                 lexerror("%s constant too short", nm);
515         else
516         while (ch != '\'') {
517                 if (ch == '\n') {
518                         lexerror("newline in %s constant", nm);
519                         LineNumber++;
520                         break;
521                 }
522                 if (ch == '\\')
523                         ch = quoted(GetChar());
524                 if (ch >= 128) ch -= 256;
525                 if (size < (int)int_size)
526                         val |= ch << 8 * size;
527                 size++;
528                 ch = GetChar();
529         }
530         if (size > 1)
531                 lexstrict("%s constant includes more than one character", nm);
532         if (size > (int)int_size)
533                 lexerror("%s constant too long", nm);
534         return val;
535 }
536
537 char *
538 string_token(nm, stop_char, plen)
539         char *nm;
540         int *plen;
541 {
542         register int ch;
543         register int str_size;
544         register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
545         register int pos = 0;
546         
547         ch = GetChar();
548         while (ch != stop_char) {
549                 if (ch == '\n') {
550                         lexerror("newline in %s", nm);
551                         LineNumber++;
552                         break;
553                 }
554                 if (ch == EOI) {
555                         lexerror("end-of-file inside %s", nm);
556                         break;
557                 }
558                 if (ch == '\\' && !AccFileSpecifier)
559                         ch = quoted(GetChar());
560                 str[pos++] = ch;
561                 if (pos == str_size)
562                         str = Realloc(str, (unsigned) (str_size += RSTRSIZE));
563                 ch = GetChar();
564         }
565         str[pos++] = '\0'; /* for filenames etc. */
566         *plen = pos;
567         return str;
568 }
569
570 int
571 quoted(ch)
572         register int ch;
573 {       
574         /*      quoted() replaces an escaped character sequence by the
575                 character meant.
576         */
577         /* first char after backslash already in ch */
578         if (!is_oct(ch)) {              /* a quoted char */
579                 switch (ch) {
580                 case 'n':
581                         ch = '\n';
582                         break;
583                 case 't':
584                         ch = '\t';
585                         break;
586                 case 'b':
587                         ch = '\b';
588                         break;
589                 case 'r':
590                         ch = '\r';
591                         break;
592                 case 'f':
593                         ch = '\f';
594                         break;
595                 case 'a':               /* alert */
596                         ch = '\007';
597                         break;
598                 case 'v':               /* vertical tab */
599                         ch = '\013';
600                         break;
601                 case 'x':               /* quoted hex */
602                 {
603                         register int hex = 0;
604                         register int vch;
605
606                         for (;;) {
607                                 ch = GetChar();
608                                 if ((vch = hex_val(ch)) == -1)
609                                         break;
610                                 hex = hex * 16 + vch;
611                         }
612                         UnGetChar();
613                         ch = hex;
614                 }
615                 }
616         }
617         else {                          /* a quoted octal */
618                 register int oct = 0, cnt = 0;
619
620                 do {
621                         oct = oct*8 + (ch-'0');
622                         ch = GetChar();
623                 } while (is_oct(ch) && ++cnt < 3);
624                 UnGetChar();
625                 ch = oct;
626         }
627         return ch&0377;
628 }
629
630
631 int
632 hex_val(ch)
633         register int ch;
634 {
635         return is_dig(ch) ? ch - '0'
636                         : is_hex(ch) ? (ch - 'a' + 10) & 017
637                         : -1;
638 }
639
640
641 int
642 GetChar()
643 {
644         /*      The routines GetChar and trigraph parses the trigraph
645                 sequences and removes occurences of \\\n.
646         */
647         register int ch;
648
649 #ifndef NOPP
650 again:
651 #endif
652         LoadChar(ch);
653
654 #ifndef NOPP
655         /* possible trigraph sequence */
656         if (ch == '?')
657                 ch = trigraph();
658
659         /* \<newline> is removed from the input stream */
660         if (ch == '\\') {
661                 LoadChar(ch);
662                 if (ch == '\n') {
663                         ++LineNumber;
664                         goto again;
665                 }
666                 PushBack();
667                 ch = '\\';
668         }
669 #endif
670         return(LexSave = ch);
671 }
672
673 #ifndef NOPP
674 int
675 trigraph()
676 {
677         register int ch;
678
679         LoadChar(ch);
680         if (ch == '?') {
681                 LoadChar(ch);
682                 switch (ch) {           /* its a trigraph */
683                 case '=':
684                         ch =  '#';
685                         return(ch);
686                 case '(':
687                         ch = '[';
688                         return(ch);
689                 case '/':
690                         ch = '\\';
691                         return(ch);
692                 case ')':
693                         ch = ']';
694                         return(ch);
695                 case '\'':
696                         ch = '^';
697                         return(ch);
698                 case '<':
699                         ch = '{';
700                         return(ch);
701                 case '!':
702                         ch = '|';
703                         return(ch);
704                 case '>':
705                         ch = '}';
706                         return(ch);
707                 case '-':
708                         ch = '~';
709                         return(ch);
710                 }
711                 PushBack();
712         }
713         PushBack();
714         return('?');
715 }
716 #endif
717
718 /* strflt2tok only checks the syntax of the floating-point number and
719  * selects the right type for the number.
720  */
721 strflt2tok(fltbuf, ptok)
722 char fltbuf[];
723 struct token *ptok;
724 {
725         register char *cp = fltbuf;
726         int malformed = 0;
727
728         while (is_dig(*cp)) cp++;
729         if (*cp == '.') {
730                 cp++;
731                 while (is_dig(*cp)) cp++;
732         }
733         if (*cp == 'e' || *cp == 'E') {
734                 cp++;
735                 if (*cp == '+' || *cp == '-')
736                         cp++;
737                 if (!is_dig(*cp)) malformed++;
738                 while (is_dig(*cp)) cp++;
739         }
740         if (*cp == 'f' || *cp == 'F') {
741                 if (*(cp + 1)) malformed++;
742                 *cp = '\0';
743                 ptok->tk_fund = FLOAT;
744         } else if (*cp == 'l' || *cp == 'L') {
745                 if (*(cp + 1)) malformed++;
746                 *cp = '\0';
747                 ptok->tk_fund = LNGDBL;
748         } else {
749                 if (*cp) malformed++;
750                 ptok->tk_fund = DOUBLE;
751         }
752         if (malformed) {
753                 lexerror("malformed floating constant");
754                 ptok->tk_fval = Salloc("0.0", (unsigned) 4);
755         } else {
756                 ptok->tk_fval = Salloc(fltbuf, (unsigned) (cp - fltbuf + 1));
757         }
758 }
759
760 strint2tok(intbuf, ptok)
761 char intbuf[];
762 struct token *ptok;
763 {
764         register char *cp = intbuf;
765         int base = 10;
766         arith val = 0, dig, ubound;
767         int uns_flg = 0, lng_flg = 0, malformed = 0, ovfl = 0;
768         int fund;
769
770         ASSERT(*cp != '-');
771         if (*cp == '0') {
772                 cp++;
773                 if (*cp == 'x' || *cp == 'X') {
774                         cp++;
775                         base = 16;
776                 } else base = 8;
777         }
778         /* The upperbound will be the same as when computed with
779          * max_unsigned_arith / base (since base is even). The problem here
780          * is that unsigned arith is not accepted by all compilers.
781          */
782         ubound = max_arith / (base / 2);
783
784         while (is_hex(*cp)) {
785                 dig = hex_val(*cp);
786                 if (dig >= base) {
787                         malformed++;                    /* ignore */
788                 }
789                 else {
790                         if (val < 0 || val > ubound) ovfl++;
791                         val *= base;
792                         if (val < 0 && val + dig >= 0) ovfl++;
793                         val += dig;
794                 }
795                 cp++;
796         }
797
798         while (*cp) {
799                 if (*cp == 'l' || *cp == 'L') lng_flg++;
800                 else if (*cp == 'u' || *cp == 'U') uns_flg++;
801                 else break;
802                 cp++;
803         }
804         if (*cp) {
805             malformed++;
806         }
807         if (malformed) {
808                 lexerror("malformed %s integer constant",
809                                 (base == 10 ? "decimal"
810                                             : (base == 8 ? "octal"
811                                                         : "hexadecimal")));
812         } else {
813                 if (lng_flg > 1)
814                         lexerror("only one long suffix allowed");
815                 if (uns_flg > 1)
816                         lexerror("only one unsigned suffix allowed");
817         }
818         if (ovfl) {
819                 lexwarning("overflow in constant");
820                 fund = ULONG;
821         } else if (!lng_flg && (val & full_mask[(int)int_size]) == val) {
822                 if (val >= 0 && val <= max_int) {
823                         fund = INT;
824                 } else if (int_size == long_size) {
825                         fund = UNSIGNED;
826                 } else if (base == 10 && !uns_flg)
827                         fund = LONG;
828                 else    fund = UNSIGNED;
829         } else if((val & full_mask[(int)long_size]) == val) {
830                 if (val >= 0) fund = LONG;
831                 else fund = ULONG;
832         } else {        /* sizeof(arith) is greater than long_size */
833                 ASSERT(arith_size > long_size);
834                 lexwarning("constant too large for target machine");
835                 /* cut the size to prevent further complaints */
836                 val &= full_mask[(int)long_size];
837                 fund = ULONG;
838         }
839         if (lng_flg) {
840                 /* fund can't be INT */
841                 if (fund == UNSIGNED) fund = ULONG;
842         }
843         if (uns_flg) {
844             if (fund == INT) fund = UNSIGNED;
845             else if (fund == LONG) fund = ULONG;
846         }
847         ptok->tk_fund = fund;
848         ptok->tk_ival = val;
849 }