util/LLgen/src/tokens.g

   1 /* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
   2  * For full copyright and restrictions on use see the file COPYING in the top
   3  * level of the LLgen tree.
   4  */
   5
   6 /*
   7  *  L L G E N
   8  *
   9  *  An Extended LL(1) Parser Generator
  10  *
  11  *  Author : Ceriel J.H. Jacobs
  12  */
  13
  14 /*
  15  * tokens.g
  16  * Defines the tokens for the grammar of LLgen.
  17  * The lexical analyser and LLmessage are also included here.
  18  */
  19
  20 {
  21 # include "types.h"
  22 # include "io.h"
  23 # include "extern.h"
  24 # include "assert.h"
  25 # include "cclass.h"
  26
  27 # ifndef NORCSID
  28 static string   rcsidc = "$Id: tokens.g,v 2.17 1997/02/21 15:44:44 ceriel Exp $";
  29 # endif
  30
  31 /* Here are defined : */
  32 extern int      scanner();
  33 extern          LLmessage();
  34 extern int      input();
  35 extern          unput();
  36 extern          skipcomment();
  37 # ifdef LINE_DIRECTIVE
  38 STATIC          linedirective();
  39 # endif
  40 STATIC string   cpy();
  41 STATIC string   vallookup();
  42 STATIC          copyact();
  43
  44 static int      nparams;
  45 }
  46 /* Classes */
  47
  48 %token  C_IDENT ;       /* lextoken.t_string contains the identifier read */
  49 %token  C_NUMBER ;      /* lextoken.t_num contains the number read */
  50 %token  C_LITERAL ;     /* lextoken.t_string contains the literal read */
  51 %token  C_EXPR ;        /* A C expression (%if or %while) */
  52 %token  C_PARAMS ;      /* formal or actual parameters */
  53 %token  C_ACTION ;      /* a C action */
  54
  55 /* Keywords */
  56
  57 %token  C_TOKEN ;
  58 %token  C_START ;
  59 %token  C_IF ;
  60 %token  C_WHILE ;
  61 %token  C_PERSISTENT ;
  62 %token  C_FIRST ;
  63 %token  C_LEXICAL ;
  64 %token  C_PREFIX ;
  65 %token  C_ONERROR ;
  66 %token  C_AVOID ;
  67 %token  C_PREFER ;
  68 %token  C_DEFAULT ;
  69 %token  C_SUBSTART ;
  70 %token  C_ERRONEOUS ;
  71 %token  C_ILLEGAL ;
  72
  73 %lexical scanner ;
  74
  75 {
  76
  77 /*
  78  * Structure for a keyword
  79  */
  80
  81 typedef struct keyword {
  82         string  w_word;
  83         int     w_value;
  84 } t_keyw, *p_keyw;
  85
  86 /*
  87  * The list of keywords, the most often used keywords come first.
  88  * Linear search is used, as there are not many keywords
  89  */
  90
  91 static t_keyw resword[] = {
  92         { "token",      C_TOKEN         },
  93         { "avoid",      C_AVOID         },
  94         { "prefer",     C_PREFER        },
  95         { "persistent", C_PERSISTENT    },
  96         { "default",    C_DEFAULT       },
  97         { "if",         C_IF            },
  98         { "while",      C_WHILE         },
  99         { "first",      C_FIRST         },
 100         { "start",      C_START         },
 101         { "lexical",    C_LEXICAL       },
 102         { "onerror",    C_ONERROR       },
 103         { "prefix",     C_PREFIX        },
 104 #ifdef NON_CORRECTING
 105         { "substart",   C_SUBSTART      },
 106         { "erroneous",  C_ERRONEOUS     },
 107         { "illegal",    C_ILLEGAL       },
 108 #endif
 109         { 0,            0               }
 110 };
 111
 112 static t_token  savedtok;       /* to save lextoken in case of an insertion */
 113 # ifdef LINE_DIRECTIVE
 114 static  int     nostartline;    /* = 0 if at the start of a line */
 115 # endif
 116
 117 STATIC
 118 copyact(ch1,ch2,flag,level) char ch1,ch2; {
 119         /*
 120          * Copy an action to file f. Opening bracket is ch1, closing bracket
 121          * is ch2.
 122          * If flag & 1, copy opening and closing parameters too.
 123          * If flag & 2, don't allow ','.
 124          */
 125         static int      text_seen = 0;
 126         register        FILE *f;
 127         register        ch;             /* Current char */
 128         register        match;          /* used to read strings */
 129         int             saved = linecount;
 130                                         /* save linecount */
 131         int             sav_strip = strip_grammar;
 132
 133         f = fact;
 134         if (ch1 == '{' || flag != 1) strip_grammar = 0;
 135         if (!level) {
 136                 text_seen = 0;
 137                 nparams = 0;                    /* count comma's */
 138                 putc('\0',f);
 139                 fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
 140         }
 141         if (level || (flag & 1)) putc(ch1,f);
 142         for (;;) {
 143                 ch = input();
 144                 if (ch == ch2) {
 145                         if (!level) {
 146                                 if (text_seen) nparams++;
 147                         }
 148                         if (level || (flag & 1)) putc(ch,f);
 149                         if (strip_grammar != sav_strip) {
 150                                 if (ch1 == '{' || flag != 1) putchar(ch);
 151                         }
 152                         strip_grammar = sav_strip;
 153                         return;
 154                 }
 155                 switch(ch) {
 156                   case ')':
 157                   case '}':
 158                   case ']':
 159                         error(linecount,"Parentheses mismatch");
 160                         break;
 161                   case '(':
 162                         text_seen = 1;
 163                         copyact('(',')',flag,level+1);
 164                         continue;
 165                   case '{':
 166                         text_seen = 1;
 167                         copyact('{','}',flag,level+1);
 168                         continue;
 169                   case '[':
 170                         text_seen = 1;
 171                         copyact('[',']',flag,level+1);
 172                         continue;
 173                   case '/':
 174                         ch = input();
 175                         unput(ch);
 176                         if (ch == '*') {
 177                                 putc('/', f);
 178                                 skipcomment(1);
 179                                 continue;
 180                         }
 181                         ch = '/';
 182                         text_seen = 1;
 183                         break;
 184                   case ';':
 185                   case ',':
 186                         if (! level && text_seen) {
 187                                 text_seen = 0;
 188                                 nparams++;
 189                                 if (ch == ',' && (flag & 2)) {
 190                                         warning(linecount, "Parameters may not be separated with a ','");
 191                                         ch = ';';
 192                                 }
 193                         }
 194                         break;
 195                   case '\'':
 196                   case '"' :
 197                         /*
 198                          * watch out for brackets in strings, they do not
 199                          * count !
 200                          */
 201                         text_seen = 1;
 202                         match = ch;
 203                         putc(ch,f);
 204                         while((ch = input())) {
 205                                 if (ch == match) break;
 206                                 if (ch == '\\') {
 207                                         putc(ch,f);
 208                                         ch = input();
 209                                 }
 210                                 if (ch == '\n') {
 211                                         error(linecount,"Newline in string");
 212                                         unput(match);
 213                                 }
 214                                 putc(ch,f);
 215                         }
 216                         if (ch == match) break;
 217                         /* Fall through */
 218                     case EOF :
 219                         if (!level) error(saved,"Action does not terminate");
 220                         strip_grammar = sav_strip;
 221                         return;
 222                     default:
 223                         if (c_class[ch] != ISSPA) text_seen = 1;
 224                 }
 225                 putc(ch,f);
 226         }
 227 }
 228
 229 scanner() {
 230         /*
 231          * Lexical analyser, what else
 232          */
 233         register int    ch;             /* Current char */
 234         register char *p = ltext;
 235         int             reserved = 0;   /* reserved word? */
 236         char            *max = &ltext[LTEXTSZ - 1];
 237         static int      nextexpr;
 238         int             expect_expr = nextexpr;
 239         long            off;
 240
 241         nextexpr = 0;
 242         if (savedtok.t_tokno) {
 243                                 /* A token has been inserted.
 244                                  * Now deliver the last lextoken again
 245                                  */
 246                 lextoken = savedtok;
 247                 savedtok.t_tokno = 0;
 248                 return lextoken.t_tokno;
 249         }
 250         for (;;) {
 251                 ch = input();
 252                 if (ch == EOF) return ch;
 253 # ifdef LINE_DIRECTIVE
 254                 if (ch == '#' && !nostartline) {
 255                         linedirective();
 256                         continue;
 257                 }
 258 # endif
 259                 switch(c_class[ch]) {
 260                   case ISACT :
 261                         if (ch == '{') {
 262                                 copyact('{', '}', in_production, 0);
 263                                 return C_ACTION;
 264                         }
 265                         assert(ch == '(');
 266                         if (expect_expr) {
 267                                 copyact('(', ')', 1, 0);
 268                                 return C_EXPR;
 269                         }
 270                         off = ftell(fact);
 271                         copyact('(', ')', in_production != 0 ? 0 : 2, 0);
 272                         if (nparams == 0) fseek(fact, off, 0);
 273                         lextoken.t_num = nparams;
 274                         return C_PARAMS;
 275                   case ISLIT :
 276                         for (;;) {
 277                                 ch = input();
 278                                 if (ch == '\n' || ch == EOF) {
 279                                         error(linecount,"Missing '");
 280                                         break;
 281                                 }
 282                                 if (ch == '\'') break;
 283                                 if (ch == '\\') {
 284                                         *p++ = ch;
 285                                         ch = input();
 286                                 }
 287                                 *p++ = ch;
 288                                 if (p > max) p--;
 289                         }
 290                         *p = '\0';
 291                         lextoken.t_string = ltext;
 292                         return C_LITERAL;
 293                   case ISCOM :
 294                         skipcomment(0);
 295                         /* Fall through */
 296                   case ISSPA :
 297                         continue;
 298                   case ISDIG : {
 299                         register i = 0;
 300                         do {
 301                                 i = 10 * i + (ch - '0');
 302                                 ch= input();
 303                         } while (c_class[ch] == ISDIG);
 304                         lextoken.t_num = i;
 305                         unput(ch);
 306                         return C_NUMBER; }
 307                   default:
 308                         return ch;
 309                   case ISKEY :
 310                         reserved = 1;
 311                         ch = input();
 312                         /* Fall through */
 313                   case ISLET :
 314                         do {
 315                                 if (reserved && ch >= 'A' && ch <= 'Z') {
 316                                         ch += 'a' - 'A';
 317                                 }
 318                                 *p++ = ch;
 319                                 if (p > max) p--;
 320                                 ch = input();
 321                         } while (c_class[ch] == ISDIG || c_class[ch] == ISLET);
 322                         unput(ch);
 323                         *p = '\0';
 324                         if (reserved) { /*
 325                                          * Now search for the keyword
 326                                          */
 327                                 register p_keyw w;
 328
 329                                 w = resword;
 330                                 while (w->w_word) {
 331                                         if (! strcmp(ltext,w->w_word)) {
 332                                                 /*
 333                                                  * Return token number.
 334                                                  */
 335                                                 if (w->w_value == C_IF ||
 336                                                     w->w_value == C_WHILE) {
 337                                                         nextexpr = 1;
 338                                                 }
 339                                                 return w->w_value;
 340                                         }
 341                                         w++;
 342                                 }
 343                                 error(linecount,"Illegal reserved word");
 344                         }
 345                         lextoken.t_string = ltext;
 346                         return C_IDENT;
 347                 }
 348         }
 349 }
 350
 351 static int      backupc;        /* for unput() */
 352 static int      nonline;        /* = 1 if last char read was a newline */
 353
 354 input() {
 355         /*
 356          * Low level input routine, used by all other input routines
 357          */
 358         register        c;
 359
 360         if (c = backupc) {
 361                         /* Last char was "unput()". Deliver it again
 362                          */
 363                 backupc = 0;
 364                 return c;
 365         }
 366         if ((c = getc(finput)) == EOF) {
 367                 nonline = 0;
 368                 return c;
 369         }
 370 # ifdef LINE_DIRECTIVE
 371         nostartline = 1;
 372 # endif
 373         if (!nonline) {
 374                 linecount++;
 375 # ifdef LINE_DIRECTIVE
 376                 nostartline = 0;
 377 # endif
 378                 nonline = 1;
 379         }
 380         if (c == '\n') nonline = 0;
 381         if (strip_grammar) putchar(c);
 382         return c;
 383 }
 384
 385 unput(c) {
 386         /*
 387          * "unread" c
 388          */
 389         backupc = c;
 390 }
 391
 392 skipcomment(flag) {
 393         /*
 394          * Skip comment. If flag != 0, the comment is inside a fragment
 395          * of C-code, so keep it.
 396          */
 397         register int    ch;
 398         int             saved;  /* line count on which comment starts */
 399
 400         saved = linecount;
 401         if (input() != '*') error(linecount,"Illegal comment");
 402         if (flag) putc('*', fact);
 403         do {
 404                 ch = input();
 405                 if (flag) putc(ch, fact);
 406                 while (ch == '*') {
 407                         ch = input();
 408                         if (flag) putc(ch, fact);
 409                         if (ch == '/') return;
 410                 }
 411         } while (ch != EOF);
 412         error(saved,"Comment does not terminate");
 413 }
 414
 415 # ifdef LINE_DIRECTIVE
 416 STATIC
 417 linedirective() {
 418         /*
 419          * Read a line directive
 420          */
 421         register int    ch;
 422         register int    i;
 423         string          s_error = "Illegal line directive";
 424         string          store();
 425         register string c;
 426
 427         do {    /*
 428                  * Skip to next digit
 429                  * Do not skip newlines
 430                  */
 431                 ch = input();
 432         } while (ch != '\n' && c_class[ch] != ISDIG);
 433         if (ch == '\n') {
 434                 error(linecount,s_error);
 435                 return;
 436         }
 437         i = 0;
 438         do  {
 439                 i = i*10 + (ch - '0');
 440                 ch = input();
 441         } while (c_class[ch] == ISDIG);
 442         while (ch != '\n' && ch != '"') ch = input();
 443         if (ch == '"') {
 444                 c = ltext;
 445                 do {
 446                         *c++ = ch = input();
 447                 } while (ch != '"' && ch != '\n');
 448                 if (ch == '\n') {
 449                         error(linecount,s_error);
 450                         return;
 451                 }
 452                 *--c = '\0';
 453                 do {
 454                         ch = input();
 455                 } while (ch != '\n');
 456                 /*
 457                  * Remember the file name
 458                  */
 459                 if (strcmp(f_input,ltext)) f_input = store(ltext);
 460         }
 461         linecount = i;
 462 }
 463 # endif
 464
 465 STATIC string
 466 vallookup(s) {
 467         /*
 468          * Look up the keyword that has token number s
 469          */
 470         register p_keyw p = resword;
 471
 472         while (p->w_value) {
 473                 if (p->w_value == s) return p->w_word;
 474                 p++;
 475         }
 476         return 0;
 477 }
 478
 479 STATIC string
 480 cpy(s,p,inserted) register string p; {
 481         /*
 482          * Create a piece of error message for token s and put it at p.
 483          * inserted = 0 if the token s was deleted (in which case we have
 484          * attributes), else it was inserted
 485          */
 486         register string t = 0;
 487
 488         switch(s) {
 489           case C_IDENT :
 490                 if (!inserted) t = lextoken.t_string;
 491                 else t = "identifier";
 492                 break;
 493           case C_NUMBER :
 494                 t = "number";
 495                 break;
 496           case C_LITERAL :
 497                 if (!inserted) {
 498                         *p++ = '\'';
 499                         t = lextoken.t_string;
 500                         break;
 501                 }
 502                 t = "literal";
 503                 break;
 504           case C_ACTION:
 505                 t = "C action";
 506                 break;
 507           case C_PARAMS:
 508                 t = "C parameter section";
 509                 break;
 510           case C_EXPR:
 511                 t = "C expression";
 512                 break;
 513           case EOFILE :
 514                 t = "end-of-file";
 515                 break;
 516         }
 517         if (!t && (t = vallookup(s))) {
 518                 *p++ = '%';
 519         }
 520         if (t) {        /*
 521                          * We have a string for the token. Copy it
 522                          */
 523                 while (*t) *p++ = *t++;
 524                 if (s == C_LITERAL && !inserted) {
 525                         *p++ = '\'';
 526                 }
 527                 return p;
 528         }
 529         /*
 530          * The token is a literal
 531          */
 532         *p++ = '\'';
 533         if (s >= 040 && s <= 0176) *p++ = s;
 534         else {
 535             *p++ = '\\';
 536             switch(s) {
 537               case '\b' : *p++ = 'b'; break;
 538               case '\f' : *p++ = 'f'; break;
 539               case '\n' : *p++ = 'n'; break;
 540               case '\r' : *p++ = 'r'; break;
 541               case '\t' : *p++ = 't'; break;
 542               default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
 543                         *p++='0'+(s&07);
 544             }
 545         }
 546         *p++ = '\'';
 547         return p;
 548 }
 549
 550 string strcpy();
 551
 552 LLmessage(d) {
 553         /*
 554          * d is either 0, in which case the current token has been deleted,
 555          * or non-zero, in which case it represents a token that is inserted
 556          * before the current token
 557          */
 558         register string s,t;
 559         char            buf[128];
 560
 561         nerrors++;
 562         s = buf;
 563         if (d < 0) {
 564                 strcpy(buf, "end-of-file expected");
 565         }
 566         else if (d == 0) {
 567 #ifdef LLNONCORR
 568                 t = " unexpected";
 569 #else
 570                 t = " deleted";
 571 #endif
 572                 s = cpy(LLsymb,s,0);
 573                 do *s++ = *t; while (*t++);
 574         } else {
 575                 s = cpy(d,s,1);
 576                 t = " inserted in front of ";
 577                 do *s++ = *t++; while (*t);
 578                 s = cpy(LLsymb,s,0);
 579                 *s = '\0';
 580         }
 581         if (d > 0) {    /*
 582                          * Save the current token and make up some
 583                          * attributes for the inserted token
 584                          */
 585                 savedtok = lextoken;
 586                 savedtok.t_tokno = LLsymb;
 587                 if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
 588                 else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
 589                 else if (d == C_NUMBER) lextoken.t_num = 1;
 590         }
 591 #ifdef LLNONCORR
 592         else
 593 #endif
 594         error(linecount, "%s", buf);
 595                         /* Don't change this line to
 596                          * error(linecount, buf).
 597                          * The string in "buf" might contain '%' ...
 598                          */
 599 #ifdef LLNONCORR
 600         in_production = 1;
 601                         /* To prevent warnings from copyact */
 602 #endif
 603 }
 604 }