1 /* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
2 * For full copyright and restrictions on use see the file COPYING in the top
3 * level of the LLgen tree.
9 * An Extended LL(1) Parser Generator
11 * Author : Ceriel J.H. Jacobs
16 * Defines the tokens for the grammar of LLgen.
17 * The lexical analyser and LLmessage are also included here.
28 static string rcsidc = "$Id: tokens.g,v 2.17 1997/02/21 15:44:44 ceriel Exp $";
31 /* Here are defined : */
37 # ifdef LINE_DIRECTIVE
38 STATIC linedirective();
41 STATIC string vallookup();
48 %token C_IDENT ; /* lextoken.t_string contains the identifier read */
49 %token C_NUMBER ; /* lextoken.t_num contains the number read */
50 %token C_LITERAL ; /* lextoken.t_string contains the literal read */
51 %token C_EXPR ; /* A C expression (%if or %while) */
52 %token C_PARAMS ; /* formal or actual parameters */
53 %token C_ACTION ; /* a C action */
78 * Structure for a keyword
81 typedef struct keyword {
87 * The list of keywords, the most often used keywords come first.
88 * Linear search is used, as there are not many keywords
91 static t_keyw resword[] = {
94 { "prefer", C_PREFER },
95 { "persistent", C_PERSISTENT },
96 { "default", C_DEFAULT },
100 { "start", C_START },
101 { "lexical", C_LEXICAL },
102 { "onerror", C_ONERROR },
103 { "prefix", C_PREFIX },
104 #ifdef NON_CORRECTING
105 { "substart", C_SUBSTART },
106 { "erroneous", C_ERRONEOUS },
107 { "illegal", C_ILLEGAL },
112 static t_token savedtok; /* to save lextoken in case of an insertion */
113 # ifdef LINE_DIRECTIVE
114 static int nostartline; /* = 0 if at the start of a line */
118 copyact(ch1,ch2,flag,level) char ch1,ch2; {
120 * Copy an action to file f. Opening bracket is ch1, closing bracket
122 * If flag & 1, copy opening and closing parameters too.
123 * If flag & 2, don't allow ','.
125 static int text_seen = 0;
127 register ch; /* Current char */
128 register match; /* used to read strings */
129 int saved = linecount;
131 int sav_strip = strip_grammar;
134 if (ch1 == '{' || flag != 1) strip_grammar = 0;
137 nparams = 0; /* count comma's */
139 fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
141 if (level || (flag & 1)) putc(ch1,f);
146 if (text_seen) nparams++;
148 if (level || (flag & 1)) putc(ch,f);
149 if (strip_grammar != sav_strip) {
150 if (ch1 == '{' || flag != 1) putchar(ch);
152 strip_grammar = sav_strip;
159 error(linecount,"Parentheses mismatch");
163 copyact('(',')',flag,level+1);
167 copyact('{','}',flag,level+1);
171 copyact('[',']',flag,level+1);
186 if (! level && text_seen) {
189 if (ch == ',' && (flag & 2)) {
190 warning(linecount, "Parameters may not be separated with a ','");
198 * watch out for brackets in strings, they do not
204 while((ch = input())) {
205 if (ch == match) break;
211 error(linecount,"Newline in string");
216 if (ch == match) break;
219 if (!level) error(saved,"Action does not terminate");
220 strip_grammar = sav_strip;
223 if (c_class[ch] != ISSPA) text_seen = 1;
231 * Lexical analyser, what else
233 register int ch; /* Current char */
234 register char *p = ltext;
235 int reserved = 0; /* reserved word? */
236 char *max = <ext[LTEXTSZ - 1];
238 int expect_expr = nextexpr;
242 if (savedtok.t_tokno) {
243 /* A token has been inserted.
244 * Now deliver the last lextoken again
247 savedtok.t_tokno = 0;
248 return lextoken.t_tokno;
252 if (ch == EOF) return ch;
253 # ifdef LINE_DIRECTIVE
254 if (ch == '#' && !nostartline) {
259 switch(c_class[ch]) {
262 copyact('{', '}', in_production, 0);
267 copyact('(', ')', 1, 0);
271 copyact('(', ')', in_production != 0 ? 0 : 2, 0);
272 if (nparams == 0) fseek(fact, off, 0);
273 lextoken.t_num = nparams;
278 if (ch == '\n' || ch == EOF) {
279 error(linecount,"Missing '");
282 if (ch == '\'') break;
291 lextoken.t_string = ltext;
301 i = 10 * i + (ch - '0');
303 } while (c_class[ch] == ISDIG);
315 if (reserved && ch >= 'A' && ch <= 'Z') {
321 } while (c_class[ch] == ISDIG || c_class[ch] == ISLET);
325 * Now search for the keyword
331 if (! strcmp(ltext,w->w_word)) {
333 * Return token number.
335 if (w->w_value == C_IF ||
336 w->w_value == C_WHILE) {
343 error(linecount,"Illegal reserved word");
345 lextoken.t_string = ltext;
351 static int backupc; /* for unput() */
352 static int nonline; /* = 1 if last char read was a newline */
356 * Low level input routine, used by all other input routines
361 /* Last char was "unput()". Deliver it again
366 if ((c = getc(finput)) == EOF) {
370 # ifdef LINE_DIRECTIVE
375 # ifdef LINE_DIRECTIVE
380 if (c == '\n') nonline = 0;
381 if (strip_grammar) putchar(c);
394 * Skip comment. If flag != 0, the comment is inside a fragment
395 * of C-code, so keep it.
398 int saved; /* line count on which comment starts */
401 if (input() != '*') error(linecount,"Illegal comment");
402 if (flag) putc('*', fact);
405 if (flag) putc(ch, fact);
408 if (flag) putc(ch, fact);
409 if (ch == '/') return;
412 error(saved,"Comment does not terminate");
415 # ifdef LINE_DIRECTIVE
419 * Read a line directive
423 string s_error = "Illegal line directive";
429 * Do not skip newlines
432 } while (ch != '\n' && c_class[ch] != ISDIG);
434 error(linecount,s_error);
439 i = i*10 + (ch - '0');
441 } while (c_class[ch] == ISDIG);
442 while (ch != '\n' && ch != '"') ch = input();
447 } while (ch != '"' && ch != '\n');
449 error(linecount,s_error);
455 } while (ch != '\n');
457 * Remember the file name
459 if (strcmp(f_input,ltext)) f_input = store(ltext);
468 * Look up the keyword that has token number s
470 register p_keyw p = resword;
473 if (p->w_value == s) return p->w_word;
480 cpy(s,p,inserted) register string p; {
482 * Create a piece of error message for token s and put it at p.
483 * inserted = 0 if the token s was deleted (in which case we have
484 * attributes), else it was inserted
486 register string t = 0;
490 if (!inserted) t = lextoken.t_string;
491 else t = "identifier";
499 t = lextoken.t_string;
508 t = "C parameter section";
517 if (!t && (t = vallookup(s))) {
521 * We have a string for the token. Copy it
523 while (*t) *p++ = *t++;
524 if (s == C_LITERAL && !inserted) {
530 * The token is a literal
533 if (s >= 040 && s <= 0176) *p++ = s;
537 case '\b' : *p++ = 'b'; break;
538 case '\f' : *p++ = 'f'; break;
539 case '\n' : *p++ = 'n'; break;
540 case '\r' : *p++ = 'r'; break;
541 case '\t' : *p++ = 't'; break;
542 default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
554 * d is either 0, in which case the current token has been deleted,
555 * or non-zero, in which case it represents a token that is inserted
556 * before the current token
564 strcpy(buf, "end-of-file expected");
573 do *s++ = *t; while (*t++);
576 t = " inserted in front of ";
577 do *s++ = *t++; while (*t);
582 * Save the current token and make up some
583 * attributes for the inserted token
586 savedtok.t_tokno = LLsymb;
587 if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
588 else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
589 else if (d == C_NUMBER) lextoken.t_num = 1;
594 error(linecount, "%s", buf);
595 /* Don't change this line to
596 * error(linecount, buf).
597 * The string in "buf" might contain '%' ...
601 /* To prevent warnings from copyact */