Pristine Ack-5.5
[Ack-5.5.git] / util / LLgen / src / LLgen.g
1 /* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
2  * For full copyright and restrictions on use see the file COPYING in the top
3  * level of the LLgen tree.
4  */
5
6 /*
7  *  L L G E N
8  *
9  *  An Extended LL(1) Parser Generator
10  *
11  *  Author : Ceriel J.H. Jacobs
12  */
13
14 /*
15  * LLgen.g
16  * Defines the grammar of LLgen.
17  * Some routines that build the internal structure are also included
18  */
19
20 {
21 # include "types.h"
22 # include "io.h"
23 # include "extern.h"
24 # include "assert.h"
25 # include "cclass.h"
26
27 # ifndef NORCSID
28 static string   rcsid = "$Id: LLgen.g,v 2.28 1997/02/21 11:27:38 ceriel Exp $";
29 # endif
30 p_mem           alloc(), ralloc();
31 string          store();
32 p_gram          search();
33 long            ftell();
34
35 static int      acount;                 /* count #of global actions */
36 static p_term t_list;
37 static int t_cnt;
38 static p_gram   alt_table;
39 static int      n_alts;
40 static int      max_alts;
41 #define ALTINCR 32
42
43 static p_gram   rule_table;
44 static int      n_rules;
45 static int      max_rules;
46 #define RULEINCR        32
47
48 /* Here are defined : */
49 STATIC          newnorder();
50 STATIC          newtorder();
51 STATIC          mkalt();
52 STATIC          mkterm();
53 STATIC p_gram   copyrule();
54 /* and of course LLparse() */
55
56 STATIC
57 newnorder(index) {
58         static int porder;
59
60         if (norder != -1) {
61                 nonterms[porder].n_next = index;
62         }
63         else    norder = index;
64         porder = index;
65         nonterms[porder].n_next = -1;
66 }
67
68 STATIC
69 newtorder(index) {
70         static int porder;
71
72         if (torder != -1) {
73                 tokens[porder].t_next = index;
74         }
75         else    torder = index;
76         porder = index;
77         tokens[porder].t_next = -1;
78 }
79
80 p_init()
81 {
82         alt_table = (p_gram )alloc(ALTINCR*sizeof(t_gram));
83         n_alts = 0;
84         max_alts = ALTINCR;
85         rule_table = (p_gram )alloc(RULEINCR*sizeof(t_gram));
86         n_rules = 0;
87         max_rules = RULEINCR;
88 }
89
90 }
91
92 %start  LLparse, spec;
93
94 spec    :               {       acount = 0; p_init(); }
95           [ %persistent def ]*
96                         {       /*
97                                  * Put an endmarker in temporary file
98                                  */
99                                 putc('\0', fact);
100                                 putc('\0', fact);
101                                 free((p_mem) rule_table);
102                                 free((p_mem) alt_table);
103                         }
104         ;
105
106 def                     {       register string p; }
107         : rule
108           /*
109            * A grammar rule
110            */
111         | C_TOKEN listel [ ',' listel ]* ';'
112           /*
113            * A token declaration
114            */
115         | C_START C_IDENT
116                         {       p = store(lextoken.t_string); }
117           ',' C_IDENT
118           /*
119            * A start symbol declaration
120            */
121                         {       /*
122                                  * Put the declaration in the list
123                                  * of start symbols
124                                  */
125                                 register p_gram temp;
126                                 register p_start ff;
127
128                                 temp = search(NONTERM,lextoken.t_string,BOTH);
129                                 ff = (p_start) alloc(sizeof(t_start));
130                                 ff->ff_nont = g_getcont(temp);
131                                 ff->ff_name = p;
132                                 ff->ff_next = start;
133                                 start = ff;
134                                 while (ff = ff->ff_next) {
135                                         if (! strcmp(p, ff->ff_name)) {
136                                                 error(linecount, "\"%s\" already used in a %%start", p);
137                                                 break;
138                                         }
139                                 }
140                         }
141           ';'
142         | C_LEXICAL C_IDENT
143           /*
144            * Declaration of a name for the lexical analyser.
145            * May appear only once
146            */
147                         {       if (!lexical) {
148                                         lexical = store(lextoken.t_string);
149                                 }
150                                 else    error(linecount,"Duplicate %%lexical");
151                         }
152           ';'
153         | C_PREFIX C_IDENT
154           /*
155            * Prefix of external names (default: LL)
156            */
157                         {       if (!prefix) {
158                                         prefix = store(lextoken.t_string);
159                                         if (strlen(prefix) > 6) {
160                                                 error(linecount,
161                                                         "%%prefix too long");
162                                                 prefix[6] = 0;
163                                         }
164                                 }
165                                 else    error(linecount,"Duplicate %%prefix");
166                         }
167           ';'
168         | C_ONERROR C_IDENT
169                         {
170 #ifdef NON_CORRECTING
171                                 if (non_corr) {
172                                         warning(linecount, "%%onerror conflicts with -n option");
173                                 }
174                                 else
175 #endif
176                                   if (! onerror) {
177                                         onerror = store(lextoken.t_string);
178                                 }
179                                 else    error(linecount,"Duplicate %%onerror");
180                         }
181           ';'
182         | C_ACTION      {       acount++; }
183           /*
184            * A global C-declaration
185            */
186         | firsts
187           /*
188            * declarations for macros
189            */
190         ;
191
192 listel  : C_IDENT       {       p_gram temp = search(TERMINAL,lextoken.t_string,ENTERING);
193                                 newtorder(g_getcont(temp));
194                                 tokens[g_getcont(temp)].t_lineno = linecount;
195                         }
196         ;
197
198 rule                    {       register p_nont p;
199                                 p_gram rr;
200                                 register p_gram temp;
201                         }
202         : /*
203            * grammar for a production rule
204            */
205           C_IDENT       {       temp = search(NONTERM,lextoken.t_string,BOTH);
206                                 p = &nonterms[g_getcont(temp)];
207                                 if (p->n_rule) {
208                                         error(linecount,
209 "Nonterminal %s already defined", lextoken.t_string);
210                                 }
211                                 /*
212                                  * Remember the order in which the nonterminals
213                                  * were defined. Code must be generated in that
214                                  * order to keep track with the actions on the
215                                  * temporary file
216                                  */
217                                 newnorder(p - nonterms);
218                                 p->n_count = acount;
219                                 acount = 0;
220                                 p->n_lineno = linecount;
221                                 p->n_off = ftell(fact);
222                         }
223           [ C_PARAMS    {       if (lextoken.t_num > 0) {
224                                         p->n_flags |= PARAMS;
225                                         if (lextoken.t_num > 15) {
226                                                 error(linecount,"Too many parameters");
227                                         }
228                                         else    setntparams(p,lextoken.t_num);
229                                 }
230                         }
231           ]?
232           [ C_ACTION    {       p->n_flags |= LOCALS; }
233           ]?
234           ':'           {       in_production = 1; }
235           productions(&rr) ';'
236                         {       in_production = 0; }
237                         /*
238                          * Do not use p->n_rule now! The nonterms array
239                          * might have been re-allocated.
240                          */
241                         {       nonterms[g_getcont(temp)].n_rule = rr;}
242         ;
243
244 productions(p_gram *p;)
245         /*
246          * One or more alternatives
247          */
248         {       p_gram          prod;
249                 int             conflres = 0;
250                 int             t = 0;
251                 int             haddefault = 0;
252                 int             altcnt = 0;
253                 int             o_lc, n_lc;
254         } :
255                         {       o_lc = linecount; }
256           simpleproduction(p,&conflres)
257                         {       if (conflres & DEF) haddefault = 1; }
258           [
259             [ '|'       {       n_lc = linecount; }
260               simpleproduction(&prod,&t)
261                         {       if (n_alts >= max_alts-2) {
262                                         alt_table = (p_gram ) ralloc(
263                                                 (p_mem) alt_table,
264                                                 (unsigned)(max_alts+=ALTINCR)*sizeof(t_gram));
265                                 }
266                                 if (t & DEF) {
267                                         if (haddefault) {
268                                                 error(n_lc,
269                 "More than one %%default in alternation");
270                                         }
271                                         haddefault = 1;
272                                 }
273                                 mkalt(*p,conflres,o_lc,&alt_table[n_alts++]);
274                                 altcnt++;
275                                 o_lc = n_lc;
276                                 conflres = t;
277                                 t = 0;
278                                 *p = prod;
279                         }
280             ]+          {       if (conflres & (COND|PREFERING|AVOIDING)) {
281                                         error(n_lc,
282                 "Resolver on last alternative not allowed");
283                                 }
284                                 mkalt(*p,conflres,n_lc,&alt_table[n_alts++]);
285                                 altcnt++;
286                                 g_settype((&alt_table[n_alts]),EORULE);
287                                 *p = copyrule(&alt_table[n_alts-altcnt],altcnt+1);
288                         }
289           |
290                         {       if (conflres & (COND|PREFERING|AVOIDING)) {
291                                         error(o_lc,
292                 "No alternation conflict resolver allowed here");
293                                 }
294                                 /*
295                                 if (conflres & DEF) {
296                                         error(o_lc,
297                 "No %%default allowed here");
298                                 }
299                                 */
300                         }
301           ]
302                         {       n_alts -= altcnt; }
303         ;
304 {
305
306 STATIC
307 mkalt(prod,condition,lc,res) p_gram prod; register p_gram res; {
308         /*
309          * Create an alternation and initialise it.
310          */
311         register p_link         l;
312         static p_link list;
313         static int cnt;
314
315         if (! cnt) {
316                 cnt = 50;
317                 list = (p_link) alloc(50 * sizeof(t_link));
318         }
319         cnt--;
320         l = list++;
321         l->l_rule = prod;
322         l->l_flag = condition;
323         g_setlink(res,l);
324         g_settype(res,ALTERNATION);
325         res->g_lineno = lc;
326         nalts++;
327 }
328 }
329
330 simpleproduction(p_gram *p; register int *conflres;)
331         {       t_gram          elem;
332                 int             elmcnt = 0;
333                 int             cnt, kind;
334                 int             termdeleted = 0;
335         } :
336           [ C_DEFAULT   {       *conflres |= DEF; }
337           ]?
338           [
339             /*
340              * Optional conflict reslover
341              */
342               C_IF C_EXPR {     *conflres |= COND; }
343             | C_PREFER  {       *conflres |= PREFERING; }
344             | C_AVOID   {       *conflres |= AVOIDING; }
345           ]?
346           [ C_ILLEGAL   {
347 #ifdef NON_CORRECTING
348                                 if (n_rules >= max_rules-2) {
349                                         rule_table = (p_gram) ralloc(
350                                                   (p_mem) rule_table,
351                                                   (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
352                                 }
353                                 elmcnt++;
354                                 rule_table[n_rules++] =
355                                     *search(TERMINAL, "LLILLEGAL", BOTH);
356                                 if (*conflres & DEF) {
357                                         error(linecount, "%%illegal not allowed in %%default rule");
358                                 }
359 #endif
360                         }
361           ]?
362           [ %persistent elem(&elem)
363                         {       if (n_rules >= max_rules-2) {
364                                         rule_table = (p_gram) ralloc(
365                                                   (p_mem) rule_table,
366                                                   (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
367                                 }
368                                 kind = FIXED;
369                                 cnt = 0;
370                         }
371             [ repeats(&kind, &cnt)
372                         {       if (g_gettype(&elem) != TERM) {
373                                         rule_table[n_rules] = elem;
374                                         g_settype((&rule_table[n_rules+1]),EORULE);
375                                         mkterm(copyrule(&rule_table[n_rules],2),
376                                                0,
377                                                elem.g_lineno,
378                                                &elem);
379                                 }
380                         }
381             |
382                         { if (g_gettype(&elem) == TERM) {
383                                 register p_term q = g_getterm(&elem);
384
385                                 if (! (q->t_flags & RESOLVER) &&
386                                     g_gettype(q->t_rule) != ALTERNATION &&
387                                     g_gettype(q->t_rule) != EORULE) {
388                                     while (g_gettype(q->t_rule) != EORULE) {
389                                         rule_table[n_rules++] = *q->t_rule++;
390                                         elmcnt++;
391                                         if (n_rules >= max_rules-2) {
392                                             rule_table = (p_gram) ralloc(
393                                                   (p_mem) rule_table,
394                                                   (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram));
395                                         }
396                                     }
397                                     elem = *--(q->t_rule);
398                                     n_rules--;
399                                     elmcnt--;
400                                     if (q == t_list - 1) {
401                                         t_list--;
402                                         nterms--;
403                                         t_cnt++;
404                                     }
405                                     termdeleted = 1;
406                                 }
407                           }
408                         }
409             ]           {       if (!termdeleted && g_gettype(&elem) == TERM) {
410                                         register p_term q;
411
412                                         q = g_getterm(&elem);
413                                         r_setkind(q,kind);
414                                         r_setnum(q,cnt);
415                                         if ((q->t_flags & RESOLVER) &&
416                                             (kind == PLUS || kind == FIXED)) {
417                                                 error(linecount,
418                 "%%while not allowed in this term");
419                                         }
420                                         /*
421                                          * A persistent fixed term is the same
422                                          * as a non-persistent fixed term.
423                                          * Should we complain?
424                                         if ((q->t_flags & PERSISTENT) &&
425                                             kind == FIXED) {
426                                                 error(linecount,
427                                                         "Illegal %%persistent");
428                                         }
429                                         */
430                                 }
431                                 termdeleted = 0;
432                                 elmcnt++;
433                                 rule_table[n_rules++] = elem;
434                         }
435           ]*            {       register p_term q;
436
437                                 g_settype((&rule_table[n_rules]),EORULE);
438                                 *p = 0;
439                                 n_rules -= elmcnt;
440                                 if (g_gettype(&rule_table[n_rules]) == TERM &&
441                                     elmcnt == 1) {
442                                         q = g_getterm(&rule_table[n_rules]);
443                                         if (r_getkind(q) == FIXED &&
444                                             r_getnum(q) == 0) {
445                                                 *p = q->t_rule;
446                                         }
447                                 }
448                                 if (!*p) *p = copyrule(&rule_table[n_rules],
449                                                 elmcnt+1);
450                         }
451         ;
452 {
453
454 STATIC
455 mkterm(prod,flags,lc,result) p_gram prod; register p_gram result; {
456         /*
457          * Create a term, initialise it and return
458          * a grammar element containing it
459          */
460         register p_term         q;
461
462         if (! t_cnt) {
463                 t_cnt = 50;
464                 t_list = (p_term) alloc(50 * sizeof(t_term));
465         }
466         t_cnt--;
467         q = t_list++;
468         q->t_rule = prod;
469         q->t_contains = 0;
470         q->t_flags = flags;
471         g_settype(result,TERM);
472         g_setterm(result,q);
473         result->g_lineno = lc;
474         nterms++;
475 }
476 }
477
478 elem (register p_gram pres;)
479         {       register int    t = 0;
480                 p_gram          p1;
481                 int             ln;
482                 p_gram          pe;
483 #ifdef NON_CORRECTING
484                 int             erroneous = 0;
485 #endif
486         } :
487           '['           {       ln = linecount; }
488           [ C_WHILE C_EXPR      {       t |= RESOLVER; }
489           ]?
490           [ C_PERSISTENT        {       t |= PERSISTENT; }
491           ]?
492           productions(&p1)
493           ']'           {
494                                 mkterm(p1,t,ln,pres);
495                         }
496         |
497           [ C_ERRONEOUS         {
498 #ifdef NON_CORRECTING
499                                         erroneous = 1;
500 #endif
501                                 }
502           ]?
503
504           [
505           C_IDENT       {       pe = search(UNKNOWN,lextoken.t_string,BOTH);
506                                 *pres = *pe;
507 #ifdef NON_CORRECTING
508                                 if (erroneous) {
509                                         if (g_gettype(pres) != TERMINAL){
510                                                 warning(linecount,
511                                                         "Erroneous only allowed on terminal");
512                                                 erroneous = 0;
513                                         }
514                                         else
515                                                 pres->g_erroneous = 1;
516                                 }
517 #endif
518
519                         }
520           [ C_PARAMS    {       if (lextoken.t_num > 15) {
521                                         error(linecount,"Too many parameters");
522                                 } else  g_setnpar(pres,lextoken.t_num);
523                                 if (g_gettype(pres) == TERMINAL) {
524                                         error(linecount,
525                                                 "Terminal with parameters");
526                                 }
527                         }
528           ]?
529         | C_LITERAL     {       pe = search(LITERAL,lextoken.t_string,BOTH);
530                                 *pres = *pe;
531 #ifdef NON_CORRECTING
532                                 if (erroneous)
533                                         pres->g_erroneous = 1;
534 #endif
535                         }
536           ]
537         |               {       g_settype(pres,ACTION);
538                                 pres->g_lineno = linecount;
539 #ifdef NON_CORRECTING
540                                 g_setsubparse(pres, (p_start) 0);
541 #endif
542                         }
543
544           [ C_SUBSTART
545
546                         {
547 #ifdef NON_CORRECTING
548                                 nsubstarts++;
549 #endif
550                         }
551
552             C_IDENT
553                         {
554 #ifdef NON_CORRECTING
555                                 register p_gram temp;
556                                 register p_start subp;
557
558                                 temp = search(NONTERM,lextoken.t_string,BOTH);
559                                 subp = (p_start) alloc (sizeof(t_start));
560
561                                 subp->ff_nont = g_getcont(temp);
562                                 subp->ff_name = (string) 0;
563                                 subp->ff_next = (p_start) 0;
564                                 g_setsubparse(pres, subp);
565 #endif
566                         }
567
568                 [ ',' C_IDENT
569                         {
570 #ifdef NON_CORRECTING
571                                 register p_gram temp;
572                                 register p_start ff;
573
574                                 temp = search(NONTERM,lextoken.t_string,BOTH);
575
576                                 ff = g_getsubparse(pres);
577                                 while (ff) {
578                                         if (ff->ff_nont == g_getcont(temp)) {
579                                                 warning(linecount, "\"%s\" used twice in %%substart", lextoken.t_string);
580                                                 break;
581                                         }
582                                         ff = ff->ff_next;
583
584                                 }
585
586                                 ff = (p_start) alloc(sizeof(t_start));
587                                 ff->ff_nont = g_getcont(temp);
588                                 ff->ff_name = (string) 0;
589                                 ff->ff_next = g_getsubparse(pres);
590                                 g_setsubparse(pres, ff);
591 #endif
592                         }
593
594                ]* ';'
595            ]?
596
597           C_ACTION
598         ;
599
600 repeats(int *kind; int *cnt;)   {       int t1 = 0; } :
601           [
602             '?'         {       *kind = OPT; }
603           | [ '*'       {       *kind = STAR; }
604             | '+'       {       *kind = PLUS; }
605             ]
606             number(&t1)?
607                         {       if (t1 == 1) {
608                                         t1 = 0;
609                                         if (*kind == STAR) *kind = OPT;
610                                         if (*kind == PLUS) *kind = FIXED;
611                                 }
612                         }
613           | number(&t1)
614           ]             {       *cnt = t1; }
615         ;
616
617 number(int *t;)
618         : C_NUMBER
619                         {       *t = lextoken.t_num;
620                                 if (*t <= 0 || *t >= 8192) {
621                                         error(linecount,"Illegal number");
622                                 }
623                         }
624         ;
625
626 firsts  {       register string p; }
627         : C_FIRST C_IDENT
628                         {       p = store(lextoken.t_string); }
629           ',' C_IDENT ';'
630                         {       /*
631                                  * Store this %first in the list belonging
632                                  * to this input file
633                                  */
634                                 p_gram temp;
635                                 register p_first ff;
636
637                                 temp = search(NONTERM,lextoken.t_string,BOTH);
638                                 ff = (p_first) alloc(sizeof(t_first));
639                                 ff->ff_nont = g_getcont(temp);
640                                 ff->ff_name = p;
641                                 ff->ff_next = pfile->f_firsts;
642                                 pfile->f_firsts = ff;
643                         }
644         ;
645 {
646
647 STATIC p_gram
648 copyrule(p,length) register p_gram p; {
649         /*
650          * Returns a pointer to a grammar rule that was created in
651          * p. The space pointed to by p can now be reused
652          */
653         register p_gram t;
654         p_gram rule;
655
656         t = (p_gram) alloc((unsigned) length * sizeof(t_gram));
657         rule = t;
658         while (length--) {
659                 *t++ = *p++;
660         }
661         return rule;
662 }
663 }