1 /* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
2 * For full copyright and restrictions on use see the file COPYING in the top
3 * level of the LLgen tree.
9 * An Extended LL(1) Parser Generator
11 * Author : Ceriel J.H. Jacobs
16 * Defines routines to compute FIRST, FOLLOW etc.
17 * Also checks the continuation grammar from the specified grammar.
27 static string rcsid = "$Id: compute.c,v 2.13 1997/02/21 11:27:46 ceriel Exp $";
31 typedef struct lngth {
32 /* Structure used to compute the shortest possible
33 * length of a terminal production of a rule.
34 * In case of a tie, the second field is used.
38 } t_length, *p_length;
40 /* Defined in this file : */
53 STATIC do_lengthcomp();
61 STATIC int do_safes();
63 STATIC int nc_nfirst();
65 STATIC int nc_nfollow();
71 * Does all the work, by calling other routines (divide and conquer)
77 co_trans(nempty); /* Which nonterminals produce empty? */
78 co_trans(nfirst); /* Computes first sets */
80 * Compute FOLLOW sets.
81 * First put EOFILE in the follow set of the start nonterminals.
83 for (st = start; st; st = st->ff_next) {
84 p = &nonterms[st->ff_nont];
89 * Compute the sets which determine which alternative to choose
92 for (p = nonterms; p < maxnt; p++) {
93 co_dirsymb(p->n_follow,p->n_rule);
96 * Compute the minimum length of productions of nonterminals,
97 * and then determine the default choices
101 * Compute the contains sets
103 for (p = nonterms; p < maxnt; p++) do_contains(p);
104 for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0);
106 * Compute the safety of each nonterminal and term.
107 * The safety gives an answer to the question whether a scan is done,
108 * and how it should be handled.
110 for (p = nonterms; p < maxnt; p++) {
112 * Don't know anything yet
114 setntsafe(p, NOSAFETY);
115 setntout(p, NOSAFETY);
117 for (st = start; st; st = st->ff_next) {
119 * But start symbols are called with lookahead done
121 p = &nonterms[st->ff_nont];
122 setntsafe(p,SCANDONE);
126 #ifdef NON_CORRECTING
130 /* compute the union of the first sets of all start symbols
131 Used to compute the nc-first-sets when -s option is given */
132 start_firsts = get_set();
133 for (st = start; st; st = st->ff_next) {
134 s = setunion(start_firsts, (&nonterms[st->ff_nont])->n_first);
139 /* compute the non_corr first sets for all nonterminals and terms */
142 for (st = start; st; st = st->ff_next) {
143 p = &nonterms[st->ff_nont];
144 PUTIN(p->n_nc_follow,0);
146 co_trans(nc_nfollow);
152 fputs("Safeties:\n", stderr);
153 for (p = nonterms; p < maxnt; p++) {
154 fprintf(stderr, "%s\t%d\t%d\n",
166 * Allocate space for the sets. Also determine which files use
167 * which nonterminals, and determine which nonterminals can be
174 int n = NINTS(NBYTES(nnonterms));
177 for (f = files; f < maxfiles; f++) {
179 f->f_used = s = (p_set) alloc((unsigned)n*sizeof(*(f->f_used)));
180 for (i = n; i; i--) *s++ = 0;
181 for (i = f->f_nonterminals; i != -1; i = p->n_next) {
183 p->n_flags |= GENSTATIC;
184 p->n_first = get_set();
185 #ifdef NON_CORRECTING
186 p->n_nc_first = get_set();
187 p->n_nc_follow = get_set();
189 p->n_follow = get_set();
190 walk(f->f_used, p->n_rule);
193 for (f = files; f < maxfiles; f++) {
194 for (i = f->f_nonterminals; i != -1; i = p->n_next) {
198 for (f2 = files; f2 < maxfiles; f2++) {
199 if (f2 != f && IN(f2->f_used, i)) {
200 p->n_flags &= ~GENSTATIC;
205 for (st = start; st; st = st->ff_next) {
206 nonterms[st->ff_nont].n_flags &= ~GENSTATIC;
211 walk(u, p) p_set u; register p_gram p; {
213 * Walk through the grammar rule p, allocating sets
217 switch (g_gettype(p)) {
222 q->t_first = get_set();
223 #ifdef NON_CORRECTING
224 q->t_nc_first = get_set();
225 q->t_nc_follow = get_set();
227 q->t_follow = get_set();
234 l->l_symbs = get_set();
235 #ifdef NON_CORRECTING
236 l->l_nc_symbs = get_set();
238 l->l_others = get_set();
242 register int i = g_getcont(p);
254 co_trans(fc) int (*fc)(); {
260 for (p = nonterms; p < maxnt; p++) {
261 if ((*fc)(p)) change = 1;
267 nempty(p) register p_nont p; {
268 if (!(p->n_flags & EMPTY) && empty(p->n_rule)) {
275 empty(p) register p_gram p; {
277 * Does the rule pointed to by p produce empty ?
281 switch (g_gettype(p)) {
288 if (r_getkind(q) == STAR
289 || r_getkind(q) == OPT
290 || empty(q->t_rule) ) break;
293 if (empty(g_getlink(p)->l_rule)) {
296 if (g_gettype(p+1) == EORULE) return 0;
299 if (nonterms[g_getcont(p)].n_flags & EMPTY) {
312 nfirst(p) register p_nont p; {
313 return first(p->n_first, p->n_rule, 0);
316 #ifdef NON_CORRECTING
317 STATIC int nc_nfirst(p) register p_nont p; {
318 return nc_first(p->n_nc_first, p->n_rule, 0);
323 first(setp,p,flag) p_set setp; register p_gram p; {
325 * Compute the FIRST set of rule p.
326 * If flag = 0, also the first sets for terms and alternations in
327 * the rule p are computed.
328 * The FIRST set is put in setp.
329 * return 1 if the set refered to by "setp" changed
331 register s; /* Will gather return value */
332 int noenter;/* when set, unables entering of elements into
333 * setp. This is necessary to walk through the
340 switch (g_gettype(p)) {
348 if (first(q->t_first,q->t_rule,0))/*nothing*/;
350 if (!noenter) s |= setunion(setp,q->t_first);
352 if (r_getkind(q) == STAR ||
353 r_getkind(q) == OPT ||
354 empty(q->t_rule)) continue;
361 if (first(l->l_symbs,l->l_rule,0))/*nothing*/;
364 s |= setunion(setp,l->l_symbs);
366 if (g_gettype(p+1) == EORULE) return s;
374 if ((noenter == 0) && !IN(setp,g_getcont(p))) {
376 PUTIN(setp,g_getcont(p));
383 n = &nonterms[g_getcont(p)];
385 s |= setunion(setp,n->n_first);
386 if (ntneeded) NTPUTIN(setp,g_getcont(p));
389 if (n->n_flags & EMPTY) continue;
400 #ifdef NON_CORRECTING
402 nc_first(setp,p,flag) p_set setp; register p_gram p; {
404 * Compute the non_corr FIRST set of rule p.
405 * If flag = 0, also the non_corr first sets for terms and
406 * alternations in the rule p are computed.
407 * The non_corr FIRST set is put in setp.
408 * return 1 if the set refered to by "setp" changed
409 * If the -s flag was given, the union of the first-sets of all
410 * start symbols is used whenever an action occurs. Else, only the
411 * first-sets of startsynbols in the %substart are used
414 register s; /* Will gather return value */
415 int noenter;/* when set, unables entering of elements into
416 * setp. This is necessary to walk through the
423 switch (g_gettype(p)) {
431 if (nc_first(q->t_nc_first,q->t_rule,0))/*nothing*/;
433 if (!noenter) s |= setunion(setp,q->t_nc_first);
435 if (r_getkind(q) == STAR ||
436 r_getkind(q) == OPT ||
437 empty(q->t_rule)) continue;
444 if (nc_first(l->l_nc_symbs,l->l_rule,0))/*nothing*/;
447 s |= setunion(setp,l->l_nc_symbs);
449 if (g_gettype(p+1) == EORULE) return s;
454 register p_start subp;
458 s |= setunion(setp, start_firsts);
460 for (subp = g_getsubparse(p); subp;
461 subp = subp->ff_next)
462 s |= setunion(setp, (&nonterms[subp->ff_nont])->n_nc_first);
470 if (g_getcont(p) == g_getcont(illegal_gram)) {
471 /* Ignore for this set. */
475 if ((noenter == 0) && !IN(setp,g_getcont(p))) {
477 PUTIN(setp,g_getcont(p));
484 n = &nonterms[g_getcont(p)];
486 s |= setunion(setp,n->n_nc_first);
487 if (ntneeded) NTPUTIN(setp,g_getcont(p));
490 if (n->n_flags & EMPTY) continue;
503 nfollow(p) register p_nont p; {
504 return follow(p->n_follow, p->n_rule);
508 follow(setp,p) p_set setp; register p_gram p; {
510 * setp is the follow set for the rule p.
511 * Compute the follow sets in the rule p from this set.
512 * Return 1 if a follow set of a nonterminal changed.
514 register s; /* Will gather return value */
518 switch (g_gettype(p)) {
527 * If what follows the term can be empty,
528 * everything that can follow the whole
529 * rule can also follow the term
531 s |= setunion(q->t_follow,setp);
534 * Everything that can start the rest of the rule
535 * can follow the term
537 s |= first(q->t_follow,p+1,1);
538 if (r_getkind(q) == STAR ||
539 r_getkind(q) == PLUS ||
542 * If the term involves a repetition
543 * of possibly more than one,
544 * everything that can start the term
545 * can also follow it.
547 s |= follow(q->t_first,q->t_rule);
550 * Now propagate the set computed sofar
552 s |= follow(q->t_follow, q->t_rule);
556 * Just propagate setp
558 s |= follow(setp,g_getlink(p)->l_rule);
563 n = &nonterms[g_getcont(p)];
564 s |= first(n->n_follow,p+1,1);
567 * If the rest of p can produce empty,
568 * everything that follows p can follow
571 s |= setunion(n->n_follow,setp);
579 #ifdef NON_CORRECTING
582 nc_nfollow(p) register p_nont p; {
583 return follow(p->n_nc_follow, p->n_rule);
587 nc_follow(setp,p) p_set setp; register p_gram p; {
589 * setp is the follow set for the rule p.
590 * Compute the follow sets in the rule p from this set.
591 * Return 1 if a follow set of a nonterminal changed.
593 register s; /* Will gather return value */
597 switch (g_gettype(p)) {
606 * If what follows the term can be empty,
607 * everything that can follow the whole
608 * rule can also follow the term
610 s |= setunion(q->t_nc_follow,setp);
613 * Everything that can start the rest of the rule
614 * can follow the term
616 s |= nc_first(q->t_nc_follow,p+1,1);
617 if (r_getkind(q) == STAR ||
618 r_getkind(q) == PLUS ||
621 * If the term involves a repetition
622 * of possibly more than one,
623 * everything that can start the term
624 * can also follow it.
626 s |= nc_follow(q->t_nc_first,q->t_rule);
629 * Now propagate the set computed sofar
631 s |= nc_follow(q->t_nc_follow, q->t_rule);
635 * Just propagate setp
637 s |= nc_follow(setp,g_getlink(p)->l_rule);
642 n = &nonterms[g_getcont(p)];
643 s |= nc_first(n->n_nc_follow,p+1,1);
646 * If the rest of p can produce empty,
647 * everything that follows p can follow
650 s |= setunion(n->n_nc_follow,setp);
661 co_dirsymb(setp,p) p_set setp; register p_gram p; {
663 * Walk the rule p, doing the work for alternations
665 register p_gram s = 0;
668 switch (g_gettype(p)) {
675 co_dirsymb(q->t_follow,q->t_rule);
680 * Save first alternative
684 co_dirsymb(setp,l->l_rule);
685 if (empty(l->l_rule)) {
687 * If the rule can produce empty, everything
688 * that follows it can also start it
690 setunion(l->l_symbs,setp);
692 if (g_gettype(p+1) == EORULE) {
694 * Every alternation is implemented as a
695 * choice between two alternatives :
696 * this one or one of the following.
697 * The l_others set will contain the starters
698 * of the other alternatives
709 co_others(p) register p_gram p; {
711 * compute the l_others-sets for the list of alternatives
714 register p_link l1,l2;
719 setunion(l1->l_others,l2->l_symbs);
720 if (g_gettype(p+1) != EORULE) {
722 * First compute l2->l_others
726 * and then l1->l_others
728 setunion(l1->l_others,l2->l_others);
732 static p_length length;
733 # define INFINITY 32767
739 register p_length pl = &length[p - nonterms];
743 complength(p->n_rule, pl);
744 return pl->cnt < INFINITY && x == INFINITY;
750 * Compute the minimum length of a terminal production for each
752 * This length consists of two fields: the number of terminals,
753 * and a number that is composed of
754 * - the number of this alternative
755 * - a crude measure of the number of terms and nonterminals in the
756 * production of this shortest string.
758 register p_length pl;
762 length = (p_length) alloc((unsigned) (nnonterms * sizeof(*length)));
763 for (pl = &length[nnonterms-1]; pl >= length; pl--) {
764 pl->val = pl->cnt = INFINITY;
767 co_trans(ncomplength);
770 for (p = nonterms; p < maxnt; p++, pl++) {
771 if (pl->cnt == INFINITY) {
772 p->n_flags |= RECURSIVE;
774 setdefaults(p->n_rule);
776 free ((p_mem) length);
780 complength(p,le) register p_gram p; p_length le; {
782 * Walk grammar rule p, computing minimum lengths
793 switch (g_gettype(p)) {
796 #ifdef NON_CORRECTING
797 if (g_getcont(p) == g_getcont(illegal_gram)) {
798 add(&X, INFINITY, 0);
808 while (g_gettype(p) != EORULE) {
812 complength(l->l_rule,&i);
814 if (l->l_flag & DEF) {
818 if (compare(&i, &X) < 0) {
833 if ((q->t_flags&PERSISTENT) ||
834 rep==FIXED || rep==PLUS) {
835 complength(q->t_rule,&i);
836 add(&X, i.cnt, i.val);
837 if (rep == FIXED && r_getnum(q) > 0) {
838 for (rep = r_getnum(q) - 1;
840 add(&X, i.cnt, i.val);
846 int nn = g_getcont(p);
847 register p_length pl = &length[nn];
852 complength(nonterms[nn].n_rule,pl);
855 else if (x == -1) x = INFINITY;
865 add(a, c, v) register p_length a; {
867 if (a->cnt == INFINITY || c == INFINITY) {
876 compare(a, b) register p_length a, b; {
877 if (a->cnt != b->cnt) return a->cnt - b->cnt;
878 return a->val - b->val;
882 setdefaults(p) register p_gram p; {
884 switch(g_gettype(p)) {
888 setdefaults(g_getterm(p)->t_rule);
891 register p_link l, l1;
892 int temp = 0, temp1, cnt = 0;
895 count.cnt = INFINITY;
896 count.val = INFINITY;
902 complength(l->l_rule,&i);
904 if (l->l_flag & DEF) temp = 1;
905 temp1 = compare(&i, &count);
907 (temp1 == 0 && l1->l_flag & AVOIDING)) {
911 setdefaults(l->l_rule);
912 } while (g_gettype(p) != EORULE);
914 /* No user specified default */
924 do_contains(n) register p_nont n; {
926 * Compute the total set of symbols that nonterminal n can
930 if (n->n_contains == 0) {
931 n->n_contains = get_set();
932 contains(n->n_rule,n->n_contains);
934 * If the rule can produce empty, delete all symbols that
935 * can follow the rule as well as be in the rule.
936 * This is needed because the contains-set may only contain
937 * symbols that are guaranteed to be eaten by the rule!
938 * Otherwise, the generated parser may loop forever
940 if (n->n_flags & EMPTY) {
941 setminus(n->n_contains,n->n_follow);
944 * But the symbols that can start the rule are always
947 setunion(n->n_contains,n->n_first);
952 contains(p,set) register p_gram p; register p_set set; {
954 * Does the real computation of the contains-sets
958 switch (g_gettype(p)) {
967 if ((q->t_flags & PERSISTENT) ||
968 rep == PLUS || rep == FIXED) {
970 * In these cases, the term belongs to the
971 * continuation grammar.
972 * Otherwise, q->t_contains is just
975 if (!q->t_contains) {
976 q->t_contains = get_set();
978 contains(q->t_rule,q->t_contains);
979 if (rep != FIXED || empty(q->t_rule)) {
980 setminus(q->t_contains,q->t_follow);
982 setunion(q->t_contains,q->t_first);
984 contains(q->t_rule, (p_set) 0);
985 q->t_contains = q->t_first;
987 if (set) setunion(set,q->t_contains);
992 n = &nonterms[g_getcont(p)];
995 setunion(set, n->n_contains);
996 if (ntneeded) NTPUTIN(set, g_getcont(p));
1004 (l->l_flag & DEF) ? set : (p_set) 0);
1011 hulp = g_getcont(p);
1012 assert(hulp < ntokens);
1020 STATIC int nsafes(p) register p_nont p; {
1026 if (i != NOSAFETY) {
1027 i = do_safes(p->n_rule, i, &ch);
1028 if (i < SCANDONE) i = SCANDONE;
1029 /* After a nonterminal, we only know whether a scan was done
1032 if (getntout(p) != i) {
1041 do_safes(p,safe,ch) register p_gram p; register int *ch; {
1043 * Walk the grammar rule, doing the computation described in the
1044 * comment of the procedure above this one.
1049 switch (g_gettype(p)) {
1064 retval = do_safes(q->t_rule,
1065 t_safety(rep,i,q->t_flags&PERSISTENT,safe),ch);
1067 safe = t_after(rep, i, retval);
1069 case ALTERNATION : {
1074 while (g_gettype(p) == ALTERNATION) {
1077 if (safe > SAFE && (l->l_flag & DEF)) {
1078 i = do_safes(l->l_rule,SAFESCANDONE,ch);
1080 else i = do_safes(l->l_rule,SAFE,ch);
1081 if (retval == -1) retval = i;
1082 else if (i != retval) {
1083 if (i == NOSCANDONE ||
1084 retval == NOSCANDONE) {
1087 else if (i > retval) retval = i;
1093 register int nsafe, osafe;
1095 n = &nonterms[g_getcont(p)];
1096 nsafe = getntsafe(n);
1099 if (safe == NOSAFETY) safe = SCANDONE;
1100 if (osafe == nsafe) break;
1101 if (nsafe == NOSAFETY) {
1103 setntsafe(n, osafe);
1106 if (osafe == NOSCANDONE || nsafe == NOSCANDONE) {
1107 if (nsafe != SCANDONE) {
1109 setntsafe(n, SCANDONE);
1113 if (osafe > nsafe) {
1114 setntsafe(n, osafe);
1125 t_safety(rep, count, persistent, safety) {
1127 if (safety == NOSCANDONE) safety = SCANDONE;
1132 if (!persistent || safety < SAFESCANDONE) return SAFE;
1133 return SAFESCANDONE;
1135 if (persistent) return SAFESCANDONE;
1139 if (safety > SAFESCANDONE) return safety;
1140 return SAFESCANDONE;
1144 if (!count) return safety;
1150 t_after(rep, count, outsafety) {
1151 if (count == 0 && (rep == STAR || rep == PLUS)) {
1152 return SAFESCANDONE;
1155 if (outsafety <= SAFESCANDONE) return SAFESCANDONE;