From 73153b484bab081f5a03671ace46ade99167a92a Mon Sep 17 00:00:00 2001 From: ceriel Date: Wed, 2 Oct 1985 22:20:04 +0000 Subject: [PATCH] Initial revision --- util/LLgen/src/compute.c | 812 ++++++++++++++++++++++++++++++++++++ util/LLgen/src/gencode.c | 877 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 1689 insertions(+) create mode 100644 util/LLgen/src/compute.c create mode 100644 util/LLgen/src/gencode.c diff --git a/util/LLgen/src/compute.c b/util/LLgen/src/compute.c new file mode 100644 index 000000000..04a1e36b7 --- /dev/null +++ b/util/LLgen/src/compute.c @@ -0,0 +1,812 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * compute.c + * Defines routines to compute FIRST, FOLLOW etc. + * Also checks the continuation grammar from the specified grammar. + */ + +# include "types.h" +# include "tunable.h" +# include "extern.h" +# include "sets.h" +# include "assert.h" +# ifndef NDEBUG +# include "io.h" +# endif + +# ifndef NORCSID +static string rcsid2 = "$Header$"; +# endif + +p_set setalloc(); + +/* Defined in this file : */ +extern createsets(); +STATIC walk(); +extern co_empty(); +extern empty(); +extern co_first(); +STATIC first(); +extern co_follow(); +STATIC follow(); +extern co_symb(); +STATIC co_dirsymb(); +STATIC co_others(); +STATIC do_checkdefault(); +STATIC checkdefault(); +extern co_contains(); +STATIC do_contains(); +STATIC contains(); +extern co_safes(); +STATIC int do_safes(); + +createsets() { + /* + * Allocate space for the sets + */ + register p_nont p; + + for (p = nonterms; p < maxnt; p++) { + p->n_first = setalloc(setsize); + p->n_follow = setalloc(setsize); + walk(p->n_rule); + } +} + +STATIC +walk(p) register p_gram p; { + /* + * Walk through the grammar rule p, allocating sets + */ + + for (;;) { + switch (g_gettype(p)) { + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + q->t_first = setalloc(setsize); + q->t_follow = setalloc(setsize); + walk(q->t_rule); + break; } + case ALTERNATION : { + register p_link l; + + l = (p_link) pentry[g_getcont(p)]; + l->l_symbs = setalloc(setsize); + walk(l->l_rule); + break; } + case EORULE : + return; + } + p++; + } +} + +co_empty() { + /* + * Which nonterminals produce the empty string ? + */ + register int change; + register p_nont p; + + change = 1; + while (change) { + change = 0; + for (p=nonterms; p < maxnt; p++) { + if ((!(p->n_flags & EMPTY)) && empty(p->n_rule)) { + p->n_flags |= EMPTY; + change = 1; + } + } + } +} + +empty(p) register p_gram p; { + /* + * Does the rule pointed to by p produce empty ? + */ + + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return 1; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + if (r_getkind(&(q->t_reps)) == STAR + || r_getkind(&(q->t_reps)) == OPT + || empty(q->t_rule) ) break; + return 0; } + case ALTERNATION : + if (empty(((p_link)pentry[g_getcont(p)])->l_rule)) { + return 1; + } + if (g_gettype(p+1) == EORULE) return 0; + break; + case NONTERM : + if (nonterms[g_getnont(p)].n_flags & EMPTY) { + break; + } + /* Fall through */ + case TERMINAL : + return 0; + } + p++; + } +} + +co_first() { + /* + * Compute the FIRST set for each nonterminal + */ + + register p_nont p; + register int change; + + change = 1; + while (change) { + change = 0; + for (p = nonterms; p < maxnt; p++) { + if (first(p->n_first,p->n_rule,0)) change = 1; + } + } +} + +STATIC +first(setp,p,flag) p_set setp; register p_gram p; { + /* + * Compute the FIRST set of rule p. + * If flag = 0, also the first sets for terms and alternations in + * the rule p are computed. + * The FIRST set is put in setp. + * return 1 if any of the sets changed + */ + register s; /* Will gather return value */ + int noenter;/* when set, unables entering of elements into + * setp. This is necessary to walk through the + * rest of rule p. + */ + + s = 0; + noenter = 0; + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return s; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + if (flag == 0) s |= first(q->t_first,q->t_rule,0); + if (!noenter) s |= setunion(setp,q->t_first,setsize); + p++; + if (r_getkind(&(q->t_reps)) == STAR + || r_getkind(&(q->t_reps)) == OPT + || empty(q->t_rule) ) continue; + break; } + case ALTERNATION : { + register p_link l; + + l = (p_link) pentry[g_getcont(p)]; + if (flag == 0) s |= first(l->l_symbs,l->l_rule,0); + if (noenter == 0) { + s |= setunion(setp,l->l_symbs,setsize); + } + if (g_gettype(p+1) == EORULE) return s; + } + /* Fall Through */ + case ACTION : + p++; + continue; + case TERMINAL : + if ((noenter == 0) && !IN(setp,g_getcont(p))) { + s = 1; + PUTIN(setp,g_getcont(p)); + } + p++; + break; + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getnont(p)]; + if (noenter == 0) { + s |= setunion(setp,n->n_first,setsize); + if (ntneeded && ! NTIN(setp,n-nonterms)) { + s = 1; + NTPUTIN(setp,n-nonterms); + } + } + p++; + if (n->n_flags & EMPTY) continue; + break; } + } + if (flag == 0) { + noenter = 1; + continue; + } + return s; + } +} + +co_follow() { + /* + * Compute the follow set for each nonterminal + */ + + register p_nont p; + register change; + register i; + p_start st; + + /* + * First put EOFILE in the follow set of the start symbols + */ + for (st = start; st; st = st->ff_next) PUTIN(st->ff_nont->n_follow,0); + change = 1; + i = 1; + while (change) { + change = 0; + for (p = nonterms; p < maxnt; p++) { + if (follow(p->n_follow,p->n_rule,i)) change = 1; + } + i = 0; + } +} + +STATIC +follow(setp,p,flag) p_set setp; register p_gram p; { + /* + * setp is the follow set for the rule p. + * Compute the follow sets in the rule p from this set. + * Return 1 if any set changed + * flag controls the use of "first" in the computation. + * It should be 1 the first time a rule is done, 0 otherwise. + */ + register s; /* Will gather return value */ + + s = 0; + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return s; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + if (empty(p+1)) { + /* + * If what follows the term can be empty, + * everything that can follow the whole + * rule can also follow the term + */ + s |= setunion(q->t_follow,setp,setsize); + } + /* + * Everything that can start the rest of the rule + * can follow the term + */ + if (flag) s |= first(q->t_follow,p+1,1); + if (r_getkind(&(q->t_reps)) == STAR + || r_getkind(&(q->t_reps)) == PLUS + || r_getnum(&(q->t_reps)) ) { + /* + * If the term involves a repetition + * of possibly more than one, + * everything that can start the term + * can also follow it. + */ + s |= follow(q->t_first,q->t_rule,flag); + } + /* + * Now propagate the set computed sofar + */ + s |= follow(q->t_follow, q->t_rule,flag); + break; } + case ALTERNATION : + /* + * Just propagate setp + */ + s |= follow(setp,((p_link)pentry[g_getcont(p)])->l_rule, + flag); + break; + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getnont(p)]; + if (flag) s |= first(n->n_follow,p+1,1); + if (empty(p+1)) { + /* + * If the rest of p can produce empty, + * everything that follows p can follow + * the nonterminal + */ + s |= setunion(n->n_follow,setp,setsize); + } + break; } + } + p++; + } +} + +co_symb() { + /* + * Compute the sets which determine which alternative to choose + * in case of a choice + * Also check the continuation grammar and see if rules do scan + * ahead. + */ + register p_nont p; + + for (p = nonterms; p < maxnt; p++) { + co_dirsymb(p->n_follow,p->n_rule); + } + for (p = nonterms; p < maxnt; p++) { + do_checkdefault(p); + } +} + +STATIC +co_dirsymb(setp,p) p_set setp; register p_gram p; { + /* + * Walk the rule p, doing the work for alternations + */ + register p_gram s = 0; + + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + co_dirsymb(q->t_follow,q->t_rule); + break; } + case ALTERNATION : { + register p_link l; + /* + * Save first alternative + */ + if (!s) s = p; + l = (p_link) pentry[g_getcont(p)]; + l->l_others = setalloc(setsize); + co_dirsymb(setp,l->l_rule); + if (empty(l->l_rule)) { + /* + * If the rule can produce empty, everything + * that follows it can also start it + */ + setunion(l->l_symbs,setp,setsize); + } + if (g_gettype(p+1) == EORULE) { + /* + * Every alternation is implemented as a + * choice between two alternatives : + * this one or one of the following. + * The l_others set will contain the starters + * of the other alternatives + */ + co_others(s); + return; + } } + } + p++; + } +} + +STATIC +co_others(p) register p_gram p; { + /* + * compute the l_others-sets for the list of alternatives + * indicated by p + */ + register p_link l1,l2; + + l1 = (p_link) pentry[g_getcont(p)]; + p++; + l2 = (p_link) pentry[g_getcont(p)]; + setunion(l1->l_others,l2->l_symbs,setsize); + if (g_gettype(p+1) != EORULE) { + /* + * First compute l2->l_others + */ + co_others(p); + /* + * and then l1->l_others + */ + setunion(l1->l_others,l2->l_others,setsize); + } +} + +STATIC +do_checkdefault(p) register p_nont p; { + /* + * check the continuation rule for nonterminal p, unless + * this is already being(!) done + */ + if (p->n_flags & BUSY) { + /* + * Error situation, recursion in continuation grammar + */ + p->n_flags ^= (RECURSIVE|BUSY); + return; + } + if (p->n_flags & CONTIN) { + /* + * Was already done + */ + return; + } + /* + * Now mark it as busy, and check the rule + */ + p->n_flags |= BUSY; + checkdefault(p->n_rule); + /* + * Now release the busy mark, and mark it as done + */ + p->n_flags ^= (CONTIN|BUSY); + return; +} + +STATIC +checkdefault(p) register p_gram p; { + /* + * Walk grammar rule p, checking the continuation grammar + */ + register p_link l; + register p_term q; + + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return; + case ALTERNATION : + l = (p_link) pentry[g_getcont(p)]; + if (l->l_flag & DEF) { + /* + * This alternative belongs to the + * continuation grammar, so check it + */ + checkdefault(l->l_rule); + return; + } + break; + case TERM : + q = (p_term) pentry[g_getcont(p)]; + /* + * First check the rest of the rule + */ + checkdefault(p+1); + /* + * Now check the term if it belongs to the + * continuation grammar + */ + if (r_getkind(&(q->t_reps))==FIXED || + r_getkind(&(q->t_reps))==PLUS) { + checkdefault(q->t_rule); + return; + } + /* + * Here we have OPT or STAR + * Only in the continuation grammar if %persistent + */ + if (q->t_flags & PERSISTENT) { + checkdefault(q->t_rule); + } + return; + case NONTERM : + /* + * Check the continuation grammar for this nonterminal. + * Note that the nonterminal we are working on is + * marked as busy, so that direct or indirect recursion + * can be detected + */ + do_checkdefault(&nonterms[g_getnont(p)]); + break; + } + p++; + } +} + +co_contains() { + /* + * Compute the contains sets + */ + register p_nont p; + register p_set dummy; + + for (p = nonterms; p < maxnt; p++) do_contains(p); + dummy = setalloc(setsize); +# ifndef NDEBUG + if (debug) fputs("Contains 1 done\n", stderr); +# endif + free(dummy); + for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0); +# ifndef NDEBUG + if (debug) fputs("Contains 2 done\n", stderr); +# endif + dummy = setalloc(setsize); + free(dummy); +} + +STATIC +do_contains(n) register p_nont n; { + /* + * Compute the total set of symbols that nonterminal n can + * produce + */ + + if (n->n_contains == 0) { + n->n_contains = setalloc(setsize); + contains(n->n_rule,n->n_contains); + /* + * If the rule can produce empty, delete all symbols that + * can follow the rule as well as be in the rule. + * This is needed because the contains-set may only contain + * symbols that are guaranteed to be eaten by the rule! + * Otherwise, the generated parser may loop forever + */ + if (n->n_flags & EMPTY) { + setminus(n->n_contains,n->n_follow,setsize); + } + /* + * But the symbols that can start the rule are always + * eaten + */ + setunion(n->n_contains,n->n_first,setsize); + } +} + +STATIC +contains(p,set) register p_gram p; register p_set set; { + /* + * Does the real computation of the contains-sets + */ + + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + if ((q->t_flags & PERSISTENT) || + r_getkind(&(q->t_reps)) == PLUS || + r_getkind(&(q->t_reps)) == FIXED) { + /* + * In these cases, the term belongs to the + * continuation grammar. + * Otherwise, q->t_contains is just + * q->t_first + */ + if (!q->t_contains) { + q->t_contains = setalloc(setsize); + } + contains(q->t_rule,q->t_contains); + if (empty(q->t_rule)) { + /* + * Same trouble as mentioned in the + * routine do_contains + */ + setminus(q->t_contains,q->t_follow, + setsize); + } + setunion(q->t_contains,q->t_first,setsize); + } else { + contains(q->t_rule, (p_set) 0); + q->t_contains = q->t_first; + } + if (set) setunion(set,q->t_contains,setsize); + break; } + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getnont(p)]; + do_contains(n); + if(set) setunion(set, n->n_contains,setsize); + break; } + case ALTERNATION : { + register p_link l; + + l = (p_link) pentry[g_getcont(p)]; + contains(l->l_rule, + (l->l_flag & DEF) ? set : (p_set) 0); + break; } + case TERMINAL : { + register hulp; + + if (set) { + hulp = g_getcont(p); + assert(hulp < nterminals); + PUTIN(set,hulp); + }} + } + p++; + } +} + +static int change; + +co_safes() { + /* + * Compute the safety of each nonterminal and term. + * The safety gives an answer to the question whether a scan is done, + * and how it should be handled. + */ + + register p_nont p; + register i; + register p_start st; + + for (p = nonterms; p < maxnt; p++) { + /* + * Don't know anything yet + */ + setntsafe(&(p->n_flags), NOSAFETY); + } + for (st = start; st; st = st->ff_next) { + /* + * But start symbols are called with lookahead done + */ + p = st->ff_nont; + setntsafe(&(p->n_flags),SCANDONE); + } + change = 1; + while (change) { + change = 0; + for (p = nonterms; p < maxnt; p++) { + i = getntsafe(p->n_flags); + if (i == NOSAFETY) { + change = 1; + continue; + } + if (do_safes(p->n_rule, i) == 2 && + !(p->n_flags & NNOSCAN)) { + p->n_flags |= NNOSCAN; + change = 1; + } + } + } +} + +STATIC int +do_safes(p,safe) register p_gram p; { + /* + * Walk the grammar rule, doing the computation described in the + * comment of the procedure above this one. + */ + register retval; + + if (safe == NOSCANDONE) retval = 2; + else retval = 1; + for (;;) { + switch (g_gettype(p)) { + case ACTION: + p++; + continue; + case TERMINAL: + retval = 2; + break; + case TERM : { + register p_term q; + int i,rep; + + q = (p_term) pentry[g_getcont(p)]; + i = r_getnum(&(q->t_reps)); + rep = r_getkind(&(q->t_reps)); + retval = do_safes(q->t_rule, + t_safety(rep,i,q->t_flags&PERSISTENT,safe)); + if (retval == 2 && (!(q->t_flags & TNOSCAN))) { + q->t_flags |= TNOSCAN; + } + if (rep != FIXED || retval == 0) retval = 1; + break; } + case ALTERNATION : { + register p_link l; + int f, i; + + f = 1; + while (g_gettype(p) == ALTERNATION) { + l = (p_link) pentry[g_getcont(p)]; + if (safe && (l->l_flag & DEF)) { + i = do_safes(l->l_rule,SAFESCANDONE); + } + else i = do_safes(l->l_rule,SAFE); + if (f) retval = i; + else retval &= i; + p++; + f = 0; + } + return retval; } + case NONTERM : { + register p_nont n; + int nsafe; + + n = &nonterms[g_getnont(p)]; + nsafe = getntsafe(n->n_flags); + if (!(n->n_flags & NNOSCAN)) retval = 1; + else retval = 2; + if (safe == nsafe) break; + if (nsafe == NOSAFETY) { + change = 1; + setntsafe(&(n->n_flags), safe); + break; + } + if (safe == NOSCANDONE || nsafe == NOSCANDONE) { + if (nsafe != SCANDONE) { + change = 1; + setntsafe(&(n->n_flags), SCANDONE); + } + break; + } + if (safe > nsafe) { + setntsafe(&(n->n_flags), safe); + change = 1; + } + break; } + case EORULE : + return retval; + } + p++; + if (retval == 1) safe = SCANDONE; + else safe = NOSCANDONE; + } +} + +t_safety(rep, count, persistent, safety) { + + switch(rep) { + default: + assert(0); + case OPT: + if (!persistent) return SAFE; + if (safety < SAFESCANDONE) return safety; + return SAFESCANDONE; + case STAR: + if (persistent) return SAFESCANDONE; + return SAFE; + case PLUS: + if (safety == NOSCANDONE) safety = SCANDONE; + if (persistent) { + if (safety > SAFESCANDONE) return safety; + return SAFESCANDONE; + } + if (safety > SAFE) return safety; + return SAFE; + case FIXED: + if (!count) { + if (safety == NOSCANDONE) safety = SCANDONE; + return safety; + } + return SCANDONE; + } + /* NOTREACHED */ +} diff --git a/util/LLgen/src/gencode.c b/util/LLgen/src/gencode.c new file mode 100644 index 000000000..260f776bd --- /dev/null +++ b/util/LLgen/src/gencode.c @@ -0,0 +1,877 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * gencode.c + * Defines the routine "gencode", which generates the parser + * we wanted so badly. + * This file is a mess, it should be cleaned up some time. + */ + +# include "types.h" +# include "io.h" +# include "tunable.h" +# include "extern.h" +# include "sets.h" +# include "assert.h" + +# ifndef NORCSID +static string rcsid3 = "$Header$"; +# endif NORCSID + +/* + * Some codestrings used more than once + */ + +static string c_arrend = "0 };\n"; +static string c_close = "}\n"; +static string c_LLptrmin = "LLptr++;\n"; +static string c_break = "break;\n"; +static string c_read = "LLread();\n"; + +/* Some constants used for reading from the action file */ +# define ENDDECL 0400 +# define IDENT 0401 + +static int nlabel; /* count for the generation of labels */ +static int nvar; /* count for generation of variables */ +static int pushlist[100]; +static int *ppushlist; +static int *lists,*maxlists,*plists; +p_mem ralloc(),alloc(); + +/* In this file the following routines are defined: */ +extern gencode(); +STATIC opentemp(); +STATIC geninclude(); +STATIC genrecovery(); +STATIC string genname(); +STATIC generate(); +STATIC prset(); +STATIC macro(); +STATIC controlline(); +STATIC getparams(); +STATIC gettok(); +STATIC rulecode(); +STATIC int dopush(); +STATIC pushcode(); +STATIC getaction(); +STATIC codeforterm(); +STATIC genifhead(); +STATIC gencases(); +STATIC genpush(); + +/* Macro to print a terminal */ +# define PTERM(f,p) fprintf(f,(p)->h_num<0400?"'%s'":"%s",(p)->h_name) + +gencode(argc) { + register p_file p = files; + + /* Generate include file Lpars.h */ + geninclude(); + + /* Set up for code generation */ + if ((fact = fopen(f_temp,"r")) == NULL) { + fatal(0,e_noopen,f_temp); + } + lists = (int *) alloc(50 * sizeof(int)); + plists = lists; + maxlists = lists+49; + + /* For every source file .... */ + while (argc--) { + /* Open temporary */ + f_input = p->f_name; + opentemp(f_input); + /* generate code ... */ + copyfile(0); + generate(p); + getaction(2); + if (ferror(fpars) != 0) { + fatal(0,"Write error on temporary"); + } + fclose(fpars); + /* And install */ + install(genname(p->f_name),p->f_name); + p++; + } + genrecovery(); +} + +STATIC +opentemp(str) string str; { + + if ((fpars = fopen(f_pars,"w")) == NULL) { + fatal(0,e_noopen,f_pars); + } + fprintf(fpars,LLgenid,str ? str : "."); +} + +STATIC +geninclude() { + register p_entry p; + register FILE *f; + + opentemp((string) 0); + f = fpars; + for (p = h_entry; p < max_t_ent; p++) { + if (p->h_num >= 0400) { + fprintf(f,"# define %s %d\n", p->h_name,p->h_num); + } + } + if (ferror(f) != 0) { + fatal(0,"write error on temporary"); + } + fclose(f); + install(HFILE, (string) 0); +} + +STATIC +genrecovery() { + register FILE *f; + register p_entry t; + register int i; + register int *q; + int index[256+NTERMINALS]; + register p_start st; + + opentemp((string) 0); + f = fpars; + copyfile(0); + /* Now generate the routines that call the startsymbols */ + fputs("#define LLSTSIZ 1024\n",f); + for (i = 1, st = start; st; st = st->ff_next) { + i++; + fputs(st->ff_name, f); + fputs("() {\n\tint LLstack[LLSTSIZ];\n\tLLnewlevel(LLstack);\n\tLLread();\n", f); + if (g_gettype(st->ff_nont->n_rule) == ALTERNATION) { + fprintf(f, "\tLLxx.LLx_p--; *LLxx.LLx_p = %d;\n", + findindex(&(st->ff_nont->n_contains))); + } + fprintf(f, "\tL%d_%s();\n", + st->ff_nont-nonterms, + (min_nt_ent+(st->ff_nont-nonterms))->h_name); + if (st->ff_nont->n_flags & NNOSCAN) { + fputs("\tLLscan(EOFILE);\n",f); + } + else fputs("\tif (LLsymb != EOFILE) LLerror(EOFILE);\n",f); + fputs("\tLLoldlevel();\n}\n",f); + } + fprintf(f,"#define LL_MAX %d\n#define LL_LEXI %s\n", i, lexical); + fputs("static short LLlists[] = {\n", f); + /* Now generate lists */ + q = lists; + while (q < plists) { + fprintf(f,"%d,\n",*q++); + } + fputs(c_arrend, f); + /* Now generate the sets */ + fputs("char LLsets[] = {\n",f); + for (i = 0; i < maxptr-setptr; i++) prset(setptr[i]); + fputs(c_arrend, f); + for (q = index; q <= &index[255 + NTERMINALS];) *q++ = -1; + for (t = h_entry; t < max_t_ent; t++) { + index[t->h_num] = t - h_entry; + } + fputs("short LLindex[] = {\n",f); + for (q = index; q <= &index[assval-1]; q++) { + fprintf(f, "%d,\n", *q); + } + fputs(c_arrend, f); + copyfile(1); + if (ferror(f) != 0) { + fatal(0,"write error on temporary"); + } + fclose(f); + install(RFILE, (string) 0); +} + +STATIC +generate(f) p_file f; { + /* + * Generates a parsing routine for every nonterminal + */ + register short *s; + register p_nont p; + register FILE *fd; + int i; + p_first ff; + int mustpop; + + /* Generate first sets */ + for (ff = f->f_firsts; ff; ff = ff->ff_next) { + macro(ff->ff_name,ff->ff_nont); + } + + /* For every nonterminal generate a function */ + fd = fpars; + for (s = f->f_start; s <= f->f_end; s++) { + p = &nonterms[*s]; + /* Generate functions in the order in which the nonterminals + * were defined in the grammar. This is important, because + * of synchronisation with the action file + */ + while (p->n_count--) getaction(1); + if (p->n_flags & PARAMS) controlline(); + fprintf(fd,"L%d_%s (",*s,(min_nt_ent + *s)->h_name); + if (p->n_flags & PARAMS) getparams(); + else fputs(") {\n", fd); + fputs("register struct LLxx *LLx = &LLxx;\n#ifdef lint\nLLx=LLx;\n#endif\n", fd); + if (p->n_flags & LOCALS) getaction(1); + i = getntsafe(p->n_flags); + mustpop = 0; + if (g_gettype(p->n_rule) == ALTERNATION) { + mustpop = 1; + if (i == NOSCANDONE) { + fputs(c_read, fd); + i = SCANDONE; + } + } + nlabel = 1; + rulecode(p->n_rule, + i, + !(p->n_flags & NNOSCAN), mustpop); + fputs(c_close, fd); + } +} + +STATIC +prset(p) p_set p; { + register int k; + register unsigned i; + int j; + + j = NBYTES(nterminals); + for (;;) { + i = (unsigned) *p++; + for (k = 0; k < sizeof(int); k++) { + fprintf(fpars,"0%o,",(i & 0377)); + i >>= 8; + if (--j == 0) { + fputs("\n",fpars); + return; + } + } + } + /* NOTREACHED */ +} + +STATIC +macro(s,n) string s; p_nont n; { + register FILE *f; + int i; + + f = fpars; + i = findindex(&(n->n_first)); + fprintf(f,"#define %s(x) ", s); + if (i < 0) { + fprintf(f, "((x) == %d)\n", -i); + return; + } + fprintf(f,"LLfirst((x), %d)\n", i); +} + +STATIC +controlline() { + /* Copy a compiler control line */ + register int l; + register FILE *f1,*f2; + + f1 = fact; f2 = fpars; + l = getc(f1); + assert(l == '\0'); + l = getc(f1); putc(l,f2); + assert( l == '#' ) ; + do { + l = getc(f1); + putc(l,f2); + } while ( l != '\n' ) ; +} + +STATIC +getparams() { + /* getparams is called if a nonterminal has parameters. The names + * of the parameters have to be found, and they should be declared + */ + long off; + register int l; + register FILE *f; + long ftell(); + char first; + + first = ' '; + f = fpars; + /* save offset in file to be able to copy the declaration later */ + off = ftell(fact); + /* First pass through declaration, find the parameter names */ + while ((l = gettok()) != ENDDECL) { + if (l == ';' || l == ',') { + /* + * The last identifier found before a ';' or a ',' + * must be a parameter + */ + fprintf(f,"%c%s", first, ltext); + first = ','; + } + } + fputs(") ",f); + /* + * Now copy the declarations + */ + fseek(fact,off,0); + getaction(0); + fputs(" {\n",f); +} + +STATIC +gettok() { + /* Read from the action file. */ + register int ch; + register string c; + register FILE *f; + + f = fact; + ch = getc(f); + switch(ch) { + case '\n': + ch = getc(f); + if (ch != EOF) { + ungetc(ch,f); + if (ch != '\0') return '\n'; + } + return ENDDECL; + case '\0': + ungetc(ch,f); + /* Fall through */ + case EOF : + return ENDDECL; + default : + if (isalpha(ch) || ch == '_') { + c = ltext; + while (isalnum(ch) || ch == '_') { + *c++ = ch; + if (c-ltext >= LTEXTSZ) --c; + ch = getc(f); + } + if (ch != EOF) ungetc(ch,f); + *c = '\0'; + return IDENT; + } + return ch; + } +} + +STATIC +rulecode(p,safety,mustscan,mustpop) register p_gram p; { + /* + * Code for a production rule. + */ + + register int toplevel = 1; + register FILE *f; + + /* + * First generate code to push the contains sets of this rule + * on a stack + */ + ppushlist = pushlist; + if (dopush(p,safety,1) > 0) pushcode(); + f = fpars; + for (;;) { + switch (g_gettype(p)) { + case EORULE : + if (mustscan && safety == NOSCANDONE) { + fputs(c_read,f); + } + return; + case TERMINAL : { + register p_entry t; + + t = &h_entry[g_getcont(p)]; + if (toplevel == 0 && safety != NOSCANDONE) { + fputs(c_LLptrmin,f); + } + if (safety == SAFE) { + fputs("LL_SAFE(",f); + } + else if (safety <= SCANDONE) { + fputs("LL_SCANDONE(",f); + } + else if (safety == NOSCANDONE) { + if (toplevel != 0) { + fputs("LL_T_NOSCANDONE(", f); + } + else fputs("LL_N_NOSCANDONE(", f); + } + PTERM(f,t); + fputs(");\n", f); + if (safety == SAFE && toplevel > 0) { + safety = NOSCANDONE; + toplevel = -1; + p++; + continue; + } + safety = NOSCANDONE; + break; } + case NONTERM : { + p_entry t; + register p_nont n; + int params; + + n = &nonterms[g_getnont(p)]; + t= min_nt_ent+(n-nonterms); + if (safety == NOSCANDONE && + getntsafe(n->n_flags) < NOSCANDONE) fputs(c_read, f); + if (toplevel == 0 && + g_gettype(n->n_rule) != ALTERNATION) { + fputs(c_LLptrmin, f); + } + params = g_getnpar(p); + if (params) controlline(); + fprintf(f,"L%d_%s(",n-nonterms, t->h_name); + if (params) getaction(0); + fputs(");\n",f); + safety = NOSCANDONE; + if (!(n->n_flags & NNOSCAN)) safety = SCANDONE; + break; } + case TERM : + safety = codeforterm((p_term) pentry[g_getcont(p)], + safety, + toplevel); + break; + case ACTION : + getaction(1); + p++; + continue; + case ALTERNATION : + alternation(p, safety, mustscan,mustpop, 0); + return; + } + p++; + toplevel = 0; + } +} + +alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { + register FILE *f = fpars; + register p_link l; + int hulp, hulp1,hulp2; + int var; + int haddefault = 0; + int unsafe = 1; + p_set set; + p_set setalloc(); + + assert(safety < NOSCANDONE); + l = (p_link) pentry[g_getcont(p)]; + hulp = nlabel++; + hulp1 = nlabel++; + hulp2 = nlabel++; + var = nvar++; + if (!lb) lb = hulp1; + if (safety <= SAFESCANDONE) unsafe = 0; + if (unsafe && hulp1 == lb) fprintf(f,"L_%d: ", hulp1); + else if (mustpop) { + mustpop = 0; + fputs(c_LLptrmin, f); + } + if (unsafe) { + fprintf(f,"{ int LL_%d = 0;\n", var); + } + fputs("switch(LLcsymb) {\n", f); + while (g_gettype(p) != EORULE) { + l = (p_link) pentry[g_getcont(p)]; + if (unsafe && (l->l_flag & DEF)) { + haddefault = 1; + fprintf(f, +"default: if (!LL_%d && LLskip()) {LL_%d = 1; goto L_%d;}\ngoto L_%d;\n", + var, var, lb, hulp2); + } + if (l->l_flag & COND) { + set = setalloc(tsetsize); + setunion(set, l->l_others, tsetsize); + setintersect(set, l->l_symbs, tsetsize); + setminus(l->l_symbs, set, tsetsize); + setminus(l->l_others, set, tsetsize); + gencases(set); + free((p_mem) set); + controlline(); + fputs("if (!",f); + getaction(0); + fprintf(f,") goto L_%d;\n", hulp); + } + if (!unsafe && (l->l_flag & DEF)) { + fputs("default:\n", f); + haddefault = 1; + } + else gencases(l->l_symbs); + if (l->l_flag & DEF) { + if (unsafe) { + fprintf(f,"L_%d: ;\n", hulp2); + } + if (mustpop) fputs(c_LLptrmin, f); + rulecode(l->l_rule, SAFESCANDONE, mustscan, 0); + } + else { + if (mustpop) fputs(c_LLptrmin, f); + rulecode(l->l_rule, SAFE, mustscan, 0); + } + fputs(c_break,f); + if (l->l_flag & COND) { + if (!haddefault) { + fputs("default:\n", f); + } + else { + gencases(l->l_others); + safety = SAFE; + } + fprintf(f,"L_%d : ;\n",hulp); + p++; + if (!unsafe && g_gettype(p+1) == EORULE) { + if (mustpop) fputs(c_LLptrmin, f); + rulecode(((p_link)pentry[g_getcont(p)])->l_rule, + safety,mustscan,0); + } + else alternation(p,safety,mustscan,mustpop,lb); + break; + } + p++; + } + fputs(c_close, f); + if (unsafe) fputs(c_close, f); + return; +} + +STATIC int +dopush(p,safety,toplevel) register p_gram p; { + /* + * The safety only matters if toplevel != 0 + */ + register int count = 0; + + for (;;) { + switch(g_gettype(p)) { + case EORULE : + case ALTERNATION : + return count; + case TERM : { + register p_term q; + + q = (p_term) pentry[g_getcont(p)]; + count += dopush(p+1,SCANDONE,0); + *ppushlist++ = findindex(&(q->t_contains)); + return count+1; } + case TERMINAL : + if (toplevel > 0 && safety == SAFE) { + count += dopush(p+1,NOSCANDONE,-1); + } + else count += dopush(p+1, NOSCANDONE, 0); + if (toplevel != 0) { + return count; + } + *ppushlist++ = -h_entry[g_getcont(p)].h_num; + return count + 1; + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getnont(p)]; + count += dopush(p+1, SCANDONE, 0); + if (toplevel == 0 || + g_gettype(n->n_rule) == ALTERNATION) { + *ppushlist++ = findindex(&n->n_contains); + count++; + } + return count; } + } + p++; + } +} + +# define max(a,b) ((a) < (b) ? (b) : (a)) +STATIC +pushcode() { + register int i,j,k; + register int *p = pushlist; + + if ((i = ppushlist - p) == 0) return; + if (i <= 2) { + genpush(p[0]); + if (i == 2) genpush(p[1]); + return; + } + fprintf(fpars,"\LLlpush(%d, %d);\n",plists-lists, i); + if (maxlists - plists < i) { + j = plists - lists; + k = maxlists-lists+max(i+1,50); + lists = (int *) ralloc( (p_mem)lists, + (unsigned)(k+1)*sizeof(int)); + plists = lists+j; + maxlists = lists+k; + } + while (i--) { + *plists++ = *p++; + } +} + +STATIC +getaction(flag) { + /* Read an action from the action file. + * flag = 1 if it is an action, + * 0 when reading parameters + */ + register int match,ch; + register FILE *f; + register int newline; + int mark = 0; + + if (flag == 1) { + controlline(); + } + f = fpars; + newline = 0; + for (;;) { + ch = gettok(); + switch(ch) { + case ENDDECL: + if (flag != 2) break; + ch = getc(fact); + assert(ch == '\0'); + fputs("\n",f); + if (mark) return; + mark = 1; + continue; + case '\n': + newline = 1; + break; + case '\'' : + case '"' : + if (newline) { + newline = 0; + } + match = ch; + putc(ch,f); + while (ch = getc(fact)) { + if (ch == match) break; + if (ch == '\\') { + putc(ch,f); + ch = getc(fact); + } + putc(ch,f); + } + break; + case IDENT : + if (newline) { + newline = 0; + } + fputs(ltext,f); + continue; + } + mark = 0; + if (ch == ENDDECL) break; + if (newline && ch != '\n') { + newline = 0; + } + putc(ch,f); + } + if (flag) fputs("\n",f); +} + +STATIC +codeforterm(q,safety,toplevel) register p_term q; { + /* + * Generate code for a term + */ + register FILE *f; + register int i; + register int rep; + int persistent; + int noscan; + + f = fpars; + i = r_getnum(&(q->t_reps)); + rep = r_getkind(&(q->t_reps)); + persistent = (q->t_flags & PERSISTENT); + noscan = (q->t_flags & TNOSCAN); + if (safety == NOSCANDONE && (rep != FIXED || i == 0)) { + fputs(c_read, f); + if (rep == FIXED && g_gettype(q->t_rule) != ALTERNATION) { + fputs(c_LLptrmin, f); + } + } + if (i) { + /* N > 0, so generate fixed forloop */ + fprintf(f,"{\nregister LL_i = %d;\n",i); + fputs("for (;;) {\nif (!LL_i--) {\nLLptr++;\n", f); + fputs("break;\n}\n", f); + if (rep == FIXED) { + if (noscan && safety == NOSCANDONE) { + fputs(c_read,f); + } + } + } + else if (rep != OPT && rep != FIXED) { + /* '+' or '*', so generate infinite loop */ + fputs("for (;;) {\n",f); + } + if (rep == STAR || rep == OPT) { + genifhead(q,rep); + } + rulecode(q->t_rule,t_safety(rep,i,persistent,safety), + rep != FIXED || !noscan, + rep == FIXED && i == 0 && g_gettype(q->t_rule) == ALTERNATION); + /* in the case of '+', the if is after the code for the rule */ + if (rep == PLUS) { + if (!persistent) { + fprintf(f, "*LLptr = %d;\n", findindex(&(q->t_first))); + } + genifhead(q,rep); + } + if (rep != OPT && rep != FIXED) fputs("continue;\n", f); + if (rep != FIXED) { + fputs(c_close, f); /* Close switch */ + if (rep != OPT) { + fputs("break;\n", f); + } + } + if (rep != OPT && (rep != FIXED || i > 0)) { + fputs(c_close, f); /* Close for */ + if (i > 0) { + fputs(c_close, f);/* Close Register ... */ + } + } + if (rep != FIXED || !noscan) return SCANDONE; + return NOSCANDONE; +} + +STATIC +genifhead(q,rep) register p_term q; { + /* + * Generate if statement for term q + */ + register FILE *f; + p_set p1; + p_set setalloc(); + int hulp, hulp1; + + f = fpars; + hulp = nlabel++; + hulp1 = nlabel++; + fprintf(f, "L_%d : switch(LLcsymb) {\n", hulp); + if (q->t_flags & RESOLVER) { + p1 = setalloc(tsetsize); + setunion(p1,q->t_first,tsetsize); + setintersect(p1,q->t_follow,tsetsize); + /* + * p1 now points to a set containing the conflicting + * symbols + */ + setminus(q->t_first, p1, tsetsize); + setminus(q->t_follow, p1, tsetsize); + gencases(p1); + free((p_mem) p1); + controlline(); + fputs("if (", f); + getaction(0); + fprintf(f, ") goto L_%d;\n", hulp1); + } + gencases(q->t_follow); + fputs("LLptr++; break;\n", f); + fprintf(f, "default: if (!LLnext()) goto L_%d;\n", hulp); + gencases(q->t_first); + if (q->t_flags & RESOLVER) { + fprintf(f, "L_%d : ;\n", hulp1); + } + if (rep == OPT) { + fputs(c_LLptrmin,f); + } +} + +STATIC +gencases(setp) register p_set setp; { + /* + * setp points to a bitset indicating which cases must + * be generated. + * YECH, the PCC compiler does not accept many cases without statements + * inbetween, so after every case label an empty statement is + * generated. + * The C-grammar used by PCC is really stupid on this point : + * it contains the rule + * statement : label statement + * which is right-recursive, and as is well known, LALR parsers don't + * handle these things very good. + * The grammar should have been written : + * labeledstatement : labels statement ; + * labels : labels label | ; + */ + register p_entry p; + register i; + + p = h_entry; + for (i=0; i < nterminals; i++) { + if (IN(setp,i)) { + fprintf(fpars, + p->h_num<0400 ? "case /* '%s' */ %d : ;\n" + : "case /* %s */ %d : ;\n", + p->h_name, i); + } + p++; + } +} + +STATIC +genpush(d) { + + fprintf(fpars, "LLptr--;\n*LLptr = %d;\n",d); +} + +static char namebuf[20]; + +STATIC string +genname(s) string s; { + /* + * Generate a target file name from the + * source file name s. + */ + register string c,d; + + c = namebuf; + while (*s) { + if (*s == '/') { + while (*s == '/') s++; + if (*s) c = namebuf; + else break; + } + *c++ = *s++; + } + for (d = c; --d > namebuf;) if (*d == '.') break; + if (d == namebuf) d = c; + if (d >= &namebuf[12]) { + fatal(0,"%s : filename too long",namebuf); + } + *d++ = '.'; + *d++ = 'c'; + *d = '\0'; + return namebuf; +} -- 2.34.1