From f055d610d31e83043583534a0c6c0a6ae1b25ac7 Mon Sep 17 00:00:00 2001 From: ceriel Date: Mon, 25 Nov 1985 15:47:51 +0000 Subject: [PATCH] Completely new version, generating a much faster parser. --- util/LLgen/src/LLgen.g | 316 ++++++++++-------- util/LLgen/src/Makefile | 22 +- util/LLgen/src/alloc.c | 49 ++- util/LLgen/src/check.c | 311 +++++++++--------- util/LLgen/src/compute.c | 673 +++++++++++++++++++++------------------ util/LLgen/src/extern.h | 28 +- util/LLgen/src/gencode.c | 581 ++++++++++++++++++--------------- util/LLgen/src/global.c | 21 +- util/LLgen/src/io.h | 1 - util/LLgen/src/main.c | 174 +++++----- util/LLgen/src/name.c | 272 ++++++++-------- util/LLgen/src/reach.c | 25 +- util/LLgen/src/sets.c | 155 ++++----- util/LLgen/src/sets.h | 10 +- util/LLgen/src/tokens.g | 246 +++++++------- util/LLgen/src/types.h | 79 +++-- 16 files changed, 1574 insertions(+), 1389 deletions(-) diff --git a/util/LLgen/src/LLgen.g b/util/LLgen/src/LLgen.g index d69687288..c842ee5b4 100644 --- a/util/LLgen/src/LLgen.g +++ b/util/LLgen/src/LLgen.g @@ -26,33 +26,64 @@ /* * LLgen.g * Defines the grammar of LLgen. - * Some routines that are included build the internal structure + * Some routines that build the internal structure are also included */ { # include "types.h" # include "io.h" -# include "tunable.h" # include "extern.h" # include "assert.h" +# include "cclass.h" # ifndef NORCSID -static string rcsidb = "$Header$"; +static string rcsid = "$Header$"; # endif -p_mem alloc(); +p_mem alloc(), new_mem(); string store(); -t_gram search(); +p_gram search(); static int nparams; /* parameter count for nonterminals */ -static t_gram elem; /* temporary space */ static int acount; /* count #of global actions */ +static t_info term_info, + link_info; +static p_order order, + maxorder; /* Here are defined : */ +extern a_init(); +STATIC p_order neworder(); STATIC copyact(); -STATIC unsigned get(); -STATIC t_gram mkalt(); -STATIC t_gram mkterm(); +STATIC mkalt(); +STATIC mkterm(); STATIC p_gram copyrule(); +/* and of course LLparse() */ + +a_init() { + term_info.i_esize = sizeof (t_term); + term_info.i_incr = 50; + link_info.i_esize = sizeof (t_link); + link_info.i_incr = 50; + name_init(); +} + +STATIC p_order +neworder(index) { + register p_order po; + + if ((po = order) == maxorder) { + po = (p_order) alloc(20 * sizeof(*order)); + maxorder = po + 20; + } + order = po + 1; + po->o_next = 0; + po->o_index = index; + if (porder) { + porder->o_next = po; + } + else sorder = po; + return po; +} } %start LLparse, spec; @@ -62,7 +93,7 @@ spec : { acount = 0; } { /* * Put an endmarker in temporary file */ - putc('\0',fact); putc('\0',fact); + fprintf(fact,"%c%c",'\0', '\0'); } ; @@ -85,12 +116,12 @@ def { register string p; } * Put the declaration in the list * of start symbols */ - t_gram temp; + register p_gram temp; register p_start ff; temp = search(NONTERM,lextoken.t_string,BOTH); ff = (p_start) alloc(sizeof(t_start)); - ff->ff_nont = &nonterms[g_getnont(&temp)]; + ff->ff_nont = g_getnont(temp); ff->ff_name = p; ff->ff_next = start; start = ff; @@ -98,7 +129,8 @@ def { register string p; } | C_LEXICAL C_IDENT { if (!lexical) { lexical = store(lextoken.t_string); - } else error(linecount,"Duplicate %%lexical"); + } + else error(linecount,"Duplicate %%lexical"); } ';' /* @@ -118,17 +150,18 @@ def { register string p; } listel : C_IDENT { search(TERMINAL,lextoken.t_string,ENTERING); } ; -rule { register p_nont p;} +rule { register p_nont p; + p_gram rr; + register p_gram temp; + } : /* * grammar for a production rule */ - C_IDENT { t_gram temp; - - temp = search(NONTERM,lextoken.t_string,BOTH); - p = &nonterms[g_getnont(&temp)]; + C_IDENT { temp = search(NONTERM,lextoken.t_string,BOTH); + p = &nonterms[g_getnont(temp)]; if (p->n_rule) { error(linecount, - "nonterminal already defined"); +"nonterminal %s already defined", lextoken.t_string); } /* * Remember the order in which the nonterminals @@ -136,20 +169,26 @@ rule { register p_nont p;} * order to keep track with the actions on the * temporary file */ - *maxorder++ = p - nonterms; + porder = neworder(p - nonterms); p->n_count = acount; acount = 0; p->n_lineno = linecount; } [ params(2) { p->n_flags |= PARAMS; - if (nparams > 7) { + if (nparams > 15) { error(linecount,"Too many parameters"); - } else setntparams(p,nparams); + } + else setntparams(p,nparams); } ]? [ action(0) { p->n_flags |= LOCALS; } ]? - ':' productions(&(p->n_rule)) ';' + ':' productions(&rr) ';' + /* + * Do not use p->n_rule now! The nonterms array + * might have been re-allocated. + */ + { nonterms[g_getnont(temp)].n_rule = rr;} ; action(int n;) @@ -173,73 +212,80 @@ productions(p_gram *p;) register p_gram p_alts = alts; int o_lc, n_lc; } : - { n_lc = o_lc = linecount; } + { o_lc = linecount; } simpleproduction(p,&conflres) - [ '|' { n_lc = linecount; } - [ C_DEFAULT { if (haddefault) { - error(linecount, - "multiple %%default"); - } - haddefault = 1; - t |= DEF; - } - ]? - simpleproduction(&prod,&t) + { if (conflres & DEF) haddefault = 1; } + [ + [ '|' { n_lc = linecount; } + simpleproduction(&prod,&t) { if (p_alts - alts >= 97) { error(n_lc,"Too many alternatives"); p_alts = alts; } - *p_alts++ = mkalt(*p,conflres,o_lc); + if (t & DEF) { + if (haddefault) { + error(n_lc, + "More than one %%default in alternation"); + } + haddefault = 1; + } + mkalt(*p,conflres,o_lc,p_alts++); o_lc = n_lc; conflres = t; t = 0; *p = prod; } - ]* { if (conflres & ~DEF) { + ]+ { if (conflres & ~DEF) { error(n_lc, - "Resolver on last alternative"); - } - if (p_alts > alts) { - *p_alts++ = mkalt(*p,conflres,n_lc); - g_settype(p_alts,EORULE); - *p = copyrule(alts,p_alts+1-alts); - if (!haddefault) { - ((p_link) pentry[g_getcont(*p)]) - ->l_flag |= DEF; - } + "Resolver on last alternative not allowed"); } + mkalt(*p,conflres,n_lc,p_alts++); + g_settype(p_alts,EORULE); + *p = copyrule(alts,p_alts+1-alts); } + | + { if (conflres & ~DEF) { + error(o_lc, + "No alternation conflict resolver allowed here"); + } + /* + if (conflres & DEF) { + error(o_lc, + "No %%default allowed here"); + } + */ + } + ] ; { -STATIC t_gram -mkalt(prod,condition,lc) p_gram prod; { +STATIC +mkalt(prod,condition,lc,res) p_gram prod; register p_gram res; { /* - * Create an alternation, initialise it and return - * a grammar element containing it + * Create an alternation and initialise it. */ - register unsigned hulp; + register int hulp; register p_link l; - t_gram r; - g_init(&r); - hulp = get(sizeof(*l)); - l = (p_link) pentry[hulp]; + l = (p_link) new_mem(&link_info); + links = (p_link) link_info.i_ptr; + hulp = l - links; l->l_rule = prod; l->l_flag = condition; - g_setcont(&r,hulp); - g_settype(&r,ALTERNATION); - r.g_lineno = lc; - return r; + g_setcont(res,hulp); + g_settype(res,ALTERNATION); + res->g_lineno = lc; } } simpleproduction(p_gram *p; register int *conflres;) { t_gram rule[100]; + t_gram elem; register p_gram p_rule = rule; - t_reps reps; + int cnt, kind; } : - { r_init(&reps); } + [ C_DEFAULT { *conflres = DEF; } + ]? [ /* * Optional conflict reslover @@ -248,92 +294,91 @@ simpleproduction(p_gram *p; register int *conflres;) | C_PREFER { *conflres |= PREFERING; } | C_AVOID { *conflres |= AVOIDING; } ]? - [ %persistent elem + [ %persistent elem(&elem) { if (p_rule - rule >= 98) { error(linecount,"Production too long"); p_rule = rule; } - r_setkind(&reps,FIXED); - r_setnum(&reps,0); + kind = FIXED; + cnt = 0; } - [ repeats(&reps) + [ repeats(&kind, &cnt) { if (g_gettype(&elem) != TERM) { *p_rule = elem; g_settype(p_rule+1,EORULE); - elem = mkterm(copyrule(p_rule,2), - 0, - &reps, - p_rule->g_lineno); + mkterm(copyrule(p_rule,2), + 0, + p_rule->g_lineno, + &elem); } } ]? { if (g_gettype(&elem) == TERM) { register p_term q; - q = (p_term) pentry[g_getcont(&elem)]; - q->t_reps = reps; - if (q->t_flags & RESOLVER && - (r_getkind(&reps) == PLUS || - r_getkind(&reps) == FIXED)) { + q = &terms[g_getcont(&elem)]; + r_setkind(q,kind); + r_setnum(q,cnt); + if ((q->t_flags & RESOLVER) && + (kind == PLUS || kind == FIXED)) { error(linecount, - "illegal %%while"); + "%%while not allowed in this term"); } - if (q->t_flags & PERSISTENT && - r_getkind(&reps) == FIXED) { + /* + * A persistent fixed term is the same + * as a non-persistent fixed term. + * Should we complain? + if ((q->t_flags & PERSISTENT) && + kind == FIXED) { error(linecount, "illegal %%persistent"); } + */ } *p_rule++ = elem; } ]* { register p_term q; g_settype(p_rule,EORULE); + *p = 0; if (g_gettype(&rule[0]) == TERM && p_rule-rule == 1) { - q=(p_term)pentry[g_getcont(&rule[0])]; - if (r_getkind(&(q->t_reps))==FIXED && - r_getnum(&(q->t_reps)) == 0) { + q = &terms[g_getcont(&rule[0])]; + if (r_getkind(q) == FIXED && + r_getnum(q) == 0) { *p = q->t_rule; } - else *p = copyrule(rule,2); } - else *p = copyrule(rule,p_rule-rule+1); + if (!*p) *p = copyrule(rule,p_rule-rule+1); } ; { -STATIC t_gram -mkterm(prod,flags,reps,lc) p_gram prod; register p_reps reps; { +STATIC +mkterm(prod,flags,lc, result) p_gram prod; register p_gram result; { /* * Create a term, initialise it and return - * a grammar element contianing it + * a grammar element containing it */ register p_term q; - register unsigned hulp; - t_gram r; + unsigned hulp; - g_init(&r); - hulp = get(sizeof(*q)); - q = (p_term) pentry[hulp]; + q = (p_term) new_mem(&term_info); + terms = (p_term) term_info.i_ptr; + hulp = q - terms; q->t_rule = prod; q->t_contains = 0; - /* "*1" = "?" */ - if (r_getnum(reps) == 1 && r_getkind(reps) == STAR) { - r_setkind(reps,OPT); - } - q->t_reps = *reps; q->t_flags = flags; - g_settype(&r,TERM); - g_setcont(&r,hulp); - r.g_lineno = lc; - return r; + g_settype(result,TERM); + g_setcont(result,hulp); + result->g_lineno = lc; } } -elem { register short t = 0; +elem (register p_gram pres;) + { register short t = 0; p_gram p1; - t_reps reps; int ln; + p_gram pe; } : '[' { ln = linecount; } [ C_WHILE expr { t |= RESOLVER; } @@ -341,23 +386,27 @@ elem { register short t = 0; [ C_PERSISTENT { t |= PERSISTENT; } ]? productions(&p1) - ']' { r_init(&reps); - elem = mkterm(p1,t,&reps,ln); + ']' { + mkterm(p1,t,ln,pres); } - | %default - C_IDENT { elem = search(UNKNOWN,lextoken.t_string,BOTH); } - [ params(0) { if (nparams > 6) { + | + C_IDENT { pe = search(UNKNOWN,lextoken.t_string,BOTH); + *pres = *pe; + } + [ params(0) { if (nparams > 14) { error(linecount,"Too many parameters"); - } else g_setnpar(&elem,nparams+1); - if (g_gettype(&elem) == TERMINAL) { + } else g_setnpar(pres,nparams+1); + if (g_gettype(pres) == TERMINAL) { error(linecount, "Terminal with parameters"); } } ]? - | C_LITERAL { elem = search(LITERAL,lextoken.t_string,BOTH); } - | { g_settype(&elem,ACTION); - elem.g_lineno = linecount; + | C_LITERAL { pe = search(LITERAL,lextoken.t_string,BOTH); + *pres = *pe; + } + | { g_settype(pres,ACTION); + pres->g_lineno = linecount; } action(1) ; @@ -371,20 +420,21 @@ expr : '(' { copyact('(',')',1,0); } ')' ; -repeats(t_reps *t;) { int t1 = 0; } : +repeats(int *kind, *cnt;) { int t1 = 0; } : [ - '?' { r_setkind(t,OPT); } - | [ '*' { r_setkind(t,STAR); } - | '+' { r_setkind(t,PLUS); } + '?' { *kind = OPT; } + | [ '*' { *kind = STAR; } + | '+' { *kind = PLUS; } ] number(&t1)? - { if (t1 == 1 && r_getkind(t) == PLUS) { - error(linecount, - "Illegal repetition specifier"); + { if (t1 == 1) { + t1 = 0; + if (*kind == STAR) *kind = OPT; + if (*kind == PLUS) *kind = FIXED; } } | number(&t1) - ] { r_setnum(t,t1); } + ] { *cnt = t1; } ; number(int *t;) @@ -404,12 +454,12 @@ firsts { register string p; } * Store this %first in the list belonging * to this input file */ - t_gram temp; + p_gram temp; register p_first ff; temp = search(NONTERM,lextoken.t_string,BOTH); ff = (p_first) alloc(sizeof(t_first)); - ff->ff_nont = &nonterms[g_getnont(&temp)]; + ff->ff_nont = g_getnont(temp); ff->ff_name = p; ff->ff_next = pfile->f_firsts; pfile->f_firsts = ff; @@ -436,11 +486,9 @@ copyact(ch1,ch2,flag,level) char ch1,ch2; { if (!level) { saved = linecount; nparams = 0; /* count comma's */ - putc('\0',f); - fprintf(f,"# line %d \"%s\"\n", linecount,f_input); - if (flag == 1) putc(ch1,f); + fprintf(f,"%c# line %d \"%s\"\n", '\0', linecount,f_input); } - else putc(ch1,f); + if (level || flag == 1) putc(ch1,f); for (;;) { ch = input(); if (ch == ch2) { @@ -451,7 +499,7 @@ copyact(ch1,ch2,flag,level) char ch1,ch2; { } return; } - if (! isspace(ch)) semicolon = 0; + if (c_class[ch] != ISSPA) semicolon = 0; switch(ch) { case ')': case '}': @@ -474,6 +522,7 @@ copyact(ch1,ch2,flag,level) char ch1,ch2; { skipcomment(1); continue; } + ch = '/'; break; case ';': semicolon = 1; @@ -516,21 +565,8 @@ copyact(ch1,ch2,flag,level) char ch1,ch2; { } } -static int ecount; /* Index in "pentry" array */ -STATIC unsigned -get(size) unsigned size; { - /* - * Get some core, save a pointer to it in "pentry", - * return index in pentry - */ - - if (ecount >= ENTSIZ) fatal(linecount,"Entry table overflow"); - pentry[ecount] = alloc(size); - return ecount++; -} - STATIC p_gram -copyrule(p,length) register p_gram p; register length; { +copyrule(p,length) register p_gram p; { /* * Returns a pointer to a grammar rule that was created in * p. The space pointed to by p can now be reused diff --git a/util/LLgen/src/Makefile b/util/LLgen/src/Makefile index 5e5f7ad91..1c9708a2d 100644 --- a/util/LLgen/src/Makefile +++ b/util/LLgen/src/Makefile @@ -1,11 +1,11 @@ # $Header$ PROF= LLOPT= # -vvv -x -CFLAGS=$(PROF) -O -DNDEBUG # -R +CFLAGS=$(PROF) -O -DNDEBUG LDFLAGS=-i -OBJECTS = main.o gencode.o compute.o LLgen.o tokens.o check.o reach.o global.o name.o sets.o Lpars.o alloc.o machdep.o -CFILES = main.c gencode.c compute.c LLgen.c tokens.c check.c reach.c global.c name.c sets.c Lpars.c alloc.c machdep.c -FILES =types.h tunable.h extern.h io.h sets.h assert.h tokens.g LLgen.g main.c name.c compute.c sets.c gencode.c global.c check.c reach.c alloc.c machdep.c Makefile +OBJECTS = main.o gencode.o compute.o LLgen.o tokens.o check.o reach.o global.o name.o sets.o Lpars.o alloc.o machdep.o cclass.o +CFILES = main.c gencode.c compute.c LLgen.c tokens.c check.c reach.c global.c name.c sets.c Lpars.c alloc.c machdep.c cclass.c +FILES =types.h tunable.h extern.h io.h sets.h assert.h tokens.g LLgen.g main.c name.c compute.c sets.c gencode.c global.c check.c reach.c alloc.c machdep.c Makefile cclass.c GFILES = tokens.g LLgen.g LINT = lint -b -DNDEBUG -DNORCSID @@ -19,7 +19,7 @@ parser: $(GFILES) @touch parser LLgen: $(OBJECTS) - $(CC) $(PROF) $(LDFLAGS) $(OBJECTS) -o LLgen + $(CC) $(PROF) $(LDFLAGS) $(OBJECTS) -o LLgen @size LLgen pr : @@ -39,9 +39,9 @@ distr: # AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO LLgen.o: Lpars.h LLgen.o: assert.h +LLgen.o: cclass.h LLgen.o: extern.h LLgen.o: io.h -LLgen.o: tunable.h LLgen.o: types.h Lpars.o: Lpars.h alloc.o: extern.h @@ -50,22 +50,20 @@ check.o: assert.h check.o: extern.h check.o: io.h check.o: sets.h -check.o: tunable.h check.o: types.h compute.o: assert.h compute.o: extern.h compute.o: io.h compute.o: sets.h -compute.o: tunable.h compute.o: types.h gencode.o: assert.h +gencode.o: cclass.h gencode.o: extern.h gencode.o: io.h gencode.o: sets.h -gencode.o: tunable.h gencode.o: types.h +global.o: extern.h global.o: io.h -global.o: tunable.h global.o: types.h machdep.o: ../../../h/em_path.h machdep.o: types.h @@ -77,12 +75,10 @@ main.o: types.h name.o: assert.h name.o: extern.h name.o: io.h -name.o: tunable.h name.o: types.h reach.o: assert.h reach.o: extern.h reach.o: io.h -reach.o: tunable.h reach.o: types.h sets.o: assert.h sets.o: extern.h @@ -90,7 +86,7 @@ sets.o: sets.h sets.o: types.h tokens.o: Lpars.h tokens.o: assert.h +tokens.o: cclass.h tokens.o: extern.h tokens.o: io.h -tokens.o: tunable.h tokens.o: types.h diff --git a/util/LLgen/src/alloc.c b/util/LLgen/src/alloc.c index da86bbb92..2e8f2967b 100644 --- a/util/LLgen/src/alloc.c +++ b/util/LLgen/src/alloc.c @@ -32,15 +32,18 @@ # include "extern.h" # ifndef NORCSID -static string rcsida = "$Header$"; +static string rcsid = "$Header$"; # endif static string e_nomem = "Out of memory"; p_mem alloc(size) unsigned size; { - register p_mem p; - p_mem malloc(); + /* + Allocate "size" bytes. Panic if it fails + */ + p_mem p; + p_mem malloc(); if ((p = malloc(size)) == 0) fatal(linecount,e_nomem); return p; @@ -48,9 +51,41 @@ alloc(size) unsigned size; { p_mem ralloc(p,size) p_mem p; unsigned size; { - register p_mem q; - p_mem realloc(); + /* + Re-allocate the chunk of memory indicated by "p", to + occupy "size" bytes + */ + p_mem realloc(); - if ((q = realloc(p,size)) == 0) fatal(linecount,e_nomem); - return q; + if ((p = realloc(p,size)) == 0) fatal(linecount,e_nomem); + return p; +} + +p_mem +new_mem(p) p_info p; { + /* + This routine implements arrays that can grow. + It must be called every time a new element is added to it. + Also, the array has associated with it a "info_alloc" structure, + which contains info on the element size, total allocated size, + a pointer to the array, a pointer to the first free element, + and a pointer to the top. + If the base of the array is remembered elsewhere, it should + be updated each time this routine is called + */ + p_mem rp; + unsigned sz; + + if (p->i_max >= p->i_top) { /* No more free elements */ + sz = p->i_size; + p->i_size += p->i_incr * p->i_esize; + p->i_ptr = !p->i_ptr ? + alloc(p->i_size) : + ralloc(p->i_ptr, p->i_size); + p->i_max = p->i_ptr + sz; + p->i_top = p->i_ptr + p->i_size; + } + rp = p->i_max; + p->i_max += p->i_esize; + return rp; } diff --git a/util/LLgen/src/check.c b/util/LLgen/src/check.c index 62815095e..5c937c4e0 100644 --- a/util/LLgen/src/check.c +++ b/util/LLgen/src/check.c @@ -30,7 +30,6 @@ # include "types.h" # include "extern.h" -# include "tunable.h" # include "io.h" # include "sets.h" # include "assert.h" @@ -39,21 +38,18 @@ static string rcsid1 = "$Header$"; # endif -# define PTERM(p) fprintf(fout,(p)->h_num < 0400 ? "'%s' " : "%s ",(p)->h_name); static string c_first = "> firstset "; static string c_contains = "> containset "; static string c_follow = "> followset "; p_set setalloc(); -static string ntname; /* nonterminal that is currently checked */ -static p_nont nt; /* pointer to its struct */ static int level; /* In this file are defined : */ extern conflchecks(); -STATIC newline(); +STATIC prline(); STATIC printset(); -STATIC check(); +STATIC int check(); STATIC moreverbose(); STATIC prrule(); STATIC cfcheck(); @@ -70,8 +66,7 @@ conflchecks() { * must be disjunct. */ register p_nont p; - register FILE *f; - register short *s; + register p_order s; p_file x = files; f_input = x->f_name; @@ -79,38 +74,43 @@ conflchecks() { for (p = nonterms; p < maxnt; p++) p->n_flags |= VERBOSE; } if (verbose) { - if ((f = fopen(f_out,"w")) == NULL) fatal(1,e_noopen,f_out); - fout = f; + if ((fout = fopen(f_out,"w")) == NULL) fatal(1,e_noopen,f_out); } /* * Check the rules in the order in which they are declared, * and input file by input file, to give proper error messages */ - for (; x->f_end < maxorder; x++) { + for (; x < maxfiles; x++) { + f_input = x->f_name; + for (s = x->f_list; s; s = s->o_next) { + p = &nonterms[s->o_index]; + if (check(p->n_rule)) p->n_flags |= VERBOSE; + } + } + for (x = files; x < maxfiles; x++) { f_input = x->f_name; - for (s = x->f_start; s <= x->f_end; s++) { - nt = p = &nonterms[*s]; - ntname = (min_nt_ent + *s)->h_name; + for (s = x->f_list; s; s = s->o_next) { + p = &nonterms[s->o_index]; if (p->n_flags & RECURSIVE) { error(p->n_lineno, "Recursion in default for nonterminal %s", - ntname); + p->n_name); } - check(p->n_rule); /* * If a printout is needed for this rule in * LL.output, just do it */ if (verbose && (p->n_flags & VERBOSE)) { - fprintf(f,"\n%s :\n",ntname); + fprintf(fout,"\n%s :\n",p->n_name); printset(p->n_first,c_first); printset(p->n_contains,c_contains); printset(p->n_follow,c_follow); - fputs("> rule\n\t",f); + fprintf(fout,"> rule%s\n\t", + p->n_flags&EMPTY ? "\t(EMPTY producing)" : ""); level = 8; prrule(p->n_rule); level = 0; - newline(); + prline("\n"); } /* * Now, the conflicts may be resolved @@ -118,16 +118,13 @@ conflchecks() { resolve(p->n_rule); } } - if (verbose) fclose(f); + if (verbose) fclose(fout); } STATIC -newline() { - /* - * Newline and "level" spaces indentation - */ - if (level > 0) fprintf(fout,"\n%*c",level,' '); - else putc('\n',fout); +prline(s) char *s; { + fputs(s, fout); + spaces(); } STATIC @@ -135,69 +132,68 @@ printset(p,s) register p_set p; string s; { /* * Print the elements of a set */ - register FILE *f; - register i; - register j; + register int i; + register int j; + register p_token pt; + string name; int k; int hulp; - k = strlen(s)+1; + k = strlen(s) + 2 + level; /* * k contains relative level of indentation */ - f = fout; - fprintf(f,"%s{ ",s); - j = level+1+k; + fprintf(fout,"%s{ ",s); + j = k; /* * j will gather the total length of the line */ - if (p == (p_set) 0) fputs(">non existent (yet?)< ",f); - else { - for (i = 0; i < nterminals; i++) { - if (IN(p,i)) { - hulp = strlen(h_entry[i].h_name)+1; - if (h_entry[i].h_num < 0400) hulp += 2; - if ((j += hulp) >= 78) { - /* - * Line becoming too long - */ - j = level+k+1+hulp; - newline(); - fprintf(f,">%*c",k,' '); - } - PTERM(&h_entry[i]); + for (i = 0, pt = tokens; i < ntokens; i++,pt++) { + if (IN(p,i)) { + hulp = strlen(pt->t_string)+1; + if (pt->t_tokno < 0400) hulp += 2; + if ((j += hulp) >= 78) { + /* + * Line becoming too long + */ + j = k+hulp; + prline("\n"); + fprintf(fout,">%*c",k - level - 1,' '); } + fprintf(fout, pt->t_tokno<0400 ? "'%s' " : "%s ",pt->t_string); } - if (ntprint) for (i = 0; i < nnonterms; i++) { - /* - * Nonterminals in the set must also be printed - */ - if (NTIN(p,i)) { - hulp = strlen((min_nt_ent+i)->h_name)+3; - if ((j += hulp) >= 78) { - j = level + k + 1 + hulp; - newline(); - fprintf(f,">%*c",k,' '); - } - fprintf(f,"<%s> ",(min_nt_ent+i)->h_name); + } + if (ntprint) for (i = 0; i < nnonterms; i++) { + /* + * Nonterminals in the set must also be printed + */ + if (NTIN(p,i)) { + name = nonterms[i].n_name; + hulp = strlen(name) + 3; + if ((j += hulp) >= 78) { + j = k + hulp; + prline("\n"); + fprintf(fout,">%*c",k - level - 1,' '); } + fprintf(fout,"<%s> ",name); } } - putc('}',f); - newline(); + prline("}\n"); } -STATIC +STATIC int check(p) register p_gram p; { /* * Search for conflicts in a grammar rule. */ - p_set temp; + register p_set temp; + register int retval; + retval = 0; for (;;) { switch (g_gettype(p)) { case EORULE : - return; + return retval; case NONTERM : { register p_nont n; @@ -205,28 +201,28 @@ check(p) register p_gram p; { if (g_getnpar(p) != getntparams(n)) { error(p->g_lineno, "Call of %s : parameter count mismatch", - (min_nt_ent+g_getnont(p))->h_name); + n->n_name); } break; } case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; - check(q->t_rule); - if (r_getkind(&(q->t_reps)) == FIXED) break; + q = &terms[g_getcont(p)]; + retval |= check(q->t_rule); + if (r_getkind(q) == FIXED) break; if (setempty(q->t_first)) { q->t_flags |= EMPTYFIRST; - nt->n_flags |= VERBOSE; + retval = 1; error(p->g_lineno, "No symbols in term"); } if (empty(q->t_rule)) { q->t_flags |= EMPTYTERM; - nt->n_flags |= VERBOSE; + retval = 1; error(p->g_lineno, "Term produces empty"); } - temp = setalloc(setsize); - setunion(temp,q->t_first,setsize); - if (!setintersect(temp,q->t_follow,setsize)) { + temp = setalloc(); + setunion(temp,q->t_first); + if (!setintersect(temp,q->t_follow)) { /* * q->t_first * q->t_follow != EMPTY */ @@ -235,16 +231,17 @@ check(p) register p_gram p; { * No conflict resolver */ error(p->g_lineno, - "Repitition conflict"); - nt->n_flags |= VERBOSE; - if (verbose == 2) moreverbose(temp); + "Repetition conflict"); + retval = 1; + moreverbose(temp); } - } else { + } + else { if (q->t_flags & RESOLVER) { q->t_flags |= NOCONF; error(p->g_lineno, "%%while, no conflict"); - nt->n_flags |= VERBOSE; + retval = 1; } } free((p_mem) temp); @@ -252,29 +249,30 @@ check(p) register p_gram p; { case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; - temp = setalloc(setsize); - setunion(temp,l->l_symbs,setsize); - if(!setintersect(temp,l->l_others,setsize)) { + l = &links[g_getcont(p)]; + temp = setalloc(); + setunion(temp,l->l_symbs); + if(!setintersect(temp,l->l_others)) { /* * temp now contains the conflicting * symbols */ if (!(l->l_flag & (COND|PREFERING|AVOIDING))) { error(p->g_lineno, - "Alternation conflict"); - nt->n_flags |= VERBOSE; - if (verbose == 2) moreverbose(temp); +"Alternation conflict"); + retval = 1; + moreverbose(temp); } } else { if (l->l_flag & (COND|PREFERING|AVOIDING)) { l->l_flag |= NOCONF; - error(p->g_lineno,"No conflict"); - nt->n_flags |= VERBOSE; + error(p->g_lineno, +"Conflict resolver without conflict"); + retval = 1; } } free( (p_mem) temp); - check(l->l_rule); + retval |= check(l->l_rule); break; } } p++; @@ -288,10 +286,11 @@ moreverbose(t) register p_set t; { * also containing nonterminals. * Take care that a printout will be prepared for these nonterminals */ - register i; + register int i; + register p_nont p; - if (verbose) for (i = 0; i < nnonterms; i++) { - if (NTIN(t,i)) nonterms[i].n_flags |= VERBOSE; + if (verbose == 2) for (i = 0, p = nonterms; i < nnonterms; i++, p++) { + if (NTIN(t,i)) p->n_flags |= VERBOSE; } } @@ -307,54 +306,45 @@ prrule(p) register p_gram p; { for (;;) { switch (g_gettype(p)) { case EORULE : - putc('\n',f); + fputs("\n",f); return; case TERM : { register p_term q; - register c; + register int c; - q = (p_term) pentry[g_getcont(p)]; - if (present) newline(); - if (r_getkind(&(q->t_reps)) != FIXED || - r_getnum(&(q->t_reps)) != 0) { - fputs("[ ",f); - level += 4; - if (q->t_flags & RESOLVER) { - fputs("%%while (..)",f); - newline(); - } - if (r_getkind(&(q->t_reps)) != FIXED) { - printset(q->t_first, c_first); - printset(q->t_contains, c_contains); - printset(q->t_follow,c_follow); - if (q->t_flags & EMPTYFIRST) { - fputs(">>> empty first",f); - newline(); - } - if (q->t_flags & EMPTYTERM) { - fputs(">>> term produces empty",f); - newline(); - } - cfcheck(q->t_first,q->t_follow, - q->t_flags & RESOLVER); - } - prrule(q->t_rule); - level -= 4; - spaces(); - c = r_getkind(&(q->t_reps)); - putc(']',f); - if (c != FIXED) { - c = (c==STAR)?'*':(c==PLUS)?'+':'?'; - putc(c,f); + q = &terms[g_getcont(p)]; + if (present) prline("\n"); + fputs("[ ",f); + level += 4; + if (q->t_flags & RESOLVER) { + prline("%while (..)\n"); + } + if (q->t_flags & PERSISTENT) { + prline("%persistent\n"); + } + if (r_getkind(q) != FIXED) { + printset(q->t_first, c_first); + printset(q->t_contains, c_contains); + printset(q->t_follow,c_follow); + if (q->t_flags & EMPTYFIRST) { + prline(">>> empty first\n"); } - if (c = r_getnum(&(q->t_reps))) { - fprintf(f,"%d",c); + if (q->t_flags & EMPTYTERM) { + prline(">>> term produces empty\n"); } - newline(); - } else { - prrule(q->t_rule); - spaces(); + cfcheck(q->t_first,q->t_follow, + q->t_flags & RESOLVER); } + prrule(q->t_rule); + level -= 4; + spaces(); + c = r_getkind(q); + fputs(c == STAR ? "]*" : c == PLUS ? "]+" : + c == OPT ? "]?" : "]", f); + if (c = r_getnum(q)) { + fprintf(f,"%d",c); + } + prline("\n"); break; } case ACTION : fputs("{..} ",f); @@ -362,28 +352,44 @@ prrule(p) register p_gram p; { case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; - if (l->l_flag & (COND|PREFERING|AVOIDING)) { - printset(l->l_symbs,"> alt with resolver on "); - } else printset(l->l_symbs,"> alternative on "); + l = &links[g_getcont(p)]; + if (g_gettype(p-1) == ALTERNATION) { + prline("|\n"); + } + printset(l->l_symbs,"> alternative on "); cfcheck(l->l_symbs, l->l_others, (int)(l->l_flag&(COND|PREFERING|AVOIDING))); fputs(" ",f); level += 4; + if (l->l_flag & DEF) { + prline("%default\n"); + } + if (l->l_flag & AVOIDING) { + prline("%avoid\n"); + } + if (l->l_flag & PREFERING) { + prline("%prefer\n"); + } + if (l->l_flag & COND) { + prline("%if ( ... )\n"); + } prrule(l->l_rule); level -= 4; - spaces(); if (g_gettype(p+1) == EORULE) { - fputs("> end alternatives\n",f); return; } + spaces(); p++; continue; } - case TERMINAL : - PTERM(&h_entry[g_getcont(p)]); - break; + case LITERAL : + case TERMINAL : { + register p_token pt = &tokens[g_getcont(p)]; + + fprintf(f,pt->t_tokno<0400 ? + "'%s' " : "%s ", pt->t_string); + break; } case NONTERM : - fprintf(f,"%s ",(g_getnont(p)+min_nt_ent)->h_name); + fprintf(f,"%s ",nonterms[g_getnont(p)].n_name); break; } p++; @@ -401,17 +407,16 @@ cfcheck(s1,s2,flag) p_set s1,s2; { */ register p_set temp; - temp = setalloc(setsize); - setunion(temp,s1,setsize); - if (!setintersect(temp,s2,setsize)) { + temp = setalloc(); + setunion(temp,s1); + if (!setintersect(temp,s2)) { if (! flag) { printset(temp,">>> conflict on "); - newline(); + prline("\n"); } } else { if (flag) { - fputs(">>> %if/%while, no conflict",fout); - newline(); + prline(">>> %if/%while, no conflict\n"); } } free((p_mem) temp); @@ -427,18 +432,18 @@ resolve(p) register p_gram p; { case EORULE : return; case TERM : - resolve(((p_term) pentry[g_getcont(p)])->t_rule); + resolve(terms[g_getcont(p)].t_rule); break; case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; + l = &links[g_getcont(p)]; if (l->l_flag & AVOIDING) { /* * On conflicting symbols, this rule * is never chosen */ - setminus(l->l_symbs,l->l_others,setsize); + setminus(l->l_symbs,l->l_others); } if (setempty(l->l_symbs)) { /* @@ -461,7 +466,7 @@ propagate(set,p) p_set set; register p_gram p; { * p will not be chosen. */ while (g_gettype(p) != EORULE) { - setminus(((p_link) pentry[g_getcont(p)])->l_symbs,set,setsize); + setminus(links[g_getcont(p)].l_symbs,set); p++; } } diff --git a/util/LLgen/src/compute.c b/util/LLgen/src/compute.c index bfdef713d..00c741926 100644 --- a/util/LLgen/src/compute.c +++ b/util/LLgen/src/compute.c @@ -30,7 +30,6 @@ */ # include "types.h" -# include "tunable.h" # include "extern.h" # include "sets.h" # include "assert.h" @@ -39,31 +38,113 @@ # endif # ifndef NORCSID -static string rcsid2 = "$Header$"; +static string rcsid = "$Header$"; # endif -p_set setalloc(); +p_set get_set(); +typedef struct lngth { + /* Structure used to compute the shortest possible + * length of a terminal production of a rule. + * In case of a tie, the second field is used. + */ + int cnt; + int val; +} t_length, *p_length; /* Defined in this file : */ -extern createsets(); +extern do_compute(); +STATIC createsets(); STATIC walk(); -extern co_empty(); +STATIC co_trans(); +STATIC int nempty(); extern empty(); -extern co_first(); +STATIC int nfirst(); STATIC first(); -extern co_follow(); +STATIC int nfollow(); STATIC follow(); -extern co_symb(); STATIC co_dirsymb(); STATIC co_others(); -STATIC do_checkdefault(); -STATIC checkdefault(); -extern co_contains(); +STATIC int ncomplength(); +STATIC do_lengthcomp(); +STATIC complength(); +STATIC add(); +STATIC int compare(); +STATIC setdefaults(); STATIC do_contains(); STATIC contains(); -extern co_safes(); +STATIC int nsafes(); STATIC int do_safes(); +do_compute() { + /* + * Does all the work, by calling other routines (divide and conquer) + */ + register p_nont p; + register p_start st; + + createsets(); + co_trans(nempty); /* Which nonterminals produce empty? */ + co_trans(nfirst); /* Computes first sets */ + /* + * Compute FOLLOW sets. + * First put EOFILE in the follow set of the start nonterminals. + */ + for (st = start; st; st = st->ff_next) { + p = &nonterms[st->ff_nont]; + PUTIN(p->n_follow,0); + } + co_trans(nfollow); + /* + * Compute the sets which determine which alternative to choose + * in case of a choice + */ + for (p = nonterms; p < maxnt; p++) { + co_dirsymb(p->n_follow,p->n_rule); + } + /* + * Compute the minimum length of productions of nonterminals, + * and then determine the default choices + */ + do_lengthcomp(); + /* + * Compute the contains sets + */ + for (p = nonterms; p < maxnt; p++) do_contains(p); + for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0); + /* + * Compute the safety of each nonterminal and term. + * The safety gives an answer to the question whether a scan is done, + * and how it should be handled. + */ + for (p = nonterms; p < maxnt; p++) { + /* + * Don't know anything yet + */ + setntsafe(p, NOSAFETY); + setntout(p, NOSAFETY); + } + for (st = start; st; st = st->ff_next) { + /* + * But start symbols are called with lookahead done + */ + p = &nonterms[st->ff_nont]; + setntsafe(p,SCANDONE); + } + co_trans(nsafes); +# ifndef NDEBUG + if (debug) { + fputs("Safeties:\n", stderr); + for (p = nonterms; p < maxnt; p++) { + fprintf(stderr, "%s\t%d\t%d\n", + p->n_name, + getntsafe(p), + getntout(p)); + } + } +# endif +} + +STATIC createsets() { /* * Allocate space for the sets @@ -71,8 +152,8 @@ createsets() { register p_nont p; for (p = nonterms; p < maxnt; p++) { - p->n_first = setalloc(setsize); - p->n_follow = setalloc(setsize); + p->n_first = get_set(); + p->n_follow = get_set(); walk(p->n_rule); } } @@ -88,16 +169,17 @@ walk(p) register p_gram p; { case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; - q->t_first = setalloc(setsize); - q->t_follow = setalloc(setsize); + q = &terms[g_getcont(p)]; + q->t_first = get_set(); + q->t_follow = get_set(); walk(q->t_rule); break; } case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; - l->l_symbs = setalloc(setsize); + l = &links[g_getcont(p)]; + l->l_symbs = get_set(); + l->l_others = get_set(); walk(l->l_rule); break; } case EORULE : @@ -107,23 +189,26 @@ walk(p) register p_gram p; { } } -co_empty() { - /* - * Which nonterminals produce the empty string ? - */ - register int change; - register p_nont p; +STATIC +co_trans(fc) int (*fc)(); { + register p_nont p; + register int change; - change = 1; - while (change) { + do { change = 0; - for (p=nonterms; p < maxnt; p++) { - if ((!(p->n_flags & EMPTY)) && empty(p->n_rule)) { - p->n_flags |= EMPTY; - change = 1; - } + for (p = nonterms; p < maxnt; p++) { + if ((*fc)(p)) change = 1; } + } while (change); +} + +STATIC int +nempty(p) register p_nont p; { + if (!(p->n_flags & EMPTY) && empty(p->n_rule)) { + p->n_flags |= EMPTY; + return 1; } + return 0; } empty(p) register p_gram p; { @@ -138,13 +223,13 @@ empty(p) register p_gram p; { case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; - if (r_getkind(&(q->t_reps)) == STAR - || r_getkind(&(q->t_reps)) == OPT + q = &terms[g_getcont(p)]; + if (r_getkind(q) == STAR + || r_getkind(q) == OPT || empty(q->t_rule) ) break; return 0; } case ALTERNATION : - if (empty(((p_link)pentry[g_getcont(p)])->l_rule)) { + if (empty(links[g_getcont(p)].l_rule)) { return 1; } if (g_gettype(p+1) == EORULE) return 0; @@ -154,6 +239,7 @@ empty(p) register p_gram p; { break; } /* Fall through */ + case LITERAL : case TERMINAL : return 0; } @@ -161,21 +247,9 @@ empty(p) register p_gram p; { } } -co_first() { - /* - * Compute the FIRST set for each nonterminal - */ - - register p_nont p; - register int change; - - change = 1; - while (change) { - change = 0; - for (p = nonterms; p < maxnt; p++) { - if (first(p->n_first,p->n_rule,0)) change = 1; - } - } +STATIC int +nfirst(p) register p_nont p; { + return first(p->n_first, p->n_rule, 0); } STATIC @@ -185,7 +259,7 @@ first(setp,p,flag) p_set setp; register p_gram p; { * If flag = 0, also the first sets for terms and alternations in * the rule p are computed. * The FIRST set is put in setp. - * return 1 if any of the sets changed + * return 1 if the set refered to by "setp" changed */ register s; /* Will gather return value */ int noenter;/* when set, unables entering of elements into @@ -202,21 +276,25 @@ first(setp,p,flag) p_set setp; register p_gram p; { case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; - if (flag == 0) s |= first(q->t_first,q->t_rule,0); - if (!noenter) s |= setunion(setp,q->t_first,setsize); + q = &terms[g_getcont(p)]; + if (flag == 0) { + if (first(q->t_first,q->t_rule,0))/*nothing*/; + } + if (!noenter) s |= setunion(setp,q->t_first); p++; - if (r_getkind(&(q->t_reps)) == STAR - || r_getkind(&(q->t_reps)) == OPT - || empty(q->t_rule) ) continue; + if (r_getkind(q) == STAR || + r_getkind(q) == OPT || + empty(q->t_rule)) continue; break; } case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; - if (flag == 0) s |= first(l->l_symbs,l->l_rule,0); + l = &links[g_getcont(p)]; + if (flag == 0) { + if (first(l->l_symbs,l->l_rule,0))/*nothing*/; + } if (noenter == 0) { - s |= setunion(setp,l->l_symbs,setsize); + s |= setunion(setp,l->l_symbs); } if (g_gettype(p+1) == EORULE) return s; } @@ -224,6 +302,7 @@ first(setp,p,flag) p_set setp; register p_gram p; { case ACTION : p++; continue; + case LITERAL : case TERMINAL : if ((noenter == 0) && !IN(setp,g_getcont(p))) { s = 1; @@ -236,11 +315,8 @@ first(setp,p,flag) p_set setp; register p_gram p; { n = &nonterms[g_getnont(p)]; if (noenter == 0) { - s |= setunion(setp,n->n_first,setsize); - if (ntneeded && ! NTIN(setp,n-nonterms)) { - s = 1; - NTPUTIN(setp,n-nonterms); - } + s |= setunion(setp,n->n_first); + if (ntneeded) NTPUTIN(setp,g_getnont(p)); } p++; if (n->n_flags & EMPTY) continue; @@ -254,39 +330,17 @@ first(setp,p,flag) p_set setp; register p_gram p; { } } -co_follow() { - /* - * Compute the follow set for each nonterminal - */ - - register p_nont p; - register change; - register i; - p_start st; - - /* - * First put EOFILE in the follow set of the start symbols - */ - for (st = start; st; st = st->ff_next) PUTIN(st->ff_nont->n_follow,0); - change = 1; - i = 1; - while (change) { - change = 0; - for (p = nonterms; p < maxnt; p++) { - if (follow(p->n_follow,p->n_rule,i)) change = 1; - } - i = 0; - } +STATIC int +nfollow(p) register p_nont p; { + return follow(p->n_follow, p->n_rule); } STATIC -follow(setp,p,flag) p_set setp; register p_gram p; { +follow(setp,p) p_set setp; register p_gram p; { /* * setp is the follow set for the rule p. * Compute the follow sets in the rule p from this set. - * Return 1 if any set changed - * flag controls the use of "first" in the computation. - * It should be 1 the first time a rule is done, 0 otherwise. + * Return 1 if a follow set of a nonterminal changed. */ register s; /* Will gather return value */ @@ -298,55 +352,54 @@ follow(setp,p,flag) p_set setp; register p_gram p; { case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; + q = &terms[g_getcont(p)]; if (empty(p+1)) { /* * If what follows the term can be empty, * everything that can follow the whole * rule can also follow the term */ - s |= setunion(q->t_follow,setp,setsize); + s |= setunion(q->t_follow,setp); } /* * Everything that can start the rest of the rule * can follow the term */ - if (flag) s |= first(q->t_follow,p+1,1); - if (r_getkind(&(q->t_reps)) == STAR - || r_getkind(&(q->t_reps)) == PLUS - || r_getnum(&(q->t_reps)) ) { + s |= first(q->t_follow,p+1,1); + if (r_getkind(q) == STAR || + r_getkind(q) == PLUS || + r_getnum(q) ) { /* * If the term involves a repetition * of possibly more than one, * everything that can start the term * can also follow it. */ - s |= follow(q->t_first,q->t_rule,flag); + s |= follow(q->t_first,q->t_rule); } /* * Now propagate the set computed sofar */ - s |= follow(q->t_follow, q->t_rule,flag); + s |= follow(q->t_follow, q->t_rule); break; } case ALTERNATION : /* * Just propagate setp */ - s |= follow(setp,((p_link)pentry[g_getcont(p)])->l_rule, - flag); + s |= follow(setp,links[g_getcont(p)].l_rule); break; case NONTERM : { register p_nont n; n = &nonterms[g_getnont(p)]; - if (flag) s |= first(n->n_follow,p+1,1); + s |= first(n->n_follow,p+1,1); if (empty(p+1)) { /* * If the rest of p can produce empty, * everything that follows p can follow * the nonterminal */ - s |= setunion(n->n_follow,setp,setsize); + s |= setunion(n->n_follow,setp); } break; } } @@ -354,23 +407,6 @@ follow(setp,p,flag) p_set setp; register p_gram p; { } } -co_symb() { - /* - * Compute the sets which determine which alternative to choose - * in case of a choice - * Also check the continuation grammar and see if rules do scan - * ahead. - */ - register p_nont p; - - for (p = nonterms; p < maxnt; p++) { - co_dirsymb(p->n_follow,p->n_rule); - } - for (p = nonterms; p < maxnt; p++) { - do_checkdefault(p); - } -} - STATIC co_dirsymb(setp,p) p_set setp; register p_gram p; { /* @@ -385,7 +421,7 @@ co_dirsymb(setp,p) p_set setp; register p_gram p; { case TERM : { register p_term q; - q = (p_term) pentry[g_getcont(p)]; + q = &terms[g_getcont(p)]; co_dirsymb(q->t_follow,q->t_rule); break; } case ALTERNATION : { @@ -394,15 +430,14 @@ co_dirsymb(setp,p) p_set setp; register p_gram p; { * Save first alternative */ if (!s) s = p; - l = (p_link) pentry[g_getcont(p)]; - l->l_others = setalloc(setsize); + l = &links[g_getcont(p)]; co_dirsymb(setp,l->l_rule); if (empty(l->l_rule)) { /* * If the rule can produce empty, everything * that follows it can also start it */ - setunion(l->l_symbs,setp,setsize); + setunion(l->l_symbs,setp); } if (g_gettype(p+1) == EORULE) { /* @@ -428,10 +463,10 @@ co_others(p) register p_gram p; { */ register p_link l1,l2; - l1 = (p_link) pentry[g_getcont(p)]; + l1 = &links[g_getcont(p)]; p++; - l2 = (p_link) pentry[g_getcont(p)]; - setunion(l1->l_others,l2->l_symbs,setsize); + l2 = &links[g_getcont(p)]; + setunion(l1->l_others,l2->l_symbs); if (g_gettype(p+1) != EORULE) { /* * First compute l2->l_others @@ -440,120 +475,183 @@ co_others(p) register p_gram p; { /* * and then l1->l_others */ - setunion(l1->l_others,l2->l_others,setsize); + setunion(l1->l_others,l2->l_others); } } +static p_length length; +# define INFINITY 32767 + STATIC -do_checkdefault(p) register p_nont p; { +do_lengthcomp() { /* - * check the continuation rule for nonterminal p, unless - * this is already being(!) done + * Compute the minimum length of a terminal production for each + * nonterminal. + * This length consists of two fields: the number of terminals, + * and a number that is composed of + * - the value of the first terminal + * - a crude measure of the number of terms and nonterminals in the + * production of this shortest string. */ - if (p->n_flags & BUSY) { - /* - * Error situation, recursion in continuation grammar - */ - p->n_flags ^= (RECURSIVE|BUSY); - return; + register p_length pl; + register p_nont p; + p_mem alloc(); + + length = (p_length) alloc((unsigned) (nnonterms * sizeof(*length))); + for (pl = &length[nnonterms-1]; pl >= length; pl--) { + pl->val = pl->cnt = INFINITY; } - if (p->n_flags & CONTIN) { - /* - * Was already done - */ - return; + co_trans(ncomplength); + pl = length; + for (p = nonterms; p < maxnt; p++, pl++) { + if (pl->cnt == INFINITY) { + p->n_flags |= RECURSIVE; + } + setdefaults(p->n_rule); } - /* - * Now mark it as busy, and check the rule - */ - p->n_flags |= BUSY; - checkdefault(p->n_rule); - /* - * Now release the busy mark, and mark it as done - */ - p->n_flags ^= (CONTIN|BUSY); - return; + free ((p_mem) length); +} + +STATIC int +ncomplength(p) register p_nont p; { + register p_length l; + + l = &length[p - nonterms]; + if (l->cnt == INFINITY) { + complength(p->n_rule, l); + if (l->cnt != INFINITY) return 1; + } + return 0; } STATIC -checkdefault(p) register p_gram p; { +complength(p,le) register p_gram p; register p_length le; { /* - * Walk grammar rule p, checking the continuation grammar + * Walk grammar rule p, computing minimum lengths */ register p_link l; register p_term q; + t_length i; + le->cnt = 0; + le->val = 0; for (;;) { switch (g_gettype(p)) { case EORULE : return; + case LITERAL : + case TERMINAL : + if (!le->cnt) add(le, 1, g_getcont(p)); + else add(le, 1, 0); + break; case ALTERNATION : - l = (p_link) pentry[g_getcont(p)]; - if (l->l_flag & DEF) { - /* - * This alternative belongs to the - * continuation grammar, so check it - */ - checkdefault(l->l_rule); - return; + + le->cnt = INFINITY; + le->val = INFINITY; + while (g_gettype(p) != EORULE) { + l = &links[g_getcont(p)]; + complength(l->l_rule,&i); + if (l->l_flag & DEF) { + *le = i; + return; + } + if (compare(&i, le) < 0) { + *le = i; + } + p++; } - break; - case TERM : - q = (p_term) pentry[g_getcont(p)]; - /* - * First check the rest of the rule - */ - checkdefault(p+1); - /* - * Now check the term if it belongs to the - * continuation grammar - */ - if (r_getkind(&(q->t_reps))==FIXED || - r_getkind(&(q->t_reps))==PLUS) { - checkdefault(q->t_rule); - return; + return; + case TERM : { + register int rep; + + q = &terms[g_getcont(p)]; + rep = r_getkind(q); + if ((q->t_flags&PERSISTENT) || + rep==FIXED || rep==PLUS) { + complength(q->t_rule,&i); + add(le, i.cnt, i.val); + if (i.cnt == 0) le->val += ntokens; + if (rep == FIXED && r_getnum(q) > 0) { + for (rep = r_getnum(q) - 1; + rep > 0; rep--) { + add(le, i.cnt, i.val); + } + } } - /* - * Here we have OPT or STAR - * Only in the continuation grammar if %persistent - */ - if (q->t_flags & PERSISTENT) { - checkdefault(q->t_rule); + else { + /* Empty producing term on this path */ + le->val += ntokens; } - return; - case NONTERM : - /* - * Check the continuation grammar for this nonterminal. - * Note that the nonterminal we are working on is - * marked as busy, so that direct or indirect recursion - * can be detected - */ - do_checkdefault(&nonterms[g_getnont(p)]); - break; + break; } + case NONTERM : { + register p_length temp; + + temp = &length[g_getnont(p)]; + add(le, temp->cnt, temp->val); + if (temp->cnt == 0) { + /* Empty producing nonterminal */ + le->val += ntokens; + }} } p++; } } -co_contains() { - /* - * Compute the contains sets - */ - register p_nont p; - register p_set dummy; +STATIC +add(a, c, v) register p_length a; { - for (p = nonterms; p < maxnt; p++) do_contains(p); - dummy = setalloc(setsize); -# ifndef NDEBUG - if (debug) fputs("Contains 1 done\n", stderr); -# endif - free(dummy); - for (p = nonterms; p < maxnt; p++) contains(p->n_rule, (p_set) 0); -# ifndef NDEBUG - if (debug) fputs("Contains 2 done\n", stderr); -# endif - dummy = setalloc(setsize); - free(dummy); + if (c == INFINITY) { + a->cnt = INFINITY; + return; + } + if (a->cnt == 0) a->val = v; + a->cnt += c; +} + +STATIC int +compare(a, b) register p_length a, b; { + if (a->cnt != b->cnt) return a->cnt - b->cnt; + return a->val - b->val; +} + +STATIC +setdefaults(p) register p_gram p; { + for (;;) { + switch(g_gettype(p)) { + case EORULE: + return; + case TERM: + setdefaults(terms[g_getcont(p)].t_rule); + break; + case ALTERNATION: { + register p_link l, l1; + int temp = 0, temp1; + t_length count, i; + + count.cnt = INFINITY; + count.val = INFINITY; + l1 = &links[g_getcont(p)]; + do { + l = &links[g_getcont(p)]; + complength(l->l_rule,&i); + if (l->l_flag & DEF) temp = 1; + temp1 = compare(&i, &count); + if (temp1 < 0 || + (temp1 == 0 && l1->l_flag & AVOIDING)) { + l1 = l; + count = i; + } + setdefaults(l->l_rule); + p++; + } while (g_gettype(p) != EORULE); + if (!temp) { + /* No user specified default */ + l1->l_flag |= DEF; + } + return; } + } + p++; + } } STATIC @@ -564,7 +662,7 @@ do_contains(n) register p_nont n; { */ if (n->n_contains == 0) { - n->n_contains = setalloc(setsize); + n->n_contains = get_set(); contains(n->n_rule,n->n_contains); /* * If the rule can produce empty, delete all symbols that @@ -574,13 +672,13 @@ do_contains(n) register p_nont n; { * Otherwise, the generated parser may loop forever */ if (n->n_flags & EMPTY) { - setminus(n->n_contains,n->n_follow,setsize); + setminus(n->n_contains,n->n_follow); } /* * But the symbols that can start the rule are always * eaten */ - setunion(n->n_contains,n->n_first,setsize); + setunion(n->n_contains,n->n_first); } } @@ -596,11 +694,12 @@ contains(p,set) register p_gram p; register p_set set; { return; case TERM : { register p_term q; + int rep; - q = (p_term) pentry[g_getcont(p)]; + q = &terms[g_getcont(p)]; + rep = r_getkind(q); if ((q->t_flags & PERSISTENT) || - r_getkind(&(q->t_reps)) == PLUS || - r_getkind(&(q->t_reps)) == FIXED) { + rep == PLUS || rep == FIXED) { /* * In these cases, the term belongs to the * continuation grammar. @@ -608,44 +707,43 @@ contains(p,set) register p_gram p; register p_set set; { * q->t_first */ if (!q->t_contains) { - q->t_contains = setalloc(setsize); + q->t_contains = get_set(); } contains(q->t_rule,q->t_contains); - if (empty(q->t_rule)) { - /* - * Same trouble as mentioned in the - * routine do_contains - */ - setminus(q->t_contains,q->t_follow, - setsize); + if (rep != FIXED || empty(q->t_rule)) { + setminus(q->t_contains,q->t_follow); } - setunion(q->t_contains,q->t_first,setsize); + setunion(q->t_contains,q->t_first); } else { contains(q->t_rule, (p_set) 0); q->t_contains = q->t_first; } - if (set) setunion(set,q->t_contains,setsize); + if (set) setunion(set,q->t_contains); break; } case NONTERM : { register p_nont n; n = &nonterms[g_getnont(p)]; do_contains(n); - if(set) setunion(set, n->n_contains,setsize); + if (set) { + setunion(set, n->n_contains); + if (ntneeded) NTPUTIN(set, g_getnont(p)); + } break; } case ALTERNATION : { register p_link l; - l = (p_link) pentry[g_getcont(p)]; + l = &links[g_getcont(p)]; contains(l->l_rule, (l->l_flag & DEF) ? set : (p_set) 0); break; } + case LITERAL : case TERMINAL : { register hulp; if (set) { hulp = g_getcont(p); - assert(hulp < nterminals); + assert(hulp < ntokens); PUTIN(set,hulp); }} } @@ -653,74 +751,36 @@ contains(p,set) register p_gram p; register p_set set; { } } -static int change; - -co_safes() { - /* - * Compute the safety of each nonterminal and term. - * The safety gives an answer to the question whether a scan is done, - * and how it should be handled. - */ - - register p_nont p; - register i; - register p_start st; - - for (p = nonterms; p < maxnt; p++) { - /* - * Don't know anything yet - */ - setntsafe(p, NOSAFETY); - setntout(p, NOSAFETY); - } - for (st = start; st; st = st->ff_next) { - /* - * But start symbols are called with lookahead done - */ - p = st->ff_nont; - setntsafe(p,SCANDONE); - } - change = 1; - while (change) { - change = 0; - for (p = nonterms; p < maxnt; p++) { - i = getntsafe(p); - if (i == NOSAFETY) { - continue; - } - i = do_safes(p->n_rule, i); - if (getntout(p) != i) { - change = 1; - setntout(p, i); - } +STATIC int nsafes(p) register p_nont p; { + int ch; + register int i; + + ch = 0; + i = getntsafe(p); + if (i != NOSAFETY) { + i = do_safes(p->n_rule, i, &ch); + if (getntout(p) != i) { + ch = 1; + setntout(p,i); } } -# ifndef NDEBUG - if (debug) { - fputs("Safeties:\n", stderr); - for (p = nonterms; p < maxnt; p++) { - fprintf(stderr, "%s\t%d\t%d\n", - (min_nt_ent + (p - nonterms))->h_name, - getntsafe(p), - getntout(p)); - } - } -# endif + return ch; } STATIC int -do_safes(p,safe) register p_gram p; { +do_safes(p,safe,ch) register p_gram p; register int *ch; { /* * Walk the grammar rule, doing the computation described in the * comment of the procedure above this one. */ - register retval; + int retval; for (;;) { switch (g_gettype(p)) { case ACTION: p++; continue; + case LITERAL: case TERMINAL: safe = NOSCANDONE; break; @@ -728,27 +788,25 @@ do_safes(p,safe) register p_gram p; { register p_term q; int i,rep; - q = (p_term) pentry[g_getcont(p)]; - i = r_getnum(&(q->t_reps)); - rep = r_getkind(&(q->t_reps)); + q = &terms[g_getcont(p)]; + i = r_getnum(q); + rep = r_getkind(q); retval = do_safes(q->t_rule, - t_safety(rep,i,q->t_flags&PERSISTENT,safe)); - if (retval != gettout(q)) { - settout(q, retval); - } - safe = t_after(rep, i, gettout(q)); + t_safety(rep,i,q->t_flags&PERSISTENT,safe),ch); + settout(q, retval); + safe = t_after(rep, i, retval); break; } case ALTERNATION : { register p_link l; - int f, i; + register int i, f; f = 1; while (g_gettype(p) == ALTERNATION) { - l = (p_link) pentry[g_getcont(p)]; + l = &links[g_getcont(p)]; if (safe > SAFE && (l->l_flag & DEF)) { - i = do_safes(l->l_rule,SAFESCANDONE); + i = do_safes(l->l_rule,SAFESCANDONE,ch); } - else i = do_safes(l->l_rule,SAFE); + else i = do_safes(l->l_rule,SAFE,ch); if (f) retval = i; else if (i != retval) { if (i == NOSCANDONE || @@ -763,7 +821,7 @@ do_safes(p,safe) register p_gram p; { return retval; } case NONTERM : { register p_nont n; - int nsafe, osafe; + register int nsafe, osafe; n = &nonterms[g_getnont(p)]; nsafe = getntsafe(n); @@ -772,20 +830,20 @@ do_safes(p,safe) register p_gram p; { if (safe == NOSAFETY) safe = SCANDONE; if (osafe == nsafe) break; if (nsafe == NOSAFETY) { - change = 1; + *ch = 1; setntsafe(n, osafe); break; } if (osafe == NOSCANDONE || nsafe == NOSCANDONE) { if (nsafe != SCANDONE) { - change = 1; + *ch = 1; setntsafe(n, SCANDONE); } break; } if (osafe > nsafe) { setntsafe(n, osafe); - change = 1; + *ch = 1; } break; } case EORULE : @@ -797,29 +855,24 @@ do_safes(p,safe) register p_gram p; { t_safety(rep, count, persistent, safety) { + if (safety == NOSCANDONE) safety = SCANDONE; switch(rep) { default: assert(0); case OPT: - if (!persistent) return SAFE; - if (safety < SAFESCANDONE) return safety; + if (!persistent || safety < SAFESCANDONE) return SAFE; return SAFESCANDONE; case STAR: if (persistent) return SAFESCANDONE; return SAFE; case PLUS: - if (safety == NOSCANDONE) safety = SCANDONE; if (persistent) { if (safety > SAFESCANDONE) return safety; return SAFESCANDONE; } - if (safety > SAFE) return safety; - return SAFE; + return safety; case FIXED: - if (!count) { - if (safety == NOSCANDONE) safety = SCANDONE; - return safety; - } + if (!count) return safety; return SCANDONE; } /* NOTREACHED */ diff --git a/util/LLgen/src/extern.h b/util/LLgen/src/extern.h index c4f28a82f..81563af52 100644 --- a/util/LLgen/src/extern.h +++ b/util/LLgen/src/extern.h @@ -29,10 +29,12 @@ * some variables that are visible in more than one file */ +# define LTEXTSZ 51 /* Size of longest token */ + /* * options for the identifier search routine */ -# define JUSTLOOKING 0 + # define ENTERING 1 # define BOTH 2 @@ -42,27 +44,22 @@ extern char ltext[]; /* input buffer */ extern int nnonterms; /* number of nonterminals */ -extern int nterminals; /* number of terminals */ +extern int ntokens; /* number of terminals */ extern p_start start; /* will contain startsymbols */ extern int linecount; /* line number */ extern int assval; /* to create difference between literals * and other terminals */ -extern t_nont nonterms[]; /* the nonterminal array */ +extern p_nont nonterms; /* the nonterminal array */ extern p_nont maxnt; /* is filled up until here */ -extern int order[]; /* order of nonterminals in the grammar, +extern p_token tokens; /* the token array */ +extern p_token maxt; /* is filled up until here */ +extern struct order *sorder, *porder; + /* order of nonterminals in the grammar, * important because actions are copied to * a temporary file in the order in which they * were read */ -extern int *maxorder; /* will contain &order[nnonterms] */ -extern t_entry h_entry[]; /* terminal and nonterminal entrys, - * first NTERMINAL entrys reserved - * for terminals - */ -extern p_entry max_t_ent; /* will contain &h_entry[nterminals] */ -# define min_nt_ent &h_entry[NTERMINALS] -extern string pentry[]; /* pointers to various allocated things */ extern string e_noopen; /* Error message string used often */ extern int verbose; /* Level of verbosity */ extern string lexical; /* name of lexical analyser */ @@ -78,9 +75,10 @@ extern int debug; extern p_file files,pfile; /* pointers to file structure. * "files" points to the start of the * list */ +extern p_file maxfiles; extern string LLgenid; /* LLgen identification string */ extern t_token lextoken; /* the current token */ extern int nerrors; -extern int fflag; /* Enable compiler to generate jump tables - * for switches? - */ +extern string rec_file, incl_file; +extern p_term terms; +extern p_link links; diff --git a/util/LLgen/src/gencode.c b/util/LLgen/src/gencode.c index c24952a32..528777e53 100644 --- a/util/LLgen/src/gencode.c +++ b/util/LLgen/src/gencode.c @@ -32,10 +32,10 @@ # include "types.h" # include "io.h" -# include "tunable.h" # include "extern.h" # include "sets.h" # include "assert.h" +# include "cclass.h" # ifndef NORCSID static string rcsid3 = "$Header$"; @@ -47,7 +47,6 @@ static string rcsid3 = "$Header$"; static string c_arrend = "0 };\n"; static string c_close = "}\n"; -static string c_LLptrmin = "LLptr++;\n"; static string c_break = "break;\n"; static string c_read = "LLread();\n"; @@ -57,10 +56,7 @@ static string c_read = "LLread();\n"; static int nlabel; /* count for the generation of labels */ static int nvar; /* count for generation of variables */ -static int pushlist[100]; -static int *ppushlist; -static int *lists,*maxlists,*plists; -p_mem ralloc(),alloc(); +static int firsts; /* are there any? */ /* In this file the following routines are defined: */ extern gencode(); @@ -75,30 +71,27 @@ STATIC controlline(); STATIC getparams(); STATIC gettok(); STATIC rulecode(); -STATIC int dopush(); -STATIC pushcode(); +STATIC int * dopush(); STATIC getaction(); +STATIC alternation(); STATIC codeforterm(); -STATIC genifhead(); +STATIC genswhead(); STATIC gencases(); STATIC genpush(); +STATIC genpop(); +STATIC genincrdecr(); -/* Macro to print a terminal */ -# define PTERM(f,p) fprintf(f,(p)->h_num<0400?"'%s'":"%s",(p)->h_name) +# define NOPOP -20000 + +p_mem alloc(); gencode(argc) { register p_file p = files; - /* Generate include file Lpars.h */ - geninclude(); - /* Set up for code generation */ if ((fact = fopen(f_temp,"r")) == NULL) { fatal(0,e_noopen,f_temp); } - lists = (int *) alloc(50 * sizeof(int)); - plists = lists; - maxlists = lists+49; /* For every source file .... */ while (argc--) { @@ -106,7 +99,7 @@ gencode(argc) { f_input = p->f_name; opentemp(f_input); /* generate code ... */ - copyfile(0); + copyfile(incl_file); generate(p); getaction(2); if (ferror(fpars) != 0) { @@ -117,6 +110,7 @@ gencode(argc) { install(genname(p->f_name),p->f_name); p++; } + geninclude(); genrecovery(); } @@ -126,86 +120,98 @@ opentemp(str) string str; { if ((fpars = fopen(f_pars,"w")) == NULL) { fatal(0,e_noopen,f_pars); } - fprintf(fpars,LLgenid,str ? str : "."); + if (!str) str = "."; + fprintf(fpars,LLgenid,str); } STATIC geninclude() { - register p_entry p; - register FILE *f; + register p_token p; opentemp((string) 0); - f = fpars; - for (p = h_entry; p < max_t_ent; p++) { - if (p->h_num >= 0400) { - fprintf(f,"# define %s %d\n", p->h_name,p->h_num); + for (p = tokens; p < maxt; p++) { + if (p->t_tokno >= 0400) { + fprintf(fpars,"# define %s %d\n", + p->t_string, + p->t_tokno); } } - if (ferror(f) != 0) { + if (ferror(fpars) != 0) { fatal(0,"write error on temporary"); } - fclose(f); - install(HFILE, (string) 0); + fclose(fpars); + install(HFILE, "."); } STATIC genrecovery() { register FILE *f; - register p_entry t; - register int i; + register p_token t; register int *q; - int index[256+NTERMINALS]; + register p_nont p; + register p_set *psetl; + int *index; + int i; register p_start st; opentemp((string) 0); f = fpars; - copyfile(0); + copyfile(incl_file); + if (!firsts) fputs("#define LLNOFIRSTS\n", f); + for (st = start; st; st = st->ff_next) { + /* Make sure that every set the parser needs is in the list + * before generating a define of the number of them! + */ + p = &nonterms[st->ff_nont]; + if (g_gettype(p->n_rule) == ALTERNATION) { + findindex(p->n_contains); + } + } + i = maxptr - setptr; + fprintf(fpars, +"#define LL_LEXI %s\n#define LL_SSIZE %d\n#define LL_NSETS %d\n#define LL_NTERMINALS %d\n", + lexical, + nbytes, + i > 0 ? i : 1, + ntokens); /* Now generate the routines that call the startsymbols */ - fputs("#define LLSTSIZ 1024\n",f); - for (i = 1, st = start; st; st = st->ff_next) { - i++; + for (st = start; st; st = st->ff_next) { fputs(st->ff_name, f); - fputs("() {\n\tint LLstack[LLSTSIZ];\n\tLLnewlevel(LLstack);\n\tLLread();\n", f); - if (g_gettype(st->ff_nont->n_rule) == ALTERNATION) { - fprintf(f, "\tLLxx.LLx_p--; *LLxx.LLx_p = %d;\n", - findindex(&(st->ff_nont->n_contains))); + p = &nonterms[st->ff_nont]; + fputs("() {\n\tunsigned int s[LL_NTERMINALS+LL_NSETS+1];\n\tLLnewlevel(s);\n\tLLread();\n", f); + if (g_gettype(p->n_rule) == ALTERNATION) { + genpush(findindex(p->n_contains)); } fprintf(f, "\tL%d_%s();\n", - st->ff_nont-nonterms, - (min_nt_ent+(st->ff_nont-nonterms))->h_name); - if (getntout(st->ff_nont) == NOSCANDONE) { + st->ff_nont, + p->n_name); + if (getntout(p) == NOSCANDONE) { fputs("\tLLscan(EOFILE);\n",f); } else fputs("\tif (LLsymb != EOFILE) LLerror(EOFILE);\n",f); - fputs("\tLLoldlevel();\n}\n",f); + fputs("\tLLoldlevel(s);\n}\n",f); } - fprintf(f,"#define LL_MAX %d\n#define LL_LEXI %s\n", i, lexical); - fputs("static short LLlists[] = {\n", f); - /* Now generate lists */ - q = lists; - while (q < plists) { - fprintf(f,"%d,\n",*q++); - } - fputs(c_arrend, f); /* Now generate the sets */ fputs("char LLsets[] = {\n",f); - for (i = 0; i < maxptr-setptr; i++) prset(setptr[i]); + for (psetl = setptr; psetl < maxptr; psetl++) prset(*psetl); fputs(c_arrend, f); - for (q = index; q <= &index[255 + NTERMINALS];) *q++ = -1; - for (t = h_entry; t < max_t_ent; t++) { - index[t->h_num] = t - h_entry; + index = (int *) alloc((unsigned) (assval * sizeof(int))); + for (q = index; q < &index[assval];) *q++ = -1; + for (t = tokens; t < maxt; t++) { + index[t->t_tokno] = t - tokens; } fputs("short LLindex[] = {\n",f); - for (q = index; q <= &index[assval-1]; q++) { + for (q = index; q < &index[assval]; q++) { fprintf(f, "%d,\n", *q); } fputs(c_arrend, f); - copyfile(1); + free((p_mem) index); + copyfile(rec_file); if (ferror(f) != 0) { fatal(0,"write error on temporary"); } fclose(f); - install(RFILE, (string) 0); + install(RFILE, "."); } STATIC @@ -213,48 +219,49 @@ generate(f) p_file f; { /* * Generates a parsing routine for every nonterminal */ - register short *s; + register p_order s; register p_nont p; - register FILE *fd; int i; - p_first ff; + register p_first ff; int mustpop; /* Generate first sets */ for (ff = f->f_firsts; ff; ff = ff->ff_next) { - macro(ff->ff_name,ff->ff_nont); + macro(ff->ff_name,&nonterms[ff->ff_nont]); } /* For every nonterminal generate a function */ - fd = fpars; - for (s = f->f_start; s <= f->f_end; s++) { - p = &nonterms[*s]; + for (s = f->f_list; s; s = s->o_next) { + p = &nonterms[s->o_index]; /* Generate functions in the order in which the nonterminals * were defined in the grammar. This is important, because * of synchronisation with the action file */ while (p->n_count--) getaction(1); - if (p->n_flags & PARAMS) controlline(); - fprintf(fd,"L%d_%s (",*s,(min_nt_ent + *s)->h_name); - if (p->n_flags & PARAMS) getparams(); - else fputs(") {\n", fd); - fputs("register struct LLxx *LLx = &LLxx;\n#ifdef lint\nLLx=LLx;\n#endif\n", fd); + fprintf(fpars,"L%d_%s (\n",s->o_index,p->n_name); + if (p->n_flags & PARAMS) { + controlline(); + getparams(); + } + else fputs(") {\n", fpars); if (p->n_flags & LOCALS) getaction(1); i = getntsafe(p); - mustpop = 0; - if (g_gettype(p->n_rule) == ALTERNATION) { - mustpop = 1; + mustpop = NOPOP; + if (g_gettype(p->n_rule) == ALTERNATION && + i > SAFESCANDONE) { + mustpop = findindex(p->n_contains); if (i == NOSCANDONE) { - fputs(c_read, fd); + fputs(c_read, fpars); i = SCANDONE; } } nlabel = 1; + nvar = 1; rulecode(p->n_rule, i, getntout(p) != NOSCANDONE, mustpop); - fputs(c_close, fd); + fputs(c_close, fpars); } } @@ -264,7 +271,7 @@ prset(p) p_set p; { register unsigned i; int j; - j = NBYTES(nterminals); + j = nbytes; for (;;) { i = (unsigned) *p++; for (k = 0; k < sizeof(int); k++) { @@ -281,17 +288,17 @@ prset(p) p_set p; { STATIC macro(s,n) string s; p_nont n; { - register FILE *f; int i; - f = fpars; - i = findindex(&(n->n_first)); - fprintf(f,"#define %s(x) ", s); + i = findindex(n->n_first); if (i < 0) { - fprintf(f, "((x) == %d)\n", -i); + fprintf(fpars, "#define %s(x) ((x) == %d)\n", + s, + tokens[-(i+1)].t_tokno); return; } - fprintf(f,"LLfirst((x), %d)\n", i); + firsts = 1; + fprintf(fpars,"#define %s(x) LLfirst((x), %d)\n", s, i); } STATIC @@ -303,8 +310,6 @@ controlline() { f1 = fact; f2 = fpars; l = getc(f1); assert(l == '\0'); - l = getc(f1); putc(l,f2); - assert( l == '#' ) ; do { l = getc(f1); putc(l,f2); @@ -318,12 +323,10 @@ getparams() { */ long off; register int l; - register FILE *f; long ftell(); char first; first = ' '; - f = fpars; /* save offset in file to be able to copy the declaration later */ off = ftell(fact); /* First pass through declaration, find the parameter names */ @@ -333,17 +336,17 @@ getparams() { * The last identifier found before a ';' or a ',' * must be a parameter */ - fprintf(f,"%c%s", first, ltext); + fprintf(fpars,"%c%s", first, ltext); first = ','; } } - fputs(") ",f); + fputs(") ",fpars); /* * Now copy the declarations */ fseek(fact,off,0); getaction(0); - fputs(" {\n",f); + fputs(" {\n",fpars); } STATIC @@ -369,13 +372,13 @@ gettok() { case EOF : return ENDDECL; default : - if (isalpha(ch) || ch == '_') { + if (c_class[ch] == ISLET) { c = ltext; - while (isalnum(ch) || ch == '_') { + do { *c++ = ch; if (c-ltext >= LTEXTSZ) --c; ch = getc(f); - } + } while (c_class[ch] == ISLET || c_class[ch] == ISDIG); if (ch != EOF) ungetc(ch,f); *c = '\0'; return IDENT; @@ -392,13 +395,27 @@ rulecode(p,safety,mustscan,mustpop) register p_gram p; { register int toplevel = 1; register FILE *f; + register int *ppu; + int pushlist[100]; + int *ppushlist; /* * First generate code to push the contains sets of this rule * on a stack */ - ppushlist = pushlist; - if (dopush(p,safety,1) > 0) pushcode(); + ppu = pushlist; + ppushlist = dopush(p,safety,1,ppu); + if (mustpop != NOPOP) for (; ppu < ppushlist; ppu++) { + if (*ppu == mustpop) { + *ppu = mustpop = NOPOP; + break; + } + } + if (g_gettype(p) != ALTERNATION) { + genpop(mustpop); + mustpop = NOPOP; + } + for (ppu = pushlist; ppu < ppushlist; ppu++) genpush(*ppu); f = fpars; for (;;) { switch (g_gettype(p)) { @@ -407,25 +424,31 @@ rulecode(p,safety,mustscan,mustpop) register p_gram p; { fputs(c_read,f); } return; + case LITERAL : case TERMINAL : { - register p_entry t; + register p_token t; + string s; - t = &h_entry[g_getcont(p)]; + t = &tokens[g_getcont(p)]; if (toplevel == 0) { - fputs(c_LLptrmin,f); + fprintf(f,"LLtdecr(%d);\n", g_getcont(p)); } if (safety == SAFE) { fputs("LL_SAFE(",f); } - else if (safety <= SCANDONE) { + else if (safety == SAFESCANDONE) { + fputs("LL_SSCANDONE(",f); + } + else if (safety == SCANDONE) { fputs("LL_SCANDONE(",f); } - else if (safety == NOSCANDONE) { + else /* if (safety == NOSCANDONE) */ { fputs("LL_T_NOSCANDONE(", f); } - PTERM(f,t); - fputs(");\n", f); - if (safety == SAFE && toplevel > 0) { + if (t->t_tokno < 0400) s = "'%s');\n"; + else s = "%s);\n"; + fprintf(f,s,t->t_string); + if (safety <= SAFESCANDONE && toplevel > 0) { safety = NOSCANDONE; toplevel = -1; p++; @@ -434,36 +457,35 @@ rulecode(p,safety,mustscan,mustpop) register p_gram p; { safety = NOSCANDONE; break; } case NONTERM : { - p_entry t; register p_nont n; - int params; n = &nonterms[g_getnont(p)]; - t= min_nt_ent+(n-nonterms); if (safety == NOSCANDONE && getntsafe(n) < NOSCANDONE) fputs(c_read, f); if (toplevel == 0 && - g_gettype(n->n_rule) != ALTERNATION) { - fputs(c_LLptrmin, f); + (g_gettype(n->n_rule) != ALTERNATION || + getntsafe(n) <= SAFESCANDONE)) { + genpop(findindex(n->n_contains)); + } + fprintf(f,"L%d_%s(\n",g_getnont(p), n->n_name); + if (g_getnpar(p)) { + controlline(); + getaction(0); } - params = g_getnpar(p); - if (params) controlline(); - fprintf(f,"L%d_%s(",n-nonterms, t->h_name); - if (params) getaction(0); fputs(");\n",f); safety = getntout(n); break; } case TERM : - safety = codeforterm((p_term) pentry[g_getcont(p)], - safety, - toplevel); + safety = codeforterm(&terms[g_getcont(p)], + safety, + toplevel); break; case ACTION : getaction(1); p++; continue; case ALTERNATION : - alternation(p, safety, mustscan,mustpop, 0); + alternation(p, safety, mustscan, mustpop, 0, 0); return; } p++; @@ -471,11 +493,11 @@ rulecode(p,safety,mustscan,mustpop) register p_gram p; { } } -alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { +STATIC +alternation(p, safety, mustscan, mustpop, lb, var) register p_gram p; { register FILE *f = fpars; register p_link l; int hulp, hulp1,hulp2; - int var; int haddefault = 0; int unsafe = 1; int nsafe; @@ -483,24 +505,23 @@ alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { p_set setalloc(); assert(safety < NOSCANDONE); - l = (p_link) pentry[g_getcont(p)]; + l = &links[g_getcont(p)]; hulp = nlabel++; hulp1 = nlabel++; hulp2 = nlabel++; - var = nvar++; if (!lb) lb = hulp1; if (safety <= SAFESCANDONE) unsafe = 0; - if (!unsafe && mustpop) { - mustpop = 0; - fputs(c_LLptrmin, f); + if (!unsafe) { + genpop(mustpop); + mustpop = NOPOP; } - if (unsafe) { - fprintf(f,"{ int LL_%d = 0;\n", var); - if (hulp1 == lb) fprintf(f, "L_%d: \n", hulp1); + if (unsafe && hulp1 == lb) { + var = ++nvar; + fprintf(f,"{ int LL_%d = 0;\nL_%d: \n", var, hulp1); } fputs("switch(LLcsymb) {\n", f); while (g_gettype(p) != EORULE) { - l = (p_link) pentry[g_getcont(p)]; + l = &links[g_getcont(p)]; if (unsafe && (l->l_flag & DEF)) { haddefault = 1; fprintf(f, @@ -508,19 +529,18 @@ alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { var, var, lb, hulp2); } if (l->l_flag & COND) { - set = setalloc(tsetsize); - setunion(set, l->l_others, tsetsize); - setintersect(set, l->l_symbs, tsetsize); - setminus(l->l_symbs, set, tsetsize); - setminus(l->l_others, set, tsetsize); + set = setalloc(); + setunion(set, l->l_others); + setintersect(set, l->l_symbs); + setminus(l->l_symbs, set); + setminus(l->l_others, set); gencases(set); - free((p_mem) set); controlline(); fputs("if (!",f); getaction(0); fprintf(f,") goto L_%d;\n", hulp); } - if (!unsafe && (l->l_flag & DEF)) { + if (!haddefault && (l->l_flag & DEF)) { fputs("default:\n", f); haddefault = 1; } @@ -532,111 +552,91 @@ alternation(p, safety, mustscan, mustpop, lb) register p_gram p; { } if (safety != SAFE) nsafe = SAFESCANDONE; } - if (mustpop) fputs(c_LLptrmin, f); - rulecode(l->l_rule, nsafe, mustscan, 0); + rulecode(l->l_rule, nsafe, mustscan, mustpop); fputs(c_break,f); if (l->l_flag & COND) { + p++; + fprintf(f,"L_%d : ;\n",hulp); + if (g_gettype(p+1) == EORULE) { + setminus(links[g_getcont(p)].l_symbs, set); + free((p_mem) set); + continue; + } + free((p_mem) set); if (!haddefault) { fputs("default:\n", f); } else { gencases(l->l_others); - safety = SAFE; + safety = SAFE; } - fprintf(f,"L_%d : ;\n",hulp); - p++; - if (!unsafe && g_gettype(p+1) == EORULE) { - if (mustpop) fputs(c_LLptrmin, f); - rulecode(((p_link)pentry[g_getcont(p)])->l_rule, - safety,mustscan,0); + if (safety <= SAFESCANDONE) { + genpop(mustpop); + mustpop = NOPOP; } - else alternation(p,safety,mustscan,mustpop,lb); + alternation(p,safety,mustscan,mustpop,lb,var); break; } p++; } fputs(c_close, f); - if (unsafe) fputs(c_close, f); - return; + if (unsafe && hulp1 == lb) fputs(c_close, f); } -STATIC int -dopush(p,safety,toplevel) register p_gram p; { +STATIC int * +dopush(p,safety,toplevel,pp) register p_gram p; register int *pp; { /* * The safety only matters if toplevel != 0 */ - register int count = 0; for (;;) { switch(g_gettype(p)) { case EORULE : case ALTERNATION : - return count; + return pp; case TERM : { register p_term q; int rep, cnt; - q = (p_term) pentry[g_getcont(p)]; - rep = r_getkind(&(q->t_reps)); - cnt = r_getnum(&(q->t_reps)); - count += dopush(p+1,SCANDONE,0); - if (toplevel > 0 && - (rep == OPT || (rep == FIXED && cnt == 0))) { - if (safety <= SAFESCANDONE) return count; + q = &terms[g_getcont(p)]; + rep = r_getkind(q); + cnt = r_getnum(q); + if (!(toplevel > 0 && safety <= SAFESCANDONE && + (rep == OPT || (rep == FIXED && cnt == 0)))) { + *pp++ = findindex(q->t_contains); } - *ppushlist++ = findindex(&(q->t_contains)); - return count+1; } + break; } + case LITERAL : case TERMINAL : - if (toplevel > 0 && safety == SAFE) { - count += dopush(p+1,NOSCANDONE,-1); - } - else count += dopush(p+1, NOSCANDONE, 0); - if (toplevel != 0) { - return count; + if (toplevel > 0 && safety <= SAFESCANDONE) { + toplevel = -1; + p++; + safety = NOSCANDONE; + continue; } - *ppushlist++ = -h_entry[g_getcont(p)].h_num; - return count + 1; + if (toplevel == 0) *pp++ = -(g_getcont(p)+1); + break; case NONTERM : { register p_nont n; n = &nonterms[g_getnont(p)]; - count += dopush(p+1, SCANDONE, 0); if (toplevel == 0 || - g_gettype(n->n_rule) == ALTERNATION) { - *ppushlist++ = findindex(&n->n_contains); - count++; + (g_gettype(n->n_rule) == ALTERNATION && + getntsafe(n) > SAFESCANDONE)) { + *pp++ = findindex(n->n_contains); } - return count; } + break; } + case ACTION : + p++; + continue; } + toplevel = 0; + safety = NOSCANDONE; p++; } } # define max(a,b) ((a) < (b) ? (b) : (a)) -STATIC -pushcode() { - register int i,j,k; - register int *p = pushlist; - - if ((i = ppushlist - p) == 0) return; - if (i <= 2) { - genpush(p[0]); - if (i == 2) genpush(p[1]); - return; - } - fprintf(fpars,"\LLlpush(%d, %d);\n",plists-lists, i); - if (maxlists - plists < i) { - j = plists - lists; - k = maxlists-lists+max(i+1,50); - lists = (int *) ralloc( (p_mem)lists, - (unsigned)(k+1)*sizeof(int)); - plists = lists+j; - maxlists = lists+k; - } - while (i--) { - *plists++ = *p++; - } -} STATIC getaction(flag) { @@ -674,14 +674,14 @@ getaction(flag) { newline = 0; } match = ch; - putc(ch,f); - while (ch = getc(fact)) { - if (ch == match) break; + for (;;) { + putc(ch,f); + ch = getc(fact); + if (ch == match || !ch) break; if (ch == '\\') { putc(ch,f); ch = getc(fact); } - putc(ch,f); } break; case IDENT : @@ -711,35 +711,44 @@ codeforterm(q,safety,toplevel) register p_term q; { register int rep; int persistent; int ispushed; + int sw = SAFE; f = fpars; - i = r_getnum(&(q->t_reps)); - rep = r_getkind(&(q->t_reps)); - ispushed = !(toplevel > 0 && safety <= SAFESCANDONE && - (rep == OPT || (rep == FIXED && i == 0))); + i = r_getnum(q); + rep = r_getkind(q); persistent = (q->t_flags & PERSISTENT); - if (safety == NOSCANDONE && (rep != FIXED || i == 0)) { + ispushed = NOPOP; + if (!(toplevel > 0 && safety <= SAFESCANDONE && + (rep == OPT || (rep == FIXED && i == 0)))) { + ispushed = findindex(q->t_contains); + } + if (safety == NOSCANDONE && (rep != FIXED || i == 0 || + gettout(q) != NOSCANDONE)) { fputs(c_read, f); safety = SCANDONE; } - if (ispushed) { - if ((safety <= SAFESCANDONE && - (rep == OPT || (rep == FIXED && i == 0))) || - (rep == FIXED && i == 0 && - g_gettype(q->t_rule) != ALTERNATION)) { - fputs(c_LLptrmin, f); - ispushed = 0; + if (rep == PLUS && !persistent) { + int temp; + + temp = findindex(q->t_first); + if (temp != ispushed) { + genpop(ispushed); + ispushed = temp; + genpush(temp); } } if (i) { /* N > 0, so generate fixed forloop */ - fprintf(f,"{\nregister LL_i = %d;\n",i); - fputs("for (;;) {\nif (!LL_i--) {\nLLptr++;\n", f); - fputs("break;\n}\n", f); + fputs("{\nregister LL_i;\n", f); + assert(ispushed != NOPOP); + fprintf(f, "for (LL_i = %d; LL_i >= 0; LL_i--) {\n", i - 1); if (rep == FIXED) { - if (gettout(q) == NOSCANDONE && safety == NOSCANDONE) { - fputs(c_read,f); - safety = SCANDONE; + fputs("if (!LL_i) ", f); + genpop(ispushed); + genpush(ispushed); + if (safety == NOSCANDONE) { + assert(gettout(q) == NOSCANDONE); + fputs(c_read, f); } } } @@ -747,26 +756,36 @@ codeforterm(q,safety,toplevel) register p_term q; { /* '+' or '*', so generate infinite loop */ fputs("for (;;) {\n",f); } + else if (safety <= SAFESCANDONE && rep == OPT) { + genpop(ispushed); + ispushed = NOPOP; + } if (rep == STAR || rep == OPT) { - genifhead(q, rep, safety, ispushed); + sw = genswhead(q, rep, i, safety, ispushed); + } + rulecode(q->t_rule, + t_safety(rep,i,persistent,safety), + gettout(q) != NOSCANDONE, + rep == FIXED ? ispushed : NOPOP); + if (gettout(q) == NOSCANDONE && rep != FIXED) { + fputs(c_read, f); } - rulecode(q->t_rule,t_safety(rep,i,persistent,safety), - rep != FIXED || gettout(q) != NOSCANDONE, - rep == FIXED && i == 0 && ispushed); /* in the case of '+', the if is after the code for the rule */ if (rep == PLUS) { - if (!persistent) { - fprintf(f, "*LLptr = %d;\n", findindex(&(q->t_first))); - /* ??? */ + if (i) { + fputs("if (!LL_i) break;\n", f); } - genifhead(q,rep,safety, ispushed); + sw = genswhead(q, rep, i, safety, ispushed); } if (rep != OPT && rep != FIXED) fputs("continue;\n", f); if (rep != FIXED) { fputs(c_close, f); /* Close switch */ + if (sw > SAFESCANDONE && (q->t_flags & RESOLVER)) { + fputs(c_close, f); + } if (rep != OPT) { - if (ispushed) fputs(c_LLptrmin, f); - fputs("break;\n", f); + genpop(ispushed); + fputs(c_break, f); } } if (rep != OPT && (rep != FIXED || i > 0)) { @@ -779,34 +798,38 @@ codeforterm(q,safety,toplevel) register p_term q; { } STATIC -genifhead(q, rep, safety, ispushed) register p_term q; { +genswhead(q, rep, cnt, safety, ispushed) register p_term q; { /* - * Generate if statement for term q + * Generate switch statement for term q */ register FILE *f; p_set p1; p_set setalloc(); - int hulp1; + int hulp1, hulp2, var; int safeterm; f = fpars; - hulp1 = nlabel++; - if (rep == PLUS) safeterm = gettout(q) <= SAFESCANDONE; - else if (rep == OPT) safeterm = safety <= SAFESCANDONE; - else /* if (rep == STAR) */ { - safeterm = safety <= SAFESCANDONE && gettout(q) <= SAFESCANDONE; + if (rep == PLUS) safeterm = gettout(q); + else if (rep == OPT) safeterm = safety; + else /* if (rep == STAR) */ safeterm = max(safety, gettout(q)); + if ((q->t_flags & RESOLVER) && safeterm > SAFESCANDONE) { + hulp2 = nlabel++; + var = nvar++; + fprintf(f, "{ int LL_%d = 0;\nL_%d : ", var, hulp2); } fputs("switch(LLcsymb) {\n", f); if (q->t_flags & RESOLVER) { - p1 = setalloc(tsetsize); - setunion(p1,q->t_first,tsetsize); - setintersect(p1,q->t_follow,tsetsize); + hulp1 = nlabel++; + p1 = setalloc(); + setunion(p1,q->t_first); + setintersect(p1,q->t_follow); /* * p1 now points to a set containing the conflicting * symbols */ - setminus(q->t_first, p1, tsetsize); - setminus(q->t_follow, p1, tsetsize); + setminus(q->t_first, p1); + setminus(q->t_follow, p1); + setminus(q->t_contains, p1); gencases(p1); free((p_mem) p1); controlline(); @@ -814,18 +837,42 @@ genifhead(q, rep, safety, ispushed) register p_term q; { getaction(0); fprintf(f, ") goto L_%d;\n", hulp1); } - gencases(q->t_follow); - if (ispushed && rep == OPT) fputs(c_LLptrmin, f); - fputs("break;\n", f); - if (!safeterm) { - fputs("default: if (!LLnext()) break;\n", f); - gencases(q->t_first); + if (safeterm <= SAFESCANDONE) fputs("default:\n", f); + else gencases(q->t_follow); + if (rep == OPT) genpop(ispushed); + fputs(c_break, f); + if (safeterm > SAFESCANDONE) { + int i; + + assert(ispushed != NOPOP); + if (ispushed >= 0) i = -ispushed; + else i = tokens[-(ispushed+1)].t_tokno; + fputs("default: if (", f); + if (q->t_flags & RESOLVER) { + fprintf(f, "LL_%d ||", var); + } + fprintf(f, "!LLnext(%d)) {\n", i); + if (rep == OPT) genpop(ispushed); + fputs(c_break, f); + fputs(c_close, f); + if (q->t_flags & RESOLVER) { + fprintf(f,"LL_%d = 1;\ngoto L_%d;\n", var, hulp2); + } + } + if ((q->t_flags & PERSISTENT) && safeterm != SAFE) { + gencases(q->t_contains); } - else fputs("default: ;\n", f); + else gencases(q->t_first); if (q->t_flags & RESOLVER) { fprintf(f, "L_%d : ;\n", hulp1); } - if (ispushed && rep == OPT) fputs(c_LLptrmin, f); + if (rep == OPT) genpop(ispushed); + if (cnt > 0) { + assert(ispushed != NOPOP); + fputs(rep == STAR ? "if (!LL_i) " : "if (LL_i == 1) ", f); + genpop(ispushed); + } + return safeterm; } STATIC @@ -845,27 +892,19 @@ gencases(setp) register p_set setp; { * labeledstatement : labels statement ; * labels : labels label | ; */ - register p_entry p; - register i; + register p_token p; + register int i; - p = h_entry; - for (i=0; i < nterminals; i++) { + for (i = 0, p = tokens; i < ntokens; i++, p++) { if (IN(setp,i)) { fprintf(fpars, - p->h_num<0400 ? "case /* '%s' */ %d : ;\n" + p->t_tokno<0400 ? "case /* '%s' */ %d : ;\n" : "case /* %s */ %d : ;\n", - p->h_name, i); + p->t_string, i); } - p++; } } -STATIC -genpush(d) { - - fprintf(fpars, "LLptr--;\n*LLptr = %d;\n",d); -} - static char namebuf[20]; STATIC string @@ -895,3 +934,23 @@ genname(s) string s; { *d = '\0'; return namebuf; } + +STATIC +genpush(d) { + genincrdecr("incr", d); +} + +STATIC +genincrdecr(s, d) string s; { + if (d == NOPOP) return; + if (d >= 0) { + fprintf(fpars, "LLs%s(%d);\n", s, d / nbytes); + return; + } + fprintf(fpars, "LLt%s(%d);\n", s, -(d + 1)); +} + +STATIC +genpop(d) { + genincrdecr("decr", d); +} diff --git a/util/LLgen/src/global.c b/util/LLgen/src/global.c index 33e8fc8b9..ebd1061d1 100644 --- a/util/LLgen/src/global.c +++ b/util/LLgen/src/global.c @@ -29,30 +29,28 @@ */ # include "types.h" +# include "extern.h" # include "io.h" -# include "tunable.h" # ifndef NORCSID static string rcsid4 = "$Header$"; # endif char ltext[LTEXTSZ]; -t_entry h_entry[NTERMINALS+NNONTERMS+1]; -p_entry max_t_ent; -t_nont nonterms[NNONTERMS+1]; +p_nont nonterms; +p_nont maxnt; int nnonterms; -int nterminals; -int order[NNONTERMS+1]; -int *maxorder; +p_token tokens; +p_token maxt; +int ntokens; +p_order porder, sorder; p_start start; int linecount; int assval; -string pentry[ENTSIZ]; FILE *fout; FILE *fpars; FILE *finput; FILE *fact; -p_nont maxnt; string f_pars = PARSERFILE; string f_out = OUTFILE; string f_temp = ACTFILE; @@ -66,8 +64,11 @@ int ntprint; int debug; # endif not NDEBUG p_file files; +p_file maxfiles; p_file pfile; string LLgenid = "/* LLgen generated code from source %s */\n"; t_token lextoken; int nerrors; -int fflag; +string rec_file, incl_file; +p_link links; +p_term terms; diff --git a/util/LLgen/src/io.h b/util/LLgen/src/io.h index 4c0340706..617eff6e5 100644 --- a/util/LLgen/src/io.h +++ b/util/LLgen/src/io.h @@ -29,7 +29,6 @@ */ # include -# include /* FILES */ diff --git a/util/LLgen/src/main.c b/util/LLgen/src/main.c index 964b08c8b..c7db5a7ae 100644 --- a/util/LLgen/src/main.c +++ b/util/LLgen/src/main.c @@ -38,12 +38,10 @@ static string rcsid6 = "$Header$"; # endif -static string rec_file; -static string incl_file; - /* In this file the following routines are defined: */ extern int main(); STATIC readgrammar(); +STATIC doparse(); extern error(); extern fatal(); extern comfatal(); @@ -56,11 +54,9 @@ extern badassertion(); main(argc,argv) register string argv[]; { register string arg; string libpath(); - int nflag = 0; /* Initialize */ - maxorder = order; assval = 0400; /* read options */ @@ -71,20 +67,11 @@ main(argc,argv) register string argv[]; { case 'V': verbose++; continue; - case 'n': - case 'N': - nflag++; - continue; - case 'f': - case 'F': - fflag++; - continue; # ifndef NDEBUG - case 'a': - case 'A': + case 'd': + case 'D': debug++; continue; -# endif not NDEBUG case 'r': case 'R': if (rec_file) { @@ -101,6 +88,7 @@ main(argc,argv) register string argv[]; { } incl_file = ++arg; break; +# endif not NDEBUG case 'x': case 'X': ntneeded = 1; @@ -110,7 +98,9 @@ main(argc,argv) register string argv[]; { fprintf(stderr,"illegal option : %c\n",*arg); return 1; } +# ifndef NDEBUG break; +# endif } argv++; argc--; @@ -119,46 +109,44 @@ main(argc,argv) register string argv[]; { * Now check wether the sets should include nonterminals */ if (verbose == 2) ntneeded = 1; - else if (! verbose) ntneeded = 0; /* * Initialise */ - if (!rec_file) rec_file = libpath("rec"); - if (!incl_file) incl_file = libpath("incl"); +# ifndef NDEBUG + if (!rec_file) { +# endif + rec_file = libpath("rec"); +# ifndef NDEBUG + } + if (!incl_file) { +# endif + incl_file = libpath("incl"); +# ifndef NDEBUG + } +# endif if ((fact = fopen(f_temp,"w")) == NULL) { fputs("Cannot create temporary\n",stderr); return 1; } - name_init(); + a_init(); readgrammar(argc,argv); - if (nflag) comfatal(); setinit(ntneeded); maxnt = &nonterms[nnonterms]; - max_t_ent = &h_entry[nterminals]; + maxt = &tokens[ntokens]; fclose(fact); /* * Now, the grammar is read. Do some computations */ co_reach(); /* Check for undefined and unreachable */ if (nerrors) comfatal(); - createsets(); - co_empty(); /* Which nonterminals produce empty? */ - co_first(); /* Computes first sets */ - co_follow(); /* Computes follow sets */ - co_symb(); /* Computes choice sets in alternations */ - conflchecks(); /* Checks for conflicts etc, and also - * takes care of LL.output etc - */ + do_compute(); + conflchecks(); if (nerrors) comfatal(); - co_contains(); /* Computes the contains sets */ - co_safes(); /* Computes safe terms and nonterminals. - * Safe means : always called with a terminal - * symbol that is guarantied to be eaten by - * the term - */ if (argc-- == 1) { - fputs("No code generation for input from standard input\n",stderr); - } else gencode(argc); + fputs("No code generation for input from standard input\n", + stderr); + } + else gencode(argc); UNLINK(f_temp); UNLINK(f_pars); return 0; @@ -180,32 +168,19 @@ readgrammar(argc,argv) char *argv[]; { files = p = (p_file) alloc((unsigned) (argc+1) * sizeof(t_file)); if (argc-- == 1) { finput = stdin; - p->f_name = f_input = "standard input"; - p->f_firsts = 0; - p->f_start = maxorder; - pfile = p; - LLparse(); - p->f_end = maxorder - 1; - p++; + f_input = "standard input"; + doparse(p++); } else { while (argc--) { if ((finput = fopen(f_input=argv[1],"r")) == NULL) { fatal(0,e_noopen,f_input); } - linecount = 0; - p->f_name = f_input; - p->f_start = maxorder; - p->f_firsts = 0; - pfile = p; - LLparse(); - p->f_end = maxorder-1; - p++; + doparse(p++); argv++; fclose(finput); } } - p->f_start = maxorder+1; - p->f_end = maxorder; + maxfiles = p; if (! lexical) lexical = "yylex"; /* * There must be a start symbol! @@ -216,19 +191,30 @@ readgrammar(argc,argv) char *argv[]; { if (nerrors) comfatal(); } +STATIC +doparse(p) register p_file p; { + + linecount = 0; + p->f_name = f_input; + p->f_firsts = 0; + pfile = p; + sorder = 0; + porder = 0; + LLparse(); + p->f_list = sorder; +} + /* VARARGS1 */ error(lineno,s,t,u) string s,t,u; { /* * Just an error message */ - register FILE *f; - f = stderr; ++nerrors; - if (lineno) fprintf(f,"\"%s\", line %d : ",f_input,lineno); - else fprintf(f,"\"%s\" : ",f_input); - fprintf(f,s,t,u); - putc('\n',f); + if (!lineno) lineno = 1; + fprintf(stderr,"\"%s\", line %d : ",f_input, lineno); + fprintf(stderr,s,t,u); + fputs("\n",stderr); } /* VARARGS1 */ @@ -253,16 +239,14 @@ comfatal() { exit(1); } -copyfile(n) { +copyfile(file) string file; { /* * Copies a file indicated by the parameter to filedescriptor fpars. - * If n != 0, the error recovery routines are copied, - * otherwise a standard header is. */ - register c; + register int c; register FILE *f; - if ((f = fopen(n?rec_file:incl_file,"r")) == NULL) { + if ((f = fopen(file,"r")) == NULL) { fatal(0,"Cannot open libraryfile, call an expert"); } while ((c = getc(f)) != EOF) putc(c,fpars); @@ -275,12 +259,9 @@ install(target, source) string target, source; { * if allowed (which means that the target must be generated * by LLgen from the source, or that the target is not present */ - register c; - register FILE *f1; - register FILE *f2; - register string s1; - register int i; - char buf[100]; + register int c1, c2; + register FILE *f1, *f2; + int cnt; /* * First open temporary, generated for source @@ -288,41 +269,38 @@ install(target, source) string target, source; { if ((f1 = fopen(f_pars,"r")) == NULL) { fatal(0,e_noopen,f_pars); } - i = 0; /* * Now open target for reading */ if ((f2 = fopen(target,"r")) == NULL) { - i = 1; fclose(f1); + RENAME(f_pars, target); + return; } - else { - /* - * Create string recognised by LLgen. The target must - * start with that! - */ - (int) sprintf(buf,LLgenid,source ? source : "."); - s1 = buf; - while (*s1 != '\0' && *s1++ == getc(f2)) { /* nothing */ } - /* - * Ai,ai, it did not - */ - if (*s1 != '\0') { + /* + * Compute length of LLgen identification string. The target must + * start with that! + */ + cnt = strlen(LLgenid) + strlen(source) - 2; + /* + * Now compare the target with the temporary + */ + do { + c1 = getc(f1); + c2 = getc(f2); + if (cnt >= 0) cnt--; + } while (c1 == c2 && c1 != EOF); + fclose(f1); + fclose(f2); + /* + * Here, if c1 != c2 the target must be recreated + */ + if (c1 != c2) { + if (cnt >= 0) { fatal(0,"%s : not a file generated by LLgen",target); } - rewind(f2); - /* - * Now compare the target with the temporary - */ - while ((c = getc(f1)) != EOF && c == getc(f2)) { /* nothing */} - if (c != EOF || getc(f2) != EOF) i = 1; - fclose(f1); - fclose(f2); + RENAME(f_pars,target); } - /* - * Here, if i != 0 the target must be recreated - */ - if (i) RENAME(f_pars,target); } #ifndef NDEBUG diff --git a/util/LLgen/src/name.c b/util/LLgen/src/name.c index 2537b6930..e9b1cf5cc 100644 --- a/util/LLgen/src/name.c +++ b/util/LLgen/src/name.c @@ -25,11 +25,11 @@ /* * name.c - * Defines the symboltable search routine and an initialising routine + * Defines the symboltable search routine, a name store routine and an + * initialising routine. */ # include "types.h" -# include "tunable.h" # include "extern.h" # include "assert.h" # include "io.h" @@ -39,42 +39,68 @@ static string rcsid7 = "$Header$"; # endif # define HASHSIZE 128 +# define NMSIZ 2048 /* Room for names allocated NMSIZ bytes at a time */ -static char name[NAMESZ]; /* space for names */ -static int iname; /* index in nametable */ +static char *name, *maxname; static p_entry h_root[HASHSIZE]; /* hash table */ static string e_literal = "Illegal literal"; +static p_entry entries, maxentries; +static t_info token_info, nont_info; /* Defined in this file are: */ extern string store(); extern name_init(); STATIC int hash(); -extern t_gram search(); +STATIC p_entry newentry(); +extern p_gram search(); -string -store(s) register string s; { - /* - * Store a string s in the name table - */ - register string t,u; - - u = t = &name[iname]; - do { if (u > &name[NAMESZ-1]) fatal(linecount,"name table overflow"); - else *u++ = *s; - } while (*s++); - iname = u - name; - return t; -} +p_mem alloc(); +p_mem new_mem(); name_init() { + token_info.i_esize = sizeof (t_token); + token_info.i_incr = 25; + nont_info.i_esize = sizeof (t_nont); + nont_info.i_incr = 25; + search(TERMINAL,"EOFILE",ENTERING); +} + +STATIC p_entry +newentry(str, next) string str; p_entry next; { + register p_entry p; + + if ((p = entries) == maxentries) { + p = (p_entry) alloc(50 * sizeof(t_entry)); + maxentries = p + 50; + } + entries = p + 1; + p->h_name = str; + p->h_next = next; + p->h_type.g_lineno = linecount; + return p; +} + +string +store(s) string s; { /* - * Initialise hash-table and enter special terminal EOFILE + * Store a string s in the name table */ - register p_entry *p; - t_gram search(); - - for(p = h_root; p<= &h_root[HASHSIZE-1]; p++) *p = 0; - search(TERMINAL,"EOFILE",ENTERING); + register string s1, t ,u; + + u = name; + t = s; + s1 = u; + do { + if (u >= maxname) { + u = alloc(NMSIZ); + maxname = u + NMSIZ; + t = s; + s1 = u; + } + *u++ = *t; + } while (*t++); + name = u; + return s1; } STATIC int @@ -82,7 +108,7 @@ hash(str) string str; { /* * Compute the hash for string str */ - register i; + register int i; register string l; l = str; @@ -92,151 +118,133 @@ hash(str) string str; { return i % HASHSIZE; } -t_gram +p_gram search(type,str,option) register string str; { /* * Search for object str. * It has type UNKNOWN, LITERAL, TERMINAL or NONTERM. - * option can be ENTERING, JUSTLOOKING or BOTH. + * option can be ENTERING or BOTH (also looking). */ register int val; register p_entry p; - t_gram r; register int i; + int type1; - g_init(&r); - g_setcont(&r,UNDEFINED); - r.g_lineno = linecount; i = hash(str); /* * Walk hash chain */ for (p = h_root[i]; p != (p_entry) 0; p = p->h_next) { if(!strcmp(p->h_name,str)) { - val = p - h_entry; - if (type == LITERAL && - (val >= NTERMINALS || p->h_num >= 0400)) continue; - if (val>=NTERMINALS) { - /* Should be a nonterminal */ - if (type == TERMINAL) { - error(linecount, - "%s : terminal expected", - str); - } - g_settype(&r,NONTERM); - g_setnont(&r,val - NTERMINALS); - } else { - if (type != LITERAL && p->h_num < 0400) { + type1 = g_gettype(&(p->h_type)); + if (type1 != type) { + if (type1 == LITERAL || type == LITERAL) { continue; } - if (type == NONTERM) { + if (type != UNKNOWN) { error(linecount, - "%s : nonterminal expected", + "%s : illegal type", str); continue; } - g_setnont(&r, val); - g_settype(&r, TERMINAL); } if (option==ENTERING) { error(linecount, "%s : already defined",str); } - return r; + p->h_type.g_lineno = linecount; + return &(p->h_type); } } - if (option == JUSTLOOKING) return r; + p = newentry(store(str), h_root[i]); + h_root[i] = p; if (type == TERMINAL || type == LITERAL) { - if (nterminals == NTERMINALS) { - fatal(linecount,"too many terminals"); - } - p = &h_entry[nterminals]; - } else { - /* - * type == NONTERM || type == UNKNOWN - * UNKNOWN and not yet declared means : NONTERM - */ - if (nnonterms == NNONTERMS) { - fatal(linecount,"too many nonterminals"); + register p_token pt; + + pt = (p_token) new_mem(&token_info); + tokens = (p_token) token_info.i_ptr; + pt->t_string = p->h_name; + if (type == LITERAL) { + if (str[0] == '\\') { + /* + * Handle escapes in literals + */ + if (str[2] == '\0') { + switch(str[1]) { + case 'n' : + val = '\n'; + break; + case 'r' : + val = '\r'; + break; + case 'b' : + val = '\b'; + break; + case 'f' : + val = '\f'; + break; + case 't' : + val = '\t'; + break; + case '\'': + val = '\''; + break; + case '\\': + val = '\\'; + break; + default : + error(linecount,e_literal); + } + } else { + /* + * Here, str[2] != '\0' + */ + if (str[1] > '3' || str[1] < '0' || + str[2] > '7' || str[2] < '0' || + str[3] > '7' || str[3] < '0' || + str[4] != '\0') error(linecount,e_literal); + val = 64*str[1] - 73*'0' + + 8*str[2] + str[3]; + } + } else { + /* + * No escape in literal + */ + if (str[1] == '\0') val = str[0]; + else error(linecount,e_literal); + } + pt->t_tokno = val; + g_settype(&(p->h_type), LITERAL); + } else { + /* + * Here, type = TERMINAL + */ + pt->t_tokno = assval++; + g_settype(&(p->h_type), TERMINAL); } - p = &h_entry[NTERMINALS+nnonterms]; + g_setcont(&(p->h_type), ntokens); + ntokens++; + return &(p->h_type); } - p->h_name = store(str); - p->h_next = h_root[i]; - h_root[i] = p; - if (type == NONTERM || type == UNKNOWN) { + /* + * type == NONTERM || type == UNKNOWN + * UNKNOWN and not yet declared means : NONTERM + */ + { register p_nont q; - q = &nonterms[nnonterms]; + q = (p_nont) new_mem(&nont_info); + nonterms = (p_nont) nont_info.i_ptr; + q->n_name = p->h_name; q->n_rule = 0; q->n_lineno = linecount; q->n_string = f_input; q->n_follow = 0; q->n_flags = 0; q->n_contains = 0; - p->h_num = 0; - g_settype(&r, NONTERM); - g_setnont(&r, nnonterms); + g_settype(&(p->h_type), NONTERM); + g_setcont(&(p->h_type), nnonterms); nnonterms++; - return r; - } - if (type == LITERAL) { - if (str[0] == '\\') { - /* - * Handle escapes in literals - */ - if (str[2] == '\0') { - switch(str[1]) { - case 'n' : - val = '\n'; - break; - case 'r' : - val = '\r'; - break; - case 'b' : - val = '\b'; - break; - case 'f' : - val = '\f'; - break; - case 't' : - val = '\t'; - break; - case '\'': - val = '\''; - break; - case '\\': - val = '\\'; - break; - default : - error(linecount,e_literal); - } - } else { - /* - * Here, str[2] != '\0' - */ - if (str[1] > '3' || str[1] < '0' || - str[2] > '7' || str[2] < '0' || - str[3] > '7' || str[3] < '0' || - str[4] != '\0') error(linecount,e_literal); - val = 64*str[1] - 73*'0' + 8*str[2] + str[3]; - } - } else { - /* - * No escape in literal - */ - if (str[1] == '\0') val = str[0]; - else error(linecount,e_literal); - } - p->h_num = val; - } else { - /* - * Here, type = TERMINAL - */ - p->h_num = assval++; + return &(p->h_type); } - g_settype(&r, TERMINAL); - g_setnont(&r, nterminals); - nterminals++; - return r; } diff --git a/util/LLgen/src/reach.c b/util/LLgen/src/reach.c index 233cc3315..e2ccf159a 100644 --- a/util/LLgen/src/reach.c +++ b/util/LLgen/src/reach.c @@ -29,7 +29,6 @@ * are all defined. */ -# include "tunable.h" # include "types.h" # include "extern.h" # include "io.h" @@ -51,15 +50,15 @@ co_reach() { */ register p_nont p; register p_start st; - register p_file x = files; - register int *s; + register p_file x; + register p_order s; /* Check for undefined nonterminals */ for (p = nonterms; p < maxnt; p++) { - if (! p->n_rule) { + if (! p->n_rule) { /* undefined */ f_input = p->n_string; error(p->n_lineno,"nonterminal %s not defined", - (min_nt_ent + (p - nonterms))->h_name); + p->n_name); } } /* @@ -67,17 +66,19 @@ co_reach() { * Mark the nonterminals that are encountered with the flag * REACHABLE, and walk their rules, if not done before */ - for (st = start; st; st = st->ff_next) reachable(st->ff_nont); + for (st = start; st; st = st->ff_next) { + reachable(&nonterms[st->ff_nont]); + } /* * Now check for unreachable nonterminals */ - for (; x->f_end < maxorder; x++) { + for (x = files; x < maxfiles; x++) { f_input = x->f_name; - for (s = x->f_start; s <= x->f_end; s++) { - p = &nonterms[*s]; + for (s = x->f_list; s; s = s->o_next) { + p = &nonterms[s->o_index]; if (! (p->n_flags & REACHABLE)) { error(p->n_lineno,"nonterminal %s unreachable", - (min_nt_ent + (p - nonterms))->h_name); + p->n_name); } } } @@ -107,10 +108,10 @@ reachwalk(p) register p_gram p; { for (;;) { switch(g_gettype(p)) { case ALTERNATION : - reachwalk(((p_link) pentry[g_getcont(p)])->l_rule); + reachwalk(links[g_getcont(p)].l_rule); break; case TERM : - reachwalk(((p_term) pentry[g_getcont(p)])->t_rule); + reachwalk(terms[g_getcont(p)].t_rule); break; case NONTERM : reachable(&nonterms[g_getnont(p)]); diff --git a/util/LLgen/src/sets.c b/util/LLgen/src/sets.c index c37109975..6d563c6e5 100644 --- a/util/LLgen/src/sets.c +++ b/util/LLgen/src/sets.c @@ -25,8 +25,7 @@ /* * sets.c - * Some general setmanipulation routines are defined, - * and also two set allocating routines are defined + * Set manipulation and allocation routines. */ # include "types.h" @@ -41,6 +40,7 @@ static string rcsid9 = "$Header$"; /* In this file the following routines are defined: */ extern setinit(); extern p_set setalloc(); +extern p_set get_set(); extern int setunion(); extern int setintersect(); extern setminus(); @@ -48,11 +48,12 @@ extern int setempty(); extern int findindex(); extern int setcount(); -int tbitset; -int setsize,tsetsize; -p_set *setptr, *maxptr, *topptr; - -static unsigned size,nbytes; +int nbytes; +static int setsize; +int tsetsize; +p_set *setptr, *maxptr; +static t_info set_info; +p_mem alloc(); setinit(ntneeded) { /* @@ -60,84 +61,99 @@ setinit(ntneeded) { */ register int bitset; - nbytes = NBYTES(nterminals); - tbitset = ALIGN(nbytes); - tsetsize = NINTS(tbitset); - bitset = tbitset; + nbytes = NBYTES(ntokens); + bitset = ALIGN(nbytes); + tsetsize = NINTS(bitset); if (ntneeded) { /* nonterminals must be included in the sets */ bitset += NBYTES(nnonterms); } setsize = NINTS(bitset); - tbitset *= 8; + set_info.i_esize = sizeof(p_set); + set_info.i_incr = 20; } p_set -setalloc(size) int size; { +get_set() { /* - * Allocate a set of size "size" ints + * Allocate a set that cannot be freed */ - register p_set t; - register int i; - p_mem alloc(); - - assert(size == tsetsize || size == setsize); - t = (p_set) alloc((unsigned) (size * sizeof(int))); - i = size; - t += i; - for (; i; i--) { - *--t = 0; + register p_set p, q; + static p_set sets, maxsets; + + if ((p = sets) >= maxsets) { + q = p = (p_set) alloc((unsigned) (50*setsize*sizeof(*sets))); + maxsets = p + 50 * setsize; + do { + *q++ = 0; + } while (q < maxsets); } - return t; + sets = p + setsize;; + return p; +} + +p_set +setalloc() { + /* + * Allocate a set which can later be freed. + */ + register p_set p; + register int size = setsize; + + p = (p_set) alloc((unsigned) (size * sizeof(*p))) + size; + do { + *--p = 0; + } while (--size); + return p; } int -setunion(a,b,size) register p_set a,b; int size; { +setunion(a,b) register p_set a,b; { /* * a = a union b. * Return 1 if the set a changed */ - register i; - register j; - int nsub = 0; + register int i; + register int j; + register int nsub = 0; - assert(size == tsetsize || size == setsize); - for (i = size; i; i--) { + i = setsize; + do { *a = (j = *a) | *b++; if (*a++ != j) { nsub = 1; } - } + } while (--i); return nsub; } int -setintersect(a,b,size) register p_set a,b; int size; { +setintersect(a,b) register p_set a,b; { /* * a = a intersect b. - * return 1 if the resut is empty + * return 1 if the result is empty */ - register i; - register nempty; + register int i; + register int nempty; - assert(size == tsetsize || size == setsize); nempty = 1; - for (i = size; i; i--) { + i = setsize; + do { if (*a++ &= *b++) nempty = 0; - } + } while (--i); return nempty; } -setminus(a,b,size) register p_set a,b; int size; { +setminus(a,b) register p_set a,b; { /* * a = a setminus b */ - register i; + register int i; - assert(size == tsetsize || size == setsize); - for (i = size; i; i--) { + i = setsize; + do { *a++ &= ~(*b++); - } + } while (--i); } int @@ -145,26 +161,28 @@ setempty(p) register p_set p; { /* * Return 1 if the set p is empty */ - register i; + register int i; - for (i = tsetsize; i; i--) { + i = tsetsize; + do { if (*p++) return 0; - } + } while (--i); return 1; } int -findindex(set) p_set *set; { +findindex(set) p_set set; { /* * The set "set" will serve as a recovery set. - * Search for it in the table. If not present, enter it + * Search for it in the table. If not present, enter it. + * Here is room for improvement. At the moment, the list of + * sets is examined with linear search. */ register p_set *t; - p_mem alloc(),ralloc(); + p_mem new_mem(); register p_set a; register p_set b; - register i; - register j; + register int i; int saved; /* @@ -172,10 +190,11 @@ findindex(set) p_set *set; { */ for (t = setptr; t < maxptr; t++) { a = *t; - b = *set; - for (i = tsetsize; i; i--) { + b = set; + i = tsetsize; + do { if (*a++ != *b++) break; - } + } while (--i); if (i) continue; /* * Here, the sets are equal. @@ -186,28 +205,14 @@ findindex(set) p_set *set; { * Now check if the set consists of only one element. * It would be a waste to use a set for that */ - if (setcount(*set, &saved) == 1) return -h_entry[saved].h_num; + if (setcount(set, &saved) == 1) return -(saved + 1); /* * If it does, return its number as a negative number. */ - if (maxptr >= topptr) { - /* - * Need new space for the list, in chunks of 50 pointers - */ - if (setptr == 0) { - setptr = (p_set *) alloc(50 * sizeof(p_set)); - size = 50; - maxptr = setptr; - } else { - setptr = (p_set *) ralloc((p_mem) setptr, - (50+size)*sizeof(p_set)); - maxptr = &setptr[size-1]; - size += 50; - } - topptr = &setptr[size-1]; - } - *maxptr = setalloc(tsetsize); - setunion(*maxptr, *set, tsetsize); + maxptr = (p_set *) new_mem(&set_info); + setptr = (p_set *) set_info.i_ptr; + *maxptr = setalloc(); + setunion(*maxptr, set); return nbytes * (maxptr++ - setptr); } @@ -215,7 +220,7 @@ int setcount(set, saved) register p_set set; int *saved; { register int i, j; - for (j = 0, i = 0; i < nterminals; i++) { + for (j = 0, i = 0; i < ntokens; i++) { if (IN(set,i)) { j++; *saved = i; diff --git a/util/LLgen/src/sets.h b/util/LLgen/src/sets.h index bee31fcbe..404a5c208 100644 --- a/util/LLgen/src/sets.h +++ b/util/LLgen/src/sets.h @@ -30,9 +30,9 @@ # define BITS (8 * sizeof (int)) # define IN(a,i) ((a)[(i)/BITS] & (1<<((i) % BITS))) -# define NTIN(a,i) ((a)[((i)+tbitset)/BITS]&(1<<((i)%BITS))) +# define NTIN(a,i) ((a)[(i)/BITS+tsetsize]&(1<<((i)%BITS))) # define PUTIN(a,i) ((a)[(i)/BITS] |=(1<<((i) % BITS))) -# define NTPUTIN(a,i) ((a)[((i)+tbitset)/BITS]|=(1<<((i)%BITS))) +# define NTPUTIN(a,i) ((a)[(i)/BITS+tsetsize]|=(1<<((i)%BITS))) # define NBYTES(n) (((n) + 7) / 8) /* * The next two macros operate on byte counts! @@ -40,6 +40,6 @@ # define NINTS(n) (((n) + (int) (sizeof(int) - 1)) / (int) sizeof(int)) # define ALIGN(n) (NINTS(n) * (int) sizeof (int)) -extern int tbitset; -extern p_set *setptr,*maxptr,*topptr; -extern int tsetsize,setsize; +extern int tsetsize; +extern p_set *setptr, *maxptr; +extern int nbytes; diff --git a/util/LLgen/src/tokens.g b/util/LLgen/src/tokens.g index 4f2f8567f..be7d17e57 100644 --- a/util/LLgen/src/tokens.g +++ b/util/LLgen/src/tokens.g @@ -26,15 +26,15 @@ /* * tokens.g * Defines the tokens for the grammar of LLgen. - * The lexical analyser and LLmes are also included here. + * The lexical analyser and LLmessage are also included here. */ { # include "types.h" # include "io.h" -# include "tunable.h" # include "extern.h" # include "assert.h" +# include "cclass.h" # ifndef NORCSID static string rcsidc = "$Header$"; @@ -46,11 +46,12 @@ extern LLmessage(); extern int input(); extern unput(); extern skipcomment(); +# ifdef LINE_DIRECTIVE STATIC linedirective(); +# endif STATIC string cpy(); STATIC string vallookup(); } - /* Classes */ %token C_IDENT ; /* lextoken.t_string contains the identifier read */ @@ -78,17 +79,17 @@ STATIC string vallookup(); * Structure for a keyword */ -struct keyword { +typedef struct keyword { string w_word; int w_value; -}; +} t_keyw, *p_keyw; /* * The list of keywords, the most often used keywords come first. * Linear search is used, as there are not many keywords */ -static struct keyword resword[] = { +static t_keyw resword[] = { { "token", C_TOKEN }, { "avoid", C_AVOID }, { "prefer", C_PREFER }, @@ -103,108 +104,105 @@ static struct keyword resword[] = { }; static t_token savedtok; /* to save lextoken in case of an insertion */ +# ifdef LINE_DIRECTIVE static int nostartline; /* = 0 if at the start of a line */ +# endif scanner() { /* * Lexical analyser, what else */ - register ch; /* Current char */ - register i; - register reserved = 0; /* reserved word? */ + register int ch; /* Current char */ + register char *p = ltext; + int reserved = 0; /* reserved word? */ int last; /* Char before current char */ + char *max = <ext[LTEXTSZ - 1]; - if (savedtok.t_tokno) { /* - * A token has been inserted. + if (ch = savedtok.t_tokno) { + /* A token has been inserted. * Now deliver the last lextoken again */ lextoken = savedtok; savedtok.t_tokno = 0; - return lextoken.t_tokno; + return ch; } - for (;;) { /* - * First, skip space, comments, line directives, etc - */ - do ch = input(); - while(isspace(ch)); - if (ch == '/') skipcomment(0); - else if (ch == '#' && !nostartline) linedirective(); - else break; - } - /* - * Now we have a first character of a token - */ - switch(ch) { - case EOF : - return EOF; - case '\'': /* - * Literal, put it in ltext - */ - i = 0; - for (;;) { - last = ch; - ch = input(); - if (ch == '\n' || ch == EOF) { - error(linecount,"missing '"); - break; - } - if (ch == '\'' && last != '\\') break; - ltext[i] = ch; - if (i < LTEXTSZ - 1) ++i; - } - ltext[i] = '\0'; - lextoken.t_string = ltext; - return C_LITERAL; - case '%' : /* - * Start of a reserved word - */ - reserved = 1; + for (;;) { ch = input(); - /* Fall through */ - default : - i = 0; - if (isdigit(ch)) { - if (reserved) { - error(linecount," A reserved number ?"); + if (ch == EOF) return ch; +# ifdef LINE_DIRECTIVE + if (ch == '#' && !nostartline) { + linedirective(); + continue; + } +# endif + switch(c_class[ch]) { + case ISLIT : + for (;;) { + last = ch; + ch = input(); + if (ch == '\n' || ch == EOF) { + error(linecount,"missing '"); + break; + } + if (ch == '\'' && last != '\\') break; + *p++ = ch; + if (p > max) p--; } - while (isdigit(ch)) { + *p = '\0'; + lextoken.t_string = ltext; + return C_LITERAL; + case ISCOM : + skipcomment(0); + /* Fall through */ + case ISSPA : + continue; + case ISDIG : { + register i = 0; + do { i = 10 * i + (ch - '0'); ch= input(); - } + } while (c_class[ch] == ISDIG); lextoken.t_num = i; unput(ch); - return C_NUMBER; - } - if (isalpha(ch) || ch == '_') { + return C_NUMBER; } + default: + return ch; + case ISKEY : + reserved = 1; + ch = input(); + /* Fall through */ + case ISLET : do { - if (reserved && isupper(ch)) ch += 'a' - 'A'; - ltext[i] = ch; - if (i < LTEXTSZ - 1) ++i; + if (reserved && ch >= 'A' && ch <= 'Z') { + ch += 'a' - 'A'; + } + *p++ = ch; + if (p > max) p--; ch = input(); - } while (isalnum(ch) || ch == '_'); - } else return ch; - unput(ch); - } - ltext[i] = '\0'; - if (reserved) { /* - * Now search for the keyword - */ - register struct keyword *w; - - w = resword; - while (w->w_word) { - if (! strcmp(ltext,w->w_word)) { - /* - * Found it. Return token number. - */ - return w->w_value; + } while (c_class[ch] == ISDIG || c_class[ch] == ISLET); + unput(ch); + *p = '\0'; + if (reserved) { /* + * Now search for the keyword + */ + register p_keyw w; + + w = resword; + while (w->w_word) { + if (! strcmp(ltext,w->w_word)) { + /* + * Return token number. + */ + return w->w_value; + } + w++; + } + error(linecount,"illegal reserved word"); } - w++; + lextoken.t_string = ltext; + return C_IDENT; } - error(linecount,"illegal reserved word"); } - lextoken.t_string = ltext; - return C_IDENT; } static int backupc; /* for unput() */ @@ -215,21 +213,22 @@ input() { * Low level input routine, used by all other input routines */ register c; - register FILE *f; - if(backupc) { /* - * Last char was "unput()". Deliver it again + if (c = backupc) { + /* Last char was "unput()". Deliver it again */ - c = backupc; backupc = 0; return c; } - f = finput; - if ((c = getc(f)) == EOF) return c; + if ((c = getc(finput)) == EOF) return c; +# ifdef LINE_DIRECTIVE nostartline = 1; +# endif if (!nonline) { linecount++; +# ifdef LINE_DIRECTIVE nostartline = 0; +# endif nonline = 1; } if (c == '\n') nonline = 0; @@ -249,30 +248,29 @@ skipcomment(flag) { * of C-code, so the newlines in it must be copied to enable the * C-compiler to keep a correct line count */ - register ch; + register int ch; int saved; /* line count on which comment starts */ saved = linecount; if (input() != '*') error(linecount,"illegal comment"); - ch = input(); - while (ch != EOF) { - if (flag && ch == '\n') putc(ch,fact); + do { + ch = input(); while (ch == '*') { if ((ch = input()) == '/') return; - if (flag && ch == '\n') putc(ch,fact); } - ch = input(); - } + if (flag && ch == '\n') putc(ch,fact); + } while (ch != EOF); error(saved,"Comment does not terminate"); } +# ifdef LINE_DIRECTIVE STATIC linedirective() { /* * Read a line directive */ - register ch; - register i; + register int ch; + register int i; string s_error = "Illegal line directive"; string store(); register string c; @@ -282,17 +280,16 @@ linedirective() { * Do not skip newlines */ ch = input(); - } while (ch != '\n' && ! isdigit(ch)); + } while (ch != '\n' && c_class[ch] != ISDIG); if (ch == '\n') { error(linecount,s_error); return; } - i = ch - '0'; - ch = input(); - while (isdigit(ch)) { + i = 0; + do { i = i*10 + (ch - '0'); ch = input(); - } + } while (c_class[ch] == ISDIG); while (ch != '\n' && ch != '"') ch = input(); if (ch == '"') { c = ltext; @@ -314,13 +311,14 @@ linedirective() { } linecount = i; } +# endif STATIC string vallookup(s) { /* * Look up the keyword that has token number s */ - register struct keyword *p = resword; + register p_keyw p = resword; while (p->w_value) { if (p->w_value == s) return p->w_word; @@ -330,25 +328,24 @@ vallookup(s) { } STATIC string -cpy(s,p,flag) register s; register string p; { +cpy(s,p,inserted) register string p; { /* * Create a piece of error message for token s and put it at p. - * flag = 0 if the token s was deleted (in which case we have + * inserted = 0 if the token s was deleted (in which case we have * attributes), else it was inserted */ register string t = 0; switch(s) { case C_IDENT : - if (!flag) t = lextoken.t_string; + if (!inserted) t = lextoken.t_string; else t = "identifier"; break; case C_NUMBER : t = "number"; break; case C_LITERAL : - if (!flag) { - *p++ = '"'; + if (!inserted) { *p++ = '\''; t = lextoken.t_string; break; @@ -359,19 +356,15 @@ cpy(s,p,flag) register s; register string p; { t = "endoffile"; break; } - if (!t) { - t = vallookup(s); - if (t) { - *p++ = '%'; - } + if (!t && (t = vallookup(s))) { + *p++ = '%'; } if (t) { /* * We have a string for the token. Copy it */ while (*t) *p++ = *t++; - if (s == C_LITERAL && !flag) { + if (s == C_LITERAL && !inserted) { *p++ = '\''; - *p++ = '"'; } return p; } @@ -380,14 +373,17 @@ cpy(s,p,flag) register s; register string p; { */ *p++ = '\''; if (s >= 040 && s <= 0176) *p++ = s; - else switch(s) { - case '\b' : *p++ = '\\'; *p++ = 'b'; break; - case '\f' : *p++ = '\\'; *p++ = 'f'; break; - case '\n' : *p++ = '\\'; *p++ = 'n'; break; - case '\r' : *p++ = '\\'; *p++ = 'r'; break; - case '\t' : *p++ = '\\'; *p++ = 't'; break; - default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); - *p++='0'+(s&07); + else { + *p++ = '\\'; + switch(s) { + case '\b' : *p++ = 'b'; break; + case '\f' : *p++ = 'f'; break; + case '\n' : *p++ = 'n'; break; + case '\r' : *p++ = 'r'; break; + case '\t' : *p++ = 't'; break; + default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); + *p++='0'+(s&07); + } } *p++ = '\''; return p; diff --git a/util/LLgen/src/types.h b/util/LLgen/src/types.h index 49181bfcb..6e37f27d2 100644 --- a/util/LLgen/src/types.h +++ b/util/LLgen/src/types.h @@ -43,7 +43,7 @@ typedef struct token { } t_x; # define t_string t_x.t_s # define t_num t_x.t_v -} t_token; +} t_token, *p_token; /* * structure for the grammar elements @@ -74,49 +74,49 @@ typedef struct gram { # define TERMINAL 03 /* A terminal */ # define TERM 04 /* Something between square brackets */ # define ALTERNATION 05 /* Alternation (|) */ +# define LITERAL 06 /* Also a terminal */ /* * Access macros for the x-field of a grammar element */ -# define g_init(p) {(p)->x = 0;} # define g_gettype(p) (((p)->x>>13)&07) # define g_getcont(p) ((p)->x&017777) # define g_getnont(p) ((p)->x&0777) -# define g_getnpar(p) (((p)->x>>9)&07) +# define g_getnpar(p) (((p)->x>>9)&017) # define g_settype(p,s) { assert(((unsigned)(s))<=07);(p)->x=((p)->x&017777)|((s)<<13);} # define g_setcont(p,s) { assert(((unsigned)(s))<=017777);(p)->x=((p)->x&0160000)|(s);} # define g_setnont(p,s) { assert(((unsigned)(s))<=0777);(p)->x=((p)->x&0177000)|(s);} -# define g_setnpar(p,s) { assert(((unsigned)(s))<=07);(p)->x=((p)->x&0170777)|((s)<<9);} +# define g_setnpar(p,s) { assert(((unsigned)(s))<=017);(p)->x=((p)->x&0160777)|((s)<<9);} /* * Some constants to communicate with the symbol table search routine */ -# define LITERAL 01 /* Not equal to NONTERM or TERMINAL */ # define UNKNOWN 00 /* Not equal to NONTERM, TERMINAL or LITERAL */ /* * Some constants for safety */ -# define SAFE 0 -# define SAFESCANDONE 1 -# define SCANDONE 2 -# define NOSCANDONE 3 -# define NOSAFETY 4 +# define SAFE 0 /* Indicates that a scan is done, and that the + * token is correct + */ +# define SAFESCANDONE 1 /* Indicates that a scan is done, and that the + * token will not be skipped + */ +# define SCANDONE 2 /* Indicates that a scan is done */ +# define NOSCANDONE 3 /* Indicates that no scan is done */ +# define NOSAFETY 4 /* Safety not yet computed */ /* * nonterminal structure */ typedef struct { - short n_flags; /* low order three bits are reserved + short n_flags; /* low order four bits are reserved * the parameter count */ -# define getntparams(p) ((p)->n_flags&07) -# define setntparams(p,i) {assert(((unsigned)(i))<=7);(p)->n_flags&=~07;(p)->n_flags|=(i);} -# define RECURSIVE 00100 /* Set if the default rule is recursive */ -# define CONTIN 00400 /* continuation already computed? */ -# define BUSY 01000 /* or are we busy computing it? */ -# define PARAMS 02000 /* tells if a nonterminal has parameters */ -# define PRODUCE 04000 /* tells if a nonterminal produces anything */ +# define getntparams(p) ((p)->n_flags&017) +# define setntparams(p,i) {assert(((unsigned)(i))<=017);(p)->n_flags&=~017;(p)->n_flags|=(i);} +# define RECURSIVE 02000 /* Set if the default rule is recursive */ +# define PARAMS 04000 /* tells if a nonterminal has parameters */ # define EMPTY 010000 /* tells if a nonterminal produces empty */ # define LOCALS 020000 /* local declarations ? */ # define REACHABLE 040000 /* can this nonterminal be reached ? */ @@ -141,6 +141,7 @@ typedef struct { # define n_string n_x.n_s p_set n_follow; /* pointer to the "follow" set */ p_set n_contains; /* pointer to symbols that can be produced */ + string n_name; /* name of nonterminal */ } t_nont, *p_nont; /* @@ -148,7 +149,7 @@ typedef struct { */ typedef struct h_entry { string h_name; /* pointer to name */ - int h_num; /* numbering of terminals */ + t_gram h_type; /* Type and index */ struct h_entry *h_next; /* next in hash chain */ } t_entry, *p_entry; @@ -180,20 +181,19 @@ typedef short t_reps,*p_reps; # define OPT 03 /* 0 or 1 times */ /* - * Access macros for repitition + * Access macros for repitition in term */ -# define r_init(p) {*(p)=0;} -# define r_getkind(p) (*(p)&03) -# define r_getnum(p) ((*(p)>>2)&037777) -# define r_setkind(p,s) { assert(((unsigned)(s))<=03);*(p)=(*(p)&0177774)|(s);} -# define r_setnum(p,s) { assert(((unsigned)(s))<=037777);*(p)=(*(p)&03)|((s)<<2);} +# define r_getkind(q) ((q)->t_reps&03) +# define r_getnum(q) (((q)->t_reps>>2)&037777) +# define r_setkind(q,s) { assert(((unsigned)(s))<=03);(q)->t_reps=((q)->t_reps&0177774)|(s);} +# define r_setnum(q,s) { assert(((unsigned)(s))<=037777);(q)->t_reps=((q)->t_reps&03)|((s)<<2);} /* * header structure for a term */ typedef struct term { t_reps t_reps; /* repeats ? */ - short t_flags; + short t_flags; /* Low order three bits for safety */ # define gettout(q) ((q)->t_flags&07) # define settout(q,i) {assert(((unsigned)(i))<=NOSAFETY);(q)->t_flags&=~07;(q)->t_flags|=i;} # define PERSISTENT 010 /* Set if this term has %persistent */ @@ -205,7 +205,7 @@ typedef struct term { p_gram t_rule; /* pointer to this term */ p_set t_follow; /* set of followers */ p_set t_first; /* set of firsts */ - p_set t_contains; + p_set t_contains; /* contains set */ } t_term, *p_term; /* @@ -213,7 +213,7 @@ typedef struct term { */ typedef struct ff_firsts { string ff_name; /* Name of the macro */ - p_nont ff_nont; /* for this nonterminal */ + int ff_nont; /* for this nonterminal */ struct ff_firsts *ff_next; } t_first, *p_first; @@ -223,6 +223,11 @@ typedef struct ff_firsts { typedef t_first t_start; typedef p_first p_start; +typedef struct order { + int o_index; /* index in nonterminal array */ + struct order *o_next; +} t_order, *p_order; + /* * structure for file names and info */ @@ -232,11 +237,21 @@ typedef struct f_file { * generated in the target file for this * grammar file */ - short *f_start,*f_end;/* pointers in the "order" array, - * Indicating which nonterminals were defined - * in this file - */ + struct order *f_list; /* list of nonterminals in this file */ } t_file, *p_file; + +typedef struct info_alloc { + /* + * Structure used for dynamically growing arrays + */ + unsigned i_size; /* Size of the array */ + unsigned i_esize; /* Size of an element */ + unsigned i_incr; /* When filled, add room for i_incr elements */ + p_mem i_ptr; /* ptr to base of array */ + p_mem i_max; /* ptr to first free */ + p_mem i_top; /* ptr to top of array */ +} t_info, *p_info; + # ifdef NDEBUG # define STATIC static # else not NDEBUG -- 2.34.1