From: ceriel Date: Mon, 8 Oct 1984 14:14:53 +0000 (+0000) Subject: Initial revision X-Git-Tag: release-5-5~6067 X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=a21f9366512bde1694dc74f89fa4b11e189d64ab;p=ack.git Initial revision --- diff --git a/util/LLgen/lib/incl b/util/LLgen/lib/incl new file mode 100644 index 000000000..237ee69ea --- /dev/null +++ b/util/LLgen/lib/incl @@ -0,0 +1,12 @@ +/* $Header$ */ + +#define LLin(x) (LLsets[(x)+LLi]&LLb) + +extern short *LLptr; +extern char LLsets[]; +extern int LLi, LLb; +extern int LLsymb; +extern int LLcsymb; +extern int LLscd; + +# include "Lpars.h" diff --git a/util/LLgen/lib/rec b/util/LLgen/lib/rec new file mode 100644 index 000000000..f64cb2205 --- /dev/null +++ b/util/LLgen/lib/rec @@ -0,0 +1,215 @@ +/* + * Some grammar independent code. + * This file is copied into Lpars.c. + */ + +static char *rcsid = "$Header$"; + +#define LLSTSIZ 1024 +static short LLstack[LLSTSIZ]; /* Recovery stack */ +short * LLptr; /* ptr in it */ +#define LLmax (&LLstack[LLSTSIZ-1]) /* if beyond this, overflow */ +int LLscd; /* lookahead done or not? */ +int LLb,LLi; +int LLsymb; +int LLcsymb; +static int LLlevel; +static short * LLbase; + +static struct LLsaved { + int LLs_i, LLs_b, LLs_s, LLs_c, LLs_t; + short *LLs_p, *LLs_x; +} LLsaved[LL_MAX]; + +/* In this file are defined: */ +extern LLcheck(); +extern LLscan(); +extern LLpush(); +extern LLlpush(); +extern int LLpop(); +extern int LLsskip(); +static LLerror(); +extern LLnewlevel(); +extern LLoldlevel(); + +LLcheck() { + register c; + /* + * The symbol to be checked is on the stack. + */ + if (!LLscd) { + if ((c = LL_LEXI()) <= 0) c = EOFILE; + LLsymb = c; + } + else LLscd = 0; + if (LLsymb == *--LLptr) return; + /* + * If we come here, an error has been detected. + * LLpop will try and recover + */ + LLptr++; + while (LLindex[LLsymb] < 0) { + LLerror(0); + if ((LLsymb = LL_LEXI()) <= 0) LLsymb = EOFILE; + } + LLcsymb = LLindex[LLsymb]; + LLb = LLbyte[LLcsymb]; + LLi = LLcsymb>>3; + LLscd = 1; + if (!LLpop()) LLerror(*LLptr); + LLscd = 0; +} + +LLscan(t) { + /* + * Check if the next symbol is equal to the parameter + */ + if (!LLscd) { + if ((LLsymb = LL_LEXI()) <= 0) LLsymb = EOFILE; + } + else LLscd = 0; + if (LLsymb == t) return; + /* + * If we come here, an error has been detected + */ + LLpush(t); + LLscd = 1; + while (LLindex[LLsymb] < 0) { + LLerror(0); + if ((LLsymb = LL_LEXI()) <= 0) LLsymb = EOFILE; + } + LLcsymb = LLindex[LLsymb]; + LLb = LLbyte[LLcsymb]; + LLi = LLcsymb>>3; + if (!LLpop()) LLerror(t); + LLscd = 0; +} + +LLpush(t) { + if (LLptr == LLmax) { + LLerror(-1); + } + *LLptr++ = t; +} + +LLlpush(d) { + register i; + register short *p; + + p = &LLlists[d]; + i = *p++; + while(i--) { + if (LLptr == LLmax) { + LLerror(-1); + } + *LLptr++ = *p++; + } +} + +LLsskip() { + /* + * Error recovery, and not only that! + * Skip symbols until one is found that is on the stack. + * Return 1 if it is on top of the stack + */ + register short *t; + register i; + + for (;;) { + if (!LLscd) { +lab: + if ((i = LL_LEXI()) <= 0) i = EOFILE; + LLsymb = i; + if ((i = LLindex[i]) < 0) { + LLerror(0); + goto lab; + /* + * Ugly, but we want speed + * on possibly correct symbols !! + * So, no breaks out of "for (;;)" + */ + } + LLcsymb = i; + LLb = LLbyte[i]; + LLi = (i>>3); + LLscd = 1; + } + t = LLptr-1; + i = *t; + if (!((i<=0 && LLsets[LLi-i]&LLb)||i==LLsymb)) { + while (--t >= LLbase) { + /* + * If the element on the stack is negative, + * its opposite is an index in the setarray, + * otherwise it is a terminal symbol + */ + i = *t; + if ((i<=0&&LLsets[LLi-i]&LLb)||i==LLsymb){ + break; + } + } + if (t >= LLbase) break; + LLerror(0); + LLscd = 0; + } + else { + return 1; + } + } + return t == LLptr - 1; +} + +LLpop() { + register i; + + i = LLsskip(); + LLptr--; + return i; +} + +static +LLerror(d) { + + LLmessage(d); + if (d < 0) exit(1); +} + +LLnewlevel() { + register struct LLsaved *p; + + if (!LLlevel++) { + LLptr = LLstack; + LLbase = LLstack; + LLpush(EOFILE); + } + else { + if (LLlevel > LL_MAX) LLerror(-1); + p = &LLsaved[LLlevel - 2]; + p->LLs_p = LLptr; + p->LLs_i = LLi; + p->LLs_b = LLb; + p->LLs_s = LLsymb; + p->LLs_t = LLcsymb; + p->LLs_c = LLscd; + p->LLs_x = LLbase; + LLbase = LLptr; + LLpush(EOFILE); + } +} + +LLoldlevel() { + register struct LLsaved *p; + + LLcheck(); + if (--LLlevel) { + p = &LLsaved[LLlevel-1]; + LLptr = p->LLs_p; + LLi = p->LLs_i; + LLb = p->LLs_b; + LLsymb = p->LLs_s; + LLcsymb = p->LLs_t; + LLbase = p->LLs_x; + LLscd = p->LLs_c; + } +} + diff --git a/util/LLgen/src/alloc.c b/util/LLgen/src/alloc.c new file mode 100644 index 000000000..e40419e88 --- /dev/null +++ b/util/LLgen/src/alloc.c @@ -0,0 +1,54 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * alloc.c + * Interface to malloc() and realloc() + */ + +# include "types.h" +# include "extern.h" + +static string rcsid = "$Header$"; + +static string e_nomem = "Out of memory"; + +p_mem +alloc(size) unsigned size; { + register p_mem p; + p_mem malloc(); + + if ((p = malloc(size)) == 0) fatal(linecount,e_nomem); + return p; +} + +p_mem +ralloc(p,size) p_mem p; unsigned size; { + register p_mem q; + p_mem realloc(); + + if ((q = realloc(p,size)) == 0) fatal(linecount,e_nomem); + return q; +} diff --git a/util/LLgen/src/assert.h b/util/LLgen/src/assert.h new file mode 100644 index 000000000..8a50b9ca2 --- /dev/null +++ b/util/LLgen/src/assert.h @@ -0,0 +1,35 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * assert.h $Header$ + * an assertion macro + */ + +#ifndef NDEBUG +#define assert(x) if(!(x)) badassertion("x",__FILE__,__LINE__) +#else +#define assert(x) /* nothing */ +#endif diff --git a/util/LLgen/src/extern.h b/util/LLgen/src/extern.h new file mode 100644 index 000000000..c4f28a82f --- /dev/null +++ b/util/LLgen/src/extern.h @@ -0,0 +1,86 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * extern.h $Header$ + * Miscellanious constants and + * some variables that are visible in more than one file + */ + +/* + * options for the identifier search routine + */ +# define JUSTLOOKING 0 +# define ENTERING 1 +# define BOTH 2 + +/* + * Now for some declarations + */ + +extern char ltext[]; /* input buffer */ +extern int nnonterms; /* number of nonterminals */ +extern int nterminals; /* number of terminals */ +extern p_start start; /* will contain startsymbols */ +extern int linecount; /* line number */ +extern int assval; /* to create difference between literals + * and other terminals + */ +extern t_nont nonterms[]; /* the nonterminal array */ +extern p_nont maxnt; /* is filled up until here */ +extern int order[]; /* order of nonterminals in the grammar, + * important because actions are copied to + * a temporary file in the order in which they + * were read + */ +extern int *maxorder; /* will contain &order[nnonterms] */ +extern t_entry h_entry[]; /* terminal and nonterminal entrys, + * first NTERMINAL entrys reserved + * for terminals + */ +extern p_entry max_t_ent; /* will contain &h_entry[nterminals] */ +# define min_nt_ent &h_entry[NTERMINALS] +extern string pentry[]; /* pointers to various allocated things */ +extern string e_noopen; /* Error message string used often */ +extern int verbose; /* Level of verbosity */ +extern string lexical; /* name of lexical analyser */ +extern int ntneeded; /* ntneeded = 1 if nonterminals are included + * in the sets. + */ +extern int ntprint; /* ntprint = 1 if they must be printed too in + * the LL.output file (-x option) + */ +# ifndef NDEBUG +extern int debug; +# endif not NDEBUG +extern p_file files,pfile; /* pointers to file structure. + * "files" points to the start of the + * list */ +extern string LLgenid; /* LLgen identification string */ +extern t_token lextoken; /* the current token */ +extern int nerrors; +extern int fflag; /* Enable compiler to generate jump tables + * for switches? + */ diff --git a/util/LLgen/src/global.c b/util/LLgen/src/global.c new file mode 100644 index 000000000..9adb8d725 --- /dev/null +++ b/util/LLgen/src/global.c @@ -0,0 +1,71 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * global.c + * Contains declarations visible in several other source files + */ + +# include "types.h" +# include "io.h" +# include "tunable.h" + +static string rcsid = "$Header$"; + +char ltext[LTEXTSZ]; +t_entry h_entry[NTERMINALS+NNONTERMS+1]; +p_entry max_t_ent; +t_nont nonterms[NNONTERMS+1]; +int nnonterms; +int nterminals; +int order[NNONTERMS+1]; +int *maxorder; +p_start start; +int linecount; +int assval; +string pentry[ENTSIZ]; +FILE *fout; +FILE *fpars; +FILE *finput; +FILE *fact; +p_nont maxnt; +string f_pars = PARSERFILE; +string f_out = OUTFILE; +string f_temp = ACTFILE; +string f_input; +string e_noopen = "Cannot open %s"; +int verbose; +string lexical; +int ntneeded; +int ntprint; +# ifndef NDEBUG +int debug; +# endif not NDEBUG +p_file files; +p_file pfile; +string LLgenid = "/* LLgen generated code from source %s */\n"; +t_token lextoken; +int nerrors; +int fflag; diff --git a/util/LLgen/src/io.h b/util/LLgen/src/io.h new file mode 100644 index 000000000..4c0340706 --- /dev/null +++ b/util/LLgen/src/io.h @@ -0,0 +1,49 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * io.h $Header$ + * Some important file names and variables + */ + +# include +# include + +/* FILES */ + +# define OUTFILE "LL.output" /* -v option */ +# define PARSERFILE "LL.xxx" /* This is what we want */ +# define ACTFILE "LL.temp" /* temporary file to save actions */ +# define HFILE "Lpars.h" /* file for "#define's " */ +# define RFILE "Lpars.c" /* Error recovery */ + +extern FILE *finput; +extern FILE *fpars; +extern FILE *fact; +extern FILE *fout; +extern string f_pars; +extern string f_temp; +extern string f_out; +extern string f_input; diff --git a/util/LLgen/src/machdep.c b/util/LLgen/src/machdep.c new file mode 100644 index 000000000..ef9e8f3bf --- /dev/null +++ b/util/LLgen/src/machdep.c @@ -0,0 +1,58 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * machdep.c + * Machine dependant things + */ + + +# include "types.h" + +static string rcsid = "$Header$"; + +/* In this file the following routines are defined: */ +extern UNLINK(); +extern RENAME(); +extern string libpath(); + +UNLINK(x) string x; { + unlink(x); /* systemcall to remove file */ +} + +RENAME(x,y) string x,y; { + unlink(y); + if(link(x,y)!=0)fatal(1,"Cannot link to %s",y); + unlink(x); +} + +string +libpath(s) char *s; { + static char buf[100]; + + strcpy(buf,"/usr/local/lib/LLgen/"); + strcat(buf,s); + return buf; +} diff --git a/util/LLgen/src/main.c b/util/LLgen/src/main.c new file mode 100644 index 000000000..b3e90b6c2 --- /dev/null +++ b/util/LLgen/src/main.c @@ -0,0 +1,333 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * main.c + * Contains main program, and some error message routines + */ + +# include "types.h" +# include "io.h" +# include "extern.h" +# include "sets.h" +# include "assert.h" + +static string rcsid = "$Header$"; + +static string rec_file; +static string incl_file; + +/* In this file the following routines are defined: */ +extern int main(); +STATIC readgrammar(); +extern error(); +extern fatal(); +extern comfatal(); +extern copyfile(); +extern install(); +# ifndef NDEBUG +extern badassertion(); +# endif not NDEBUG + +main(argc,argv) register string argv[]; { + register string arg; + string libpath(); + int nflag = 0; + + /* Initialize */ + + maxorder = order; + assval = 0400; + /* read options */ + + while (argc >= 2 && (arg = argv[1], *arg == '-')) { + while (*++arg) { + switch(*arg) { + case 'v': + case 'V': + verbose++; + continue; + case 'n': + case 'N': + nflag++; + continue; + case 'f': + case 'F': + fflag++; + continue; +# ifndef NDEBUG + case 'a': + case 'A': + debug++; + continue; +# endif not NDEBUG + case 'r': + case 'R': + if (rec_file) { + fprintf(stderr,"duplicate -r flag\n"); + exit(1); + } + rec_file = ++arg; + break; + case 'i': + case 'I': + if (incl_file) { + fprintf(stderr,"duplicate -i flag\n"); + exit(1); + } + incl_file = ++arg; + break; + case 'x': + case 'X': + ntneeded = 1; + ntprint = 1; + continue; + default: + fprintf(stderr,"illegal option : %c\n",*arg); + return 1; + } + break; + } + argv++; + argc--; + } + /* + * Now check wether the sets should include nonterminals + */ + if (verbose == 2) ntneeded = 1; + else if (! verbose) ntneeded = 0; + /* + * Initialise + */ + if (!rec_file) rec_file = libpath("rec"); + if (!incl_file) incl_file = libpath("incl"); + if ((fact = fopen(f_temp,"w")) == NULL) { + fputs("Cannot create temporary\n",stderr); + return 1; + } + name_init(); + readgrammar(argc,argv); + if (nflag) comfatal(); + setinit(ntneeded); + maxnt = &nonterms[nnonterms]; + max_t_ent = &h_entry[nterminals]; + fclose(fact); + /* + * Now, the grammar is read. Do some computations + */ + co_reach(); /* Check for undefined and unreachable */ + if (nerrors) comfatal(); + createsets(); + co_empty(); /* Which nonterminals produce empty? */ + co_first(); /* Computes first sets */ + co_follow(); /* Computes follow sets */ + co_symb(); /* Computes choice sets in alternations */ + conflchecks(); /* Checks for conflicts etc, and also + * takes care of LL.output etc + */ + if (nerrors) comfatal(); + co_contains(); /* Computes the contains sets */ + co_safes(); /* Computes safe terms and nonterminals. + * Safe means : always called with a terminal + * symbol that is guarantied to be eaten by + * the term + */ + if (argc-- == 1) { + fputs("No code generation for input from standard input\n",stderr); + } else gencode(argc); + UNLINK(f_temp); + UNLINK(f_pars); + return 0; +} + +STATIC +readgrammar(argc,argv) char *argv[]; { + /* + * Do just what the name suggests : read the grammar + */ + register p_file p; + p_mem alloc(); + + linecount = 0; + f_input = "no filename"; + /* + * Build the file structure + */ + files = p = (p_file) alloc((unsigned) (argc+1) * sizeof(t_file)); + if (argc-- == 1) { + finput = stdin; + p->f_name = f_input = "standard input"; + p->f_firsts = 0; + p->f_start = maxorder; + pfile = p; + LLparse(); + p->f_end = maxorder - 1; + p++; + } else { + while (argc--) { + if ((finput = fopen(f_input=argv[1],"r")) == NULL) { + fatal(0,e_noopen,f_input); + } + linecount = 0; + p->f_name = f_input; + p->f_start = maxorder; + p->f_firsts = 0; + pfile = p; + LLparse(); + p->f_end = maxorder-1; + p++; + argv++; + fclose(finput); + } + } + p->f_start = maxorder+1; + p->f_end = maxorder; + if (! lexical) lexical = "yylex"; + /* + * There must be a start symbol! + */ + if (start == 0) { + fatal(linecount,"Missing %%start"); + } + if (nerrors) comfatal(); +} + +/* VARARGS1 */ +error(lineno,s,t,u) string s,t,u; { + /* + * Just an error message + */ + register FILE *f; + + f = stderr; + ++nerrors; + if (lineno) fprintf(f,"\"%s\", line %d : ",f_input,lineno); + else fprintf(f,"\"%s\" : ",f_input); + fprintf(f,s,t,u); + putc('\n',f); +} + +/* VARARGS1 */ +fatal(lineno,s,t,u) string s,t,u; { + /* + * Fatal error + */ + error(lineno,s,t,u); + comfatal(); +} + +comfatal() { + /* + * Some common code for exit on errors + */ + if (fact != NULL) { + fclose(fact); + UNLINK(f_temp); + } + if (fpars != NULL) fclose(fpars); + UNLINK(f_pars); + exit(1); +} + +copyfile(n) { + /* + * Copies a file indicated by the parameter to filedescriptor fpars. + * If n != 0, the error recovery routines are copied, + * otherwise a standard header is. + */ + register c; + register FILE *f; + + if ((f = fopen(n?rec_file:incl_file,"r")) == NULL) { + fatal(0,"Cannot open libraryfile, call an expert"); + } + while ((c = getc(f)) != EOF) putc(c,fpars); +} + +install(target, source) string target, source; { + /* + * Copy the temporary file generated from source to target + * if allowed (which means that the target must be generated + * by LLgen from the source, or that the target is not present + */ + register c; + register FILE *f1; + register FILE *f2; + register string s1; + register int i; + char buf[100]; + + /* + * First open temporary, generated for source + */ + if ((f1 = fopen(f_pars,"r")) == NULL) { + fatal(0,e_noopen,f_pars); + } + i = 0; + /* + * Now open target for reading + */ + if ((f2 = fopen(target,"r")) == NULL) { + i = 1; + fclose(f1); + } + else { + /* + * Create string recognised by LLgen. The target must + * start with that! + */ + (int) sprintf(buf,LLgenid,source ? source : "."); + s1 = buf; + while (*s1 != '\0' && *s1++ == getc(f2)) { /* nothing */ } + /* + * Ai,ai, it did not + */ + if (*s1 != '\0') { + fatal(0,"%s : not a file generated by LLgen",target); + } + rewind(f2); + /* + * Now compare the target with the temporary + */ + while ((c = getc(f1)) != EOF && c == getc(f2)) { /* nothing */} + if (c != EOF || getc(f2) != EOF) i = 1; + fclose(f1); + fclose(f2); + } + /* + * Here, if i != 0 the target must be recreated + */ + if (i) RENAME(f_pars,target); +} + +#ifndef NDEBUG +badassertion(asstr,file,line) char *asstr, *file; { + + fprintf(stderr,"Assertion \"%s\" failed %s(%d)\n",asstr,file,line); + if (fact != NULL) fclose(fact); + if (fpars != NULL) fclose(fpars); + abort(); +} +#endif diff --git a/util/LLgen/src/name.c b/util/LLgen/src/name.c new file mode 100644 index 000000000..fc5f10063 --- /dev/null +++ b/util/LLgen/src/name.c @@ -0,0 +1,239 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * name.c + * Defines the symboltable search routine and an initialising routine + */ + +# include "types.h" +# include "tunable.h" +# include "extern.h" +# include "assert.h" +# include "io.h" + +static string rcsid = "$Header$"; + +# define HASHSIZE 128 + +static char name[NAMESZ]; /* space for names */ +static int iname; /* index in nametable */ +static p_entry h_root[HASHSIZE]; /* hash table */ +static string e_literal = "Illegal literal"; + +/* Defined in this file are: */ +extern string store(); +extern name_init(); +STATIC int hash(); +extern t_gram search(); + +string +store(s) register string s; { + /* + * Store a string s in the name table + */ + register string t,u; + + u = t = &name[iname]; + do { if (u > &name[NAMESZ-1]) fatal(linecount,"name table overflow"); + else *u++ = *s; + } while (*s++); + iname = u - name; + return t; +} + +name_init() { + /* + * Initialise hash-table and enter special terminal EOFILE + */ + register p_entry *p; + t_gram search(); + + for(p = h_root; p<= &h_root[HASHSIZE-1]; p++) *p = 0; + search(TERMINAL,"EOFILE",ENTERING); +} + +STATIC int +hash(str) string str; { + /* + * Compute the hash for string str + */ + register i; + register string l; + + l = str; + i = 0; + while (*l != '\0') i += *l++ & 0377; + i += l - str; + return i % HASHSIZE; +} + +t_gram +search(type,str,option) register string str; { + /* + * Search for object str. + * It has type UNKNOWN, LITERAL, TERMINAL or NONTERM. + * option can be ENTERING, JUSTLOOKING or BOTH. + */ + register int val; + register p_entry p; + t_gram r; + register int i; + + g_init(&r); + g_setcont(&r,UNDEFINED); + r.g_lineno = linecount; + i = hash(str); + /* + * Walk hash chain + */ + for (p = h_root[i]; p != (p_entry) 0; p = p->h_next) { + if(!strcmp(p->h_name,str)) { + val = p - h_entry; + if (type == LITERAL && + (val >= NTERMINALS || p->h_num >= 0400)) continue; + if (val>=NTERMINALS) { + /* Should be a nonterminal */ + if (type == TERMINAL) { + error(linecount, + "%s : terminal expected", + str); + } + g_settype(&r,NONTERM); + g_setnont(&r,val - NTERMINALS); + } else { + if (type != LITERAL && p->h_num < 0400) { + continue; + } + if (type == NONTERM) { + error(linecount, + "%s : nonterminal expected", + str); + continue; + } + g_setnont(&r, val); + g_settype(&r, TERMINAL); + } + if (option==ENTERING) { + error(linecount, + "%s : already defined",str); + } + return r; + } + } + if (option == JUSTLOOKING) return r; + if (type == TERMINAL || type == LITERAL) { + if (nterminals == NTERMINALS) { + fatal(linecount,"too many terminals"); + } + p = &h_entry[nterminals]; + } else { + /* + * type == NONTERM || type == UNKNOWN + * UNKNOWN and not yet declared means : NONTERM + */ + if (nnonterms == NNONTERMS) { + fatal(linecount,"too many nonterminals"); + } + p = &h_entry[NTERMINALS+nnonterms]; + } + p->h_name = store(str); + p->h_next = h_root[i]; + h_root[i] = p; + if (type == NONTERM || type == UNKNOWN) { + register p_nont q; + + q = &nonterms[nnonterms]; + q->n_rule = 0; + q->n_string = f_input; + q->n_follow = 0; + q->n_flags = 0; + q->n_contains = 0; + p->h_num = 0; + g_settype(&r, NONTERM); + g_setnont(&r, nnonterms); + nnonterms++; + return r; + } + if (type == LITERAL) { + if (str[0] == '\\') { + /* + * Handle escapes in literals + */ + if (str[2] == '\0') { + switch(str[1]) { + case 'n' : + val = '\n'; + break; + case 'r' : + val = '\r'; + break; + case 'b' : + val = '\b'; + break; + case 'f' : + val = '\f'; + break; + case 't' : + val = '\t'; + break; + case '\'': + val = '\''; + break; + case '\\': + val = '\\'; + break; + default : + error(linecount,e_literal); + } + } else { + /* + * Here, str[2] != '\0' + */ + if (str[1] > '3' || str[1] < '0' || + str[2] > '7' || str[2] < '0' || + str[3] > '7' || str[3] < '0' || + str[4] != '\0') error(linecount,e_literal); + val = 64*str[1] - 73*'0' + 8*str[2] + str[3]; + } + } else { + /* + * No escape in literal + */ + if (str[1] == '\0') val = str[0]; + else error(linecount,e_literal); + } + p->h_num = val; + } else { + /* + * Here, type = TERMINAL + */ + p->h_num = assval++; + } + g_settype(&r, TERMINAL); + g_setnont(&r, nterminals); + nterminals++; + return r; +} diff --git a/util/LLgen/src/reach.c b/util/LLgen/src/reach.c new file mode 100644 index 000000000..53d5a059a --- /dev/null +++ b/util/LLgen/src/reach.c @@ -0,0 +1,121 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * reach.c + * Determine which nonterminals are reachable, and also check that they + * are all defined. + */ + +# include "tunable.h" +# include "types.h" +# include "extern.h" +# include "io.h" +# include "assert.h" + +static string rcsid = "$Header$"; + +/* In this file the following routines are defined: */ +extern co_reach(); +STATIC reachable(); +STATIC reachwalk(); + +co_reach() { + /* + * Check for undefined or unreachable nonterminals. + * An undefined nonterminal is a fatal error! + */ + register p_nont p; + register p_start st; + register p_file x = files; + register int *s; + + /* Check for undefined nonterminals */ + for (p = nonterms; p < maxnt; p++) { + if (! p->n_rule) { + f_input = p->n_string; + fatal(p->n_lineno,"nonterminal %s not defined", + (min_nt_ent + (p - nonterms))->h_name); + } + } + /* + * Walk the grammar rules, starting with the startsymbols + * Mark the nonterminals that are encountered with the flag + * REACHABLE, and walk their rules, if not done before + */ + for (st = start; st; st = st->ff_next) reachable(st->ff_nont); + /* + * Now check for unreachable nonterminals + */ + for (; x->f_end < maxorder; x++) { + f_input = x->f_name; + for (s = x->f_start; s <= x->f_end; s++) { + p = &nonterms[*s]; + if (! (p->n_flags & REACHABLE)) { + error(p->n_lineno,"nonterminal %s unreachable", + (min_nt_ent + (p - nonterms))->h_name); + } + } + } +} + +STATIC +reachable(p) register p_nont p; { + /* + * Enter the fact that p is reachable, and look for implications + */ + if (! (p->n_flags & REACHABLE)) { + p->n_flags |= REACHABLE; + /* + * Now walk its grammar rule + */ + reachwalk(p->n_rule); + } +} + +STATIC +reachwalk(p) register p_gram p; { + /* + * Walk through rule p, looking for nonterminals. + * The nonterminals found are entered as reachable + */ + + for (;;) { + switch(g_gettype(p)) { + case ALTERNATION : + reachwalk(((p_link) pentry[g_getcont(p)])->l_rule); + break; + case TERM : + reachwalk(((p_term) pentry[g_getcont(p)])->t_rule); + break; + case NONTERM : + reachable(&nonterms[g_getnont(p)]); + break; + case EORULE : + return; + } + p++; + } +} diff --git a/util/LLgen/src/sets.h b/util/LLgen/src/sets.h new file mode 100644 index 000000000..bee31fcbe --- /dev/null +++ b/util/LLgen/src/sets.h @@ -0,0 +1,45 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * sets.h $Header$ + * Some macros that deal with bitsets and their size + */ + +# define BITS (8 * sizeof (int)) +# define IN(a,i) ((a)[(i)/BITS] & (1<<((i) % BITS))) +# define NTIN(a,i) ((a)[((i)+tbitset)/BITS]&(1<<((i)%BITS))) +# define PUTIN(a,i) ((a)[(i)/BITS] |=(1<<((i) % BITS))) +# define NTPUTIN(a,i) ((a)[((i)+tbitset)/BITS]|=(1<<((i)%BITS))) +# define NBYTES(n) (((n) + 7) / 8) +/* + * The next two macros operate on byte counts! + */ +# define NINTS(n) (((n) + (int) (sizeof(int) - 1)) / (int) sizeof(int)) +# define ALIGN(n) (NINTS(n) * (int) sizeof (int)) + +extern int tbitset; +extern p_set *setptr,*maxptr,*topptr; +extern int tsetsize,setsize; diff --git a/util/LLgen/src/tokens.g b/util/LLgen/src/tokens.g new file mode 100644 index 000000000..86fa7792e --- /dev/null +++ b/util/LLgen/src/tokens.g @@ -0,0 +1,435 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * tokens.g + * Defines the tokens for the grammar of LLgen. + * The lexical analyser and LLmes are also included here. + */ + +{ +# include "types.h" +# include "io.h" +# include "tunable.h" +# include "extern.h" +# include "assert.h" + +static string rcsid = "$Header$"; + +/* Here are defined : */ +extern int scanner(); +extern LLmessage(); +extern int input(); +extern unput(); +extern skipcomment(); +STATIC linedirective(); +STATIC string cpy(); +STATIC string vallookup(); +} + +/* Classes */ + +%token C_IDENT ; /* lextoken.t_string contains the identifier read */ +%token C_NUMBER ; /* lextoken.t_num contains the number read */ +%token C_LITERAL ; /* lextoken.t_string contains the literal read */ + +/* Keywords */ + +%token C_TOKEN ; +%token C_START ; +%token C_IF ; +%token C_WHILE ; +%token C_PERSISTENT ; +%token C_FIRST ; +%token C_LEXICAL ; +%token C_AVOID ; +%token C_PREFER ; +%token C_DEFAULT ; + +%lexical scanner ; + +{ + +/* + * Structure for a keyword + */ + +struct keyword { + string w_word; + int w_value; +}; + +/* + * The list of keywords, the most often used keywords come first. + * Linear search is used, as there are not many keywords + */ + +static struct keyword resword[] = { + { "token", C_TOKEN }, + { "avoid", C_AVOID }, + { "prefer", C_PREFER }, + { "persistent", C_PERSISTENT }, + { "default", C_DEFAULT }, + { "if", C_IF }, + { "while", C_WHILE }, + { "first", C_FIRST }, + { "start", C_START }, + { "lexical", C_LEXICAL }, + { 0, 0 } +}; + +static t_token savedtok; /* to save lextoken in case of an insertion */ +static int nostartline; /* = 0 if at the start of a line */ + +scanner() { + /* + * Lexical analyser, what else + */ + register ch; /* Current char */ + register i; + register reserved = 0; /* reserved word? */ + int last; /* Char before current char */ + + if (savedtok.t_tokno) { /* + * A token has been inserted. + * Now deliver the last lextoken again + */ + lextoken = savedtok; + savedtok.t_tokno = 0; + return lextoken.t_tokno; + } + for (;;) { /* + * First, skip space, comments, line directives, etc + */ + do ch = input(); + while(isspace(ch)); + if (ch == '/') skipcomment(0); + else if (ch == '#' && !nostartline) linedirective(); + else break; + } + /* + * Now we have a first character of a token + */ + switch(ch) { + case EOF : + return EOF; + case '\'': /* + * Literal, put it in ltext + */ + i = 0; + for (;;) { + last = ch; + ch = input(); + if (ch == '\n' || ch == EOF) { + error(linecount,"missing '"); + break; + } + if (ch == '\'' && last != '\\') break; + ltext[i] = ch; + if (i < LTEXTSZ - 1) ++i; + } + ltext[i] = '\0'; + lextoken.t_string = ltext; + return C_LITERAL; + case '%' : /* + * Start of a reserved word + */ + reserved = 1; + ch = input(); + /* Fall through */ + default : + i = 0; + if (isdigit(ch)) { + if (reserved) { + error(linecount," A reserved number ?"); + } + while (isdigit(ch)) { + i = 10 * i + (ch - '0'); + ch= input(); + } + lextoken.t_num = i; + unput(ch); + return C_NUMBER; + } + if (isalpha(ch) || ch == '_') { + do { + if (reserved && isupper(ch)) ch += 'a' - 'A'; + ltext[i] = ch; + if (i < LTEXTSZ - 1) ++i; + ch = input(); + } while (isalnum(ch) || ch == '_'); + } else return ch; + unput(ch); + } + ltext[i] = '\0'; + if (reserved) { /* + * Now search for the keyword + */ + register struct keyword *w; + + w = resword; + while (w->w_word) { + if (! strcmp(ltext,w->w_word)) { + /* + * Found it. Return token number. + */ + return w->w_value; + } + w++; + } + error(linecount,"illegal reserved word"); + } + lextoken.t_string = ltext; + return C_IDENT; +} + +static int backupc; /* for unput() */ +static int nonline; /* = 1 if last char read was a newline */ + +input() { + /* + * Low level input routine, used by all other input routines + */ + register c; + register FILE *f; + + if(backupc) { /* + * Last char was "unput()". Deliver it again + */ + c = backupc; + backupc = 0; + return c; + } + f = finput; + if ((c = getc(f)) == EOF) return c; + nostartline = 1; + if (!nonline) { + linecount++; + nostartline = 0; + nonline = 1; + } + if (c == ' ' || c == '\t') { /* + * Deliver space, but only once + */ + do c = getc(f); + while (c == ' ' || c == '\t'); + ungetc(c,f); + return ' '; + } + if (c == '\n') nonline = 0; + return c; +} + +unput(c) { + /* + * "unread" c + */ + backupc = c; +} + +skipcomment(flag) { + /* + * Skip comment. If flag != 0, the comment is inside a fragment + * of C-code, so the newlines in it must be copied to enable the + * C-compiler to keep a correct line count + */ + register ch; + int saved; /* line count on which comment starts */ + + saved = linecount; + if (input() != '*') error(linecount,"illegal comment"); + ch = input(); + while (ch != EOF) { + if (flag && ch == '\n') putc(ch,fact); + while (ch == '*') { + if ((ch = input()) == '/') return; + if (flag && ch == '\n') putc(ch,fact); + } + ch = input(); + } + error(saved,"Comment does not terminate"); +} + +STATIC +linedirective() { + /* + * Read a line directive + */ + register ch; + register i; + string s_error = "Illegal line directive"; + string store(); + register string c; + + do { /* + * Skip to next digit + * Do not skip newlines + */ + ch = input(); + } while (ch != '\n' && ! isdigit(ch)); + if (ch == '\n') { + error(linecount,s_error); + return; + } + i = ch - '0'; + ch = input(); + while (isdigit(ch)) { + i = i*10 + (ch - '0'); + ch = input(); + } + while (ch != '\n' && ch != '"') ch = input(); + if (ch == '"') { + c = ltext; + do { + *c++ = ch = input(); + } while (ch != '"' && ch != '\n'); + if (ch == '\n') { + error(linecount,s_error); + return; + } + *--c = '\0'; + do { + ch = input(); + } while (ch != '\n'); + /* + * Remember the file name + */ + if (strcmp(f_input,ltext)) f_input = store(ltext); + } + linecount = i; +} + +STATIC string +vallookup(s) { + /* + * Look up the keyword that has token number s + */ + register struct keyword *p = resword; + + while (p->w_value) { + if (p->w_value == s) return p->w_word; + p++; + } + return 0; +} + +STATIC string +cpy(s,p,flag) register s; register string p; { + /* + * Create a piece of error message for token s and put it at p. + * flag = 0 if the token s was deleted (in which case we have + * attributes), else it was inserted + */ + register string t = 0; + + switch(s) { + case C_IDENT : + if (!flag) t = lextoken.t_string; + else t = "identifier"; + break; + case C_NUMBER : + t = "number"; + break; + case C_LITERAL : + if (!flag) { + *p++ = '"'; + *p++ = '\''; + t = lextoken.t_string; + break; + } + t = "literal"; + break; + case EOFILE : + t = "endoffile"; + break; + } + if (!t) { + t = vallookup(s); + if (t) { + *p++ = '%'; + } + } + if (t) { /* + * We have a string for the token. Copy it + */ + while (*t) *p++ = *t++; + if (s == C_LITERAL && !flag) { + *p++ = '\''; + *p++ = '"'; + } + return p; + } + /* + * The token is a literal + */ + *p++ = '\''; + if (s >= 040 && s <= 0176) *p++ = s; + else switch(s) { + case '\b' : *p++ = '\\'; *p++ = 'b'; break; + case '\f' : *p++ = '\\'; *p++ = 'f'; break; + case '\n' : *p++ = '\\'; *p++ = 'n'; break; + case '\r' : *p++ = '\\'; *p++ = 'r'; break; + case '\t' : *p++ = '\\'; *p++ = 't'; break; + default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); + *p++='0'+(s&07); + } + *p++ = '\''; + return p; +} + +LLmessage(d) { + /* + * d is either 0, in which case the current token has been deleted, + * or non-zero, in which case it represents a token that is inserted + * before the current token + */ + register string s,t; + char buf[128]; + + nerrors++; + s = buf; + if (d == 0) { + s = cpy(LLsymb,s,0); + t = " deleted"; + do *s++ = *t; while (*t++); + } else { + s = cpy(d,s,1); + t = " inserted in front of "; + do *s++ = *t++; while (*t); + s = cpy(LLsymb,s,0); + *s = '\0'; + } + error(linecount,buf); + if (d) { /* + * Save the current token and make up some + * attributes for the inserted token + */ + savedtok = lextoken; + if (d == C_IDENT) lextoken.t_string = "dummy_identifier"; + else if (d == C_LITERAL) lextoken.t_string = "dummy_literal"; + else if (d == C_NUMBER) lextoken.t_num = 1; + } +} +} diff --git a/util/LLgen/src/tunable.h b/util/LLgen/src/tunable.h new file mode 100644 index 000000000..9714f410a --- /dev/null +++ b/util/LLgen/src/tunable.h @@ -0,0 +1,35 @@ +/* + * (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands. + * + * This product is part of the Amsterdam Compiler Kit. + * + * Permission to use, sell, duplicate or disclose this software must be + * obtained in writing. Requests for such permissions may be sent to + * + * Dr. Andrew S. Tanenbaum + * Wiskundig Seminarium + * Vrije Universiteit + * Postbox 7161 + * 1007 MC Amsterdam + * The Netherlands + * + */ + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * tunable.h $Header$ + * Tunable constants + */ + +# define NNONTERMS 150 /* size of nonterminal array */ +# define NTERMINALS 150 /* size of terminal array */ +# define NAMESZ 3000 /* size of name table */ +# define LTEXTSZ 51 /* size of token */ +# define ENTSIZ 900 /* size of entry table, max 8191 */