From 1d37165575359df88a3ac68228da0ba183f141f3 Mon Sep 17 00:00:00 2001 From: eck Date: Mon, 23 Oct 1989 10:35:56 +0000 Subject: [PATCH] Initial revision --- lang/cem/cpp.ansi/LLlex.c | 549 ++++++++++++++++++++++++ lang/cem/cpp.ansi/LLlex.h | 42 ++ lang/cem/cpp.ansi/LLmessage.c | 25 ++ lang/cem/cpp.ansi/Makefile | 256 ++++++++++++ lang/cem/cpp.ansi/Parameters | 74 ++++ lang/cem/cpp.ansi/arith.h | 18 + lang/cem/cpp.ansi/bits.h | 18 + lang/cem/cpp.ansi/ch3bin.c | 80 ++++ lang/cem/cpp.ansi/ch3mon.c | 25 ++ lang/cem/cpp.ansi/char.tab | 67 +++ lang/cem/cpp.ansi/class.h | 48 +++ lang/cem/cpp.ansi/domacro.c | 738 +++++++++++++++++++++++++++++++++ lang/cem/cpp.ansi/error.c | 106 +++++ lang/cem/cpp.ansi/expr.c | 58 +++ lang/cem/cpp.ansi/expression.g | 129 ++++++ lang/cem/cpp.ansi/file_info.h | 18 + lang/cem/cpp.ansi/idf.c | 7 + lang/cem/cpp.ansi/idf.h | 16 + lang/cem/cpp.ansi/init.c | 92 ++++ lang/cem/cpp.ansi/input.c | 61 +++ lang/cem/cpp.ansi/input.h | 15 + lang/cem/cpp.ansi/macro.str | 56 +++ lang/cem/cpp.ansi/main.c | 84 ++++ lang/cem/cpp.ansi/make.allocd | 8 + lang/cem/cpp.ansi/make.hfiles | 35 ++ lang/cem/cpp.ansi/make.next | 3 + lang/cem/cpp.ansi/make.tokcase | 34 ++ lang/cem/cpp.ansi/make.tokfile | 6 + lang/cem/cpp.ansi/nccp.6 | 74 ++++ lang/cem/cpp.ansi/options.c | 133 ++++++ lang/cem/cpp.ansi/preprocess.c | 326 +++++++++++++++ lang/cem/cpp.ansi/replace.c | 703 +++++++++++++++++++++++++++++++ lang/cem/cpp.ansi/replace.str | 48 +++ lang/cem/cpp.ansi/skip.c | 77 ++++ lang/cem/cpp.ansi/tokenname.c | 72 ++++ 35 files changed, 4101 insertions(+) create mode 100644 lang/cem/cpp.ansi/LLlex.c create mode 100644 lang/cem/cpp.ansi/LLlex.h create mode 100644 lang/cem/cpp.ansi/LLmessage.c create mode 100644 lang/cem/cpp.ansi/Makefile create mode 100644 lang/cem/cpp.ansi/Parameters create mode 100644 lang/cem/cpp.ansi/arith.h create mode 100644 lang/cem/cpp.ansi/bits.h create mode 100644 lang/cem/cpp.ansi/ch3bin.c create mode 100644 lang/cem/cpp.ansi/ch3mon.c create mode 100644 lang/cem/cpp.ansi/char.tab create mode 100644 lang/cem/cpp.ansi/class.h create mode 100644 lang/cem/cpp.ansi/domacro.c create mode 100644 lang/cem/cpp.ansi/error.c create mode 100644 lang/cem/cpp.ansi/expr.c create mode 100644 lang/cem/cpp.ansi/expression.g create mode 100644 lang/cem/cpp.ansi/file_info.h create mode 100644 lang/cem/cpp.ansi/idf.c create mode 100644 lang/cem/cpp.ansi/idf.h create mode 100644 lang/cem/cpp.ansi/init.c create mode 100644 lang/cem/cpp.ansi/input.c create mode 100644 lang/cem/cpp.ansi/input.h create mode 100644 lang/cem/cpp.ansi/macro.str create mode 100644 lang/cem/cpp.ansi/main.c create mode 100755 lang/cem/cpp.ansi/make.allocd create mode 100755 lang/cem/cpp.ansi/make.hfiles create mode 100755 lang/cem/cpp.ansi/make.next create mode 100755 lang/cem/cpp.ansi/make.tokcase create mode 100755 lang/cem/cpp.ansi/make.tokfile create mode 100644 lang/cem/cpp.ansi/nccp.6 create mode 100644 lang/cem/cpp.ansi/options.c create mode 100644 lang/cem/cpp.ansi/preprocess.c create mode 100644 lang/cem/cpp.ansi/replace.c create mode 100644 lang/cem/cpp.ansi/replace.str create mode 100644 lang/cem/cpp.ansi/skip.c create mode 100644 lang/cem/cpp.ansi/tokenname.c diff --git a/lang/cem/cpp.ansi/LLlex.c b/lang/cem/cpp.ansi/LLlex.c new file mode 100644 index 000000000..85ea38215 --- /dev/null +++ b/lang/cem/cpp.ansi/LLlex.c @@ -0,0 +1,549 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* L E X I C A L A N A L Y Z E R */ + +#include "idfsize.h" +#include "numsize.h" +#include "strsize.h" + +#include +#include "input.h" +#include "arith.h" +#include "macro.h" +#include "idf.h" +#include "LLlex.h" +#include "Lpars.h" +#include "class.h" +#include "bits.h" + +#define BUFSIZ 1024 + +struct token dot; + +int ReplaceMacros = 1; /* replacing macros */ +int AccDefined = 0; /* accept "defined(...)" */ +int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */ +int Unstacked = 0; /* an unstack is done */ +int AccFileSpecifier = 0; /* return filespecifier <...> */ +int LexSave = 0; /* last character read by GetChar */ +extern int InputLevel; /* # of current macro expansions */ + +char *string_token(); +arith char_constant(); +#define FLG_ESEEN 0x01 /* possibly a floating point number */ +#define FLG_DOTSEEN 0x02 /* certainly a floating point number */ + +int +LLlex() +{ + return (DOT != EOF) ? GetToken(&dot) : EOF; +} + + +int +GetToken(ptok) + register struct token *ptok; +{ + /* GetToken() is the actual token recognizer. It calls the + control line interpreter if it encounters a "\n{w}*#" + combination. Macro replacement is also performed if it is + needed. + */ + char buf[BUFSIZ]; + register int ch, nch; + +again: /* rescan the input after an error or replacement */ + ch = GetChar(); +go_on: /* rescan, the following character has been read */ + if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */ + fatal("non-ascii '\\%03o' read", ch & 0377); + /* keep track of the place of the token in the file */ + + switch (class(ch)) { /* detect character class */ + case STNL: /* newline, vertical space or formfeed */ + LineNumber++; + return ptok->tk_symb = EOF; + case STSKIP: /* just skip the skip characters */ + goto again; + case STGARB: /* garbage character */ +garbage: + if (040 < ch && ch < 0177) + error("garbage char %c", ch); + else + error("garbage char \\%03o", ch); + goto again; + case STSIMP: /* a simple character, no part of compound token*/ + return ptok->tk_symb = ch; + case STCOMP: /* maybe the start of a compound token */ + nch = GetChar(); /* character lookahead */ + switch (ch) { + case '!': + if (nch == '=') + return ptok->tk_symb = NOTEQUAL; + UnGetChar(); + return ptok->tk_symb = ch; + case '&': + if (nch == '&') + return ptok->tk_symb = AND; + else if (nch == '=') + return ptok->tk_symb = ANDAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '+': + if (nch == '+') + return ptok->tk_symb = PLUSPLUS; + else if (nch == '=') + return ptok->tk_symb = PLUSAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '-': + if (nch == '-') + return ptok->tk_symb = MINMIN; + else if (nch == '>') + return ptok->tk_symb = ARROW; + else if (nch == '=') + return ptok->tk_symb = MINAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '<': + if (AccFileSpecifier) { + UnGetChar(); /* pushback nch */ + ptok->tk_str = + string_token("file specifier", '>'); + return ptok->tk_symb = FILESPECIFIER; + } else if (nch == '<') { + if ((nch = GetChar()) == '=') + return ptok->tk_symb = LEFTAB; + UnGetChar(); + return ptok->tk_symb = LEFT; + } else if (nch == '=') + return ptok->tk_symb = LESSEQ; + UnGetChar(); + return ptok->tk_symb = ch; + case '=': + if (nch == '=') + return ptok->tk_symb = EQUAL; + UnGetChar(); + return ptok->tk_symb = ch; + case '>': + if (nch == '=') + return ptok->tk_symb = GREATEREQ; + else if (nch == '>') { + if ((nch = GetChar()) == '=') + return ptok->tk_symb = RIGHTAB; + UnGetChar(); + return ptok->tk_symb = RIGHT; + } + UnGetChar(); + return ptok->tk_symb = ch; + case '|': + if (nch == '|') + return ptok->tk_symb = OR; + else if (nch == '=') + return ptok->tk_symb = ORAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '%': + if (nch == '=') + return ptok->tk_symb = MODAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '*': + if (nch == '=') + return ptok->tk_symb = TIMESAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '^': + if (nch == '=') + return ptok->tk_symb = XORAB; + UnGetChar(); + return ptok->tk_symb = ch; + case '/': + if (nch == '*' && !InputLevel) { + skipcomment(); + goto again; + } + else if (nch == '=') + return ptok->tk_symb = DIVAB; + UnGetChar(); + return ptok->tk_symb = ch; + default: + crash("bad class for char 0%o", ch); + /* NOTREACHED */ + } + case STCHAR: /* character constant */ + ptok->tk_val = char_constant("character"); + return ptok->tk_symb = INTEGER; + case STSTR: /* string */ + ptok->tk_str = string_token("string", '"'); + return ptok->tk_symb = STRING; + case STELL: /* wide character constant/string prefix */ + nch = GetChar(); + if (nch == '"') { + ptok->tk_str = + string_token("wide character string", '"'); + return ptok->tk_symb = STRING; + } else if (nch == '\'') { + ptok->tk_val = char_constant("wide character"); + return ptok->tk_symb = INTEGER; + } + UnGetChar(); + /* fallthrough */ + case STIDF: + { + extern int idfsize; /* ??? */ + register char *tg = &buf[0]; + register char *maxpos = &buf[idfsize]; + int NoExpandNext = 0; + +#define tstmac(bx) if (!(bits[ch] & bx)) goto nomac +#define cpy *tg++ = ch +#define load (ch = GetChar()); if (!in_idf(ch)) goto endidf + + if (Unstacked) EnableMacros(); /* unstack macro's when allowed. */ + if (ch == NOEXPM) { + NoExpandNext = 1; + ch = GetChar(); + } +#ifdef DOBITS + cpy; tstmac(bit0); load; + cpy; tstmac(bit1); load; + cpy; tstmac(bit2); load; + cpy; tstmac(bit3); load; + cpy; tstmac(bit4); load; + cpy; tstmac(bit5); load; + cpy; tstmac(bit6); load; + cpy; tstmac(bit7); load; +#endif + for(;;) { + if (tg < maxpos) { + cpy; + + } + load; + } + endidf: + /*if (ch != EOI) UnGetChar();*/ + UnGetChar(); + *tg++ = '\0'; /* mark the end of the identifier */ + if (ReplaceMacros) { + register struct idf *idef = findidf(buf); + + if (idef && idef->id_macro && !NoExpandNext) { + if (replace(idef)) + goto again; + } + } + + nomac: /* buf can already be null-terminated. soit */ + ch = GetChar(); + while (in_idf(ch)) { + if (tg < maxpos) *tg++ = ch; + ch = GetChar(); + } + UnGetChar(); + *tg++ = '\0'; /* mark the end of the identifier */ + + NoExpandNext = 0; + if (UnknownIdIsZero) { + ptok->tk_val = (arith)0; + return ptok->tk_symb = INTEGER; + } + ptok->tk_str = Malloc(tg - buf); + strcpy(ptok->tk_str, buf); + return IDENTIFIER; + } + case STNUM: /* a numeric constant */ + { /* it may only be an integer constant */ + register int base = 10, val = 0, vch; + + /* Since the preprocessor only knows integers and has + * nothing to do with ellipsis we just return when the + * pp-number starts with a '.' + */ + if (ch == '.') { + return ptok->tk_symb = ch; + } + if (ch == '0') { + ch = GetChar(); + if (ch == 'x' || ch == 'X') { + base = 16; + ch = GetChar(); + } else { + base = 8; + } + + } + while ((vch = val_in_base(ch, base)) >= 0) { + val = val * base + vch; /* overflow? nah */ + ch = GetChar(); + } + while (ch == 'l' || ch == 'L' || ch == 'u' || ch == 'U') + ch = GetChar(); + UnGetChar(); + ptok->tk_val = val; + return ptok->tk_symb = INTEGER; + } + case STEOI: /* end of text on source file */ + return ptok->tk_symb = EOF; + case STMSPEC: + if (!InputLevel) goto garbage; + if (ch == TOKSEP) goto again; + /* fallthrough shouldn't happen */ + default: /* this cannot happen */ + crash("bad class for char 0%o", ch); + } + /*NOTREACHED*/ +} + +skipcomment() +{ + /* The last character read has been the '*' of '/_*'. The + characters, except NL and EOI, between '/_*' and the first + occurring '*_/' are not interpreted. + NL only affects the LineNumber. EOI is not legal. + + Important note: it is not possible to stop skipping comment + beyond the end-of-file of an included file. + EOI is returned by LoadChar only on encountering EOF of the + top-level file... + */ + register int c; + + NoUnstack++; + c = GetChar(); + do { + while (c != '*') { + if (class(c) == STNL) { + ++LineNumber; + } else if (c == EOI) { + NoUnstack--; + return; + } + c = GetChar(); + } /* last Character seen was '*' */ + c = GetChar(); + } while (c != '/'); + NoUnstack--; +} + +arith +char_constant(nm) + char *nm; +{ + register arith val = 0; + register int ch; + int size = 0; + + ch = GetChar(); + if (ch == '\'') + error("%s constant too short", nm); + else + while (ch != '\'') { + if (ch == '\n') { + error("newline in %s constant", nm); + LineNumber++; + break; + } + if (ch == '\\') + ch = quoted(GetChar()); + if (ch >= 128) ch -= 256; + if (size < (int)size) + val |= ch << 8 * size; + size++; + ch = GetChar(); + } + if (size > 1) + strict("%s constant includes more than one character", nm); + if (size > sizeof(arith)) + error("%s constant too long", nm); + return val; +} + +char * +string_token(nm, stop_char) + char *nm; +{ + register int ch; + register int str_size; + register char *str = Malloc((unsigned) (str_size = ISTRSIZE)); + register int pos = 0; + + ch = GetChar(); + while (ch != stop_char) { + if (ch == '\n') { + error("newline in %s", nm); + LineNumber++; + break; + } + if (ch == EOI) { + error("end-of-file inside %s", nm); + break; + } + if (ch == '\\' && !AccFileSpecifier) + ch = quoted(GetChar()); + str[pos++] = ch; + if (pos == str_size) + str = Realloc(str, str_size <<= 1); + ch = GetChar(); + } + str[pos++] = '\0'; /* for filenames etc. */ + str = Realloc(str, pos); + return str; +} + +int +quoted(ch) + register int ch; +{ + /* quoted() replaces an escaped character sequence by the + character meant. + */ + /* first char after backslash already in ch */ + if (!is_oct(ch)) { /* a quoted char */ + switch (ch) { + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'r': + ch = '\r'; + break; + case 'f': + ch = '\f'; + break; + case 'a': /* alert */ + ch = '\007'; + break; + case 'v': /* vertical tab */ + ch = '\013'; + break; + case 'x': /* quoted hex */ + { + register int hex = 0; + register int vch; + + for (;;) { + ch = GetChar(); + if (vch = val_in_base(ch, 16), vch == -1) + break; + hex = hex * 16 + vch; + } + UnGetChar(); + ch = hex; + } + } + } + else { /* a quoted octal */ + register int oct = 0, cnt = 0; + + do { + oct = oct*8 + (ch-'0'); + ch = GetChar(); + } while (is_oct(ch) && ++cnt < 3); + UnGetChar(); + ch = oct; + } + return ch&0377; +} + + +int +val_in_base(ch, base) + register int ch; +{ + switch (base) { + case 8: + return (is_dig(ch) && ch < '9') ? ch - '0' : -1; + case 10: + return is_dig(ch) ? ch - '0' : -1; + case 16: + return is_dig(ch) ? ch - '0' + : is_hex(ch) ? (ch - 'a' + 10) & 017 + : -1; + default: + fatal("(val_in_base) illegal base value %d", base); + /* NOTREACHED */ + } +} + + +int +GetChar() +{ + /* The routines GetChar and trigraph parses the trigraph + sequences and removes occurences of \\\n. + */ + register int ch; + +again: + LoadChar(ch); + + /* possible trigraph sequence */ + if (ch == '?') + ch = trigraph(); + + /* \\\n are removed from the input stream */ + if (ch == '\\') { + LoadChar(ch); + if (ch == '\n') { + ++LineNumber; + goto again; + } + PushBack(); + ch = '\\'; + } + return(LexSave = ch); +} + + +int +trigraph() +{ + register int ch; + + LoadChar(ch); + if (ch == '?') { + LoadChar(ch); + switch (ch) { /* its a trigraph */ + case '=': + ch = '#'; + return(ch); + case '(': + ch = '['; + return(ch); + case '/': + ch = '\\'; + return(ch); + case ')': + ch = ']'; + return(ch); + case '\'': + ch = '^'; + return(ch); + case '<': + ch = '{'; + return(ch); + case '!': + ch = '|'; + return(ch); + case '>': + ch = '}'; + return(ch); + case '-': + ch = '~'; + return(ch); + } + PushBack(); + } + PushBack(); + return('?'); +} diff --git a/lang/cem/cpp.ansi/LLlex.h b/lang/cem/cpp.ansi/LLlex.h new file mode 100644 index 000000000..d3bd91574 --- /dev/null +++ b/lang/cem/cpp.ansi/LLlex.h @@ -0,0 +1,42 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* D E F I N I T I O N S F O R T H E L E X I C A L A N A L Y Z E R */ + +/* A token from the input stream is represented by an integer, + called a "symbol", but it may have other information associated + to it. +*/ + +/* the structure of a token: */ +struct token { + int tok_symb; /* the token itself */ + union { + char *tok_str; + arith tok_val; /* for INTEGER */ + } tok_data; +}; + +#include "file_info.h" + +#define tk_symb tok_symb +#define tk_str tok_data.tok_str +#define tk_val tok_data.tok_val + +extern struct token dot; + +extern int ReplaceMacros; /* "LLlex.c" */ +extern int AccDefined; /* "LLlex.c" */ +extern int Unstacked; /* "LLlex.c" */ +extern int UnknownIdIsZero; /* "LLlex.c" */ +extern int AccFileSpecifier; /* "LLlex.c" */ + +extern int NoUnstack; /* buffer.c */ + +extern int err_occurred; /* "error.c" */ + +#define DOT dot.tk_symb + +#define EOF (-1) diff --git a/lang/cem/cpp.ansi/LLmessage.c b/lang/cem/cpp.ansi/LLmessage.c new file mode 100644 index 000000000..34b0a265d --- /dev/null +++ b/lang/cem/cpp.ansi/LLmessage.c @@ -0,0 +1,25 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PARSER ERROR ADMINISTRATION */ + +#include "arith.h" +#include "LLlex.h" +#include "Lpars.h" + +extern char *symbol2str(); + +LLmessage(tk) { + err_occurred = 1; + if (tk < 0) + error("garbage at end of line"); + else if (tk) { + error("%s missing", symbol2str(tk)); + if (DOT != EOF) SkipToNewLine(0); + DOT = EOF; + } + else + error("%s deleted", symbol2str(DOT)); +} diff --git a/lang/cem/cpp.ansi/Makefile b/lang/cem/cpp.ansi/Makefile new file mode 100644 index 000000000..bc0b941b3 --- /dev/null +++ b/lang/cem/cpp.ansi/Makefile @@ -0,0 +1,256 @@ +# MAKEFILE FOR (STAND_ALONE) CEM PREPROCESSOR + +EMHOME=../../.. +MODULES=$(EMHOME)/modules +MODULESLIB=$(MODULES)/lib +BIN=$(EMHOME)/lib +MANDIR=$(EMHOME)/man + +# Some paths + +# Libraries +SYSLIB = $(MODULESLIB)/libsystem.a +STRLIB = $(MODULESLIB)/libstring.a +PRTLIB = $(MODULESLIB)/libprint.a +ALLOCLIB = $(MODULESLIB)/liballoc.a +ASSERTLIB = $(MODULESLIB)/libassert.a +MALLOC = $(MODULESLIB)/malloc.o +LIBS = $(PRTLIB) $(STRLIB) $(ALLOCLIB) $(MALLOC) $(ASSERTLIB) $(SYSLIB) +LIB_INCLUDES = -I$(MODULES)/h -I$(MODULES)/pkg + +# Where to install the preprocessor +CEMPP = $(BIN)/ncpp + +TABGEN = $(EMHOME)/bin/tabgen + +# What C compiler to use and how +CC = fcc +COPTIONS = +LDFLAGS = -i + +# What parser generator to use and how +GEN = $(EMHOME)/bin/LLgen +GENOPTIONS = + +# Special #defines during compilation +CDEFS = $(LIB_INCLUDES) +CFLAGS = $(CDEFS) $(COPTIONS) -O# # we cannot pass the COPTIONS to lint! + +# Grammar files and their objects +LSRC = tokenfile.g expression.g +LCSRC = tokenfile.c expression.c Lpars.c +LOBJ = tokenfile.o expression.o Lpars.o + +# Objects of hand-written C files +CSRC = LLlex.c LLmessage.c ch7bin.c ch7mon.c domacro.c \ + error.c idf.c init.c input.c main.c options.c \ + preprocess.c replace.c skip.c tokenname.c expr.c +COBJ = LLlex.o LLmessage.o ch7bin.o ch7mon.o domacro.o \ + error.o idf.o init.o input.o main.o options.o \ + preprocess.o replace.o skip.o tokenname.o next.o expr.o + +PRFILES = Makefile Parameters \ + make.hfiles make.tokcase make.tokfile LLlex.h bits.h file_info.h \ + idf.h input.h interface.h macro.str replace.str \ + class.h char.tab expression.g $(CSRC) + +# Objects of other generated C files +GOBJ = char.o symbol2str.o + +# generated source files +GSRC = char.c symbol2str.c + +# .h files generated by `make.allod' +STRSRC = macro.str replace.str +GSTRSRC = macro.h replace.h + +# .h files generated by `make hfiles'; PLEASE KEEP THIS UP-TO-DATE! +GHSRC = errout.h idfsize.h ifdepth.h lapbuf.h \ + nparams.h numsize.h obufsize.h argbuf.h \ + parbufsize.h pathlength.h strsize.h textsize.h \ + botch_free.h debug.h inputtype.h dobits.h line_prefix.h + +# Other generated files, for 'make clean' only +GENERATED = tokenfile.g Lpars.h LLfiles LL.output lint.out \ + Xref hfiles cfiles next.c tags Makefile.old + +all: cc + +cc: hfiles LLfiles + make "EMHOME="$(EMHOME) ncpp + +hfiles: Parameters char.c + ./make.hfiles Parameters + @touch hfiles + +.SUFFIXES: .str .h +.str.h: + ./make.allocd <$*.str >$*.h + +char.c: char.tab + $(TABGEN) -fchar.tab > char.c + +next.c: make.next $(STRSRC) + ./make.next $(STRSRC) >next.c + +macro.h: make.allocd +repl.h: make.allocd + +LLfiles: $(LSRC) + $(GEN) $(GENOPTIONS) $(LSRC) + @touch LLfiles + +tokenfile.g: tokenname.c make.tokfile + tokenfile.g + +symbol2str.c: tokenname.c make.tokcase + symbol2str.c + +# Objects needed for 'ncpp' +OBJ = $(COBJ) $(LOBJ) $(GOBJ) +SRC = $(CSRC) $(LCSRC) $(GSRC) + +ncpp: $(OBJ) Makefile + $(CC) $(COPTIONS) $(LDFLAGS) $(OBJ) $(LIBS) -o ncpp + -size ncpp + +cfiles: hfiles LLfiles $(GSRC) $(GSTRSRC) + @touch cfiles + +install: all + rm -f $(CEMPP) + cp ncpp $(CEMPP) + rm -f $(MANDIR)/ncpp.6 + cp ncpp.6 $(MANDIR)/ncpp.6 + +cmp: all + -cmp ncpp $(CEMPP) + -cmp ncpp.6 $(MANDIR)/ncpp.6 + +pr: + @pr $(PRFILES) + +opr: + make pr | opr + +tags: cfiles + ctags $(SRC) + +depend: cfiles + sed '/^#AUTOAUTO/,$$d' Makefile >Makefile.new + echo '#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO' >>Makefile.new + $(EMHOME)/bin/mkdep $(SRC) | \ + sed 's/\.c:/.o:/' >>Makefile.new + mv Makefile Makefile.old + mv Makefile.new Makefile + +xref: + ctags -x `grep "\.[ch]" Files`|sed "s/).*/)/">Xref + +lint: cfiles + lint -bx $(CDEFS) $(SRC) >lint.out + +clean: + rm -f $(LCSRC) $(OBJ) $(GENERATED) $(GSRC) $(GHSRC) $(GSTRSRC) ncpp Out + +#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO +LLlex.o: LLlex.h +LLlex.o: Lpars.h +LLlex.o: arith.h +LLlex.o: bits.h +LLlex.o: class.h +LLlex.o: dobits.h +LLlex.o: file_info.h +LLlex.o: idf.h +LLlex.o: idfsize.h +LLlex.o: input.h +LLlex.o: inputtype.h +LLlex.o: macro.h +LLlex.o: numsize.h +LLlex.o: strsize.h +LLmessage.o: LLlex.h +LLmessage.o: Lpars.h +LLmessage.o: file_info.h +ch7bin.o: Lpars.h +ch7bin.o: arith.h +ch7mon.o: Lpars.h +ch7mon.o: arith.h +domacro.o: LLlex.h +domacro.o: Lpars.h +domacro.o: arith.h +domacro.o: bits.h +domacro.o: botch_free.h +domacro.o: class.h +domacro.o: dobits.h +domacro.o: file_info.h +domacro.o: idf.h +domacro.o: idfsize.h +domacro.o: ifdepth.h +domacro.o: input.h +domacro.o: inputtype.h +domacro.o: macro.h +domacro.o: nparams.h +domacro.o: parbufsize.h +domacro.o: textsize.h +error.o: LLlex.h +error.o: errout.h +error.o: file_info.h +idf.o: idf.h +init.o: class.h +init.o: idf.h +init.o: macro.h +input.o: file_info.h +input.o: input.h +input.o: inputtype.h +main.o: arith.h +main.o: file_info.h +main.o: idfsize.h +options.o: class.h +options.o: idf.h +options.o: idfsize.h +options.o: macro.h +preprocess.o: LLlex.h +preprocess.o: bits.h +preprocess.o: class.h +preprocess.o: dobits.h +preprocess.o: file_info.h +preprocess.o: idf.h +preprocess.o: idfsize.h +preprocess.o: input.h +preprocess.o: inputtype.h +preprocess.o: line_prefix.h +preprocess.o: macro.h +preprocess.o: obufsize.h +replace.o: LLlex.h +replace.o: argbuf.h +replace.o: class.h +replace.o: file_info.h +replace.o: idf.h +replace.o: idfsize.h +replace.o: input.h +replace.o: inputtype.h +replace.o: lapbuf.h +replace.o: macro.h +replace.o: nparams.h +replace.o: numsize.h +replace.o: pathlength.h +replace.o: replace.h +replace.o: strsize.h +skip.o: LLlex.h +skip.o: class.h +skip.o: file_info.h +skip.o: input.h +skip.o: inputtype.h +tokenname.o: LLlex.h +tokenname.o: Lpars.h +tokenname.o: file_info.h +tokenname.o: idf.h +expr.o: Lpars.h +tokenfile.o: Lpars.h +expression.o: LLlex.h +expression.o: Lpars.h +expression.o: arith.h +expression.o: file_info.h +Lpars.o: Lpars.h +char.o: class.h +symbol2str.o: Lpars.h diff --git a/lang/cem/cpp.ansi/Parameters b/lang/cem/cpp.ansi/Parameters new file mode 100644 index 000000000..ce2c3671c --- /dev/null +++ b/lang/cem/cpp.ansi/Parameters @@ -0,0 +1,74 @@ +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: errout.h +#define ERROUT STDERR /* file pointer for writing messages */ +#define MAXERR_LINE 5 /* maximum number of error messages given + on the same input line. */ + + +!File: idfsize.h +#define IDFSIZE 64 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters of macros */ +#define STDC_NPARAMS 31 /* ANSI limit on number of parameters */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: lapbuf.h +#define LAPBUF 4096 /* size of macro actual parameter buffer */ + + +!File: argbuf.h +#define ARGBUF 2048 /* sizeof of macro actual parameter buffer */ + + +!File: strsize.h +#define ISTRSIZE 16 /* minimum number of bytes allocated for + storing a string */ + + +!File: botch_free.h +#undef BOTCH_FREE 1 /* botch freed memory, as a check */ + + +!File: debug.h +#define DEBUG 1 /* perform various self-tests */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 16 /* 1st piece of memory for repl. text */ + + +!File: inputtype.h +#undef INP_READ_IN_ONE 1 /* read input file in one. */ + /* If defined, we cannot read from a pipe */ + + +!File: obufsize.h +#define OBUFSIZE 8192 /* output buffer size */ + + +!File: dobits.h +#define DOBITS 1 /* use trick to reduce symboltable accesses */ + + +!File: line_prefix.h +#define LINE_PREFIX "#" /* prefix for generated line directives, + either "#" or "#line" + */ + diff --git a/lang/cem/cpp.ansi/arith.h b/lang/cem/cpp.ansi/arith.h new file mode 100644 index 000000000..b04a532ca --- /dev/null +++ b/lang/cem/cpp.ansi/arith.h @@ -0,0 +1,18 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* COMPILER ARITHMETIC */ + +/* Normally the compiler does its internal arithmetics in longs + native to the source machine, which is always good for local + compilations, and generally OK too for cross compilations + downwards and sidewards. For upwards cross compilation and + to save storage on small machines, SPECIAL_ARITHMETICS will + be handy. +*/ + +/* All preprocessor arithmetic should be done in longs. +*/ +#define arith long /* dummy */ diff --git a/lang/cem/cpp.ansi/bits.h b/lang/cem/cpp.ansi/bits.h new file mode 100644 index 000000000..ffcd78b22 --- /dev/null +++ b/lang/cem/cpp.ansi/bits.h @@ -0,0 +1,18 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +#include "dobits.h" +#ifdef DOBITS +#define bit0 0x01 +#define bit1 0x02 +#define bit2 0x04 +#define bit3 0x08 +#define bit4 0x10 +#define bit5 0x20 +#define bit6 0x40 +#define bit7 0x80 + +extern char bits[]; +#endif diff --git a/lang/cem/cpp.ansi/ch3bin.c b/lang/cem/cpp.ansi/ch3bin.c new file mode 100644 index 000000000..b6f9aa019 --- /dev/null +++ b/lang/cem/cpp.ansi/ch3bin.c @@ -0,0 +1,80 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* EVALUATION OF BINARY OPERATORS */ + +#include "Lpars.h" +#include "arith.h" + +ch7bin(pval, oper, val) + register arith *pval, val; + int oper; +{ + switch (oper) { + case '%': + if (val == 0) + error("% by 0"); + else + *pval = *pval % val; + break; + case '/': + if (val == 0) + error("/ by 0"); + else + *pval = *pval / val; + break; + case '*': + *pval = *pval * val; + break; + case '+': + *pval = *pval + val; + break; + case '-': + *pval = *pval - val; + break; + case LEFT: + *pval = *pval << val; + break; + case RIGHT: + *pval = *pval >> val; + break; + case '<': + *pval = (*pval < val); + break; + case '>': + *pval = (*pval > val); + break; + case LESSEQ: + *pval = (*pval <= val); + break; + case GREATEREQ: + *pval = (*pval >= val); + break; + case EQUAL: + *pval = (*pval == val); + break; + case NOTEQUAL: + *pval = (*pval != val); + break; + case '&': + *pval = *pval & val; + break; + case '^': + *pval = *pval ^ val; + break; + case '|': + *pval = *pval | val; + break; + case AND: + *pval = (*pval && val); + break; + case OR: + *pval = (*pval || val); + break; + case ',': + *pval = val; + break; + } +} diff --git a/lang/cem/cpp.ansi/ch3mon.c b/lang/cem/cpp.ansi/ch3mon.c new file mode 100644 index 000000000..5da103326 --- /dev/null +++ b/lang/cem/cpp.ansi/ch3mon.c @@ -0,0 +1,25 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* EVALUATION OF MONADIC OPERATORS */ + +#include "Lpars.h" +#include "arith.h" + +ch7mon(oper, pval) + register arith *pval; +{ + switch (oper) { + case '~': + *pval = ~(*pval); + break; + case '-': + *pval = -(*pval); + break; + case '!': + *pval = !(*pval); + break; + } +} diff --git a/lang/cem/cpp.ansi/char.tab b/lang/cem/cpp.ansi/char.tab new file mode 100644 index 000000000..329178284 --- /dev/null +++ b/lang/cem/cpp.ansi/char.tab @@ -0,0 +1,67 @@ +% +% CHARACTER CLASSES +% +% some general settings: +%S129 +%F %s, +% +% START OF TOKEN +% +%iSTGARB +STSKIP:\r \t\013\f +STNL:\n +STCOMP:-!&+<=>|*%/^ +STSIMP:(),:;?[]{}~ +STCHAR:' +STIDF:a-zA-KM-Z_\003 +STELL:L +STNUM:.0-9 +STSTR:" +STEOI:\200 +STMSPEC:\004 +%T/* character classes */ +%T#include "class.h" +%Tchar tkclass[] = { +%p +%T}; +% +% INIDF +% +%C +1:a-zA-Z_0-9 +%Tchar inidf[] = { +%F %s, +%p +%T}; +% +% ISDIG +% +%C +1:0-9 +%Tchar isdig[] = { +%p +%T}; +% +% ISHEX +% +%C +1:0-9a-fA-F +%Tchar ishex[] = { +%p +%T}; +% +% ISOCT +% +%C +1:0-7 +%Tchar isoct[] = { +%p +%T}; +% +% ISWSP +% +%C +1: \t\n +%Tchar iswsp[] = { +%p +%T}; diff --git a/lang/cem/cpp.ansi/class.h b/lang/cem/cpp.ansi/class.h new file mode 100644 index 000000000..8cc7b3b9c --- /dev/null +++ b/lang/cem/cpp.ansi/class.h @@ -0,0 +1,48 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* U S E O F C H A R A C T E R C L A S S E S */ + +/* As a starter, chars are divided into classes, according to which + token they can be the start of. + At present such a class number is supposed to fit in 4 bits. +*/ + +#define class(ch) ((tkclass)[ch]) + +/* Being the start of a token is, fortunately, a mutual exclusive + property, so, as there are less than 16 classes they can be + packed in 4 bits. +*/ + +#define STSKIP 0 /* spaces and so on: skipped characters */ +#define STNL 1 /* newline character(s): update linenumber etc. */ +#define STGARB 2 /* garbage ascii character: not allowed */ +#define STSIMP 3 /* this character can occur as token */ +#define STCOMP 4 /* this one can start a compound token */ +#define STELL 5 /* wide character- or string- constant prefix */ +#define STIDF 6 /* being the initial character of an identifier */ +#define STCHAR 7 /* the starter of a character constant */ +#define STSTR 8 /* the starter of a string */ +#define STNUM 9 /* the starter of a numeric constant */ +#define STEOI 10 /* End-Of-Information mark */ +#define STMSPEC 11 /* special class for token expansion */ + +#define NOEXPM '\003' /* don't expand the next macro identifier */ +#define TOKSEP '\004' /* the token separator */ + +/* But occurring inside a token is not, so we need 1 bit for each + class. This is implemented as a collection of tables to speed up + the decision whether a character has a special meaning. +*/ +#define in_idf(ch) (inidf[ch]) +#define is_oct(ch) (isoct[ch]) +#define is_dig(ch) (isdig[ch]) +#define is_hex(ch) (ishex[ch]) +#define is_suf(ch) (issuf[ch]) +#define is_wsp(ch) (iswsp[ch]) + +extern char tkclass[]; +extern char inidf[], isoct[], isdig[], ishex[], issuf[], iswsp[]; diff --git a/lang/cem/cpp.ansi/domacro.c b/lang/cem/cpp.ansi/domacro.c new file mode 100644 index 000000000..1d19ec0d4 --- /dev/null +++ b/lang/cem/cpp.ansi/domacro.c @@ -0,0 +1,738 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PREPROCESSOR: CONTROLLINE INTERPRETER */ + +#include "arith.h" +#include "LLlex.h" +#include "Lpars.h" +#include "idf.h" +#include "input.h" + +#include "ifdepth.h" +#include "botch_free.h" +#include "nparams.h" +#include "parbufsize.h" +#include "textsize.h" +#include "idfsize.h" +#include +#include +#include "class.h" +#include "macro.h" +#include "bits.h" + +extern char **inctable; /* list of include directories */ +extern char *getwdir(); +char ifstack[IFDEPTH]; /* if-stack: the content of an entry is */ + /* 1 if a corresponding ELSE has been */ + /* encountered. */ + +int nestlevel = -1; +int svnestlevel[30] = {-1}; +int nestcount; + +char * +GetIdentifier(skiponerr) + int skiponerr; /* skip the rest of the line on error */ +{ + /* Returns a pointer to the identifier that is read from the + input stream. When the input does not contain an + identifier, the rest of the line is skipped when skiponerr + is on, and a null-pointer is returned. + The substitution of macros is disabled. + Remember that on end-of-line EOF is returned. + */ + int tmp = UnknownIdIsZero; + int tok; + struct token tk; + + UnknownIdIsZero = ReplaceMacros = 0; + tok = GetToken(&tk); + ReplaceMacros = 1; + UnknownIdIsZero = tmp; + if (tok != IDENTIFIER) { + if (skiponerr && tok != EOF) SkipToNewLine(0); + return (char *)0; + } + return tk.tk_str; +} + +/* domacro() is the control line interpreter. The '#' has already + been read by the lexical analyzer by which domacro() is called. + The token appearing directly after the '#' is obtained by calling + the basic lexical analyzing function GetToken() and is interpreted + to perform the action belonging to that token. + An error message is produced when the token is not recognized, + i.e. it is not one of "define" .. "undef" , integer or newline. + Return 1 if the preprocessing directive is done. This is to leave + pragma's in the input. +*/ +int +domacro() +{ + struct token tk; /* the token itself */ + register struct idf *id; + int toknum; + + ReplaceMacros = 0; + toknum = GetToken(&tk); + ReplaceMacros = 1; + switch(toknum) { /* select control line action */ + case IDENTIFIER: /* is it a macro keyword? */ + id = findidf(tk.tk_str); + if (!id) { + error("%s: unknown control", tk.tk_str); + SkipToNewLine(0); + free(tk.tk_str); + break; + } + free(tk.tk_str); + switch (id->id_resmac) { + case K_DEFINE: /* "define" */ + do_define(); + break; + case K_ELIF: /* "elif" */ + do_elif(); + break; + case K_ELSE: /* "else" */ + do_else(); + break; + case K_ENDIF: /* "endif" */ + do_endif(); + break; + case K_IF: /* "if" */ + do_if(); + break; + case K_IFDEF: /* "ifdef" */ + do_ifdef(1); + break; + case K_IFNDEF: /* "ifndef" */ + do_ifdef(0); + break; + case K_INCLUDE: /* "include" */ + do_include(); + break; + case K_LINE: /* "line" */ + /* set LineNumber and FileName according to + the arguments. + */ + if (GetToken(&tk) != INTEGER) { + error("bad #line syntax"); + SkipToNewLine(0); + } + else + do_line((unsigned int)tk.tk_val); + break; + case K_ERROR: /* "error" */ + do_error(); + break; + case K_PRAGMA: /* "pragma" */ + return 0; /* this is for the compiler */ + break; + case K_UNDEF: /* "undef" */ + do_undef(); + break; + default: + /* invalid word seen after the '#' */ + error("%s: unknown control", id->id_text); + SkipToNewLine(0); + } + break; + case INTEGER: /* # []? */ + do_line((unsigned int)tk.tk_val); + break; + case EOF: /* only `#' on this line: do nothing, ignore */ + break; + default: /* invalid token following '#' */ + error("illegal # line"); + SkipToNewLine(0); + } + return 1; +} + +skip_block(to_endif) +int to_endif; +{ + /* skip_block() skips the input from + 1) a false #if, #ifdef, #ifndef or #elif until the + corresponding #elif (resulting in true), #else or + #endif is read. + 2) a #else corresponding to a true #if, #ifdef, + #ifndef or #elif until the corresponding #endif is + seen. + */ + register int ch; + register int skiplevel = nestlevel; /* current nesting level */ + struct token tk; + int toknum; + struct idf *id; + + NoUnstack++; + for (;;) { + ch = GetChar(); /* read first character after newline */ + while (class(ch) == STSKIP) + ch = GetChar(); + if (ch != '#') { + if (ch == EOI) { + NoUnstack--; + return; + } + UnGetChar(); + SkipToNewLine(0); + continue; + } + ReplaceMacros = 0; + toknum = GetToken(&tk); + ReplaceMacros = 1; + if (toknum != IDENTIFIER) { + SkipToNewLine(0); + continue; + } + /* an IDENTIFIER: look for #if, #ifdef and #ifndef + without interpreting them. + Interpret #else, #elif and #endif if they occur + on the same level. + */ + id = findidf(tk.tk_str); + free(tk.tk_str); + switch(id->id_resmac) { + default: + SkipToNewLine(0); + break; + case K_IF: + case K_IFDEF: + case K_IFNDEF: + push_if(); + SkipToNewLine(0); + break; + case K_ELIF: + if (ifstack[nestlevel]) + error("#elif after #else"); + if (!to_endif && nestlevel == skiplevel) { + nestlevel--; + push_if(); + if (ifexpr()) { + NoUnstack--; + return; + } + } + else SkipToNewLine(0); /* otherwise done in ifexpr() */ + break; + case K_ELSE: + if (ifstack[nestlevel]) + error("#else after #else"); + ++(ifstack[nestlevel]); + if (!to_endif && nestlevel == skiplevel) { + if (SkipToNewLine(1)) + strict("garbage following #else"); + NoUnstack--; + return; + } + else SkipToNewLine(0); + break; + case K_ENDIF: + assert(nestlevel > svnestlevel[nestcount]); + if (nestlevel == skiplevel) { + if (SkipToNewLine(1)) + strict("garbage following #endif"); + nestlevel--; + NoUnstack--; + return; + } + else SkipToNewLine(0); + nestlevel--; + break; + } + } +} + + +ifexpr() +{ + /* ifexpr() returns whether the restricted constant + expression following #if or #elif evaluates to true. This + is done by calling the LLgen generated subparser for + constant expressions. The result of this expression will + be given in the extern long variable "ifval". + */ + extern arith ifval; + int errors = err_occurred; + + ifval = (arith)0; + AccDefined = 1; + UnknownIdIsZero = 1; + DOT = 0; /* tricky */ + If_expr(); /* invoke constant expression parser */ + AccDefined = 0; + UnknownIdIsZero = 0; + return (errors == err_occurred) && (ifval != (arith)0); +} + +do_include() +{ + /* do_include() performs the inclusion of a file. + */ + char *filenm; + char *result; + int tok; + struct token tk; + + AccFileSpecifier = 1; + if (((tok = GetToken(&tk)) == FILESPECIFIER) || tok == STRING) + filenm = tk.tk_str; + else { + error("bad include syntax"); + filenm = (char *)0; + } + AccFileSpecifier = 0; + SkipToNewLine(0); + inctable[0] = WorkingDir; + if (filenm) { + if (!InsertFile(filenm, &inctable[tok==FILESPECIFIER],&result)){ + error("cannot open include file \"%s\"", filenm); + } + else { + WorkingDir = getwdir(result); + svnestlevel[++nestcount] = nestlevel; + FileName = result; + LineNumber = 1; + } + } +} + +do_define() +{ + /* do_define() interprets a #define control line. + */ + register char *str; /* the #defined identifier's descriptor */ + int nformals = -1; /* keep track of the number of formals */ + char *formals[NPARAMS]; /* pointers to the names of the formals */ + char parbuf[PARBUFSIZE]; /* names of formals */ + char *repl_text; /* start of the replacement text */ + int length; /* length of the replacement text */ + register ch; + char *get_text(); + + /* read the #defined macro's name */ + if (!(str = GetIdentifier(1))) { + error("#define: illegal macro name"); + return; + } + /* there is a formal parameter list if the identifier is + followed immediately by a '('. + */ + ch = GetChar(); + if (ch == '(') { + if ((nformals = getparams(formals, parbuf)) == -1) { + SkipToNewLine(0); + free(str); + return; /* an error occurred */ + } + ch = GetChar(); + } + /* read the replacement text if there is any */ + ch = skipspaces(ch,0); /* find first character of the text */ + assert(ch != EOI); + if (class(ch) == STNL) { + /* Treat `#define something' as `#define something ""' + */ + repl_text = Malloc(1); + *repl_text = '\0'; + length = 0; + } + else { + UnGetChar(); + repl_text = get_text((nformals > 0) ? formals : 0, &length); + } + macro_def(str2idf(str, 0), repl_text, nformals, length, NOFLAG); + LineNumber++; +} + +push_if() +{ + if (nestlevel >= IFDEPTH) + fatal("too many nested #if/#ifdef/#ifndef"); + else + ifstack[++nestlevel] = 0; +} + +do_elif() +{ + if (nestlevel <= svnestlevel[nestcount]) { + error("#elif without corresponding #if"); + SkipToNewLine(0); + } + else { /* restart at this level as if a #if is detected. */ + if (ifstack[nestlevel]) { + error("#elif after #else"); + SkipToNewLine(0); + } + nestlevel--; + push_if(); + skip_block(1); + } +} + +do_else() +{ + if (SkipToNewLine(1)) + strict("garbage following #else"); + if (nestlevel <= svnestlevel[nestcount]) + error("#else without corresponding #if"); + else { /* mark this level as else-d */ + if (ifstack[nestlevel]) { + error("#else after #else"); + } + ++(ifstack[nestlevel]); + skip_block(1); + } +} + +do_endif() +{ + if (SkipToNewLine(1)) + strict("garbage following #endif"); + if (nestlevel <= svnestlevel[nestcount]) { + error("#endif without corresponding #if"); + } + else nestlevel--; +} + +do_if() +{ + push_if(); + if (!ifexpr()) /* a false #if/#elif expression */ + skip_block(0); +} + +do_ifdef(how) +{ + register struct idf *id; + register char *str; + + /* how == 1 : ifdef; how == 0 : ifndef + */ + push_if(); + if (!(str = GetIdentifier(1))) { + error("illegal #ifdef construction"); + id = (struct idf *)0; + } else { + id = findidf(str); + free(str); + } + + /* The next test is a shorthand for: + (how && !id->id_macro) || (!how && id->id_macro) + */ + if (how ^ (id && id->id_macro != 0)) + skip_block(0); + else + SkipToNewLine(0); +} + +do_undef() +{ + register struct idf *id; + register char *str; + + /* Forget a macro definition. */ + if (str = GetIdentifier(1)) { + if ((id = findidf(str)) && id->id_macro) { + if (id->id_macro->mc_flag & NOUNDEF) { + error("it is not allowed to #undef %s", str); + } else { + free(id->id_macro->mc_text); + free_macro(id->id_macro); + id->id_macro = (struct macro *) 0; + } + } /* else: don't complain */ + free(str); + SkipToNewLine(0); + } + else + error("illegal #undef construction"); +} + +do_error() +{ + static char errbuf[512]; + register char *bp = errbuf; + register int ch; + + while ((ch = GetChar()) != '\n') + *bp++ = ch; + *bp = '\0'; + UnGetChar(); + error("user error: %s", errbuf); +} + +int +getparams(buf, parbuf) + char *buf[]; + char parbuf[]; +{ + /* getparams() reads the formal parameter list of a macro + definition. + The number of parameters is returned. + As a formal parameter list is expected when calling this + routine, -1 is returned if an error is detected, for + example: + #define one(1), where 1 is not an identifier. + Note that the '(' has already been eaten. + The names of the formal parameters are stored into parbuf. + */ + register char **pbuf = &buf[0]; + register int c; + register char *ptr = &parbuf[0]; + register char **pbuf2; + + c = GetChar(); + c = skipspaces(c,0); + if (c == ')') { /* no parameters: #define name() */ + *pbuf = (char *) 0; + return 0; + } + for (;;) { /* eat the formal parameter list */ + if (class(c) != STIDF && class(c) != STELL) { + error("#define: bad formal parameter"); + return -1; + } + *pbuf = ptr; /* name of the formal */ + *ptr++ = c; + if (ptr >= &parbuf[PARBUFSIZE]) + fatal("formal parameter buffer overflow"); + do { /* eat the identifier name */ + c = GetChar(); + *ptr++ = c; + if (ptr >= &parbuf[PARBUFSIZE]) + fatal("formal parameter buffer overflow"); + } while (in_idf(c)); + *(ptr - 1) = '\0'; /* mark end of the name */ + + /* Check if this formal parameter is already used. + Usually, macros do not have many parameters, so ... + */ + for (pbuf2 = pbuf - 1; pbuf2 >= &buf[0]; pbuf2--) { + if (!strcmp(*pbuf2, *pbuf)) { + warning("formal parameter \"%s\" already used", + *pbuf); + } + } + + pbuf++; + c = skipspaces(c,0); + if (c == ')') { /* end of the formal parameter list */ + *pbuf = (char *) 0; + return pbuf - buf; + } + if (c != ',') { + error("#define: bad formal parameter list"); + return -1; + } + c = GetChar(); + c = skipspaces(c,0); + } + /*NOTREACHED*/ +} + +macro_def(id, text, nformals, length, flags) + register struct idf *id; + char *text; +{ + register struct macro *newdef = id->id_macro; + + /* macro_def() puts the contents and information of a macro + definition into a structure and stores it into the symbol + table entry belonging to the name of the macro. + An error is given if there was already a definition + */ + if (newdef) { /* is there a redefinition? */ + if (newdef->mc_flag & NOUNDEF) { + error("it is not allowed to redefine %s", id->id_text); + } else if (!macroeq(newdef->mc_text, text)) + error("illegal redefine of \"%s\"", id->id_text); + free(text); + return; + } else { +#ifdef DOBITS + register char *p = id->id_text; +#define setbit(bx) if (!*p) goto go_on; bits[*p++] |= (bx) + setbit(bit0); + setbit(bit1); + setbit(bit2); + setbit(bit3); + setbit(bit4); + setbit(bit5); + setbit(bit6); + setbit(bit7); + + go_on: +#endif + id->id_macro = newdef = new_macro(); + } + newdef->mc_text = text; /* replacement text */ + newdef->mc_nps = nformals; /* nr of formals */ + newdef->mc_length = length; /* length of repl. text */ + newdef->mc_flag = flags; /* special flags */ +} + +int +find_name(nm, index) + char *nm, *index[]; +{ + /* find_name() returns the index of "nm" in the namelist + "index" if it can be found there. 0 is returned if it is + not there. + */ + register char **ip = &index[0]; + + while (*ip) + if (strcmp(nm, *ip++) == 0) + return ip - &index[0]; + /* arrived here, nm is not in the name list. */ + return 0; +} + +char * +get_text(formals, length) + char *formals[]; + int *length; +{ + /* get_text() copies the replacement text of a macro + definition with zero, one or more parameters, thereby + substituting each formal parameter by a special character + (non-ascii: 0200 & (order-number in the formal parameter + list)) in order to substitute this character later by the + actual parameter. The replacement text is copied into + itself because the copied text will contain fewer or the + same amount of characters. The length of the replacement + text is returned. + + Implementation: + finite automaton : we are only interested in + identifiers, because they might be replaced by some actual + parameter. Other tokens will not be seen as such. + */ + register int c; + register unsigned text_size; + char *text = Malloc(text_size = ITEXTSIZE); + register int pos = 0; + + c = GetChar(); + + while ((c != EOI) && (class(c) != STNL)) { + if (c == '\'' || c == '"') { + register int delim = c; + + do { + /* being careful, as ever */ + if (pos+3 >= text_size) + text = Srealloc(text, text_size <<= 1); + text[pos++] = c; + if (c == '\\') + text[pos++] = GetChar(); + c = GetChar(); + } while (c != delim && c != EOI && class(c) != STNL); + text[pos++] = c; + c = GetChar(); + } + else + if (c == '/') { + c = GetChar(); + if (pos+1 >= text_size) + text = Srealloc(text, text_size <<= 1); + if (c == '*') { + skipcomment(); + text[pos++] = ' '; + c = GetChar(); + } + else + text[pos++] = '/'; + } + else + if (formals && (class(c) == STIDF || class(c) == STELL)) { + char id_buf[IDFSIZE + 1]; + register id_size = 0; + register n; + + /* read identifier: it may be a formal parameter */ + id_buf[id_size++] = c; + do { + c = GetChar(); + if (id_size <= IDFSIZE) + id_buf[id_size++] = c; + } while (in_idf(c)); + id_buf[--id_size] = '\0'; + if (n = find_name(id_buf, formals)) { + /* construct the formal parameter mark */ + if (pos+1 >= text_size) + text = Srealloc(text, + text_size <<= 1); + text[pos++] = FORMALP | (char) n; + } + else { + register char *ptr = &id_buf[0]; + + while (pos + id_size >= text_size) + text_size <<= 1; + text = Realloc(text, text_size); + while (text[pos++] = *ptr++) + /* EMPTY */ ; + pos--; + } + } + else { + if (pos+1 >= text_size) + text = Realloc(text, text_size <<= 1); + text[pos++] = c; + c = GetChar(); + } + } + text[pos++] = '\0'; + text = Realloc(text, pos); + *length = pos - 1; + return text; +} + +#define BLANK(ch) ((ch == ' ') || (ch == '\t')) + +/* macroeq() decides whether two macro replacement texts are + identical. This version compares the texts, which occur + as strings, without taking care of the leading and trailing + blanks (spaces and tabs). +*/ +macroeq(s, t) + register char *s, *t; +{ + + /* skip leading spaces */ + while (BLANK(*s)) s++; + while (BLANK(*t)) t++; + /* first non-blank encountered in both strings */ + /* The actual comparison loop: */ + while (*s && *s == *t) + s++, t++; + /* two cases are possible when arrived here: */ + if (*s == '\0') { /* *s == '\0' */ + while (BLANK(*t)) t++; + return *t == '\0'; + } + else { /* *s != *t */ + while (BLANK(*s)) s++; + while (BLANK(*t)) t++; + return (*s == '\0') && (*t == '\0'); + } +} + +do_line(l) + unsigned int l; +{ + struct token tk; + + LineNumber = l - 1; /* the number of the next input line */ + if (GetToken(&tk) == STRING) /* is there a filespecifier? */ + FileName = tk.tk_str; + SkipToNewLine(0); +} diff --git a/lang/cem/cpp.ansi/error.c b/lang/cem/cpp.ansi/error.c new file mode 100644 index 000000000..50d1cc98f --- /dev/null +++ b/lang/cem/cpp.ansi/error.c @@ -0,0 +1,106 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* E R R O R A N D D I A G N O S T I C R O U T I N E S */ + +#include +#include + +#include "arith.h" +#include "errout.h" +#include "LLlex.h" + +/* This file contains the (non-portable) error-message and diagnostic + functions. Beware, they are called with a variable number of + arguments! +*/ + +int err_occurred; + +err_hdr(s) + char *s; +{ + if (FileName) { + fprint(ERROUT, "\"%s\", line %d: %s", FileName, LineNumber, s); + } + else fprint(ERROUT, s); +} + +/*VARARGS1*/ +error(va_alist) + va_dcl +{ + char *fmt; + va_list ap; + + err_hdr(""); + va_start(ap); + fmt = va_arg(ap, char *); + doprnt(ERROUT, fmt, ap); + fprint(ERROUT, "\n"); + va_end(ap); +} + +/*VARARGS1*/ +warning(va_alist) + va_dcl +{ + char *fmt; + va_list ap; + + err_hdr("(warning) "); + va_start(ap); + fmt = va_arg(ap, char *); + doprnt(ERROUT, fmt, ap); + fprint(ERROUT, "\n"); + va_end(ap); +} + +/*VARARGS1*/ +strict(va_alist) + va_dcl +{ + char *fmt; + va_list ap; + + err_hdr("(strict) "); + va_start(ap); + fmt = va_arg(ap, char *); + doprnt(ERROUT, fmt, ap); + fprint(ERROUT, "\n"); + va_end(ap); +} + +/*VARARGS1*/ +crash(va_alist) + va_dcl +{ + char *fmt; + va_list ap; + + err_hdr("CRASH\007 "); + va_start(ap); + fmt = va_arg(ap, char *); + doprnt(ERROUT, fmt, ap); + fprint(ERROUT, "\n"); + va_end(ap); + sys_stop(S_ABORT); +} + +/*VARARGS1*/ +fatal(va_alist) + va_dcl +{ + char *fmt; + va_list ap; + + err_hdr("fatal error -- "); + va_start(ap); + fmt = va_arg(ap, char *); + doprnt(ERROUT, fmt, ap); + fprint(ERROUT, "\n"); + va_end(ap); + sys_stop(S_EXIT); +} diff --git a/lang/cem/cpp.ansi/expr.c b/lang/cem/cpp.ansi/expr.c new file mode 100644 index 000000000..48f4157d3 --- /dev/null +++ b/lang/cem/cpp.ansi/expr.c @@ -0,0 +1,58 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* OPERATOR HANDLING */ + +#include "Lpars.h" + +int +rank_of(oper) + int oper; +{ + /* The rank of the operator oper is returned. + */ + switch (oper) { + default: + return 0; + case '(': + return 1; + case '!': + return 2; + case '*': + case '/': + case '%': + return 3; + case '+': + case '-': + return 4; + case LEFT: + case RIGHT: + return 5; + case '<': + case '>': + case LESSEQ: + case GREATEREQ: + return 6; + case EQUAL: + case NOTEQUAL: + return 7; + case '&': + return 8; + case '^': + return 9; + case '|': + return 10; + case AND: + return 11; + case OR: + return 12; + case '?': + case ':': + return 13; + case ',': + return 15; + } + /*NOTREACHED*/ +} diff --git a/lang/cem/cpp.ansi/expression.g b/lang/cem/cpp.ansi/expression.g new file mode 100644 index 000000000..aadc73efb --- /dev/null +++ b/lang/cem/cpp.ansi/expression.g @@ -0,0 +1,129 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* EXPRESSION SYNTAX PARSER */ + +%lexical LLlex; +%start If_expr, if_expression; + +{ +#include "arith.h" +#include "LLlex.h" + +extern arith ifval; +} + +if_expression +: + constant_expression(&ifval) +; + +/* 7.1 */ +primary(arith *pval;) +: + constant(pval) +| + '(' expression(pval) ')' +; + +unary(arith *pval;) + {int oper;} +: + unop(&oper) + unary(pval) + { ch7mon(oper, pval); } +| + primary(pval) +; + +binary_expression(int maxrank; arith *pval;) + {int oper; arith val1;} +: + unary(pval) + [%while (rank_of(DOT) <= maxrank) + binop(&oper) + binary_expression(rank_of(oper)-1, &val1) + { + ch7bin(pval, oper, val1); + } + ]* +; + +/* 7.13 */ +conditional_expression(arith *pval;) + {arith val1 = 0, val2 = 0;} +: + /* allow all binary operators */ + binary_expression(rank_of('?') - 1, pval) + [ '?' + expression(&val1) + ':' + assignment_expression(&val2) + { *pval = (*pval ? val1 : val2); } + ]? +; + +/* 7.14 */ +assignment_expression(arith *pval;) +: + conditional_expression(pval) +; + +/* 7.15 */ +expression(arith *pval;) + {arith val1;} +: + assignment_expression(pval) + [ ',' + assignment_expression(&val1) + { + ch7bin(pval, ',', val1); + } + ]* +; + +unop(int *oper;) : + [ '-' | '!' | '~' ] + {*oper = DOT;} +; + +multop: + '*' | '/' | '%' +; + +addop: + '+' | '-' +; + +shiftop: + LEFT | RIGHT +; + +relop: + '<' | '>' | LESSEQ | GREATEREQ +; + +eqop: + EQUAL | NOTEQUAL +; + +arithop: + multop | addop | shiftop +| + '&' | '^' | '|' +; + +binop(int *oper;) : + [ arithop | relop | eqop | AND | OR ] + {*oper = DOT;} +; + +constant(arith *pval;) : + INTEGER + {*pval = dot.tk_val;} +; + +constant_expression (arith *pval;) : + assignment_expression(pval) +; diff --git a/lang/cem/cpp.ansi/file_info.h b/lang/cem/cpp.ansi/file_info.h new file mode 100644 index 000000000..740e27b5c --- /dev/null +++ b/lang/cem/cpp.ansi/file_info.h @@ -0,0 +1,18 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* F I L E I N F O R M A T I O N S T R U C T U R E */ + +struct file_info { + unsigned int fil_lino; + char *fil_name; + char *fil_wdir; +}; + +#define LineNumber finfo.fil_lino +#define FileName finfo.fil_name +#define WorkingDir finfo.fil_wdir + +extern struct file_info finfo; /* input.c */ diff --git a/lang/cem/cpp.ansi/idf.c b/lang/cem/cpp.ansi/idf.c new file mode 100644 index 000000000..c23e4ce76 --- /dev/null +++ b/lang/cem/cpp.ansi/idf.c @@ -0,0 +1,7 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +#include "idf.h" +#include diff --git a/lang/cem/cpp.ansi/idf.h b/lang/cem/cpp.ansi/idf.h new file mode 100644 index 000000000..c2bce70b2 --- /dev/null +++ b/lang/cem/cpp.ansi/idf.h @@ -0,0 +1,16 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +struct id_usr { + struct macro *idu_macro; + int idu_resmac; +}; + +#define IDF_TYPE struct id_usr +#define IDF_HSIZE 6 +#define id_macro id_user.idu_macro +#define id_resmac id_user.idu_resmac + +#include diff --git a/lang/cem/cpp.ansi/init.c b/lang/cem/cpp.ansi/init.c new file mode 100644 index 000000000..1b066f076 --- /dev/null +++ b/lang/cem/cpp.ansi/init.c @@ -0,0 +1,92 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PREPROCESSOR: INITIALIZATION ROUTINES */ + +#include +#include +#include +#include "class.h" +#include "macro.h" +#include "idf.h" + +struct mkey { + char *mk_reserved; + int mk_key; +} mkey[] = { + {"define", K_DEFINE}, + {"elif", K_ELIF}, + {"else", K_ELSE}, + {"endif", K_ENDIF}, + {"error", K_ERROR}, + {"if", K_IF}, + {"ifdef", K_IFDEF}, + {"ifndef", K_IFNDEF}, + {"include", K_INCLUDE}, + {"line", K_LINE}, + {"pragma", K_PRAGMA}, + {"undef", K_UNDEF}, + {0, K_UNKNOWN} +}; + +char *strcpy(); + +init_pp() +{ + static char *months[12] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + long clock, sys_time(); + static char dbuf[30]; + static char tbuf[30]; + struct tm *tp; + + /* Initialise the control line keywords (if, include, define, etc) + Although the lexical analyzer treats them as identifiers, the + control line handler can recognize them as keywords by the + id_resmac field of the identifier. + */ + { + register struct mkey *mk = &mkey[0]; + + while (mk->mk_reserved) { + register struct idf *idf = str2idf(mk->mk_reserved); + + if (idf->id_resmac) + fatal("maximum identifier length insufficient"); + idf->id_resmac = mk->mk_key; + mk++; + } + } + + /* Initialize __LINE__, __FILE__, __DATE__, __TIME__, + and __STDC__ macro definitions. + */ + clock = sys_time(); + tp = localtime(&clock); + + /* __DATE__ */ + sprintf(dbuf, "\"%.3s %.2d %d\"", months[tp->tm_mon], + tp->tm_mday, tp->tm_year+1900); + if (tp->tm_mday < 10) dbuf[5] = ' '; /* hack */ + macro_def(str2idf("__DATE__", 0), dbuf, -1, strlen(dbuf), NOUNDEF); + + /* __TIME__ */ + sprintf(tbuf, "\"%.2d:%.2d:%.2d\"", tp->tm_hour, tp->tm_min, tp->tm_sec); + macro_def(str2idf("__TIME__", 0), tbuf, -1, strlen(tbuf), NOUNDEF); + + /* __LINE__ */ + macro_def(str2idf("__LINE__", 0), "0", -1, 1, NOUNDEF | FUNC); + + /* __FILE__ */ + macro_def(str2idf("__FILE__", 0), "", -1, 1, NOUNDEF | FUNC); + + /* __STDC__ */ + macro_def(str2idf("__STDC__", 0), "1", -1, 1, NOUNDEF); + + /* defined(??) */ + macro_def(str2idf("defined", 0), "", 1, 1, NOUNDEF | FUNC); +} diff --git a/lang/cem/cpp.ansi/input.c b/lang/cem/cpp.ansi/input.c new file mode 100644 index 000000000..62fad6336 --- /dev/null +++ b/lang/cem/cpp.ansi/input.c @@ -0,0 +1,61 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ + +#include "file_info.h" +#include "input.h" + +#define INP_PUSHBACK 3 +#define INP_TYPE struct file_info +#define INP_VAR finfo +struct file_info finfo; +#include + +char * +getwdir(fn) + register char *fn; +{ + register char *p; + char *strrindex(); + + p = strrindex(fn, '/'); + while (p && *(p + 1) == '\0') { /* remove trailing /'s */ + *p = '\0'; + p = strrindex(fn, '/'); + } + + if (fn[0] == '\0' || (fn[0] == '/' && p == &fn[0])) /* absolute path */ + return ""; + if (p) { + *p = '\0'; + fn = Salloc(fn, p - &fn[0] + 1); + *p = '/'; + return fn; + } + return "."; +} + +int NoUnstack; +int InputLevel; + +AtEoIT() +{ + InputLevel--; + /* if (NoUnstack) warning("unexpected EOF"); ??? */ + unstackrepl(); + return 0; +} + +AtEoIF() +{ + extern int nestlevel; + extern int nestcount; + extern int svnestlevel[]; + + if (nestlevel > svnestlevel[nestcount]) warning("missing #endif"); + else if (NoUnstack) warning("unexpected EOF"); + nestlevel = svnestlevel[nestcount--]; + return 0; +} diff --git a/lang/cem/cpp.ansi/input.h b/lang/cem/cpp.ansi/input.h new file mode 100644 index 000000000..660d01d77 --- /dev/null +++ b/lang/cem/cpp.ansi/input.h @@ -0,0 +1,15 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +#define INP_PUSHBACK 3 +#include "inputtype.h" +#include + +/* Note: The following macro only garuantees one PushBack. +*/ +#define UnGetChar() ((LexSave != EOI) ? ChPushBack(LexSave) : 0) + +extern int LexSave; /* last character read by GetChar */ +extern int GetChar(); /* character input, with trigraph parsing */ diff --git a/lang/cem/cpp.ansi/macro.str b/lang/cem/cpp.ansi/macro.str new file mode 100644 index 000000000..8adec26aa --- /dev/null +++ b/lang/cem/cpp.ansi/macro.str @@ -0,0 +1,56 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PREPROCESSOR: DEFINITION OF MACRO DESCRIPTOR */ + +/* The flags of the mc_flag field of the macro structure. Note that + these flags can be set simultaneously. +*/ +#define NOFLAG 0 /* no special flags */ +#define FUNC 0x1 /* function attached */ +#define NOUNDEF 0x2 /* reserved macro */ +#define NOREPLACE 0x4 /* prevent recursion */ + +#define FORMALP 0200 /* mask for creating macro formal parameter */ + +/* The macro descriptor is very simple, except the fact that the + mc_text, which points to the replacement text, contains the + non-ascii characters \201, \202, etc, indicating the position of a + formal parameter in this text. +*/ +struct macro { + struct macro *next; + char * mc_text; /* the replacement text */ + int mc_nps; /* number of formal parameters */ + int mc_length; /* length of replacement text */ + char mc_flag; /* marking this macro */ +}; + +/* ALLOCDEF "macro" 20 */ + +struct mlist { + struct mlist *next; + struct macro *m_mac; + char *m_repl; + char m_unstack; +}; + +/* ALLOCDEF "mlist" 20 */ + +/* `token' numbers of keywords of command-line processor +*/ +#define K_UNKNOWN 0 +#define K_DEFINE 1 +#define K_ELIF 2 +#define K_ELSE 3 +#define K_ENDIF 4 +#define K_ERROR 5 +#define K_IF 6 +#define K_IFDEF 7 +#define K_IFNDEF 8 +#define K_INCLUDE 9 +#define K_LINE 10 +#define K_PRAGMA 11 +#define K_UNDEF 12 diff --git a/lang/cem/cpp.ansi/main.c b/lang/cem/cpp.ansi/main.c new file mode 100644 index 000000000..c26d015b5 --- /dev/null +++ b/lang/cem/cpp.ansi/main.c @@ -0,0 +1,84 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* MAIN PROGRAM */ + +#include +#include +#include "arith.h" +#include "file_info.h" +#include "idfsize.h" + +extern char *symbol2str(); +extern char *getwdir(); +extern int err_occurred; +int idfsize = IDFSIZE; + +arith ifval; + +char *prog_name; + +extern char **inctable; +extern int inc_max, inc_total; + +main(argc, argv) + char *argv[]; +{ + /* parse and interpret the command line options */ + prog_name = argv[0]; + + init_idf(); + + inctable = (char **) Malloc(10 * sizeof(char *)); + inc_max = 10; + inc_total = 2; + inctable[0] = "."; + inctable[1] = "/usr/include"; + init_pp(); /* initialise the preprocessor macros */ + + /* Note: source file "-" indicates that the source is supplied + as standard input. This is only allowed if INP_READ_IN_ONE is + not defined! + */ + while (argc > 1 && *argv[1] == '-' && argv[1][1] != '\0') { + char *par = &argv[1][1]; + + if (*par == '-') + par++; + do_option(par); + argc--, argv++; + } + compile(argc - 1, &argv[1]); + sys_stop(err_occurred ? S_EXIT : S_END); + /*NOTREACHED*/ +} + +compile(argc, argv) + char *argv[]; +{ + register char *source = 0; + char *dummy; + + switch (argc) { + case 1: + source = argv[0]; + FileName = source; + break; + case 0: + FileName = ""; + WorkingDir = 0; + break; + default: + FileName = argv[0]; + fatal("use: %s [options] [source]", prog_name); + break; + } + + if (!InsertFile(source, (char **) 0, &dummy)) /* read the source file */ + fatal("%s: no source file %s\n", prog_name, + source ? source : "stdin"); + if (source) WorkingDir = getwdir(dummy); + preprocess(source); +} diff --git a/lang/cem/cpp.ansi/make.allocd b/lang/cem/cpp.ansi/make.allocd new file mode 100755 index 000000000..08492fad1 --- /dev/null +++ b/lang/cem/cpp.ansi/make.allocd @@ -0,0 +1,8 @@ +sed -e ' +s:^.*[ ]ALLOCDEF[ ].*"\(.*\)"[ ]*\([0-9][0-9]*\).*$:\ +/* allocation definitions of struct \1 */\ +extern char *st_alloc();\ +extern struct \1 *h_\1;\ +#define new_\1() ((struct \1 *) st_alloc((char **)\&h_\1, sizeof(struct \1), \2))\ +#define free_\1(p) st_free(p, \&h_\1, sizeof(struct \1))\ +:' diff --git a/lang/cem/cpp.ansi/make.hfiles b/lang/cem/cpp.ansi/make.hfiles new file mode 100755 index 000000000..2132dd618 --- /dev/null +++ b/lang/cem/cpp.ansi/make.hfiles @@ -0,0 +1,35 @@ +: Update Files from database + +PATH=/bin:/usr/bin + +case $# in +1) ;; +*) echo use: $0 file >&2 + exit 1 +esac + +( +IFCOMMAND="if (<\$FN) 2>/dev/null;\ + then if cmp -s \$FN \$TMP;\ + then rm \$TMP;\ + else mv \$TMP \$FN;\ + echo update \$FN;\ + fi;\ + else mv \$TMP \$FN;\ + echo create \$FN;\ + fi" +echo 'TMP=.uf$$' +echo 'FN=$TMP' +echo 'cat >$TMP <<\!EOF!' +sed -n '/^!File:/,${ +/^$/d +/^!File:[ ]*\(.*\)$/s@@!EOF!\ +'"$IFCOMMAND"'\ +FN=\1\ +cat >$TMP <<\\!EOF!@ +p +}' $1 +echo '!EOF!' +echo $IFCOMMAND +) | +sh diff --git a/lang/cem/cpp.ansi/make.next b/lang/cem/cpp.ansi/make.next new file mode 100755 index 000000000..be69d8d69 --- /dev/null +++ b/lang/cem/cpp.ansi/make.next @@ -0,0 +1,3 @@ +sed -n ' +s:^.*ALLOCDEF.*"\(.*\)".*$:struct \1 *h_\1 = 0;:p +' $* diff --git a/lang/cem/cpp.ansi/make.tokcase b/lang/cem/cpp.ansi/make.tokcase new file mode 100755 index 000000000..ef32292f9 --- /dev/null +++ b/lang/cem/cpp.ansi/make.tokcase @@ -0,0 +1,34 @@ +cat <<'--EOT--' +#include "Lpars.h" + +char * +symbol2str(tok) + int tok; +{ + static char buf[2] = { '\0', '\0' }; + + if (040 <= tok && tok < 0177) { + buf[0] = tok; + buf[1] = '\0'; + return buf; + } + switch (tok) { +--EOT-- +sed ' +/{[A-Z]/!d +s/.*{\(.*\),.*\(".*"\).*$/ case \1 :\ + return \2;/ +' +cat <<'--EOT--' + case '\n': + case '\f': + case '\v': + case '\r': + case '\t': + buf[0] = tok; + return buf; + default: + return "bad token"; + } +} +--EOT-- diff --git a/lang/cem/cpp.ansi/make.tokfile b/lang/cem/cpp.ansi/make.tokfile new file mode 100755 index 000000000..494b7e3cc --- /dev/null +++ b/lang/cem/cpp.ansi/make.tokfile @@ -0,0 +1,6 @@ +sed ' +/{[A-Z]/!d +s/.*{// +s/,.*// +s/.*/%token &;/ +' diff --git a/lang/cem/cpp.ansi/nccp.6 b/lang/cem/cpp.ansi/nccp.6 new file mode 100644 index 000000000..d72f72693 --- /dev/null +++ b/lang/cem/cpp.ansi/nccp.6 @@ -0,0 +1,74 @@ +.TH NCPP 6ACK +.ad +.SH NAME +ncpp \- New C Pre-Processor +.SH SYNOPSIS +ncpp [\-options] [ file ] +.SH DESCRIPTION +.I Ncpp +reads a file, expands macros and include +files, and writes an input file for the C compiler. +All output is to standard output. +.br +The following options are supported. +.IP -\fBI\fIdirectory\fR +.br +add this directory to the list of +directories searched for #include "..." and #include <...> +commands. Note that there is no space between the +"-I" and the directory string. More than one -I command +is permitted. +.IP -\fBI\fR +end the list of directories to be searched, and also do not look in +default places. +.IP -\fBD\fIname\fR=\fItext\fR +.br +define +.I name +as a macro with +.I text +as its replacement text. +.IP -\fBD\fIname\fR +the same as -\fBD\fIname\fR=1. +.IP +.IP -\fBU\fIname\fR +.br +undefine the macro name +.IR name . +.IP -\fBC\fR +leave comments in. By default, C-comments are deleted. +.IP -\fBP\fR +do not generate line directives +.IP -\fBM\fIn\fR +set maximum identifier length to +.IR n . +.PP +The following names are always available unless undefined: +.RS +.IP __STDC__ +A decimal constant 1, indicating that this is an ANSI C conforming +implementation. +.IP __FILE__ +The input (or #include) file being compiled +(as a quoted string). +.IP __LINE__ +The line number being compiled. +.IP __DATE__ +The date of translation of the source file. This is a string +literal of the form "\fBMmm dd yyyy\fP". +.IP __TIME__ +The time of translation of the source file. This is a string +literal of the form "\fBhh:mm:ss\fP". +.RE +.SH BUGS +The output may contain extra spaces, this prevents unintended +pasting of tokens. +.SH "SEE ALSO" +L. Rosler, +.I +Draft Proposed Standard - Programming Language C, +.R +ANSI X3J11 Language Subcommittee +.SH AUTHOR +Leendert van Doorn + diff --git a/lang/cem/cpp.ansi/options.c b/lang/cem/cpp.ansi/options.c new file mode 100644 index 000000000..cb83406c6 --- /dev/null +++ b/lang/cem/cpp.ansi/options.c @@ -0,0 +1,133 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* USER-OPTION HANDLING */ + +#include +#include "idfsize.h" +#include "class.h" +#include "macro.h" +#include "idf.h" + +char options[128]; /* one for every char */ +int inc_pos = 1; /* place where next -I goes */ +int inc_max; +int inc_total; +int debug; +char **inctable; + +extern int idfsize; +int txt2int(); + +do_option(text) + char *text; +{ + switch(*text++) { + case '-': + options[*text] = 1; + break; + default: + error("illegal option: %c", text[-1]); + break; + case 'C' : /* comment output */ + options['C'] = 1; + break; + case 'D' : /* -Dname : predefine name */ + { + register char *cp = text, *name, *mactext; + + if (class(*cp) != STIDF || class(*cp) == STELL) { + error("identifier missing in -D%s", text); + break; + } + name = cp; + while (*cp && in_idf(*cp)) + ++cp; + if (!*cp) /* -Dname */ + mactext = "1"; + else + if (*cp == '=') { /* -Dname=text */ + *cp++ = '\0'; /* end of name */ + mactext = cp; + } + else { /* -Dname?? */ + error("malformed option -D%s", text); + break; + } + macro_def(str2idf(name, 0), mactext, -1, strlen(mactext), NOFLAG); + break; + } + case 'I' : /* -Ipath : insert "path" into include list */ + if (*text) { + register int i; + register char *new = text; + + if (++inc_total > inc_max) { + char **n = (char **) + Malloc((10 + inc_max) * sizeof(char *)); + + for (i = 0; i < inc_max; i++) { + n[i] = inctable[i]; + } + free((char *) inctable); + inctable = n; + inc_max += 10; + } + + i = inc_pos++; + while (new) { + register char *tmp = inctable[i]; + + inctable[i++] = new; + new = tmp; + } + } + else inctable[inc_pos] = 0; + break; + case 'M': /* maximum identifier length */ + idfsize = txt2int(&text); + if (*text) + error("malformed -M option"); + if (idfsize > IDFSIZE) { + warning("maximum identifier length is %d", IDFSIZE); + idfsize = IDFSIZE; + } + if (idfsize < 8) { + warning("minimum identifier length is 8"); + idfsize = 8; + } + break; + case 'P' : /* run preprocessor stand-alone, without #'s */ + options['P'] = 1; + break; + case 'U' : /* -Uname : undefine predefined */ + if (*text) { + register struct idf *idef = findidf(text); + + if (idef && idef->id_macro) { + free_macro(idef->id_macro); + idef->id_macro = (struct macro *) 0; + } + } + break; + } +} + +int +txt2int(tp) + char **tp; +{ + /* the integer pointed to by *tp is read, while increasing + *tp; the resulting value is yielded. + */ + register int val = 0; + register int ch; + + while (ch = **tp, ch >= '0' && ch <= '9') { + val = val * 10 + ch - '0'; + (*tp)++; + } + return val; +} diff --git a/lang/cem/cpp.ansi/preprocess.c b/lang/cem/cpp.ansi/preprocess.c new file mode 100644 index 000000000..d2f7110fd --- /dev/null +++ b/lang/cem/cpp.ansi/preprocess.c @@ -0,0 +1,326 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PREPROCESSOR DRIVER */ + +#include +#include "input.h" +#include "obufsize.h" +#include "arith.h" +#include "LLlex.h" +#include "class.h" +#include "macro.h" +#include "idf.h" +#include "idfsize.h" +#include "bits.h" +#include "line_prefix.h" + +char _obuf[OBUFSIZE]; +#ifdef DOBITS +char bits[128]; +#endif +extern int InputLevel; + +Xflush() +{ + sys_write(STDOUT, _obuf, OBUFSIZE); +} + +preprocess(fn) + char *fn; +{ + register int c; + register char *op = _obuf; + register char *ob = &_obuf[OBUFSIZE]; + char Xbuf[256]; + int lineno = 0; + extern char options[]; + +#define flush(X) (sys_write(STDOUT,_obuf,X)) +#define echo(ch) if (op == ob) { Xflush(); op = _obuf; } *op++ = (ch); +#define newline() echo('\n') + + if (!options['P']) { + /* Generate a line directive communicating the + source filename + */ + register char *p = Xbuf; + + sprint(p, "%s 1 \"%s\"\n", + LINE_PREFIX, + FileName); + while (*p) { + echo(*p++); + } + } +#define do_line(lineno, fn) \ + if (lineno != LineNumber || fn != FileName) { \ + fn = FileName; \ + lineno = LineNumber; \ + if (! options['P']) { \ + register char *p = Xbuf; \ + \ + sprint(p, "%s %d \"%s\"\n", \ + LINE_PREFIX, \ + LineNumber, \ + FileName); \ + while (*p) { \ + echo(*p++); \ + } \ + } \ + } + + for (;;) { + LineNumber++; + lineno++; + c = GetChar(); + while (class(c) == STSKIP) { + echo(c); + c = GetChar(); + } + + while (c == '#') { + if (!domacro()) { /* pass pragma's to compiler */ + register char *p = "#pragma"; + + do_line(lineno, fn); + + while(*p) { + echo(*p++); + } + while ((c = GetChar()) != EOI) { + if (class(c) == STNL) break; + echo(c); + } + } + lineno++; + newline(); + c = GetChar(); + while (class(c) == STSKIP) { + echo(c); + c = GetChar(); + } + } + do_line(lineno, fn); + for (;;) { + + /* illegal character */ + if (c & 0200) { + if (c == EOI) { + newline(); + flush(op-_obuf); + return; + } + fatal("non-ascii character read"); + } + + /* comments */ + if (c == '/' && !InputLevel) { + c = GetChar(); + if (c == '*') { + NoUnstack++; + if (options['C']) { + echo('/'); + echo('*'); + } + for (;;) { + c = GetChar(); + if (c == '\n') { + ++LineNumber; + ++lineno; + echo(c); + } + else if (c == EOI) { + newline(); + flush(op - _obuf); + return; + } + else if (c == '*') { + if (options['C']) { + echo(c); + } + c = GetChar(); + if (c == '/') { + if (options['C']) { + echo(c); + } + break; + } + else { + UnGetChar(); + } + } + else if (options['C']) { + echo(c); + } + } + NoUnstack--; + c = GetChar(); + continue; + } + echo('/'); + continue; + } + + /* switch on character */ + switch(class(c)) { + case STNL: + echo(c); + break; + case STSTR: + case STCHAR: + { + register int stopc = c; + int escaped; + + do { + + escaped = 0; + echo(c); + c = GetChar(); + if (c == '\n') { + break; + } + else if (c == EOI) { + newline(); + flush(op-_obuf); + return; + } + if (c == '\\') { + echo(c); + c = GetChar(); + if (c == '\n') { + ++LineNumber; + lineno++; + } + else if (c == '\'') escaped = 1; + } + } while (escaped || c != stopc); + echo(c); + if (c == '\n') + break; /* Don't eat # */ + c = GetChar(); + continue; + } + case STNUM: + echo(c); + if (c == '.') { + c = GetChar(); + if (c == '.') { + if ((c = GetChar()) == '.') { + echo('.'); echo('.'); + continue; + } + UnGetChar(); + c = '.'; + continue; + } else if (!is_dig(c)) { + continue; + } + } + c = GetChar(); + while (in_idf(c) || c == '.') { + echo(c); + if (c == 'e' || c == 'E') { + c = GetChar(); + if (c == '+' || c == '-') { + echo(c); + c = GetChar(); + } + } else c = GetChar(); + } + continue; + case STELL: + if (c == '"' || c == '\'') { + echo(c); + continue; + } + UnGetChar(); + c = 'L'; + case STIDF: { + extern int idfsize; /* ??? */ + char buf[IDFSIZE + 1]; + register char *tg = &buf[0]; + register char *maxpos = &buf[idfsize]; + register struct idf *idef; + int NoExpandNext = 0; + +#define tstmac(bx) if (!(bits[c] & bx)) goto nomac +#define cpy *tg++ = c +#define load c = GetChar(); if (!in_idf(c)) goto endidf + + /* unstack macro's when allowed. */ + if (Unstacked) + EnableMacros(); + if (c == NOEXPM) { + NoExpandNext = 1; + c = GetChar(); + } + +#ifdef DOBITS + cpy; tstmac(bit0); load; + cpy; tstmac(bit1); load; + cpy; tstmac(bit2); load; + cpy; tstmac(bit3); load; + cpy; tstmac(bit4); load; + cpy; tstmac(bit5); load; + cpy; tstmac(bit6); load; + cpy; tstmac(bit7); load; +#endif + + for(;;) { + if (tg < maxpos) { + cpy; + } + load; + } + endidf: + if (c != EOF) UnGetChar(); + *tg = '\0'; /* mark the end of the identifier */ + if ((idef = findidf(buf)) + && idef->id_macro + && ReplaceMacros && !NoExpandNext) { + if (replace(idef)) { + c = GetChar(); + continue; + } + tg = buf; + while (*tg) { + echo(*tg++); + } + c = GetChar(); + if (in_idf(c)) echo(' '); + continue; + } + nomac: + *tg = '\0'; + tg = buf; + while (*tg) { + echo(*tg++); + } + c = GetChar(); + while (in_idf(c)) { + echo(c); + c = GetChar(); + } + continue; + } + case STMSPEC: + if (InputLevel) { + echo(' '); /* seperate tokens */ + c = GetChar(); + continue; + } + /* else fallthrough */ + default: + echo(c); + c = GetChar(); + continue; + } + break; + } + } + /*NOTREACHED*/ +} diff --git a/lang/cem/cpp.ansi/replace.c b/lang/cem/cpp.ansi/replace.c new file mode 100644 index 000000000..6177b7fbf --- /dev/null +++ b/lang/cem/cpp.ansi/replace.c @@ -0,0 +1,703 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* M A C R O R E P L A C E M E N T */ + +#include "pathlength.h" +#include "strsize.h" +#include "nparams.h" +#include "idfsize.h" +#include "numsize.h" +#include +#include "idf.h" +#include "input.h" +#include "macro.h" +#include "arith.h" +#include "LLlex.h" +#include "class.h" +#include +#include "lapbuf.h" +#include "argbuf.h" +#include "replace.h" + +extern char *GetIdentifier(); +extern int InputLevel; +struct repl *ReplaceList; /* list of currently active macros */ + +int +replace(idf) + register struct idf *idf; +{ + /* replace is called by the lexical analyzer to perform + macro replacement. The routine actualy functions as a + higher interface to the real thing: expand_macro(). + */ + struct repl *repl; + + if (!(idf->id_macro)) return 0; + if (idf->id_macro->mc_flag & NOREPLACE) + return 0; + repl = new_repl(); + repl->r_ptr = repl->r_text; + repl->r_args = new_args(); + repl->r_idf = idf; + if (!expand_macro(repl, idf)) + return 0; + InputLevel++; + InsertText(repl->r_text, repl->r_ptr - repl->r_text); + idf->id_macro->mc_flag |= NOREPLACE; + repl->r_level = InputLevel; + repl->next = ReplaceList; + ReplaceList = repl; + return 1; +} + +unstackrepl() +{ + Unstacked++; +} + +EnableMacros() +{ + register struct repl *r = ReplaceList, *prev = 0; + + assert(Unstacked > 0); + while(r) { + struct repl *nxt = r->next; + + if (r->r_level > InputLevel) { + r->r_idf->id_macro->mc_flag &= ~NOREPLACE; + if (!prev) ReplaceList = nxt; + else prev->next = nxt; + free_args(r->r_args); + free_repl(r); + } + else prev = r; + r = nxt; + } + Unstacked = 0; +} + +expand_macro(repl, idf) + register struct repl *repl; + register struct idf *idf; +{ + /* expand_macro() does the actual macro replacement. + "idf" is a description of the identifier which + caused the replacement. + If the identifier represents a function-like macro + call, the number of actual parameters is checked + against the number of formal parameters. Note that + in ANSI C the parameters are expanded first; + this is done by calling getactuals(). + When the possible parameters are expanded, the replace- + ment list associated with "idf" is expanded. + expand_macro() returns 1 if the replacement succeeded + and 0 if some error occurred. + + A special case is "defined". This acts as a unary operator + on a single, unexpanded identifier, which may be surrounded + by parenthesis. The function expand_defined() handles this. + */ + register struct macro *mac = idf->id_macro; + struct args *args = repl->r_args; + register int ch; + + if (mac->mc_nps != -1) { /* with parameter list */ + if (mac->mc_flag & FUNC) { + /* the following assertion won't compile: + assert(!strcmp("defined", idf->id_text)); + */ + if (!AccDefined) return 0; + expand_defined(repl); + return 1; + } + + ch = GetChar(); + ch = skipspaces(ch,1); + if (ch != '(') { /* no replacement if no () */ + UnGetChar(); + return 0; + } else + getactuals(repl, idf); + + } + + if (mac->mc_flag & FUNC) /* this macro leads to special action */ + macro_func(idf); + + macro2buffer(repl, idf, args); + + /* According to the ANSI definition: + + #define a + + a+b; --> + + b ; + + 'a' must be substituded, but the result should be + three tokens: + + ID. Because this preprocessor is + character based, we have a problem. + For now: just insert a space after all tokens, + until ANSI fixes this flaw. + ^^^^^^^^^^^^^^^^^^^^^^^^^^ tsk tsk tsk + */ + if (*repl->r_ptr != TOKSEP) *repl->r_ptr++ = TOKSEP; + *repl->r_ptr = '\0'; + + return 1; +} + +expand_defined(repl) + register struct repl *repl; +{ + register int ch = GetChar(); + struct idf *id; + char *str; + int parens = 0; + + ch = skipspaces(ch, 0); + + if (ch == '(') { + parens++; + ch = GetChar(); + ch = skipspaces(ch, 0); + } + if ((class(ch) != STIDF) && (class(ch) != STELL)) { + error("identifier missing"); + if (parens && ch != ')') error(") missing"); + if (!parens || ch != ')') UnGetChar(); + *repl->r_ptr++ = '0'; + *repl->r_ptr = '\0'; + return; + } + UnGetChar(); + str = GetIdentifier(0); + if (str) + id = str2idf(str, 0); + else id = 0; + assert(id || class(ch) == STELL); + ch = GetChar(); + ch = skipspaces(ch, 0); + if (parens && ch != ')') error(") missing"); + if (!parens || ch != ')') UnGetChar(); + *repl->r_ptr++ = (id && id->id_macro) ? '1' : '0'; + *repl->r_ptr = '\0'; +} + +getactuals(repl, idf) + struct repl *repl; + register struct idf *idf; +{ + /* Get the actual parameters from the input stream. + The hard part is done by actual(), only comma's and + other syntactic trivialities are checked here. + */ + register struct args *args = repl->r_args; + register int nps = idf->id_macro->mc_nps; + register int argcnt; + register int ch; + + argcnt = 0; + args->a_expvec[0] = args->a_expptr = &args->a_expbuf[0]; + args->a_rawvec[0] = args->a_rawptr = &args->a_rawbuf[0]; + if ((ch = GetChar()) != ')') { + UnGetChar(); + while ((ch = actual(repl)) != ')' ) { + if (ch != ',') { + error("illegal macro call"); + return; + } + stash(repl, '\0', 1); + ++argcnt; + args->a_expvec[argcnt] = args->a_expptr; + args->a_rawvec[argcnt] = args->a_rawptr; + if (argcnt == STDC_NPARAMS) + strict("number of parameters exceeds ANSI standard"); + if (argcnt >= NPARAMS) + fatal("argument vector overflow"); + } + stash(repl, '\0', 1); + ++argcnt; + } + if (argcnt < nps) + error("too few macro arguments"); + else if (argcnt > nps) + error("too many macro arguments"); +} + +saveraw(repl) +struct repl *repl; +{ + register struct repl *nrepl = ReplaceList; + register struct args *ap = nrepl->r_args; + struct args *args = repl->r_args; + register char *p; + + /* stash identifier name */ + for (p = nrepl->r_idf->id_text; *p != '\0'; p++) + *args->a_rawptr++ = *p; + + /* The following code deals with expanded function + like macro calls. It makes the following code + work: + + #define def(a,b) x(a,b) + #define glue(a,b) a ## b + + glue(abc,def(a,b)) + + Results in: + + abcdef(a,b); + */ + if (ap->a_rawvec[0]) { + /* stash arguments */ + register int i; + + *args->a_rawptr++ = '('; + for (i = 0; ap->a_rawvec[i] != (char *)0; i++) { + for (p = ap->a_rawvec[i]; *p != '\0'; p++) + *args->a_rawptr++ = *p; + *args->a_rawptr++ = ','; + } + *(args->a_rawptr-1) = ')'; /* delete last ',' */ + } +} + +int +actual(repl) + struct repl *repl; +{ + /* This routine deals with the scanning of an actual parameter. + It keeps in account the opening and closing brackets, + preprocessor numbers, strings and character constants. + */ + register int ch; + register int level = 0, nostashraw = 0; + + while (1) { + ch = GetChar(); + + if (Unstacked) { + nostashraw -= Unstacked; + if (nostashraw < 0) nostashraw = 0; + EnableMacros(); + } + if (class(ch) == STIDF || class(ch) == STELL) { + /* Scan a preprocessor identifier token. If the + token is a macro, it is expanded first. + */ + char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1]; + register char *p = buf; + register struct idf *idef; + register int pos = -1; + extern int idfsize; + int NoExpandMacro; + + if (ch == NOEXPM) { + NoExpandMacro= 1; + ch = GetChar(); + } else NoExpandMacro = 0; + + do { + if (++pos < idfsize) { + *p++ = ch; + } + ch = GetChar(); + } while (in_idf(ch)); + *p++ = '\0'; + UnGetChar(); + + /* When the identifier has an associated macro + replacement list, it's expanded. + */ + idef = findidf(buf); + if (!idef || NoExpandMacro || !replace(idef)) { + if (NoExpandMacro + || (idef && idef->id_macro + && (idef->id_macro->mc_flag & NOREPLACE))) + stash(repl, NOEXPM, !nostashraw); + for (p = buf; *p != '\0'; p++) + stash(repl, *p, !nostashraw); + } else { + if (!nostashraw) saveraw(repl); + nostashraw++; + } + } else if (class(ch) == STNUM) { + /* a preprocessing number has the following + regular expression: + [0-9|"."[0-9]]{[0-9"."a-zA-Z_]|{[Ee][+-]}}* + */ + stash(repl, ch, !nostashraw); + if (ch == '.') { + ch = GetChar(); + if (class(ch) != STNUM) { + UnGetChar(); + continue; + } + else stash(repl, ch, !nostashraw); + } + ch = GetChar(); + while (in_idf(ch) || ch == '.') { + stash(repl, ch, !nostashraw); + if ((ch = GetChar()) == 'e' || ch == 'E') { + stash(repl, ch, !nostashraw); + ch = GetChar(); + if (ch == '+' || ch == '-') { + stash(repl, ch, !nostashraw); + ch = GetChar(); + } + } + } + UnGetChar(); + } else if (ch == '(' || ch == '[' || ch == '{') { + /* a comma may occur within these constructions ??? + */ + level++; + stash(repl, ch, !nostashraw); + } else if (ch == ')' || ch == ']' || ch == '}') { + level--; + /* clossing parenthesis of macro call */ + if (ch == ')' && level < 0) + return ')'; + stash(repl, ch, !nostashraw); + } else if (ch == ',') { + if (level <= 0) { /* comma separator for next argument */ + if (level) + error("unbalanced parenthesis"); + if (!nostashraw) + return ','; /* ??? */ + } + stash(repl, ch, !nostashraw); + } else if (ch == '\n') { + /* newlines are accepted as white spaces */ + LineNumber++; + while ((ch = GetChar()), class(ch) == STSKIP) + /* EMPTY */; + + /* This piece of code needs some explanation: + consider the call of a macro defined as: + #define sum(a,b) (a+b) + in the following form: + sum( + #include phone_number + ,2); + in which case the include must be handled + interpreted as such. + */ + if (ch == '#') + domacro(); + else if (ch == EOI) { + error("unterminated macro call"); + return ')'; + } + UnGetChar(); + stash(repl, ' ', !nostashraw); + } else if (ch == '/') { + /* comments are treated as one white space token */ + if ((ch = GetChar()) == '*' && !InputLevel) { + skipcomment(); + stash(repl, ' ', !nostashraw); + } else { + UnGetChar(); + stash(repl, '/', !nostashraw); + } + } else if (ch == '\'' || ch == '"') { + /* Strings are considered as ONE token, thus no + replacement within strings. + */ + register int match = ch; + + stash(repl, ch, !nostashraw); + while ((ch = GetChar()) != EOI) { + if (ch == match) + break; + if (ch == '\\') { + stash(repl, ch, !nostashraw); + ch = GetChar(); + } else if (ch == '\n') { + error("newline in string"); + LineNumber++; + stash(repl, match, !nostashraw); + break; + } + stash(repl, ch, !nostashraw); + } + if (ch != match) { + error("unterminated macro call"); + return ')'; + } + stash(repl, ch, !nostashraw); + } else + stash(repl, ch, !nostashraw); + } +} + +macro_func(idef) + register struct idf *idef; +{ + /* macro_func() performs the special actions needed with some + macros. These macros are __FILE__ and __LINE__ which + replacement texts must be evaluated at the time they are + used. + */ + register struct macro *mac = idef->id_macro; + static char FilNamBuf[PATHLENGTH]; + char *long2str(); + + switch (idef->id_text[2]) { + case 'F': /* __FILE__ */ + FilNamBuf[0] = '"'; + strcpy(&FilNamBuf[1], FileName); + strcat(FilNamBuf, "\""); + mac->mc_text = FilNamBuf; + mac->mc_length = strlen(FilNamBuf); + break; + case 'L': /* __LINE__ */ + mac->mc_text = long2str((long)LineNumber, 10); + mac->mc_length = strlen(mac->mc_text); + break; + default: + crash("(macro_func)"); + /*NOTREACHED*/ + } +} + +macro2buffer(repl, idf, args) + register struct repl *repl; + register struct idf *idf; + register struct args *args; +{ + /* macro2buffer expands the replacement list and places the + result onto the replacement buffer. It deals with the # + and ## operators, and inserts the actual parameters. + The argument buffer contains the raw argument (needed + for the ## operator), and the expanded argument (for + all other parameter substitutions). + + The grammar of the replacement list is: + + repl_list: TOKEN repl_list + | PARAMETER repl_list + | '#' PARAMETER + | TOKEN '##' TOKEN + | PARAMETER '##' TOKEN + | TOKEN '##' PARAMETER + | PARAMETER '##' PARAMETER + ; + + As the grammar indicates, we could make a DFA and + use this finite state machine for the replacement + list parsing (inserting the arguments, etc.). + + Currently we go through the replacement list in a + linear fashion. This is VERY expensive, something + smarter should be done (but even a DFA is O(|s|)). + */ + register char *ptr = idf->id_macro->mc_text; + register char *tmpptr; + int err = 0; + char *stringify(); + + while (*ptr) { + assert(repl->r_ptr < &(repl->r_text[LAPBUF])); + if (*ptr == '\'' || *ptr == '"') { + register int delim = *ptr; + + do { + *repl->r_ptr++ = *ptr; + if (*ptr == '\\') + *repl->r_ptr++ = *++ptr; + if (*ptr == '\0') { + error("unterminated string"); + *repl->r_ptr = '\0'; + return; + } + ptr++; + } while (*ptr != delim || *ptr == '\0'); + *repl->r_ptr++ = *ptr++; + } else if (*ptr == '#') { + if (*++ptr == '#') { + /* ## - paste operator */ + ptr++; + + /* trim the actual replacement list */ + --repl->r_ptr; + while (is_wsp(*repl->r_ptr) + && repl->r_ptr >= repl->r_text) + --repl->r_ptr; + + /* ## occurred at the beginning of the + replacement list. + */ + if (repl->r_ptr == repl->r_text + && is_wsp(*repl->r_text)) { + err = 1; + break; + } + + while(*repl->r_ptr == TOKSEP + && repl->r_ptr >= repl->r_text) + --repl->r_ptr; + + tmpptr = repl->r_ptr; + ++repl->r_ptr; + + /* skip space in macro replacement list */ + while ((*ptr & FORMALP) == 0 && is_wsp(*ptr)) + ptr++; + + /* ## occurred at the end of the replacement list. + */ + if (*ptr & FORMALP) { + register int n = *ptr++ & 0177; + register char *p; + + assert(n > 0); + p = args->a_rawvec[n-1]; + if (p) { /* else macro argument missing */ + while (is_wsp(*p)) + p++; + if (*p == NOEXPM) p++; + while (*p) + *repl->r_ptr++ = *p++; + } + if (in_idf(*tmpptr + 1)) { + while (in_idf(*tmpptr) + && tmpptr >= repl->r_text) + tmpptr--; + if (*tmpptr == NOEXPM) *tmpptr = TOKSEP; + } + } else if (*ptr == '\0') { + err = 1; + break; + } else { + if (in_idf(*ptr)) { + while (in_idf(*tmpptr) + && tmpptr >= repl->r_text) + tmpptr--; + if (*tmpptr == NOEXPM) *tmpptr = TOKSEP; + } + } + } else /* # operator */ + ptr = stringify(repl, ptr, args); + } else if (*ptr & FORMALP) { + /* insert actual parameter */ + register int n = *ptr++ & 0177; + register char *p, *q; + + assert(n > 0); + + /* This is VERY dirty, we look ahead for the + ## operater. If it's found we use the raw + argument buffer instead of the expanded + one. + */ + for (p = ptr; (*p & FORMALP) == 0 && is_wsp(*p); p++) + /* EMPTY */; + if (*p == '#' && p[1] == '#') + q = args->a_rawvec[n-1]; + else + q = args->a_expvec[n-1]; + + p = repl->r_ptr; + if (q) /* else macro argument missing */ + while (*q) + *repl->r_ptr++ = *q++; + + if (*repl->r_ptr != TOKSEP) + *repl->r_ptr++ = TOKSEP; + } else + *repl->r_ptr++ = *ptr++; + } + *repl->r_ptr = '\0'; + if (err) + error("illegal use of ## operator"); +} + +char * +stringify(repl, ptr, args) + register struct repl *repl; + register char *ptr; + register struct args *args; +{ + /* If a parameter is immediately preceded by a # token + both are replaced by a single string literal that + contains the spelling of the token sequence for the + corresponding argument. + Each occurrence of white space between the argument's + tokens become a single space character in the string + literal. White spaces before the first token and after + the last token comprising the argument are deleted. + To retain the original spelling we insert backslashes + as appropriate. We only escape backslashes if they + occure within string tokens. + */ + register int space = 1; /* skip leading spaces */ + register int delim = 0; /* string or character constant delim */ + register int backslash = 0; /* last character was a \ */ + + /* skip spaces macro replacement list */ + while ((*ptr & FORMALP) == 0 && is_wsp(*ptr)) + ptr++; + + if (*ptr & FORMALP) { + register int n = *ptr++ & 0177; + register char *p; + + assert(n != 0); + p = args->a_rawvec[n-1]; + *repl->r_ptr++ = '"'; + while (*p) { + if (is_wsp(*p)) { + if (!space) { + space = 1; + *repl->r_ptr++ = ' '; + } + p++; + continue; + } + space = 0; + + if (!delim && (*p == '"' || *p == '\'')) + delim = *p; + else if (*p == delim && !backslash) + delim = 0; + backslash = *p == '\\'; + if (*p == '"' || (delim && *p == '\\')) + *repl->r_ptr++ = '\\'; + if (*p == TOKSEP || *p == NOEXPM) p++; + else *repl->r_ptr++ = *p++; + } + + /* trim spaces in the replacement list */ + for (--repl->r_ptr; is_wsp(*repl->r_ptr); repl->r_ptr--) + /* EMPTY */; + *++repl->r_ptr = '"'; + ++repl->r_ptr; /* oops, one to far */ + } else + error("illegal use of # operator"); + *repl->r_ptr = '\0'; + return ptr; +} + +stash(repl, ch, stashraw) + struct repl *repl; + register int ch; + int stashraw; +{ + /* Stash characters into the macro expansion buffer. + */ + register struct args *args = repl->r_args; + + if (args->a_expptr >= &(args->a_expbuf[ARGBUF])) + fatal("macro argument buffer overflow"); + *args->a_expptr++ = ch; + + if (stashraw) { + if (args->a_rawptr >= &(args->a_rawbuf[ARGBUF])) + fatal("raw macro argument buffer overflow"); + *args->a_rawptr++ = ch; + } +} diff --git a/lang/cem/cpp.ansi/replace.str b/lang/cem/cpp.ansi/replace.str new file mode 100644 index 000000000..61ed6fc0e --- /dev/null +++ b/lang/cem/cpp.ansi/replace.str @@ -0,0 +1,48 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* DEFINITIONS FOR THE MACRO REPLACEMENT ROUTINES */ + +struct repl { + struct repl *next; + struct idf *r_idf; /* name of the macro */ + struct args *r_args; /* replacement parameters */ + int r_level; /* level of insertion */ + char *r_ptr; /* replacement text pointer */ + char r_text[LAPBUF]; /* replacement text */ +}; + +/* ALLOCDEF "repl" 4 */ + +#define NO_REPL (struct repl *)0 + +/* The implementation of the ## operator is currently very clumsy. + When the the ## operator is used the arguments are taken from + the raw buffer; this buffer contains a precise copy of the + original argument. The fully expanded copy is in the arg buffer. + The two copies are here explicitely because: + + #define ABC f() + #define ABCD 2 + #define g(x, y) x ## y + h(x) + + g(ABC, D); + + In this case we need two copies: one raw copy for the pasting + operator, and an expanded one as argument for h(). +*/ +struct args { + char *a_expptr; /* expanded argument pointer */ + char *a_expvec[NPARAMS]; /* expanded argument vector */ + char a_expbuf[ARGBUF]; /* expanded argument buffer space */ + char *a_rawptr; /* raw argument pointer */ + char *a_rawvec[NPARAMS]; /* raw argument vector */ + char a_rawbuf[ARGBUF]; /* raw argument buffer space */ +}; + +/* ALLOCDEF "args" 2 */ + +#define NO_ARGS (struct args *)0 + diff --git a/lang/cem/cpp.ansi/skip.c b/lang/cem/cpp.ansi/skip.c new file mode 100644 index 000000000..25781bacb --- /dev/null +++ b/lang/cem/cpp.ansi/skip.c @@ -0,0 +1,77 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* PREPROCESSOR: INPUT SKIP FUNCTIONS */ + +#include "arith.h" +#include "LLlex.h" +#include "class.h" +#include "input.h" + +extern int InputLevel; + +int +skipspaces(ch, skipnl) + register int ch; +{ + /* skipspaces() skips any white space and returns the first + non-space character. + */ + register int nlseen = 0; + + for (;;) { + while (class(ch) == STSKIP) + ch = GetChar(); + if (skipnl && class(ch) == STNL) { + ch = GetChar(); + LineNumber++; + nlseen++; + continue; + } + if (ch == TOKSEP && InputLevel) { + ch = GetChar(); + continue; + } + + /* \\\n are handled by trigraph */ + + if (ch == '/') { + ch = GetChar(); + if (ch == '*' && !InputLevel) { + skipcomment(); + ch = GetChar(); + } + else { + UnGetChar(); + return '/'; + } + } + else if (nlseen && ch == '#') { + domacro(); + ch = GetChar(); + } else + return ch; + } +} + +SkipToNewLine(garbage) + int garbage; +{ + register int ch; + register int pstrict = 0; + + while ((ch = GetChar()) != '\n') { + if (ch == '/') { + if ((ch = GetChar()) == '*' && !InputLevel) { + skipcomment(); + continue; + } + } + if (garbage && !is_wsp(ch)) + pstrict = 1; + } + ++LineNumber; + return pstrict; +} diff --git a/lang/cem/cpp.ansi/tokenname.c b/lang/cem/cpp.ansi/tokenname.c new file mode 100644 index 000000000..2e81b05ce --- /dev/null +++ b/lang/cem/cpp.ansi/tokenname.c @@ -0,0 +1,72 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Header$ */ +/* TOKEN NAME DEFINITIONS */ + +#include "idf.h" +#include "arith.h" +#include "LLlex.h" +#include "Lpars.h" + + +struct tokenname { /* Used for defining the name of a + token as identified by its symbol + */ + int tn_symbol; + char *tn_name; +}; + + +/* To centralize the declaration of %tokens, their presence in this + file is taken as their declaration. The Makefile will produce + a grammar file (tokenfile.g) from this file. + Moreover, rather than looking up a symbol in all these lists + to find its printable name, a fast version of symbol2str() is + generated from these tables. + Consequenty some of these tables are not referenced explicitly + in the C text any more. To save space and to avoid lint confusion, + these have been made pseudo-invisible by #ifdefs. +*/ + +#ifdef ____ +struct tokenname tkspec[] = { /* the names of the special tokens */ + {IDENTIFIER, "identifier"}, + {STRING, "string"}, + {FILESPECIFIER, "filespecifier"}, + {INTEGER, "integer"}, + {0, ""} +}; + +struct tokenname tkcomp[] = { /* names of the composite tokens */ + {PLUSAB, "+="}, + {MINAB, "-="}, + {TIMESAB, "*="}, + {DIVAB, "/="}, + {MODAB, "%="}, + {LEFTAB, "<<="}, + {RIGHTAB, ">>="}, + {ANDAB, "&="}, + {XORAB, "^="}, + {ORAB, "|="}, + {NOTEQUAL, "!="}, + {AND, "&&"}, + {PLUSPLUS, "++"}, + {MINMIN, "--"}, + {ARROW, "->"}, + {LEFT, "<<"}, + {LESSEQ, "<="}, + {EQUAL, "=="}, + {GREATEREQ, ">="}, + {RIGHT, ">>"}, + {OR, "||"}, + {ELLIPSIS, "..."}, + {0, ""} +}; + +struct tokenname tkfunny[] = { /* internal keywords */ + {ERRONEOUS, "erroneous"}, + {0, ""} +}; +#endif ____ -- 2.34.1