From bc296e2dcc5593de3d4326032efc5ec783260565 Mon Sep 17 00:00:00 2001 From: erikb Date: Mon, 10 Mar 1986 13:07:55 +0000 Subject: [PATCH] *** empty log message *** --- lang/cem/cemcom/LLlex.c | 563 ++++++++++++++++++ lang/cem/cemcom/LLlex.h | 54 ++ lang/cem/cemcom/LLmessage.c | 50 ++ lang/cem/cemcom/Makefile.erik | 215 +++++++ lang/cem/cemcom/Parameters | 144 +++++ lang/cem/cemcom/align.h | 9 + lang/cem/cemcom/alloc.c | 161 ++++++ lang/cem/cemcom/alloc.h | 16 + lang/cem/cemcom/arith.c | 465 +++++++++++++++ lang/cem/cemcom/arith.h | 23 + lang/cem/cemcom/asm.c | 10 + lang/cem/cemcom/assert.h | 17 + lang/cem/cemcom/atw.h | 6 + lang/cem/cemcom/blocks.c | 88 +++ lang/cem/cemcom/cem.1 | 238 ++++++++ lang/cem/cemcom/cem.c | 744 ++++++++++++++++++++++++ lang/cem/cemcom/cemcom.1 | 94 +++ lang/cem/cemcom/ch7.c | 409 +++++++++++++ lang/cem/cemcom/ch7bin.c | 308 ++++++++++ lang/cem/cemcom/ch7mon.c | 148 +++++ lang/cem/cemcom/char.tab | 58 ++ lang/cem/cemcom/class.h | 37 ++ lang/cem/cemcom/code.c | 491 ++++++++++++++++ lang/cem/cemcom/code.h | 23 + lang/cem/cemcom/code.str | 23 + lang/cem/cemcom/conversion.c | 130 +++++ lang/cem/cemcom/cstoper.c | 230 ++++++++ lang/cem/cemcom/dataflow.c | 34 ++ lang/cem/cemcom/declar.g | 473 +++++++++++++++ lang/cem/cemcom/declar.str | 45 ++ lang/cem/cemcom/declarator.c | 106 ++++ lang/cem/cemcom/declarator.h | 45 ++ lang/cem/cemcom/decspecs.c | 92 +++ lang/cem/cemcom/decspecs.h | 23 + lang/cem/cemcom/decspecs.str | 23 + lang/cem/cemcom/def.h | 37 ++ lang/cem/cemcom/def.str | 37 ++ lang/cem/cemcom/domacro.c | 673 +++++++++++++++++++++ lang/cem/cemcom/dumpidf.c | 367 ++++++++++++ lang/cem/cemcom/em.c | 219 +++++++ lang/cem/cemcom/em.h | 42 ++ lang/cem/cemcom/emcode.def | 123 ++++ lang/cem/cemcom/error.c | 212 +++++++ lang/cem/cemcom/eval.c | 1028 +++++++++++++++++++++++++++++++++ lang/cem/cemcom/expr.c | 408 +++++++++++++ lang/cem/cemcom/expr.h | 102 ++++ lang/cem/cemcom/expr.str | 102 ++++ lang/cem/cemcom/expression.g | 371 ++++++++++++ lang/cem/cemcom/faulty.h | 5 + lang/cem/cemcom/field.c | 199 +++++++ lang/cem/cemcom/field.h | 20 + lang/cem/cemcom/field.str | 20 + lang/cem/cemcom/idf.c | 697 ++++++++++++++++++++++ lang/cem/cemcom/idf.h | 68 +++ lang/cem/cemcom/idf.str | 68 +++ lang/cem/cemcom/init.c | 107 ++++ lang/cem/cemcom/input.c | 458 +++++++++++++++ lang/cem/cemcom/input.h | 13 + lang/cem/cemcom/interface.h | 3 + lang/cem/cemcom/ival.c | 792 +++++++++++++++++++++++++ lang/cem/cemcom/label.c | 88 +++ lang/cem/cemcom/label.h | 11 + lang/cem/cemcom/level.h | 15 + lang/cem/cemcom/macro.h | 52 ++ lang/cem/cemcom/macro.str | 52 ++ lang/cem/cemcom/main.c | 382 ++++++++++++ lang/cem/cemcom/make.emfun | 19 + lang/cem/cemcom/make.emmac | 10 + lang/cem/cemcom/make.hfiles | 35 ++ lang/cem/cemcom/make.next | 3 + lang/cem/cemcom/make.tokcase | 34 ++ lang/cem/cemcom/make.tokfile | 6 + lang/cem/cemcom/mcomm.c | 241 ++++++++ lang/cem/cemcom/mes.h | 4 + lang/cem/cemcom/options | 28 + lang/cem/cemcom/options.c | 252 ++++++++ lang/cem/cemcom/program.g | 190 ++++++ lang/cem/cemcom/replace.c | 158 +++++ lang/cem/cemcom/scan.c | 224 +++++++ lang/cem/cemcom/sizes.h | 8 + lang/cem/cemcom/skip.c | 73 +++ lang/cem/cemcom/specials.h | 14 + lang/cem/cemcom/stack.c | 280 +++++++++ lang/cem/cemcom/stack.h | 46 ++ lang/cem/cemcom/stack.str | 46 ++ lang/cem/cemcom/statement.g | 402 +++++++++++++ lang/cem/cemcom/stb.c | 11 + lang/cem/cemcom/storage.c | 67 +++ lang/cem/cemcom/storage.h | 9 + lang/cem/cemcom/string.c | 275 +++++++++ lang/cem/cemcom/string.h | 13 + lang/cem/cemcom/struct.c | 503 ++++++++++++++++ lang/cem/cemcom/struct.h | 44 ++ lang/cem/cemcom/struct.str | 44 ++ lang/cem/cemcom/switch.c | 184 ++++++ lang/cem/cemcom/switch.h | 40 ++ lang/cem/cemcom/switch.str | 40 ++ lang/cem/cemcom/system.c | 72 +++ lang/cem/cemcom/system.h | 34 ++ lang/cem/cemcom/tab.c | 295 ++++++++++ lang/cem/cemcom/tokenname.c | 143 +++++ lang/cem/cemcom/tokenname.h | 9 + lang/cem/cemcom/type.c | 217 +++++++ lang/cem/cemcom/type.h | 52 ++ lang/cem/cemcom/type.str | 52 ++ 105 files changed, 16543 insertions(+) create mode 100644 lang/cem/cemcom/LLlex.c create mode 100644 lang/cem/cemcom/LLlex.h create mode 100644 lang/cem/cemcom/LLmessage.c create mode 100644 lang/cem/cemcom/Makefile.erik create mode 100644 lang/cem/cemcom/Parameters create mode 100644 lang/cem/cemcom/align.h create mode 100644 lang/cem/cemcom/alloc.c create mode 100644 lang/cem/cemcom/alloc.h create mode 100644 lang/cem/cemcom/arith.c create mode 100644 lang/cem/cemcom/arith.h create mode 100644 lang/cem/cemcom/asm.c create mode 100644 lang/cem/cemcom/assert.h create mode 100644 lang/cem/cemcom/atw.h create mode 100644 lang/cem/cemcom/blocks.c create mode 100644 lang/cem/cemcom/cem.1 create mode 100644 lang/cem/cemcom/cem.c create mode 100644 lang/cem/cemcom/cemcom.1 create mode 100644 lang/cem/cemcom/ch7.c create mode 100644 lang/cem/cemcom/ch7bin.c create mode 100644 lang/cem/cemcom/ch7mon.c create mode 100644 lang/cem/cemcom/char.tab create mode 100644 lang/cem/cemcom/class.h create mode 100644 lang/cem/cemcom/code.c create mode 100644 lang/cem/cemcom/code.h create mode 100644 lang/cem/cemcom/code.str create mode 100644 lang/cem/cemcom/conversion.c create mode 100644 lang/cem/cemcom/cstoper.c create mode 100644 lang/cem/cemcom/dataflow.c create mode 100644 lang/cem/cemcom/declar.g create mode 100644 lang/cem/cemcom/declar.str create mode 100644 lang/cem/cemcom/declarator.c create mode 100644 lang/cem/cemcom/declarator.h create mode 100644 lang/cem/cemcom/decspecs.c create mode 100644 lang/cem/cemcom/decspecs.h create mode 100644 lang/cem/cemcom/decspecs.str create mode 100644 lang/cem/cemcom/def.h create mode 100644 lang/cem/cemcom/def.str create mode 100644 lang/cem/cemcom/domacro.c create mode 100644 lang/cem/cemcom/dumpidf.c create mode 100644 lang/cem/cemcom/em.c create mode 100644 lang/cem/cemcom/em.h create mode 100644 lang/cem/cemcom/emcode.def create mode 100644 lang/cem/cemcom/error.c create mode 100644 lang/cem/cemcom/eval.c create mode 100644 lang/cem/cemcom/expr.c create mode 100644 lang/cem/cemcom/expr.h create mode 100644 lang/cem/cemcom/expr.str create mode 100644 lang/cem/cemcom/expression.g create mode 100644 lang/cem/cemcom/faulty.h create mode 100644 lang/cem/cemcom/field.c create mode 100644 lang/cem/cemcom/field.h create mode 100644 lang/cem/cemcom/field.str create mode 100644 lang/cem/cemcom/idf.c create mode 100644 lang/cem/cemcom/idf.h create mode 100644 lang/cem/cemcom/idf.str create mode 100644 lang/cem/cemcom/init.c create mode 100644 lang/cem/cemcom/input.c create mode 100644 lang/cem/cemcom/input.h create mode 100644 lang/cem/cemcom/interface.h create mode 100644 lang/cem/cemcom/ival.c create mode 100644 lang/cem/cemcom/label.c create mode 100644 lang/cem/cemcom/label.h create mode 100644 lang/cem/cemcom/level.h create mode 100644 lang/cem/cemcom/macro.h create mode 100644 lang/cem/cemcom/macro.str create mode 100644 lang/cem/cemcom/main.c create mode 100755 lang/cem/cemcom/make.emfun create mode 100755 lang/cem/cemcom/make.emmac create mode 100755 lang/cem/cemcom/make.hfiles create mode 100755 lang/cem/cemcom/make.next create mode 100755 lang/cem/cemcom/make.tokcase create mode 100755 lang/cem/cemcom/make.tokfile create mode 100644 lang/cem/cemcom/mcomm.c create mode 100644 lang/cem/cemcom/mes.h create mode 100644 lang/cem/cemcom/options create mode 100644 lang/cem/cemcom/options.c create mode 100644 lang/cem/cemcom/program.g create mode 100644 lang/cem/cemcom/replace.c create mode 100644 lang/cem/cemcom/scan.c create mode 100644 lang/cem/cemcom/sizes.h create mode 100644 lang/cem/cemcom/skip.c create mode 100644 lang/cem/cemcom/specials.h create mode 100644 lang/cem/cemcom/stack.c create mode 100644 lang/cem/cemcom/stack.h create mode 100644 lang/cem/cemcom/stack.str create mode 100644 lang/cem/cemcom/statement.g create mode 100644 lang/cem/cemcom/stb.c create mode 100644 lang/cem/cemcom/storage.c create mode 100644 lang/cem/cemcom/storage.h create mode 100644 lang/cem/cemcom/string.c create mode 100644 lang/cem/cemcom/string.h create mode 100644 lang/cem/cemcom/struct.c create mode 100644 lang/cem/cemcom/struct.h create mode 100644 lang/cem/cemcom/struct.str create mode 100644 lang/cem/cemcom/switch.c create mode 100644 lang/cem/cemcom/switch.h create mode 100644 lang/cem/cemcom/switch.str create mode 100644 lang/cem/cemcom/system.c create mode 100644 lang/cem/cemcom/system.h create mode 100644 lang/cem/cemcom/tab.c create mode 100644 lang/cem/cemcom/tokenname.c create mode 100644 lang/cem/cemcom/tokenname.h create mode 100644 lang/cem/cemcom/type.c create mode 100644 lang/cem/cemcom/type.h create mode 100644 lang/cem/cemcom/type.str diff --git a/lang/cem/cemcom/LLlex.c b/lang/cem/cemcom/LLlex.c new file mode 100644 index 000000000..0c3e9f98f --- /dev/null +++ b/lang/cem/cemcom/LLlex.c @@ -0,0 +1,563 @@ +/* $Header$ */ +/* L E X I C A L A N A L Y Z E R */ + +#include "idfsize.h" +#include "numsize.h" +#include "debug.h" +#include "strsize.h" +#include "nopp.h" + +#include "input.h" +#include "alloc.h" +#include "arith.h" +#include "def.h" +#include "idf.h" +#include "LLlex.h" +#include "Lpars.h" +#include "class.h" +#include "assert.h" +#include "sizes.h" + +/* Data about the token yielded */ +struct token dot, ahead, aside; + +unsigned int LineNumber = 0; /* current LineNumber */ +char *FileName = 0; /* current filename */ + +int ReplaceMacros = 1; /* replacing macros */ +int EoiForNewline = 0; /* return EOI upon encountering newline */ +int PreProcKeys = 0; /* return preprocessor key */ +int AccFileSpecifier = 0; /* return filespecifier <...> */ +int AccDefined = 0; /* accept "defined(...)" */ +int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */ +int SkipEscNewline = 0; /* how to interpret backslash-newline */ + +#define MAX_LL_DEPTH 2 + +static struct token LexStack[MAX_LL_DEPTH]; +static LexSP = 0; + +/* In PushLex() the actions are taken in order to initialise or + re-initialise the lexical scanner. + E.g. at the invocation of a sub-parser that uses LLlex(), the + state of the current parser should be saved. +*/ +PushLex() +{ + ASSERT(LexSP < 2); + ASSERT(ASIDE == 0); /* ASIDE = 0; */ + GetToken(&ahead); + ahead.tk_line = LineNumber; + ahead.tk_file = FileName; + LexStack[LexSP++] = dot; +} + +PopLex() +{ + ASSERT(LexSP > 0); + dot = LexStack[--LexSP]; +} + +int +LLlex() +{ + /* LLlex() plays the role of Lexical Analyzer for the C parser. + The look-ahead and putting aside of tokens are taken into + account. + */ + if (ASIDE) { /* a token is put aside */ + dot = aside; + ASIDE = 0; + } + else { /* read ahead and return the old one */ + dot = ahead; + /* the following test is performed due to the dual + task of LLlex(): it is also called for parsing the + restricted constant expression following a #if or + #elif. The newline character causes EOF to be + returned in this case to stop the LLgen parsing task. + */ + if (DOT != EOI) + GetToken(&ahead); + else + DOT = EOF; + } + /* keep track of the place of the token in the file */ + ahead.tk_file = FileName; + ahead.tk_line = LineNumber; + return DOT; +} + +char *string_token(); + +int +GetToken(ptok) + register struct token *ptok; +{ + /* GetToken() is the actual token recognizer. It calls the + control line interpreter if it encounters a "\n#" + combination. Macro replacement is also performed if it is + needed. + */ + char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1]; + register int ch, nch; + +again: /* rescan the input after an error or replacement */ + LoadChar(ch); +go_on: /* rescan, the following character has been read */ + /* The following test is made to strip off the nonascii's */ + if ((ch & 0200) && ch != EOI) { + /* this is the only user-error which causes the + process to stop abruptly. + */ + fatal("non-ascii '\\%03o' read", ch & 0377); + } + switch (class(ch)) { /* detect character class */ + case STNL: /* newline, vertical space or formfeed */ + LineNumber++; /* also at vs and ff */ + if (EoiForNewline) /* called in control line */ + /* a newline in a control line indicates the + end-of-information of the line. + */ + return ptok->tk_symb = EOI; + while (LoadChar(ch), ch == '#') /* a control line follows */ + domacro(); + /* We have to loop here, because in + `domacro' the nl, vt or ff is read. The + character following it may again be a `#'. + */ + goto go_on; + case STSKIP: /* just skip the skip characters */ + goto again; + case STGARB: /* garbage character */ +#ifndef NOPP + if (SkipEscNewline && (ch == '\\')) { + /* a '\\' is allowed in #if/#elif expression */ + LoadChar(ch); + if (class(ch) == STNL) { /* vt , ff ? */ + ++LineNumber; + goto again; + } + PushBack(); + ch = '\\'; + } +#endif NOPP + if (040 < ch && ch < 0177) + lexerror("garbage char %c", ch); + else + lexerror("garbage char \\%03o", ch); + goto again; + case STSIMP: /* a simple character, no part of compound token*/ + if (ch == '/') { /* probably the start of comment */ + LoadChar(ch); + if (ch == '*') { + /* start of comment */ + skipcomment(); + goto again; + } + else { + PushBack(); + ch = '/'; /* restore ch */ + } + } + return ptok->tk_symb = ch; + case STCOMP: /* maybe the start of a compound token */ + LoadChar(nch); /* character lookahead */ + switch (ch) { + case '!': + if (nch == '=') + return ptok->tk_symb = NOTEQUAL; + PushBack(); + return ptok->tk_symb = ch; + case '&': + if (nch == '&') + return ptok->tk_symb = AND; + PushBack(); + return ptok->tk_symb = ch; + case '+': + if (nch == '+') + return ptok->tk_symb = PLUSPLUS; + PushBack(); + return ptok->tk_symb = ch; + case '-': + if (nch == '-') + return ptok->tk_symb = MINMIN; + if (nch == '>') + return ptok->tk_symb = ARROW; + PushBack(); + return ptok->tk_symb = ch; + case '<': + if (AccFileSpecifier) { + PushBack(); /* pushback nch */ + ptok->tk_str = + string_token("file specifier", '>'); + return ptok->tk_symb = FILESPECIFIER; + } + if (nch == '<') + return ptok->tk_symb = LEFT; + if (nch == '=') + return ptok->tk_symb = LESSEQ; + PushBack(); + return ptok->tk_symb = ch; + case '=': + if (nch == '=') + return ptok->tk_symb = EQUAL; + /* The following piece of code tries to recognise + old-fashioned assignment operators `=op' + */ + switch (nch) { + case '+': + return ptok->tk_symb = PLUSAB; + case '-': + return ptok->tk_symb = MINAB; + case '*': + return ptok->tk_symb = TIMESAB; + case '/': + return ptok->tk_symb = DIVAB; + case '%': + return ptok->tk_symb = MODAB; + case '>': + case '<': + LoadChar(ch); + if (ch != nch) { + PushBack(); + lexerror("illegal combination '=%c'", + nch); + } + return ptok->tk_symb = + nch == '<' ? LEFTAB : RIGHTAB; + case '&': + return ptok->tk_symb = ANDAB; + case '^': + return ptok->tk_symb = XORAB; + case '|': + return ptok->tk_symb = ORAB; + } + PushBack(); + return ptok->tk_symb = ch; + case '>': + if (nch == '=') + return ptok->tk_symb = GREATEREQ; + if (nch == '>') + return ptok->tk_symb = RIGHT; + PushBack(); + return ptok->tk_symb = ch; + case '|': + if (nch == '|') + return ptok->tk_symb = OR; + PushBack(); + return ptok->tk_symb = ch; + } + case STIDF: + { + register char *tg = &buf[0]; + register int pos = -1; + register int hash; + register struct idf *idef; + extern int idfsize; /* ??? */ + + hash = STARTHASH(); + do { /* read the identifier */ + if (++pos < idfsize) { + *tg++ = ch; + hash = ENHASH(hash, ch, pos); + } + LoadChar(ch); + } while (in_idf(ch)); + hash = STOPHASH(hash); + if (ch != EOI) + PushBack(); + *tg++ = '\0'; /* mark the end of the identifier */ + idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash); +#ifndef NOPP + if (idef->id_macro && ReplaceMacros) { + /* macro replacement should be performed */ + if (replace(idef)) + goto again; + /* arrived here: something went wrong in + replace, don't substitute in this case + */ + } + else + if (UnknownIdIsZero) { + ptok->tk_ival = (arith)0; + ptok->tk_fund = INT; + return ptok->tk_symb = INTEGER; + } +#endif NOPP + ptok->tk_symb = ( + idef->id_reserved ? + idef->id_reserved : + idef->id_def && idef->id_def->df_sc == TYPEDEF ? + TYPE_IDENTIFIER : + IDENTIFIER + ); + return IDENTIFIER; + } + case STCHAR: /* character constant */ + { + register arith val = 0, size = 0; + + LoadChar(ch); + if (ch == '\'') + lexerror("character constant too short"); + else + while (ch != '\'') { + if (ch == '\n') { + lexerror("newline in character constant"); + LineNumber++; + break; + } + if (ch == '\\') { + LoadChar(ch); + ch = quoted(ch); + } + val = val*256 + ch; + size++; + LoadChar(ch); + } + if (size > int_size) + lexerror("character constant too long"); + ptok->tk_ival = val; + ptok->tk_fund = INT; + return ptok->tk_symb = INTEGER; + } + case STSTR: /* string */ + ptok->tk_str = string_token("string", '"'); + return ptok->tk_symb = STRING; + case STNUM: /* a numeric constant */ + { + /* It should be noted that 099 means 81(decimal) and + 099.5 means 99.5 . This severely limits the tricks + we can use to scan a numeric value. + */ + register char *np = &buf[1]; + register int base = 10; + register int vch; + register arith val = 0; + + if (ch == '.') { /* an embarrassing ambiguity */ + LoadChar(vch); + PushBack(); + if (!is_dig(vch)) /* just a `.' */ + return ptok->tk_symb = ch; + *np++ = '0'; + /* in the rest of the compiler, all floats + have to start with a digit. + */ + } + if (ch == '0') { + *np++ = ch; + LoadChar(ch); + if (ch == 'x' || ch == 'X') { + base = 16; + LoadChar(ch); + } + else + base = 8; + } + while (vch = val_in_base(ch, base), vch >= 0) { + val = val*base + vch; + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + } + if (ch == 'l' || ch == 'L') { + ptok->tk_ival = val; + ptok->tk_fund = LONG; + return ptok->tk_symb = INTEGER; + } + if (base == 16 || !(ch == '.' || ch == 'e' || ch == 'E')) { + PushBack(); + ptok->tk_ival = val; + /* The semantic analyser must know if the + integral constant is given in octal/hexa- + decimal form, in which case its type is + UNSIGNED, or in decimal form, in which case + its type is signed, indicated by + the fund INTEGER. + */ + ptok->tk_fund = + (base == 10 || (base == 8 && val == (arith)0)) + ? INTEGER : UNSIGNED; + return ptok->tk_symb = INTEGER; + } + /* where's the test for the length of the integral ??? */ + if (ch == '.'){ + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + } + while (is_dig(ch)){ + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + } + if (ch == 'e' || ch == 'E') { + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + if (ch == '+' || ch == '-') { + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + } + if (!is_dig(ch)) { + lexerror("malformed floating constant"); + if (np < &buf[NUMSIZE]) + *np++ = ch; + } + while (is_dig(ch)) { + if (np < &buf[NUMSIZE]) + *np++ = ch; + LoadChar(ch); + } + } + PushBack(); + *np++ = '\0'; + buf[0] = '-'; /* good heavens... */ + if (np == &buf[NUMSIZE+1]) { + lexerror("floating constant too long"); + ptok->tk_fval = Salloc("0.0", 5) + 1; + } + else + ptok->tk_fval = Salloc(buf, np - buf) + 1; + return ptok->tk_symb = FLOATING; + } + case STEOI: /* end of text on source file */ + return ptok->tk_symb = EOI; + default: /* this cannot happen */ + crash("bad class for char 0%o", ch); + } + /*NOTREACHED*/ +} + +skipcomment() +{ + /* The last character read has been the '*' of '/_*'. The + characters, except NL and EOI, between '/_*' and the first + occurring '*_/' are not interpreted. + NL only affects the LineNumber. EOI is not legal. + + Important note: it is not possible to stop skipping comment + beyond the end-of-file of an included file. + EOI is returned by LoadChar only on encountering EOF of the + top-level file... + */ + register int c; + + NoUnstack++; + LoadChar(c); + do { + while (c != '*') { + if (class(c) == STNL) + ++LineNumber; + else + if (c == EOI) { + NoUnstack--; + return; + } + LoadChar(c); + } + /* Last Character seen was '*' */ + LoadChar(c); + } while (c != '/'); + NoUnstack--; +} + +char * +string_token(nm, stop_char) + char *nm; +{ + register int ch; + register int str_size; + register char *str = Malloc(str_size = ISTRSIZE); + register int pos = 0; + + LoadChar(ch); + while (ch != stop_char) { + if (ch == '\n') { + lexerror("newline in %s", nm); + LineNumber++; + break; + } + if (ch == EOI) { + lexerror("end-of-file inside %s", nm); + break; + } + if (ch == '\\') { + register int nch; + + LoadChar(nch); + if (nch == '\n') { + LineNumber++; + LoadChar(ch); + continue; + } + else { + str[pos++] = '\\'; + if (pos == str_size) + str = Srealloc(str, str_size += RSTRSIZE); + ch = nch; + } + } + str[pos++] = ch; + if (pos == str_size) + str = Srealloc(str, str_size += RSTRSIZE); + LoadChar(ch); + } + str[pos++] = '\0'; + return str; +} + +int +quoted(ch) + register int ch; +{ + /* quoted() replaces an escaped character sequence by the + character meant. + */ + /* first char after backslash already in ch */ + if (!is_oct(ch)) { /* a quoted char */ + switch (ch) { + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'r': + ch = '\r'; + break; + case 'f': + ch = '\f'; + break; + } + } + else { /* a quoted octal */ + register int oct = 0, cnt = 0; + + do { + oct = oct*8 + (ch-'0'); + LoadChar(ch); + } while (is_oct(ch) && ++cnt < 3); + PushBack(); + ch = oct; + } + return ch&0377; +} + +/* provisional */ +int +val_in_base(ch, base) + register int ch; +{ + return + is_dig(ch) ? ch - '0' : + base != 16 ? -1 : + is_hex(ch) ? (ch - 'a' + 10) & 017 : + -1; +} diff --git a/lang/cem/cemcom/LLlex.h b/lang/cem/cemcom/LLlex.h new file mode 100644 index 000000000..fbc18ad09 --- /dev/null +++ b/lang/cem/cemcom/LLlex.h @@ -0,0 +1,54 @@ +/* $Header$ */ +/* D E F I N I T I O N S F O R T H E L E X I C A L A N A L Y Z E R */ + +/* A token from the input stream is represented by an integer, + called a "symbol", but it may have other information associated + to it. +*/ + +/* the structure of a token: */ +struct token { + int tok_symb; /* the token itself */ + char *tok_file; /* the file it (probably) comes from */ + unsigned int tok_line; /* the line it (probably) comes from */ + union { + struct idf *tok_idf; /* for IDENTIFIER & TYPE_IDENTIFIER */ + char *tok_str; /* for STRING: text */ + struct { /* for INTEGER */ + int tok_fund; /* INT or LONG */ + arith tok_ival; + } tok_integer; + char *tok_fval; + } tok_data; +}; + +#define tk_symb tok_symb +#define tk_file tok_file +#define tk_line tok_line +#define tk_idf tok_data.tok_idf +#define tk_str tok_data.tok_str +#define tk_fund tok_data.tok_integer.tok_fund +#define tk_ival tok_data.tok_integer.tok_ival +#define tk_fval tok_data.tok_fval + +extern struct token dot, ahead, aside; +extern unsigned int LineNumber; /* "LLlex.c" */ +extern char *FileName; /* "LLlex.c" */ + +extern int ReplaceMacros; /* "LLlex.c" */ +extern int EoiForNewline; /* "LLlex.c" */ +extern int PreProcKeys; /* "LLlex.c" */ +extern int AccFileSpecifier; /* "LLlex.c" */ +extern int AccDefined; /* "LLlex.c" */ +extern int UnknownIdIsZero; /* "LLlex.c" */ +extern int SkipEscNewline; /* "LLlex.c" */ + +extern int NoUnstack; /* buffer.c */ + +extern int err_occurred; /* "error.c" */ + +#define DOT dot.tk_symb +#define AHEAD ahead.tk_symb +#define ASIDE aside.tk_symb + +#define EOF (-1) diff --git a/lang/cem/cemcom/LLmessage.c b/lang/cem/cemcom/LLmessage.c new file mode 100644 index 000000000..acb3b9bce --- /dev/null +++ b/lang/cem/cemcom/LLmessage.c @@ -0,0 +1,50 @@ +/* $Header$ */ +/* PARSER ERROR ADMINISTRATION */ + +#include "idf.h" +#include "alloc.h" +#include "arith.h" +#include "LLlex.h" +#include "Lpars.h" + +extern char *symbol2str(); + +LLmessage(tk) { + err_occurred = 1; + if (tk < 0) + fatal("parser administration overflow"); + if (tk) { + error("%s missing", symbol2str(tk)); + insert_token(tk); + } + else + error("%s deleted", symbol2str(DOT)); +} + +insert_token(tk) + int tk; +{ + aside = dot; + + DOT = tk; + + switch (tk) { + /* The operands need some body */ + case IDENTIFIER: + dot.tk_idf = gen_idf(); + break; + case TYPE_IDENTIFIER: + dot.tk_idf = str2idf("int"); + break; + case STRING: + dot.tk_str = Salloc("", 1); + break; + case INTEGER: + dot.tk_fund = INT; + dot.tk_ival = 1; + break; + case FLOATING: + dot.tk_fval = Salloc("0.0", 4); + break; + } +} diff --git a/lang/cem/cemcom/Makefile.erik b/lang/cem/cemcom/Makefile.erik new file mode 100644 index 000000000..83f229fc9 --- /dev/null +++ b/lang/cem/cemcom/Makefile.erik @@ -0,0 +1,215 @@ +# $Header$ +# M A K E F I L E F O R A C K C - C O M P I L E R + +# Some paths +BIN =/user1/$$USER/bin# # provisional ??? +EM = /usr/em# # where to find the ACK tree +ACK = $(EM)/bin/ack# # old ACK C compiler +EM_INCLUDES =$(EM)/h# # directory containing EM interface definition + +# Where to install the compiler and its driver +CEMCOM = $(BIN)/cemcom +DRIVER = $(BIN)/cem + +# What C compiler to use and how +CC = $(ACK) -.c +CC = CC +CC = /bin/cc +COPTIONS = + +# What parser generator to use and how +GEN = /user0/ceriel/bin/LLgen +GENOPTIONS = -vv + +# Special #defines during compilation +CDEFS = $(MAP) -I$(EM_INCLUDES) +CFLAGS = $(CDEFS) $(COPTIONS) -O# # we cannot pass the COPTIONS to lint! + +# Grammar files and their objects +LSRC = tokenfile.g declar.g statement.g expression.g program.g +LOBJ = tokenfile.o declar.o statement.o expression.o program.o Lpars.o + +# Objects of hand-written C files +COBJ = main.o idf.o declarator.o decspecs.o struct.o \ + expr.o ch7.o ch7bin.o cstoper.o arith.o \ + alloc.o asm.o code.o dumpidf.o error.o field.o\ + tokenname.o LLlex.o LLmessage.o \ + input.o domacro.o replace.o init.o options.o \ + scan.o skip.o stack.o type.o ch7mon.o label.o eval.o \ + switch.o storage.o ival.o conversion.o \ + em.o blocks.o dataflow.o system.o string.o + +# Objects of other generated C files +GOBJ = char.o symbol2str.o next.o writeem.o + +# generated source files +GSRC = char.c symbol2str.c next.c writeem.c \ + writeem.h + +# .h files generated by `make hfiles'; PLEASE KEEP THIS UP-TO-DATE! +GHSRC = botch_free.h dataflow.h debug.h density.h errout.h \ + idepth.h idfsize.h ifdepth.h inputtype.h inumlength.h lapbuf.h \ + maxincl.h myalloc.h nobitfield.h nopp.h \ + nparams.h numsize.h parbufsize.h pathlength.h predefine.h \ + proc_intf.h strsize.h target_sizes.h textsize.h use_tmp.h \ + bufsiz.h str_params.h spec_arith.h + +# Other generated files, for 'make clean' only +GENERATED = tab tokenfile.g Lpars.h LLfiles LL.output lint.out \ + print Xref lxref hfiles cfiles + +# include files containing ALLOCDEF specifications +NEXTFILES = code.h declarator.h decspecs.h def.h expr.h field.h \ + idf.h macro.h stack.h struct.h switch.h type.h + +all: cc + +cc: + make hfiles + make LLfiles + make main + +cem: cem.c string.o + $(CC) -O cem.c string.o -o cem + +lint.cem: cem.c string.c + lint -abx cem.c + +hfiles: Parameters + ./make.hfiles Parameters + @touch hfiles + +LLfiles: $(LSRC) + $(GEN) $(GENOPTIONS) $(LSRC) + @touch LLfiles + +tokenfile.g: tokenname.c make.tokfile + tokenfile.g + +symbol2str.c: tokenname.c make.tokcase + symbol2str.c + +char.c: tab char.tab + tab -fchar.tab >char.c + +next.c: make.next $(NEXTFILES) + ./make.next $(NEXTFILES) >next.c + +writeem.c: make.emfun emcode.def + ./make.emfun emcode.def >writeem.c + +writeem.h: make.emmac emcode.def + ./make.emmac emcode.def >writeem.h + +# Objects needed for 'main' +OBJ = $(COBJ) $(LOBJ) $(GOBJ) + +main: $(OBJ) Makefile + $(CC) $(COPTIONS) $(LFLAGS) $(OBJ) -o main + size main + +cfiles: hfiles LLfiles $(GSRC) + @touch cfiles + +install: main cem + cp main $(CEMCOM) + cp cem $(DRIVER) + +print: files + pr `cat files` > print + +tags: cfiles + ctags `sources $(OBJ)` + +shar: files + shar `cat files` + +listcfiles: + @echo `sources $(OBJ)` + +listobjects: + @echo $(OBJ) + +depend: cfiles + sed '/^#AUTOAUTO/,$$d' Makefile >Makefile.new + echo '#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO' >>Makefile.new + /user1/erikb/bin/mkdep `sources $(OBJ)` | \ + sed 's/\.c:/.o:/' >>Makefile.new + mv Makefile Makefile.old + mv Makefile.new Makefile + +xref: + ctags -x `grep "\.[ch]" files`|sed "s/).*/)/">Xref + +lxref: + lxref $(OBJ) -lc >lxref + +lint: lint.main lint.cem lint.tab + +lint.main: cfiles + lint -DNORCSID -bx $(CDEFS) `sources $(OBJ)` >lint.out + +cchk: + cchk `sources $(COBJ)` + +clean: + rm -f `sources $(LOBJ)` $(OBJ) $(GENERATED) $(GSRC) $(GHSRC) + +tab: + $(CC) tab.c -o tab + +lint.tab: + lint -abx tab.c + +sim: cfiles + $(SIM) $(SIMFLAGS) `sources $(COBJ)` $(GSRC) $(LSRC) + +#AUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTOAUTO +main.o: LLlex.h Lpars.h alloc.h arith.h bufsiz.h debug.h declarator.h idf.h input.h inputtype.h level.h maxincl.h myalloc.h nobitfield.h nopp.h spec_arith.h specials.h system.h target_sizes.h tokenname.h type.h use_tmp.h +idf.o: LLlex.h Lpars.h align.h alloc.h arith.h assert.h botch_free.h debug.h declarator.h decspecs.h def.h idf.h idfsize.h label.h level.h nobitfield.h nopp.h sizes.h spec_arith.h specials.h stack.h storage.h struct.h type.h +declarator.o: Lpars.h alloc.h arith.h botch_free.h declarator.h expr.h idf.h label.h nobitfield.h nopp.h sizes.h spec_arith.h storage.h type.h +decspecs.o: Lpars.h arith.h decspecs.h def.h level.h nobitfield.h spec_arith.h type.h +struct.o: LLlex.h Lpars.h align.h arith.h assert.h botch_free.h debug.h def.h field.h idf.h level.h nobitfield.h nopp.h sizes.h spec_arith.h stack.h storage.h struct.h type.h +expr.o: LLlex.h Lpars.h alloc.h arith.h botch_free.h declarator.h decspecs.h def.h expr.h idf.h label.h nobitfield.h nopp.h sizes.h spec_arith.h storage.h type.h +ch7.o: Lpars.h arith.h assert.h debug.h def.h expr.h idf.h label.h nobitfield.h nopp.h spec_arith.h struct.h type.h +ch7bin.o: Lpars.h arith.h botch_free.h expr.h idf.h label.h nobitfield.h nopp.h spec_arith.h storage.h struct.h type.h +cstoper.o: Lpars.h arith.h expr.h idf.h label.h nobitfield.h nopp.h sizes.h spec_arith.h target_sizes.h type.h +arith.o: Lpars.h alloc.h arith.h botch_free.h expr.h field.h idf.h label.h mes.h nobitfield.h nopp.h spec_arith.h storage.h type.h +alloc.o: alloc.h assert.h debug.h myalloc.h system.h +code.o: LLlex.h Lpars.h alloc.h arith.h assert.h atw.h botch_free.h code.h dataflow.h debug.h declarator.h decspecs.h def.h em.h expr.h idf.h label.h level.h mes.h nobitfield.h nopp.h proc_intf.h sizes.h spec_arith.h specials.h stack.h storage.h type.h use_tmp.h writeem.h +dumpidf.o: Lpars.h arith.h debug.h def.h expr.h field.h idf.h label.h nobitfield.h nopp.h spec_arith.h stack.h struct.h type.h +error.o: LLlex.h arith.h debug.h em.h errout.h expr.h label.h nopp.h proc_intf.h spec_arith.h string.h system.h tokenname.h use_tmp.h writeem.h +field.o: Lpars.h arith.h assert.h code.h debug.h em.h expr.h field.h idf.h label.h nobitfield.h nopp.h proc_intf.h sizes.h spec_arith.h type.h writeem.h +tokenname.o: LLlex.h Lpars.h arith.h idf.h nopp.h spec_arith.h tokenname.h +LLlex.o: LLlex.h Lpars.h alloc.h arith.h assert.h class.h debug.h def.h idf.h idfsize.h input.h nopp.h numsize.h sizes.h spec_arith.h strsize.h +LLmessage.o: LLlex.h Lpars.h alloc.h arith.h idf.h nopp.h spec_arith.h +input.o: LLlex.h alloc.h arith.h assert.h bufsiz.h debug.h idepth.h input.h inputtype.h interface.h nopp.h pathlength.h spec_arith.h system.h +domacro.o: LLlex.h Lpars.h alloc.h arith.h assert.h botch_free.h class.h debug.h idf.h idfsize.h ifdepth.h input.h interface.h macro.h nopp.h nparams.h parbufsize.h spec_arith.h storage.h textsize.h +replace.o: LLlex.h alloc.h arith.h assert.h class.h debug.h idf.h input.h interface.h macro.h nopp.h pathlength.h spec_arith.h string.h strsize.h +init.o: alloc.h class.h idf.h interface.h macro.h nopp.h predefine.h string.h system.h +options.o: align.h arith.h class.h idf.h idfsize.h macro.h maxincl.h nobitfield.h nopp.h sizes.h spec_arith.h storage.h +scan.o: class.h idf.h input.h interface.h lapbuf.h macro.h nopp.h nparams.h +skip.o: LLlex.h arith.h class.h input.h interface.h nopp.h spec_arith.h +stack.o: Lpars.h alloc.h arith.h botch_free.h debug.h def.h em.h idf.h level.h mes.h nobitfield.h nopp.h proc_intf.h spec_arith.h stack.h storage.h struct.h system.h type.h use_tmp.h writeem.h +type.o: Lpars.h align.h alloc.h arith.h def.h idf.h nobitfield.h nopp.h sizes.h spec_arith.h type.h +ch7mon.o: Lpars.h arith.h botch_free.h def.h expr.h idf.h label.h nobitfield.h nopp.h spec_arith.h storage.h type.h +label.o: Lpars.h arith.h def.h idf.h label.h level.h nobitfield.h nopp.h spec_arith.h type.h +eval.o: Lpars.h align.h arith.h assert.h atw.h code.h dataflow.h debug.h def.h em.h expr.h idf.h label.h level.h mes.h nobitfield.h nopp.h proc_intf.h sizes.h spec_arith.h stack.h string.h type.h writeem.h +switch.o: arith.h assert.h botch_free.h code.h debug.h density.h em.h expr.h idf.h label.h nobitfield.h nopp.h proc_intf.h spec_arith.h storage.h switch.h type.h writeem.h +storage.o: alloc.h assert.h botch_free.h debug.h storage.h +ival.o: Lpars.h align.h arith.h assert.h class.h debug.h def.h em.h expr.h field.h idf.h label.h level.h nobitfield.h nopp.h proc_intf.h sizes.h spec_arith.h string.h struct.h type.h writeem.h +conversion.o: Lpars.h arith.h em.h nobitfield.h proc_intf.h sizes.h spec_arith.h type.h writeem.h +em.o: arith.h bufsiz.h em.h label.h proc_intf.h spec_arith.h system.h writeem.h +blocks.o: arith.h atw.h em.h proc_intf.h sizes.h spec_arith.h writeem.h +dataflow.o: dataflow.h +system.o: inputtype.h system.h +string.o: arith.h nopp.h spec_arith.h str_params.h string.h system.h +tokenfile.o: Lpars.h +declar.o: LLlex.h Lpars.h arith.h debug.h declarator.h decspecs.h def.h expr.h field.h idf.h label.h nobitfield.h nopp.h sizes.h spec_arith.h struct.h type.h +statement.o: LLlex.h Lpars.h arith.h botch_free.h code.h debug.h def.h em.h expr.h idf.h label.h nobitfield.h nopp.h proc_intf.h spec_arith.h stack.h storage.h type.h writeem.h +expression.o: LLlex.h Lpars.h arith.h expr.h idf.h label.h nobitfield.h nopp.h spec_arith.h type.h +program.o: LLlex.h Lpars.h alloc.h arith.h code.h declarator.h decspecs.h def.h expr.h idf.h label.h nobitfield.h nopp.h spec_arith.h type.h +Lpars.o: Lpars.h +char.o: class.h +symbol2str.o: Lpars.h +writeem.o: arith.h em.h label.h proc_intf.h spec_arith.h writeem.h diff --git a/lang/cem/cemcom/Parameters b/lang/cem/cemcom/Parameters new file mode 100644 index 000000000..f0757da2c --- /dev/null +++ b/lang/cem/cemcom/Parameters @@ -0,0 +1,144 @@ +!File: myalloc.h +#define OWNALLOC 1 /* use own superfast allocation */ +#define ALLOCSIZ 4096 /* allocate pieces of 4K */ +#define ALIGNSIZE 8 /* needed for alloc.c */ + + +!File: pathlength.h +#define PATHLENGTH 1024 /* max. length of path to file */ + + +!File: idepth.h +#define IDEPTH 20 /* maximum nr of stacked input buffers */ + + +!File: errout.h +#define ERROUT stderr /* file pointer for writing messages */ +#define MAXERR_LINE 5 /* maximum number of error messages given + on the same input line. */ + + +!File: idfsize.h +#define IDFSIZE 30 /* maximum significant length of an identifier */ + + +!File: numsize.h +#define NUMSIZE 256 /* maximum length of a numeric constant */ + + +!File: nparams.h +#define NPARAMS 32 /* maximum number of parameters of macros */ + + +!File: ifdepth.h +#define IFDEPTH 256 /* maximum number of nested if-constructions */ + + +!File: maxincl.h +#define MAXINCL 8 /* maximum number of #include directories */ + + +!File: density.h +#define DENSITY 2 /* see switch.[ch] for an explanation */ + + +!File: predefine.h +#define PREDEFINE "vax,VAX,BSD4_1,bsd4_1" + + +!File: lapbuf.h +#define LAPBUF 4096 /* size of macro actual parameter buffer */ + + +!File: strsize.h +#define ISTRSIZE 32 /* minimum number of bytes allocated for + storing a string */ +#define RSTRSIZE 8 /* step size in enlarging the memory for + the storage of a string */ + + +!File: target_sizes.h +#define MAXSIZE 8 /* the maximum of the SZ_* constants */ + +/* target machine sizes */ +#define SZ_CHAR (arith)1 +#define SZ_SHORT (arith)2 +#define SZ_WORD (arith)4 +#define SZ_INT (arith)4 +#define SZ_LONG (arith)4 +#define SZ_FLOAT (arith)4 +#define SZ_DOUBLE (arith)8 +#define SZ_POINTER (arith)4 + +/* target machine alignment requirements */ +#define AL_CHAR 1 +#define AL_SHORT SZ_SHORT +#define AL_WORD SZ_WORD +#define AL_INT SZ_WORD +#define AL_LONG SZ_WORD +#define AL_FLOAT SZ_WORD +#define AL_DOUBLE SZ_WORD +#define AL_POINTER SZ_WORD +#define AL_STRUCT 1 +#define AL_UNION 1 + + +!File: botch_free.h +#undef BOTCH_FREE 1 /* botch freed memory, as a check */ + + +!File: dataflow.h +#define DATAFLOW 1 /* produce some compile-time xref */ + + +!File: debug.h +#define DEBUG 1 /* perform various self-tests */ + + +!File: proc_intf.h +#define PROC_INTF 1 /* compile with procedural EM interface */ + + +!File: use_tmp.h +#define USE_TMP 1 /* collect exa, exp, ina and inp commands + and let them precede the rest of + the generated compact code */ + + +!File: parbufsize.h +#define PARBUFSIZE 1024 + + +!File: textsize.h +#define ITEXTSIZE 8 /* 1st piece of memory for repl. text */ +#define RTEXTSIZE 8 /* stepsize for enlarging repl.text */ + + +!File: inputtype.h +#undef READ_IN_ONE 1 /* read input file in one */ + + +!File: nopp.h +#undef NOPP 1 /* use built-int preprocessor */ + + +!File: nobitfield.h +#undef NOBITFIELD 1 /* implement bitfields */ + + +!File: str_params.h +/* maximum number of characters in string representation of (unsigned) long +*/ +#define MAXWIDTH 32 + +#define SSIZE 1024 /* string-buffer size for print routines */ + + +!File: bufsiz.h +#define BUFSIZ 1024 /* system block size */ + + +!File: spec_arith.h +/* describes internal compiler arithmetics */ +#undef SPECIAL_ARITHMETICS /* something different from native long */ + diff --git a/lang/cem/cemcom/align.h b/lang/cem/cemcom/align.h new file mode 100644 index 000000000..b0be9d445 --- /dev/null +++ b/lang/cem/cemcom/align.h @@ -0,0 +1,9 @@ +/* $Header$ */ +/* A L I G N M E N T D E F I N I T I O N S */ + +extern int + short_align, word_align, int_align, long_align, + float_align, double_align, pointer_align, + struct_align, union_align; + +extern arith align(); diff --git a/lang/cem/cemcom/alloc.c b/lang/cem/cemcom/alloc.c new file mode 100644 index 000000000..064c2f097 --- /dev/null +++ b/lang/cem/cemcom/alloc.c @@ -0,0 +1,161 @@ +/* $Header$ */ +/* M E M O R Y A L L O C A T I O N R O U T I N E S */ + +/* The allocation of memory in this program, which plays an important + role in reading files, replacing macros and building expression + trees, is not performed by malloc etc. The reason for having own + memory allocation routines (malloc(), realloc() and free()) is + plain: the garbage collection performed by the library functions + malloc(), realloc() and free() costs a lot of time, while in most + cases (on a VAX) the freeing and reallocation of memory is not + necessary. The only reallocation done in this program is at + building strings in memory. This means that the last + (re-)allocated piece of memory can be extended. + + The (basic) memory allocating routines offered by this memory + handling package are: + + char *malloc(n) : allocate n bytes + char *realloc(ptr, n) : reallocate buffer to n bytes + (works only if ptr was last allocated) + free(ptr) : if ptr points to last allocated + memory, this memory is re-allocatable + Salloc(str, sz) : save string in malloc storage +*/ + +#include "myalloc.h" /* UF */ +#include "debug.h" /* UF */ + +#include "alloc.h" +#include "assert.h" +#include "system.h" + +#ifdef OWNALLOC + +#define SBRK_ERROR ((char *) -1) /* errors during allocation */ + +/* the following variables are used for book-keeping */ +static int nfreebytes = 0; /* # free bytes in sys_sbrk-ed space */ +static char *freeb; /* pointer to first free byte */ +static char *lastalloc; /* pointer to last malloced sp */ +static int lastnbytes; /* nr of bytes in last allocated */ + /* space */ +static char *firstfreeb = 0; + +#endif OWNALLOC + +char * +Salloc(str, sz) + register char str[]; + register int sz; +{ + /* Salloc() is not a primitive function: it just allocates a + piece of storage and copies a given string into it. + */ + char *res = Malloc(sz); + register char *m = res; + + while (sz--) + *m++ = *str++; + return res; +} + +#ifdef OWNALLOC + +#define ALIGN(m) (ALIGNSIZE * (((m) - 1) / ALIGNSIZE + 1)) + +char * +malloc(n) + unsigned n; +{ + /* malloc() is a very simple malloc(). + */ + n = ALIGN(n); + if (nfreebytes < n) { + register nbts = (n <= ALLOCSIZ) ? ALLOCSIZ : n; + + if (!nfreebytes) { + if ((freeb = sys_sbrk(nbts)) == SBRK_ERROR) + fatal("out of memory"); + } + else { + if (sys_sbrk(nbts) == SBRK_ERROR) + fatal("out of memory"); + } + nfreebytes += nbts; + } + lastalloc = freeb; + freeb = lastalloc + n; + lastnbytes = n; + nfreebytes -= n; + return lastalloc; +} + +/*ARGSUSED*/ +char * +realloc(ptr, n) + char *ptr; + unsigned n; +{ + /* realloc() is designed to append more bytes to the latest + allocated piece of memory. However reallocation should be + performed, even if the mentioned memory is not the latest + allocated one, this situation will not occur. To do so, + realloc should know how many bytes are allocated the last + time for that piece of memory. ???? + */ + register int nbytes = n; + + ASSERT(ptr == lastalloc); /* security */ + nbytes -= lastnbytes; /* # bytes required */ + if (nbytes == 0) /* no extra bytes */ + return lastalloc; + + /* if nbytes < 0: free last allocated bytes; + if nbytes > 0: allocate more bytes + */ + if (nbytes > 0) + nbytes = ALIGN(nbytes); + if (nfreebytes < nbytes) { + register int nbts = (nbytes < ALLOCSIZ) ? ALLOCSIZ : nbytes; + if (sys_sbrk(nbts) == SBRK_ERROR) + fatal("out of memory"); + nfreebytes += nbts; + } + freeb += nbytes; /* less bytes */ + lastnbytes += nbytes; /* change nr of last all. bytes */ + nfreebytes -= nbytes; /* less or more free bytes */ + return lastalloc; +} + +/* to ensure that the alloc library package will not be loaded: */ +/*ARGSUSED*/ +free(p) + char *p; +{} + +init_mem() +{ + firstfreeb = sys_sbrk(0); + /* align the first memory unit to ALIGNSIZE ??? */ + if ((long) firstfreeb % ALIGNSIZE != 0) { + register char *fb = firstfreeb; + + fb = (char *)ALIGN((long)fb); + firstfreeb = sys_sbrk(fb - firstfreeb); + firstfreeb = fb; + ASSERT((long)firstfreeb % ALIGNSIZE == 0); + } +} + +#ifdef DEBUG +mem_stat() +{ + extern char options[]; + + if (options['m']) + printf("Total nr of bytes allocated: %d\n", + sys_sbrk(0) - firstfreeb); +} +#endif DEBUG +#endif OWNALLOC diff --git a/lang/cem/cemcom/alloc.h b/lang/cem/cemcom/alloc.h new file mode 100644 index 000000000..a6bafae42 --- /dev/null +++ b/lang/cem/cemcom/alloc.h @@ -0,0 +1,16 @@ +/* $Header$ */ +/* PROGRAM'S INTERFACE TO MEMORY ALLOCATION ROUTINES */ + +/* This file serves as the interface between the program and the + memory allocating routines. + There are 3 memory allocation routines: + char *Malloc(n) to allocate n bytes + char *Salloc(str, n) to allocate n bytes + and fill them with string str + char *Realloc(str, n) reallocate the string at str to n bytes +*/ + +extern char *Salloc(), *malloc(), *realloc(); + +#define Malloc(n) malloc((unsigned)(n)) +#define Srealloc(ptr,n) realloc(ptr, (unsigned)(n)) diff --git a/lang/cem/cemcom/arith.c b/lang/cem/cemcom/arith.c new file mode 100644 index 000000000..04f843a21 --- /dev/null +++ b/lang/cem/cemcom/arith.c @@ -0,0 +1,465 @@ +/* $Header$ */ +/* A R I T H M E T I C C O N V E R S I O N S */ + +/* This file contains the routines for the various conversions that + may befall operands in C. It is structurally a mess, but I haven't + decided yet whether I can't find the right structure or the + semantics of C is a mess. +*/ + +#include "botch_free.h" +#include "nobitfield.h" +#include "alloc.h" +#include "idf.h" +#include "arith.h" +#include "type.h" +#include "label.h" +#include "expr.h" +#include "Lpars.h" +#include "storage.h" +#include "field.h" +#include "mes.h" + +extern char *symbol2str(); +extern char options[]; + +int +arithbalance(e1p, oper, e2p) /* RM 6.6 */ + struct expr **e1p, **e2p; +{ + /* The expressions *e1p and *e2p are balanced to be operands + of the arithmetic operator oper. + */ + register int t1, t2, u1, u2; + + t1 = any2arith(e1p, oper); + t2 = any2arith(e2p, oper); + + /* Now t1 and t2 are either INT or LONG or DOUBLE */ + if (t1 == DOUBLE && t2 != DOUBLE) + t2 = int2float(e2p, double_type); + else + if (t2 == DOUBLE && t1 != DOUBLE) + t1 = int2float(e1p, double_type); + else + if (t1 == DOUBLE) + return DOUBLE; + + /* Now they are INT or LONG */ + u1 = (*e1p)->ex_type->tp_unsigned; + u2 = (*e2p)->ex_type->tp_unsigned; + + /* if either is long, the other will be */ + if (t1 == LONG && t2 != LONG) + t2 = int2int(e2p, u2 ? ulong_type : long_type); + else + if (t2 == LONG && t1 != LONG) + t1 = int2int(e1p, u1 ? ulong_type : long_type); + + /* if either is unsigned, the other will be */ + if (u1 && !u2) + t2 = int2int(e2p, (t1 == LONG) ? ulong_type : uint_type); + else + if (!u1 && u2) + t1 = int2int(e1p, (t2 == LONG) ? ulong_type : uint_type); + + return t1; +} + +relbalance(e1p, oper, e2p) + register struct expr **e1p, **e2p; +{ + /* The expressions *e1p and *e2p are balanced to be operands + of the relational operator oper. + */ + if ((*e1p)->ex_type->tp_fund == FUNCTION) + function2pointer(e1p); + if ((*e2p)->ex_type->tp_fund == FUNCTION) + function2pointer(e2p); + if ((*e1p)->ex_type->tp_fund == POINTER) + ch76pointer(e2p, oper, (*e1p)->ex_type); + else + if ((*e2p)->ex_type->tp_fund == POINTER) + ch76pointer(e1p, oper, (*e2p)->ex_type); + else + if ( (*e1p)->ex_type == (*e2p)->ex_type && + (*e1p)->ex_type->tp_fund == ENUM + ) + {} + else + arithbalance(e1p, oper, e2p); +} + +ch76pointer(expp, oper, tp) + register struct expr **expp; + register struct type *tp; +{ + /* Checks whether *expp may be compared to tp using oper, + as described in chapter 7.6 and 7.7. + tp is known to be a pointer. + */ + if ((*expp)->ex_type->tp_fund == POINTER) { + if ((*expp)->ex_type != tp) + ch7cast(expp, oper, tp); + } + else + if ( is_integral_type((*expp)->ex_type) && + ( !options['R'] /* we don't care */ || + (oper == EQUAL || oper == NOTEQUAL || oper == ':') + ) + ) /* ch 7.7 */ + ch7cast(expp, CAST, tp); + else { + if ((*expp)->ex_type != error_type) + error("%s on %s and pointer", + symbol2str(oper), + symbol2str((*expp)->ex_type->tp_fund) + ); + (*expp)->ex_type = error_type; + ch7cast(expp, oper, tp); + } +} + +int +any2arith(expp, oper) + register struct expr **expp; +{ + /* Turns any expression into int_type, long_type or + double_type. + */ + int fund = (*expp)->ex_type->tp_fund; + + switch (fund) { + case CHAR: + case SHORT: + int2int(expp, + (*expp)->ex_type->tp_unsigned ? uint_type : int_type); + break; + case INT: + case LONG: + break; + case ENUM: + if ( is_test_op(oper) || oper == '=' || oper == PARCOMMA || + oper == ',' || oper == ':' || + ( !options['R'] && + (is_arith_op(oper) || is_asgn_op(oper)) + ) + ) + {} + else + warning("%s on enum", symbol2str(oper)); + int2int(expp, int_type); + break; + case FLOAT: + float2float(expp, double_type); + break; + case DOUBLE: + break; +#ifndef NOBITFIELD + case FIELD: + field2arith(expp); + break; +#endif NOBITFIELD + default: + error("operator %s on non-numerical operand (%s)", + symbol2str(oper), symbol2str(fund)); + case ERRONEOUS: + free_expression(*expp); + *expp = intexpr((arith)1, INT); + break; + } + + return (*expp)->ex_type->tp_fund; +} + +struct expr * +arith2arith(tp, oper, expr) + struct type *tp; + int oper; + struct expr *expr; +{ + /* arith2arith constructs a new expression containing a + run-time conversion between some arithmetic types. + */ + register struct expr *new = new_expr(); + + clear((char *)new, sizeof(struct expr)); + new->ex_file = expr->ex_file; + new->ex_line = expr->ex_line; + new->ex_type = tp; + new->ex_class = Type; + return new_oper(tp, new, oper, expr); +} + +int +int2int(expp, tp) + register struct expr **expp; + struct type *tp; +{ + /* The expression *expp, which is of some integral type, is + converted to the integral type tp. + */ + + if (is_cp_cst(*expp)) { + (*expp)->ex_type = tp; + cut_size(*expp); + } + else { + *expp = arith2arith(tp, INT2INT, *expp); + } + return (*expp)->ex_type->tp_fund; +} + +int +int2float(expp, tp) + struct expr **expp; + struct type *tp; +{ + /* The expression *expp, which is of some integral type, is + converted to the floating type tp. + */ + + fp_used = 1; + *expp = arith2arith(tp, INT2FLOAT, *expp); + return (*expp)->ex_type->tp_fund; +} + +float2int(expp, tp) + struct expr **expp; + struct type *tp; +{ + /* The expression *expp, which is of some floating type, is + converted to the integral type tp. + */ + + fp_used = 1; + *expp = arith2arith(tp, FLOAT2INT, *expp); +} + +float2float(expp, tp) + struct expr **expp; + struct type *tp; +{ + /* The expression *expp, which is of some floating type, is + converted to the floating type tp. + There is no need for an explicit conversion operator + if the expression is a constant. + */ + + fp_used = 1; + if ((*expp)->ex_class == Float) { + (*expp)->ex_type = tp; + } + else { + *expp = arith2arith(tp, FLOAT2FLOAT, *expp); + } +} + +array2pointer(expp) + struct expr **expp; +{ + /* The expression, which must be an array, it is converted + to a pointer. + */ + (*expp)->ex_type = + construct_type(POINTER, (*expp)->ex_type->tp_up, (arith)0); +} + +function2pointer(expp) + struct expr **expp; +{ + /* The expression, which must be a function, it is converted + to a pointer to the function. + */ + (*expp)->ex_type = + construct_type(POINTER, (*expp)->ex_type, (arith)0); +} + +opnd2integral(expp, oper) + struct expr **expp; + int oper; +{ + register int fund = (*expp)->ex_type->tp_fund; + + if (fund != INT && fund != LONG) { + if (fund != ERRONEOUS) + error("%s operand to %s", + symbol2str(fund), symbol2str(oper)); + *expp = intexpr((arith)1, INT); + /* fund = INT; */ + } +} + +opnd2logical(expp, oper) + struct expr **expp; + int oper; +{ + register int fund; + + if ((*expp)->ex_type->tp_fund == FUNCTION) + function2pointer(expp); +#ifndef NOBITFIELD + else + if ((*expp)->ex_type->tp_fund == FIELD) + field2arith(expp); +#endif NOBITFIELD + + fund = (*expp)->ex_type->tp_fund; + + switch (fund) { + + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + case POINTER: + case FLOAT: + case DOUBLE: + break; + default: + error("%s operand to %s", + symbol2str(fund), symbol2str(oper)); + case ERRONEOUS: + *expp = intexpr((arith)1, INT); + break; + } +} + +opnd2test(expp, oper) + struct expr **expp; +{ + opnd2logical(expp, oper); + if ((*expp)->ex_class == Oper && is_test_op((*expp)->OP_OPER)) + { /* It is already a test */ } + else + ch7bin(expp, NOTEQUAL, intexpr((arith)0, INT)); +} + +int +is_test_op(oper) +{ + switch (oper) { + case '<': + case '>': + case LESSEQ: + case GREATEREQ: + case EQUAL: + case NOTEQUAL: + case '!': + case AND: + case OR: /* && and || also impose a test */ + return 1; + default: + return 0; + } + /*NOTREACHED*/ +} + +int +is_arith_op(oper) +{ + switch (oper) { + case '*': + case '/': + case '%': + case '+': + case '-': + case LEFT: + case RIGHT: + case '&': + case '^': + case '|': + return 1; + default: + return 0; + } +} + +int +is_asgn_op(oper) +{ + switch (oper) { + case '=': + case PLUSAB: + case MINAB: + case TIMESAB: + case DIVAB: + case MODAB: + case LEFTAB: + case RIGHTAB: + case ANDAB: + case ORAB: + case XORAB: + return 1; + default: + return 0; + } +} + +any2opnd(expp, oper) + struct expr **expp; +{ + if (!*expp) + return; + switch ((*expp)->ex_type->tp_fund) { /* RM 7.1 */ + case CHAR: + case SHORT: + case ENUM: + case FLOAT: + any2arith(expp, oper); + break; + case ARRAY: + array2pointer(expp); + break; +#ifndef NOBITFIELD + case FIELD: + field2arith(expp); + break; +#endif NOBITFIELD + } +} + +#ifndef NOBITFIELD +field2arith(expp) + struct expr **expp; +{ + /* The expression to extract the bitfield value from the + memory word is put in the tree. + */ + register struct type *tp = (*expp)->ex_type->tp_up; + register struct field *fd = (*expp)->ex_type->tp_field; + register struct type *atype = tp->tp_unsigned ? uword_type : word_type; + + (*expp)->ex_type = atype; + + if (atype->tp_unsigned) { /* don't worry about the sign bit */ + ch7bin(expp, RIGHT, intexpr((arith)fd->fd_shift, INT)); + ch7bin(expp, '&', intexpr(fd->fd_mask, INT)); + } + else { /* take care of the sign bit: sign extend if needed */ + register arith bits_in_type = atype->tp_size * 8; + + ch7bin(expp, LEFT, + intexpr(bits_in_type - fd->fd_width - fd->fd_shift, INT) + ); + ch7bin(expp, RIGHT, intexpr(bits_in_type - fd->fd_width, INT)); + } + ch7cast(expp, CAST, tp); /* restore its original type */ +} +#endif NOBITFIELD + +/* switch_sign_fp() negates the given floating constant expression + The lexical analyser has reserved an extra byte of space in front + of the string containing the representation of the floating + constant. This byte contains the '-' character and we have to + take care of the first byte the fl_value pointer points to. +*/ +switch_sign_fp(expr) + struct expr *expr; +{ + if (*(expr->FL_VALUE) == '-') + ++(expr->FL_VALUE); + else + --(expr->FL_VALUE); +} diff --git a/lang/cem/cemcom/arith.h b/lang/cem/cemcom/arith.h new file mode 100644 index 000000000..551f7c90b --- /dev/null +++ b/lang/cem/cemcom/arith.h @@ -0,0 +1,23 @@ +/* $Header$ */ +/* COMPILER ARITHMETIC */ + +/* Normally the compiler does its internal arithmetics in longs + native to the source machine, which is always good for local + compilations, and generally OK too for cross compilations + downwards and sidewards. For upwards cross compilation and + to save storage on small machines, SPECIAL_ARITHMETICS will + be handy. +*/ + +#include "spec_arith.h" + +#ifndef SPECIAL_ARITHMETICS + +#define arith long /* native */ + +#else SPECIAL_ARITHMETICS + +/* not implemented yet */ +#define arith int /* dummy */ + +#endif SPECIAL_ARITHMETICS diff --git a/lang/cem/cemcom/asm.c b/lang/cem/cemcom/asm.c new file mode 100644 index 000000000..21daf1f5d --- /dev/null +++ b/lang/cem/cemcom/asm.c @@ -0,0 +1,10 @@ +/* $Header$ */ +/* A S M */ + +asm_seen(s) + char *s; +{ + /* 'asm' '(' string ')' ';' + */ + warning("\"asm(\"%s\")\" instruction skipped", s); +} diff --git a/lang/cem/cemcom/assert.h b/lang/cem/cemcom/assert.h new file mode 100644 index 000000000..6afd202f3 --- /dev/null +++ b/lang/cem/cemcom/assert.h @@ -0,0 +1,17 @@ +/* $Header$ */ +/* A S S E R T I O N M A C R O D E F I N I T I O N */ + +/* At some points in the program, it must be sure that some condition + holds true, due to further, successful, processing. As long as + there is no reasonable method to prove that a program is 100% + correct, these assertions are needed in some places. +*/ +#include "debug.h" /* UF */ + +#ifdef DEBUG +/* Note: this macro uses parameter substitution inside strings */ +#define ASSERT(exp) (exp || crash("in %s, %u: assertion %s failed", \ + __FILE__, __LINE__, "exp")) +#else +#define ASSERT(exp) +#endif DEBUG diff --git a/lang/cem/cemcom/atw.h b/lang/cem/cemcom/atw.h new file mode 100644 index 000000000..6dc02ee5a --- /dev/null +++ b/lang/cem/cemcom/atw.h @@ -0,0 +1,6 @@ +/* $Header$ */ +/* Align To Word boundary Definition */ + +extern int word_align; /* align of a word */ + +#define ATW(arg) ((((arg) + word_align - 1) / word_align) * word_align) diff --git a/lang/cem/cemcom/blocks.c b/lang/cem/cemcom/blocks.c new file mode 100644 index 000000000..799402e9d --- /dev/null +++ b/lang/cem/cemcom/blocks.c @@ -0,0 +1,88 @@ +/* $Header$ */ +/* B L O C K S T O R I N G A N D L O A D I N G */ + +#include "em.h" +#include "arith.h" +#include "sizes.h" +#include "atw.h" + +/* Because EM does not support the loading and storing of + objects having other sizes than word fragment and multiple, + we need to have a way of transferring these objects, whereby + we simulate "loi" and "sti": the address of the source resp. + destination is located on top of stack and a call is done + to load_block() resp. store_block(). + =============================================================== + # Loadblock() works on the stack as follows: ([ ] indicates the + # position of the stackpointer) + # lower address---> + # 1) | &object + # 2) | ... ATW(sz) bytes ... | sz | &stack_block | &object + # 3) | ... ATW(sz) bytes ... + =============================================================== + Loadblock() pushes ATW(sz) bytes directly onto the stack! + + Store_block() works on the stack as follows: + lower address---> + 1) | ... ATW(sz) bytes ... | &object + 2) | ... ATW(sz) bytes ... | &object | &stack_block | sz + 3) + + If sz is a legal argument for "loi" or "sti", just one EM + instruction is generated. + In the other cases, the notion of alignment is taken into account: + we only push an object of the size accepted by EM onto the stack, + while we need a loop to store the stack block into a memory object. +*/ +store_block(sz, al) + arith sz; + int al; +{ + /* Next condition contains Lots of Irritating Stupid Parentheses + */ + if ( + ((sz == al) && (word_align % al == 0)) || + ( + (sz % word_size == 0 || word_size % sz == 0) && + (al % word_align == 0) + ) + ) + C_sti(sz); + else { + /* address of destination lies on the stack */ + + /* push address of first byte of block on stack onto + the stack by computing it from the current stack + pointer position + */ + C_lor((arith)1); /* push current sp */ + C_adp(pointer_size); /* set & to 1st byte of block */ + C_loc(sz); /* number of bytes to transfer */ + C_cal("__stb"); /* call transfer routine */ + C_asp(pointer_size + pointer_size + int_size + ATW(sz)); + } +} + +load_block(sz, al) + arith sz; + int al; +{ + arith esz = ATW(sz); /* effective size == actual # pushed bytes */ + + if ((sz == al) && (word_align % al == 0)) + C_loi(sz); + else + if (al % word_align == 0) + C_loi(esz); + else { + /* do not try to understand this... */ + C_asp(-(esz - pointer_size)); /* allocate stack block */ + C_lor((arith)1); /* push & of stack block as dst */ + C_dup(pointer_size); /* fetch source address */ + C_adp(esz - pointer_size); + C_loi(pointer_size); + C_loc(sz); /* # bytes to copy */ + C_cal("__stb"); /* library copy routine */ + C_asp(int_size + pointer_size + pointer_size); + } +} diff --git a/lang/cem/cemcom/cem.1 b/lang/cem/cemcom/cem.1 new file mode 100644 index 000000000..b9162e0ba --- /dev/null +++ b/lang/cem/cemcom/cem.1 @@ -0,0 +1,238 @@ +.TH CEM 1 local +.SH NAME +cem \- ACK C compiler +.SH SYNOPSIS +.B cem +[ option ] ... file ... +.SH DESCRIPTION +.I Cem +is a \fIcc\fP(1)-like +C compiler that uses the C front-end compiler \fIcemcom\fP(1) +of the Amsterdam Compiler Kit. +.I Cem +interprets its arguments not starting with a '\-' as +source files, to be compiled by the various parts of the compilation process, +which are listed below. +File arguments whose names end with \fB.\fP\fIcharacter\fP are interpreted as +follows: +.IP .[ao] +object file. +.IP .[ci] +C source code +.IP .e +EM assembler source file. +.IP .k +compact EM file, not yet optimised by the EM peephole optimiser. +.IP .m +compact EM file, already optimised by the peephole optimiser. +.IP .s +assembler file. +.LP +The actions to be taken by +.I cem +are directed by the type of file argument and the various options that are +presented to it. +.PP +The following options, which is a mixture of options interpreted by \fIcc\fP(1) +and \fIack\fP(?), +are interpreted by +.I cem . +(The options not specified here are passed to the front-end +compiler \fIcemcom\fP(1).) +.IP \fB\-B\fP\fIname\fP +Use \fIname\fP as front-end compiler instead of the default \fIcemcom\fP(1). +.br +Same as "\fB\-Rcem=\fP\fIname\fP". +.IP \fB\-C\fP +Run C preprocessor \fI/lib/cpp\fP only and prevent it from eliding comments. +.IP \fB\-D\fP\fIname\fP\fB=\fP\fIdef\fP +Define the \fIname\fP to the preprocessor, as if by "#define". +.IP \fB\-D\fP\fIname\fP +.br +Same as "\fB\-D\fP\fIname\fP\fB=1\fP". +.IP \fB\-E\fP +Run only the macro preprocessor on the named files and send the +result to standard output. +.IP \fB\-I\fP\fIdir\fP +\&"#include" files whose names do not begin with '/' are always +sought first in the directory of the \fIfile\fP argument, then in directories +in \fB\-I\fP options, then in directories on a standard list (which in fact +consists of "/usr/include"). +.IP \fB\-L\fP\fIdir\fP +Use \fIdir\fP as library-containing directory instead of the default. +.IP \fB\-P\fP +Same as \fB\-E\fP, but sending the result of input file \fIfile\fP\fB.[ceis]\fP +to \fIfile\fP\fB.i\fP. +.IP \fB\-R\fP +Passed to \fIcemcom\fP(1) in order to parse the named C programs according +to the C language as described in [K&R] (also called \fIRestricted\fP C). +.IP \fB\-R\fP\fIprog\fP\fB=\fP\fIname\fP +.br +Use \fIname\fP as program for phase \fIprog\fP of the compilation instead of +the default. +\&\fIProg\fP is one of the following names: +.RS +.IP \fBcpp\fP +macro preprocessor (default: /lib/cpp) +.IP \fBcem\fP +front\-end compiler (default: $CEM/bin/cemcom) +.IP \fBopt\fP +EM peephole optimiser (default: $EM/lib/em_opt) +.IP \fBdecode\fP +EM compact to EM assembler translator (default: $EM/lib/em_decode) +.IP \fBencode\fP +EM assembler to EM compact translator (default: $EM/lib/em_encode) +.IP \fBbe\fP +EM compact code to target\-machine assembly code compiler +(default: $EM/lib/vax4/cg) +.IP \fBcg\fP +same as \fBbe\fP +.IP \fBas\fP +assembler (default: /bin/as) +.IP \fBld\fP +linker/loader (default: /bin/ld) +.RE +.IP \fB\-R\fP\fIprog\fP\fB\-\fP\fIoption\fP +.br +Pass \fB\-\fP\fIoption\fP to the compilation phase indicated by \fIprog\fP. +.IP \fB\-S\fP +Same as \fB\-c.s\fP. +.IP \fB\-U\fP\fIname\fP +.br +Remove any initial definition of \fIname\fP. +.IP \fB\-V\fP\fIcm\fP.\fIn\fP,\ \fB\-V\fIcm\fP.\fIncm\fP.\fIn\fP\ ... +.br +Set the size and alignment requirements of the C constructs of the named +C input files. +The letter \fIc\fP indicates the simple type, which is one of +\fBs\fP(short), \fBi\fP(int), \fBl\fP(long), \fBf\fP(float), \fBd\fP(double) or +\fBp\fP(pointer). +The \fIm\fP parameter can be used to specify the length of the type (in bytes) +and the \fIn\fP parameter for the alignment of that type. +Absence of \fIm\fP or \fIn\fP causes the default value to be retained. +To specify that the bitfields should be right adjusted instead of the +default left adjustment, specify \fBr\fP as \fIc\fP parameter +without parameters. +.br +This option is passed directly to \fIcemcom\fP(1). +.IP \fB\-c\fP +Same as \fB\-c.o\fP. +.IP \fB\-c.e\fP +Produce EM assembly code on \fIfile\fP\fB.e\fP for the +named files \fIfile\fP\fB.[cikm]\fP +.IP \fB\-c.k\fP +Compile C source \fIfile\fP\fB.[ci]\fP or +encode EM assembly code from \fIfile\fP\fB.e\fP +into unoptimised compact EM code and write the result on \fIfile\fP\fB.k\fP +.IP \fB\-c.m\fP +Compile C source \fIfile\fP\fB.[ci]\fP, +translate unoptimised EM code from \fIfile\fP\fB.k\fP or +encode EM assembly code from \fIfile\fP\fB.e\fP +into optimised compact EM code and write the result on \fIfile\fP\fB.m\fP +.IP \fB\-c.o\fP +Suppress the loading phase of the compilation, and force an object file to +be produced even if only one program is compiled +.IP \fB\-c.s\fP +Compile the named \fIfile\fP\fB.[ceikm]\fP input files, and leave the +assembly language output on corresponding files suffixed ".s". +.IP \fB\-k\fP +Same as \fB\-c.k\fP. +.IP \fB\-l\fP\fIname\fP +.br +Append the library \fBlib\fP\fIname\fP\fB.a\fP to the list of files that +should be loaded and linked into the final output file. +The library is searched for in the library directory. +.IP \fB\-m\fP +Same as \fB\-c.m\fP. +.IP \fB\-o\fP\ \fIoutput\fP +.br +Name the final output file \fIoutput\fP. +If this option is used, the default "a.out" will be left undisturbed. +.IP \fB\-p\fP +Produce EM profiling code (\fBfil\fP and \fBlin\fP instructions to +enable an interpreter to keep track of the current location in the +source code) +.IP \fB\-t\fP +Keep the intermediate files, produced during the various phases of the +compilation. +The produced files are named \fIfile\fP\fB.\fP\fIcharacter\fP where +\&\fIcharacter\fP indicates the type of the file as listed before. +.IP \fB\-v\fP +Verbose. +Print the commands before they are executed. +.IP \fB\-vn\fP +Do not really execute (for debugging purposes only). +.IP \fB\-vd\fP +Print some additional information (for debugging purposes only). +.IP \fB\-\-\fP\fIanything\f +.br +Equivalent to \fB\-Rcem\-\-\fP\fIanything\fP. +The options +.B \-\-C , +.B \-\-E +and +.B \-\-P +all have the same effect as respectively +.B \-C , +.B \-E +and +.B \-P +except for the fact that the macro preprocessor is taken to be the +built\-in preprocessor of the \fBcem\fP phase. +Most "\-\-" options are used by +.I cemcom (1) +to set some internal debug switches. +.IP loader\ options +.br +The options +.B \-d , +.B \-e , +.B \-F , +.B \-n , +.B \-N , +.B \-r , +.B \-s , +.B \-u , +.B \-x , +.B \-X +and +.B \-z +are directly passed to the loader. +.SH FILES +$CEM/bin/cem: this program +.br +$CEM/src/cem.c: C source of the \fBcem\fP program +.br +$CEM/bin/cemcom: C front end compiler +.br +$CEM/lib: default library-containing directory +.br +$CEM/src/cem.1: this manual page +.br +$CEM/src/cemcom.1: manual page for the C front end compiler +.SH SEE ALSO +cemcom(1), cc(1), ack(?), as(1), ld(1) +.br +.IP [K&R] +B.W. Kernighan and D.M. Ritchie, \fIThe C Programming Language\fP, +Prentice-Hall, 1978. +.SH DIAGNOSTICS +Any failure of one of the phases is reported. +.SH NOTES +.IP \(bu +The names $CEM and $EM refer to the directories containing the CEM compiler +and the ACK distribution tree respectively. +.IP \(bu +This manual page contains references to programs that reside on our site +which is a VAX 11/750 running UNIX BSD4.1. +Setting up \fBcem\fP requires some names to be declared in $CEM/src/cem.c +.SH BUGS +.IP \(bu +All intermediate files are placed in the current working directory which +causes files with the same name as the intermediate files to be overwritten. +.IP \(bu +.B Cem +only accepts a limited number of arguments to be passed to the various phases. +(e.g. 256). +.IP \(bu +Please report suggestions and other bugs to erikb@tjalk.UUCP diff --git a/lang/cem/cemcom/cem.c b/lang/cem/cemcom/cem.c new file mode 100644 index 000000000..5e4c48196 --- /dev/null +++ b/lang/cem/cemcom/cem.c @@ -0,0 +1,744 @@ +/* $Header$ */ +/* + Driver for the CEMCOM compiler: works like /bin/cc and accepts the + options accepted by /bin/cc and /usr/em/bin/ack. + Date written: dec 4, 1985 + Author: Erik Baalbergen +*/ + +#include "string.h" +#include +#include +#include +#include + +#define MAXARGC 256 /* maximum number of arguments allowed in a list */ +#define USTR_SIZE 1024 /* maximum length of string variable */ + +struct arglist { + int al_argc; + char *al_argv[MAXARGC]; +}; + +/* some system-dependent variables */ +char *PP = "/lib/cpp"; +char *CEM = "/user1/erikb/bin/cemcom"; +char *AS_FIX = "/user1/erikb/bin/mcomm"; +char *ENCODE = "/usr/em/lib/em_encode"; +char *DECODE = "/usr/em/lib/em_decode"; +char *OPT = "/usr/em/lib/em_opt"; +char *CG = "/usr/em/lib/vax4/cg"; +char *AS = "/bin/as"; +char *LD = "/bin/ld"; +char *SHELL = "/bin/sh"; + +char *LIBDIR = "/user1/cem/lib"; + +char *V_FLAG = "-Vs2.2w4.4i4.4l4.4f4.4d8.4p4.4"; + +struct arglist LD_HEAD = { + 2, + { + "/usr/em/lib/vax4/head_em", + "/usr/em/lib/vax4/head_cc" + } +}; + +struct arglist LD_TAIL = { + 3, + { + "/user1/cem/lib/stb.o", + "/usr/em/lib/vax4/tail_mon", + "/usr/em/lib/vax4/tail_em" + } +}; + +char *o_FILE = "a.out"; + +#define remove(str) (((t_flag == 0) && unlink(str)), (str)[0] = '\0') +#define cleanup(str) (str && remove(str)) +#define mkname(dst, s1, s2) mkstr(dst, (s1), (s2), 0) +#define init(al) (al)->al_argc = 1 +#define library(nm) \ + mkstr(alloc((unsigned int)strlen(nm) + strlen(LIBDIR) + 7), \ + LIBDIR, "/lib", nm, ".a", 0) + +char *ProgCall = 0; + +struct arglist SRCFILES; +struct arglist LDFILES; +struct arglist GEN_LDFILES; + +struct arglist PP_FLAGS; +struct arglist CEM_FLAGS; + +int debug = 0; +int exec = 1; + +int RET_CODE = 0; + +struct arglist OPT_FLAGS; +struct arglist DECODE_FLAGS; +struct arglist ENCODE_FLAGS; +struct arglist CG_FLAGS; +struct arglist AS_FLAGS; +struct arglist LD_FLAGS; +struct arglist O_FLAGS; +struct arglist DEBUG_FLAGS; + +struct arglist CALL_VEC; + +int e_flag = 0; +int E_flag = 0; +int c_flag = 0; +int k_flag = 0; +int m_flag = 0; +int o_flag = 0; +int S_flag = 0; +int t_flag = 0; +int v_flag = 0; +int P_flag = 0; + +struct prog { + char *p_name; + char **p_task; + struct arglist *p_flags; +} ProgParts[] = { + { "cpp", &PP, &PP_FLAGS }, + { "cem", &CEM, &CEM_FLAGS }, + { "opt", &OPT, &OPT_FLAGS }, + { "decode", &DECODE, &DECODE_FLAGS }, + { "encode", &ENCODE, &ENCODE_FLAGS }, + { "be", &CG, &CG_FLAGS }, + { "cg", &CG, &CG_FLAGS }, + { "as", &AS, &AS_FLAGS }, + { "ld", &LD, &LD_FLAGS }, + { 0, 0, 0 } +}; + +int trap(); +char *mkstr(); +char *alloc(); +long sizeof_file(); + +main(argc, argv) + char *argv[]; +{ + char *str; + char **argvec; + int count; + int ext; + char Nfile[USTR_SIZE]; + char kfile[USTR_SIZE]; + char sfile[USTR_SIZE]; + char mfile[USTR_SIZE]; + char ofile[USTR_SIZE]; + register struct arglist *call = &CALL_VEC; + char BASE[USTR_SIZE]; + char *file; + char *ldfile = 0; + + set_traps(trap); + + ProgCall = *argv++; + + while (--argc > 0) { + if (*(str = *argv++) != '-') { + append(&SRCFILES, str); + continue; + } + + switch (str[1]) { + + case '-': + switch (str[2]) { + case 'C': + case 'E': + case 'P': + E_flag = 1; + append(&PP_FLAGS, str); + PP = CEM; + P_flag = (str[2] == 'P'); + break; + default: + append(&DEBUG_FLAGS, str); + break; + } + break; + + case 'B': + PP = CEM = &str[2]; + break; + case 'C': + case 'E': + case 'P': + E_flag = 1; + append(&PP_FLAGS, str); + P_flag = (str[1] == 'P'); + break; + case 'c': + if (str[2] == '.') { + switch (str[3]) { + + case 's': + S_flag = 1; + break; + case 'k': + k_flag = 1; + break; + case 'o': + c_flag = 1; + break; + case 'm': + m_flag = 1; + break; + case 'e': + e_flag = 1; + break; + default: + bad_option(str); + } + } + else + if (str[2] == '\0') + c_flag = 1; + else + bad_option(str); + break; + case 'D': + case 'I': + case 'U': + append(&PP_FLAGS, str); + break; + case 'k': + k_flag = 1; + break; + case 'l': + if (str[2] == '\0') /* no standard libraries */ + LD_HEAD.al_argc = LD_TAIL.al_argc = 0; + else /* use library from library directory */ + append(&SRCFILES, library(&str[2])); + break; + case 'L': /* change default library directory */ + LIBDIR = &str[2]; + break; + case 'm': + m_flag = 1; + break; + case 'o': + o_flag = 1; + if (argc-- < 0) + bad_option(str); + else + o_FILE = *argv++; + break; + case 'O': + append(&O_FLAGS, "-O"); + break; + case 'p': + append(&CEM_FLAGS, "-p"); + break; + case 'R': + if (str[2] == '\0') + append(&CEM_FLAGS, str); + else + Roption(str); + break; + case 'S': + S_flag = 1; + break; + case 't': + t_flag = 1; + break; + case 'v': /* set debug switches */ + v_flag = 1; + switch (str[2]) { + + case 'd': + debug = 1; + break; + case 'n': /* no execute */ + exec = 0; + break; + } + break; + case 'V': + V_FLAG = str; + break; + case 'e': + case 'F': + case 'd': + case 'n': + case 'N': + case 'r': + case 's': + case 'u': + case 'x': + case 'X': + case 'z': + append(&LD_FLAGS, str); + break; + default: + append(&CEM_FLAGS, str); + } + } + + if (debug) + report("Note: debug output"); + if (exec == 0) + report("Note: no execution"); + + count = SRCFILES.al_argc; + argvec = &(SRCFILES.al_argv[0]); + + Nfile[0] = '\0'; + + while (count-- > 0) { + basename(file = *argvec++, BASE); + + if (E_flag) { + char ifile[USTR_SIZE]; + + init(call); + append(call, PP); + concat(call, &DEBUG_FLAGS); + concat(call, &PP_FLAGS); + append(call, file); + runvec(call, P_flag ? mkname(ifile, BASE, ".i") : 0); + continue; + } + + ext = extension(file); + + /* .c to .k and .N */ + if (ext == 'c' || ext == 'i') { + init(call); + append(call, CEM); + concat(call, &DEBUG_FLAGS); + append(call, V_FLAG); + concat(call, &CEM_FLAGS); + concat(call, &PP_FLAGS); + append(call, file); + append(call, mkname(kfile, BASE, ".k")); + append(call, mkname(Nfile, BASE, ".N")); + + if (runvec(call, (char *)0)) { + file = kfile; + ext = 'k'; + if (sizeof_file(Nfile) <= 0L) + remove(Nfile); + } + else { + remove(kfile); + remove(Nfile); + continue; + } + } + + /* .e to .k */ + if (ext == 'e') { + init(call); + append(call, ENCODE); + concat(call, &ENCODE_FLAGS); + append(call, file); + append(call, mkname(kfile, BASE, ".k")); + if (runvec(call, (char *)0) == 0) + continue; + file = kfile; + ext = 'k'; + } + + if (k_flag) + continue; + + /* decode .k or .m */ + if (e_flag && (ext == 'k' || ext == 'm')) { + char efile[USTR_SIZE]; + + init(call); + append(call, DECODE); + concat(call, &DECODE_FLAGS); + append(call, file); + append(call, mkname(efile, BASE, ".e")); + runvec(call, (char *)0); + cleanup(kfile); + continue; + } + + /* .k to .m */ + if (ext == 'k') { + init(call); + append(call, OPT); + concat(call, &OPT_FLAGS); + append(call, file); + if (runvec(call, mkname(mfile, BASE, ".m")) == 0) + continue; + file = mfile; + ext = 'm'; + cleanup(kfile); + } + + if (m_flag) + continue; + + /* .m to .s */ + if (ext == 'm') { + init(call); + append(call, CG); + concat(call, &CG_FLAGS); + append(call, file); + append(call, mkname(sfile, BASE, ".s")); + if (runvec(call, (char *)0) == 0) + continue; + if (Nfile[0] != '\0') { + init(call); + append(call, AS_FIX); + append(call, Nfile); + append(call, sfile); + runvec(call, (char *)0); + remove(Nfile); + } + cleanup(mfile); + file = sfile; + ext = 's'; + } + + if (S_flag) + continue; + + /* .s to .o */ + if (ext == 's') { + ldfile = c_flag ? + ofile : + alloc((unsigned)strlen(BASE) + 3); + init(call); + append(call, AS); + concat(call, &AS_FLAGS); + append(call, "-o"); + append(call, mkname(ldfile, BASE, ".o")); + append(call, file); + if (runvec(call, (char *)0) == 0) + continue; + file = ldfile; + ext = 'o'; + cleanup(sfile); + } + + if (c_flag) + continue; + + append(&LDFILES, file); + if (ldfile) { + append(&GEN_LDFILES, ldfile); + ldfile = 0; + } + } + + /* *.o to a.out */ + if (RET_CODE == 0 && LDFILES.al_argc > 0) { + init(call); + append(call, LD); + concat(call, &LD_FLAGS); + append(call, "-o"); + append(call, o_FILE); + concat(call, &LD_HEAD); + concat(call, &LDFILES); + append(call, library("c")); + concat(call, &LD_TAIL); + if (runvec(call, (char *)0)) { + register i = GEN_LDFILES.al_argc; + + while (i-- > 0) + remove(GEN_LDFILES.al_argv[i]); + } + } + + exit(RET_CODE); +} + + +char * +alloc(u) + unsigned u; +{ +#define BUFSIZE (USTR_SIZE * MAXARGC) + static char buf[BUFSIZE]; + static char *bufptr = &buf[0]; + register char *p = bufptr; + + if ((bufptr += u) >= &buf[BUFSIZE]) + panic("no space"); + return p; +} + +append(al, arg) + struct arglist *al; + char *arg; +{ + if (al->al_argc >= MAXARGC) + panic("argument list overflow"); + al->al_argv[(al->al_argc)++] = arg; +} + +concat(al1, al2) + struct arglist *al1, *al2; +{ + register i = al2->al_argc; + register char **p = &(al1->al_argv[al1->al_argc]); + register char **q = &(al2->al_argv[0]); + + if ((al1->al_argc += i) >= MAXARGC) + panic("argument list overflow"); + while (i-- > 0) + *p++ = *q++; +} + +/* The next function is a dirty old one, taking a variable number of + arguments. + Take care that the last argument is a null-valued pointer! +*/ +/*VARARGS1*/ +char * +mkstr(dst, arg) + char *dst, *arg; +{ + char **vec = (char **) &arg; + register char *p; + register char *q = dst; + + while (p = *vec++) { + while (*q++ = *p++); + q--; + } + return dst; +} + +Roption(str) + char *str; /* of the form "prog=/-arg" */ +{ + char *eq; + char *prog, *arg; + char bc; + char *cindex(); + + prog = &str[2]; + + if (eq = cindex(prog, '=')) + bc = '='; + else + if (eq = cindex(prog, '-')) + bc = '-'; + else { + bad_option(str); + return; + } + + *eq++ = '\0'; + if (arg = eq) { + char *opt = 0; + struct prog *pp = &ProgParts[0]; + + if (bc == '-') { + opt = mkstr(alloc((unsigned)strlen(arg) + 2), + "-", arg, 0); + } + + while (pp->p_name) { + if (strcmp(prog, pp->p_name) == 0) { + if (opt) + append(pp->p_flags, opt); + else + *(pp->p_task) = arg; + return; + } + pp++; + } + } + bad_option(str); +} + +basename(str, dst) + char *str; + register char *dst; +{ + register char *p1 = str; + register char *p2 = p1; + + while (*p1) + if (*p1++ == '/') + p2 = p1; + p1--; + if (*--p1 == '.') + *p1 = '\0'; + while (*dst++ = *p2++); + *p1 = '.'; +} + +int +extension(fn) + register char *fn; +{ + char c; + + while (*fn++) ; + fn--; + c = *--fn; + return (*--fn == '.') ? c : 0; +} + +long +sizeof_file(nm) + char *nm; +{ + struct stat stbuf; + + if (stat(nm, &stbuf) == 0) + return stbuf.st_size; + return -1; +} + +char * sysmsg[] = { + 0, + "Hangup", + "Interrupt", + "Quit", + "Illegal instruction", + "Trace/BPT trap", + "IOT trap", + "EMT trap", + "Floating exception", + "Killed", + "Bus error", + "Memory fault", + "Bad system call", + "Broken pipe", + "Alarm call", + "Terminated", + "Signal 16" +}; + +runvec(vec, outp) + struct arglist *vec; + char *outp; +{ + int status, fd; + char *task = vec->al_argv[1]; + + vec->al_argv[vec->al_argc] = 0; + if (v_flag) + print_vec(vec); + if (exec == 0) + return 1; + if (fork() == 0) { /* start up the process */ + extern int errno; + + if (outp) { /* redirect standard output */ + if ((fd = creat(outp, 0666)) < 0) + panic("cannot create %s", outp); + if (dup2(fd, 1) == -1) + panic("dup failure"); + close(fd); + } + if (debug) report("exec %s", task); + execv(task, &(vec->al_argv[1])); + + /* not an a.out file, let's try it with the SHELL */ + if (debug) report("try it with %s", SHELL); + if (errno == ENOEXEC) { + vec->al_argv[0] = SHELL; + execv(SHELL, &(vec->al_argv[0])); + } + + /* failed, so ... */ + panic("cannot execute %s", task); + exit(1); + } + else { + int loworder, highorder, sig; + + wait(&status); + loworder = status & 0377; + highorder = (status >> 8) & 0377; + if (loworder == 0) { + if (highorder) + report("%s: exit status %d", task, highorder); + return highorder ? ((RET_CODE = 1), 0) : 1; + } + else { + sig = loworder & 0177; + if (sig == 0177) + report("%s: stopped by ptrace", task); + else + if (sysmsg[sig]) + report("%s: %s%s", task, sysmsg[sig], + (loworder & 0200) + ? " - core dumped" + : ""); + RET_CODE = 1; + return 0; + } + } + /*NOTREACHED*/ +} + +bad_option(str) + char *str; +{ + report("bad option %s", str); +} + +/*VARARGS1*/ +report(fmt, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) + char *fmt; +{ + fprintf(stderr, "%s: ", ProgCall); + fprintf(stderr, fmt, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + fprintf(stderr, "\n"); +} + +/*VARARGS1*/ +panic(fmt, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) + char *fmt; +{ + fprintf(stderr, "%s: ", ProgCall); + fprintf(stderr, fmt, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + fprintf(stderr, "\n"); + exit(1); +} + +set_traps(f) + int (*f)(); +{ + signal(SIGHUP, f); + signal(SIGINT, f); + signal(SIGQUIT, f); + signal(SIGALRM, f); + signal(SIGTERM, f); +} + +/*ARGSUSED*/ +trap(sig) +{ + set_traps(SIG_IGN); + panic("Trapped"); +} + +print_vec(vec) + struct arglist *vec; +{ + register i; + + for (i = 1; i < vec->al_argc; i++) + printf("%s ", vec->al_argv[i]); + printf("\n"); +} + +char * +cindex(s, c) + char *s, c; +{ + while (*s) + if (*s++ == c) + return s - 1; + return (char *) 0; +} diff --git a/lang/cem/cemcom/cemcom.1 b/lang/cem/cemcom/cemcom.1 new file mode 100644 index 000000000..ec84c40fa --- /dev/null +++ b/lang/cem/cemcom/cemcom.1 @@ -0,0 +1,94 @@ +.TH CEMCOM 1 local +.SH NAME +cemcom \- C to EM compiler +.SH SYNOPSIS +\fBcemcom\fP [\fIoptions\fP] \fIsource \fP[\fIdestination \fP[\fInamelist\fP]] +.SH DESCRIPTION +\fICemcom\fP is a compiler that translates C programs +into EM compact code. +The input is taken from \fIsource\fP, while the +EM code is written on \fIdestination\fP. +If either of these two names is "\fB-\fP", standard input or output respectively +is taken. +The file \fInamelist\fP, if supplied, will contain a list of the names +of external, so-called \fBcommon\fP, variables. +When the preprocessor is invoked to run stand-alone, \fIdestination\fP +needs not be specified. +.br +\fIOptions\fP is a, possibly empty, sequence of the following combinations: +.IP \fB\-C\fR +list the sequence of input tokens while maintaining the comments. +.IP \fB\-D\fIname\fR=\fItext\fR +.br +define \fIname\fR as a macro with \fItext\fR as its replacement text. +.IP \fB\-D\fIname\fR +.br +the same as \fB\-D\fIname\fR=1. +.IP \fB\-E\fR +list the sequence of input tokens and delete any comments. +Control lines of the form +.RS +.RS +#\fBline\fR <\fIinteger\fR> "\fIfilename\fR" +.RE +are generated whenever needed. +.RE +.IP \fB\-I\fIdirname\fR +.br +insert \fIdirname\fR in the list of include directories. +.IP \fB\-M\fP\fIn\fP +set maximum identifier length to \fIn\fP. +.IP \fB\-n\fR +do not generate EM register messages. +The user-declared variables are not stored into registers on the target +machine. +.IP \fB\-p\fR +generate the EM \fBfil\fR and \fBlin\fR instructions in order to enable +an interpreter to keep track of the current location in the source code. +.IP \fB\-P\fR +like \fB\-E\fR but without #\fBline\fR control lines. +.IP \fB\-R\fR +interpret the input as restricted C (according to the language as +described in \fIThe C programming language\fR by Kernighan and Ritchie.) +.IP \fB\-U\fIname\fR +.br +get rid of the compiler-predefined macro \fIname\fR. +.IP \fB\-V\fIcm\fR.\fIn\fR,\ \fB\-V\fIcm\fR.\fIncm\fR.\fIn\fR\ ... +.br +set the size and alignment requirements. +The letter \fIc\fR indicates the simple type, which is one of +\fBs\fR(short), \fBi\fR(int), \fBl\fR(long), \fBf\fR(float), \fBd\fR(double) or +\fBp\fR(pointer). +The \fIm\fR parameter can be used to specify the length of the type (in bytes) +and the \fIn\fR parameter for the alignment of that type. +Absence of \fIm\fR or \fIn\fR causes the default value to be retained. +To specify that the bitfields should be right adjusted instead of the +default left adjustment, specify \fBr\fR as \fIc\fR parameter. +.IP \fB\-w\fR +suppress warning messages +.IP \fB\-\-\fItext\fR +.br +where \fItext\fR can be either of the above or +a debug flag of the compiler (which is not useful for the common user.) +This feature can be used in various shell scripts and surrounding programs +to force a certain option to be handed over to \fBcemcom\fR. +.LP +.SH FILES +.IR /user1/cem/bin/cemcom : +binary of the CEM compiler. +.br +.IR /user1/cem/bin/cem : +a \fIcc\fP(1)-like driver for the VAX running 4.1BSD UNIX. +.br +.IR /user1/sjoerd/bin/CC : +a \fIcc\fP(1)-like driver for the 68000 running Amoeba. +.SH DIAGNOSTICS +All warning and error messages are written on standard error output. +.SH BUGS +Debugging and profiling facilities may be present during the development +of \fIcemcom\fP. +.br +Please report all bugs to ..tjalk!cem or ..tjalk!erikb +.SH REFERENCE +Baalbergen, E.H., D. Grune, M. Waage ;"\fIThe CEM compiler\fR", +Informatica Manual IM-4 diff --git a/lang/cem/cemcom/ch7.c b/lang/cem/cemcom/ch7.c new file mode 100644 index 000000000..38fcbc21b --- /dev/null +++ b/lang/cem/cemcom/ch7.c @@ -0,0 +1,409 @@ +/* $Header$ */ +/* S E M A N T I C A N A L Y S I S -- C H A P T E R 7 RM */ + +#include "debug.h" +#include "nobitfield.h" +#include "idf.h" +#include "arith.h" +#include "type.h" +#include "struct.h" +#include "label.h" +#include "expr.h" +#include "def.h" +#include "Lpars.h" +#include "assert.h" + +#define is_zero(ex) \ + ((ex)->ex_class == Value && (ex)->VL_VALUE == (arith)0 && \ + (ex)->VL_IDF == 0) + +extern char options[]; +extern char *symbol2str(); + +/* Most expression-handling routines have a pointer to a + (struct type *) as first parameter. The object under the pointer + gets updated in the process. +*/ + +ch7sel(expp, oper, idf) + register struct expr **expp; + struct idf *idf; +{ + /* The selector idf is applied to *expp; oper may be '.' or + ARROW. + */ + register struct type *tp = (*expp)->ex_type; + register struct sdef *sd; + + if (oper == ARROW) { + if (tp->tp_fund == POINTER) /* normal case */ + tp = tp->tp_up; + else { /* constructions like "12->selector" and + "char c; c->selector" + */ + switch (tp->tp_fund) { + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + /* Allowed by RM 14.1 */ + ch7cast(expp, CAST, pa_type); + sd = idf2sdef(idf, tp); + tp = sd->sd_stype; + break; + default: + error("-> applied to %s", + symbol2str(tp->tp_fund)); + case ERRONEOUS: + (*expp)->ex_type = error_type; + return; + } + } /* tp->tp_fund != POINTER */ + } /* oper == ARROW */ + else { /* oper == '.' */ + /* filter out illegal expressions "non_lvalue.sel" */ + if (!(*expp)->ex_lvalue) { + error("dot requires lvalue"); + (*expp)->ex_type = error_type; + return; + } + } + switch (tp->tp_fund) { + case POINTER: /* for int *p; p->next = ... */ + case STRUCT: + case UNION: + break; + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + /* warning will be given by idf2sdef() */ + break; + default: + if (!is_anon_idf(idf)) + error("selector %s applied to %s", + idf->id_text, symbol2str(tp->tp_fund)); + case ERRONEOUS: + (*expp)->ex_type = error_type; + return; + } + sd = idf2sdef(idf, tp); + if (oper == '.') { + /* there are 3 cases in which the selection can be + performed compile-time: + I: n.sel (n either an identifier or a constant) + II: (e.s1).s2 (transformed into (e.(s1+s2))) + III: (e->s1).s2 (transformed into (e->(s1+s2))) + The code performing these conversions is + extremely obscure. + */ + if ((*expp)->ex_class == Value) { + /* It is an object we know the address of; so + we can calculate the address of the + selected member + */ + (*expp)->VL_VALUE += sd->sd_offset; + (*expp)->ex_type = sd->sd_type; + } + else + if ((*expp)->ex_class == Oper) { + struct oper *op = &((*expp)->ex_object.ex_oper); + + if (op->op_oper == '.' || op->op_oper == ARROW) { + op->op_right->VL_VALUE += sd->sd_offset; + (*expp)->ex_type = sd->sd_type; + } + else + *expp = new_oper(sd->sd_type, *expp, '.', + intexpr(sd->sd_offset, INT)); + } + } + else /* oper == ARROW */ + *expp = new_oper(sd->sd_type, + *expp, oper, intexpr(sd->sd_offset, INT)); + (*expp)->ex_lvalue = sd->sd_type->tp_fund != ARRAY; +} + +ch7incr(expp, oper) + register struct expr **expp; +{ + /* The monadic prefix/postfix incr/decr operator oper is + applied to *expp. + */ + arith addend; + struct expr *expr; + register int fund = (*expp)->ex_type->tp_fund; + + if (!(*expp)->ex_lvalue) { + error("no lvalue with %s", symbol2str(oper)); + return; + } + if (fund == ENUM) { + warning("%s on enum", symbol2str(oper)); + addend = (arith)1; + } + else + if (is_arith_type((*expp)->ex_type)) + addend = (arith)1; + else + if (fund == POINTER) + addend = size_of_type((*expp)->ex_type->tp_up, "object"); +#ifndef NOBITFIELD + else + if (fund == FIELD) + addend = (arith)1; +#endif NOBITFIELD + else { + if ((*expp)->ex_type != error_type) + error("%s on %s", + symbol2str(oper), + symbol2str((*expp)->ex_type->tp_fund) + ); + return; + } + expr = intexpr(addend, INT); + ch7cast(&expr, CAST, (*expp)->ex_type); +#ifndef NOBITFIELD + if (fund == FIELD) + *expp = new_oper((*expp)->ex_type->tp_up, *expp, oper, expr); + else +#endif NOBITFIELD + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); +} + +ch7cast(expp, oper, tp) + register struct expr **expp; + register struct type *tp; +{ + /* The expression *expp is cast to type tp; the cast is + caused by the operator oper. If the cast has + to be passed on to run time, its left operand will be an + expression of class Type. + */ + register struct type *oldtp; + + if ((*expp)->ex_type->tp_fund == FUNCTION) + function2pointer(expp); + if ((*expp)->ex_type->tp_fund == ARRAY) + array2pointer(expp); + oldtp = (*expp)->ex_type; + if (oldtp == tp) + {} /* life is easy */ + else +#ifndef NOBITFIELD + if (oldtp->tp_fund == FIELD) { + field2arith(expp); + ch7cast(expp, oper, tp); + } + else + if (tp->tp_fund == FIELD) + ch7cast(expp, oper, tp->tp_up); + else +#endif NOBITFIELD + if (tp->tp_fund == VOID) /* Easy again */ + (*expp)->ex_type = void_type; + else + if (is_arith_type(oldtp) && is_arith_type(tp)) { + int oldi = is_integral_type(oldtp); + int i = is_integral_type(tp); + + if (oldi && i) { + if ( oldtp->tp_fund == ENUM && + tp->tp_fund == ENUM && + oper != CAST + ) + warning("%s on enums of different types", + symbol2str(oper)); + int2int(expp, tp); + } + else + if (oldi && !i) { + if (oldtp->tp_fund == ENUM && oper != CAST) + warning("conversion of enum to %s\n", + symbol2str(tp->tp_fund)); + int2float(expp, tp); + } + else + if (!oldi && i) + float2int(expp, tp); + else /* !oldi && !i */ + float2float(expp, tp); + } + else + if (oldtp->tp_fund == POINTER && tp->tp_fund == POINTER) { + if (oper != CAST) + warning("incompatible pointers in %s", + symbol2str(oper)); + (*expp)->ex_type = tp; /* free conversion */ + } + else + if (oldtp->tp_fund == POINTER && is_integral_type(tp)) { + /* from pointer to integral */ + if (oper != CAST) + warning("illegal conversion of pointer to %s", + symbol2str(tp->tp_fund)); + if (oldtp->tp_size > tp->tp_size) + warning("conversion of pointer to %s loses accuracy", + symbol2str(tp->tp_fund)); + if (oldtp->tp_size != tp->tp_size) + int2int(expp, tp); + else + (*expp)->ex_type = tp; + } + else + if (tp->tp_fund == POINTER && is_integral_type(oldtp)) { + /* from integral to pointer */ + switch (oper) { + case CAST: + break; + case EQUAL: + case NOTEQUAL: + case '=': + case RETURN: + if (is_zero(*expp)) + break; + default: + warning("illegal conversion of %s to pointer", + symbol2str(oldtp->tp_fund)); + break; + } + if (oldtp->tp_size > tp->tp_size) + warning("conversion of %s to pointer loses accuracy", + symbol2str(oldtp->tp_fund)); + if (oldtp->tp_size != tp->tp_size) + int2int(expp, tp); + else + (*expp)->ex_type = tp; + } + else + if (oldtp->tp_size == tp->tp_size && oper == CAST) { + warning("dubious conversion based on equal size"); + (*expp)->ex_type = tp; /* brute force */ + } + else + { + if (oldtp->tp_fund != ERRONEOUS && tp->tp_fund != ERRONEOUS) + expr_error(*expp, "cannot convert %s to %s", + symbol2str(oldtp->tp_fund), + symbol2str(tp->tp_fund) + ); + (*expp)->ex_type = tp; + } +} + +ch7asgn(expp, oper, expr) + register struct expr **expp; + struct expr *expr; +{ + /* The assignment operators. + */ + int fund = (*expp)->ex_type->tp_fund; + + /* We expect an lvalue */ + if (!(*expp)->ex_lvalue) { + error("no lvalue in lhs of %s", symbol2str(oper)); + (*expp)->ex_depth = 99; /* no direct store/load at EVAL() */ + /* what is 99 ??? DG */ + } + switch (oper) { + case '=': + ch7cast(&expr, oper, (*expp)->ex_type); + break; + case TIMESAB: + case DIVAB: + case MODAB: + if (!is_arith_type((*expp)->ex_type)) + error("%s on %s", symbol2str(oper), symbol2str(fund)); + any2arith(&expr, oper); + ch7cast(&expr, CAST, (*expp)->ex_type); + break; + case PLUSAB: + case MINAB: + any2arith(&expr, oper); + if (fund == POINTER) { + if (!is_integral_type(expr->ex_type)) + error("%s on non-integral type (%s)", + symbol2str(oper), symbol2str(fund)); + ch7bin(&expr, '*', + intexpr( + size_of_type( + (*expp)->ex_type->tp_up, + "object" + ), + pa_type->tp_fund + ) + ); + } + else + if (!is_arith_type((*expp)->ex_type)) + error("%s on %s", symbol2str(oper), symbol2str(fund)); + else + ch7cast(&expr, CAST, (*expp)->ex_type); + break; + case LEFTAB: + case RIGHTAB: + ch7cast(&expr, oper, int_type); + if (!is_integral_type((*expp)->ex_type)) + error("%s on %s", symbol2str(oper), symbol2str(fund)); + break; + case ANDAB: + case XORAB: + case ORAB: + if (!is_integral_type((*expp)->ex_type)) + error("%s on %s", symbol2str(oper), symbol2str(fund)); + ch7cast(&expr, oper, (*expp)->ex_type); + break; + } +#ifndef NOBITFIELD + if (fund == FIELD) + *expp = new_oper((*expp)->ex_type->tp_up, *expp, oper, expr); + else +#endif NOBITFIELD + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); +} + +/* Some interesting (?) questions answered. +*/ +int +is_integral_type(tp) + struct type *tp; +{ + switch (tp->tp_fund) { + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + return 1; +#ifndef NOBITFIELD + case FIELD: + return is_integral_type(tp->tp_up); +#endif NOBITFIELD + default: + return 0; + } +} + +int +is_arith_type(tp) + struct type *tp; +{ + switch (tp->tp_fund) { + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + case FLOAT: + case DOUBLE: + return 1; +#ifndef NOBITFIELD + case FIELD: + return is_arith_type(tp->tp_up); +#endif NOBITFIELD + default: + return 0; + } +} diff --git a/lang/cem/cemcom/ch7bin.c b/lang/cem/cemcom/ch7bin.c new file mode 100644 index 000000000..ee30b03d5 --- /dev/null +++ b/lang/cem/cemcom/ch7bin.c @@ -0,0 +1,308 @@ +/* $Header$ */ +/* SEMANTIC ANALYSIS (CHAPTER 7RM) -- BINARY OPERATORS */ + +#include "botch_free.h" /* UF */ +#include "idf.h" +#include "arith.h" +#include "type.h" +#include "struct.h" +#include "label.h" +#include "expr.h" +#include "Lpars.h" +#include "storage.h" + +extern char options[]; +extern char *symbol2str(); + +/* This chapter asks for the repeated application of code to handle + an operation that may be executed at compile time or at run time, + depending on the constancy of the operands. +*/ + +ch7bin(expp, oper, expr) + register struct expr **expp; + struct expr *expr; +{ + /* apply binary operator oper between *expp and expr. + */ + any2opnd(expp, oper); + any2opnd(&expr, oper); + switch (oper) { + int fund; + case '[': /* RM 7.1 */ + /* RM 14.3 states that indexing follows the commutative laws */ + switch ((*expp)->ex_type->tp_fund) { + case POINTER: + case ARRAY: + break; + case ERRONEOUS: + return; + default: /* unindexable */ + switch (expr->ex_type->tp_fund) { + case POINTER: + case ARRAY: + break; + case ERRONEOUS: + return; + default: + error("indexing an object of type %s", + symbol2str((*expp)->ex_type->tp_fund)); + return; + } + break; + } + ch7bin(expp, '+', expr); + ch7mon('*', expp); + break; + case '(': /* RM 7.1 */ + if ( (*expp)->ex_type->tp_fund == POINTER && + (*expp)->ex_type->tp_up->tp_fund == FUNCTION + ) { + if (options['R']) + warning("function pointer called"); + ch7mon('*', expp); + } + if ((*expp)->ex_type->tp_fund != FUNCTION) { + if ((*expp)->ex_type != error_type) + error("call of non-function (%s)", + symbol2str((*expp)->ex_type->tp_fund)); + /* leave the expression; it may still serve */ + free_expression(expr); /* there go the parameters */ + } + else + *expp = new_oper((*expp)->ex_type->tp_up, + *expp, '(', expr); + break; + case PARCOMMA: /* RM 7.1 */ + if ((*expp)->ex_type->tp_fund == FUNCTION) + function2pointer(expp); + *expp = new_oper(expr->ex_type, *expp, PARCOMMA, expr); + break; + case '%': + fund = arithbalance(expp, oper, &expr); + if (fund == DOUBLE) { + error("floating operand to %%"); + *expp = intexpr((arith)1, INT); + } + else + non_commutative_binop(expp, oper, expr); + break; + case '/': + fund = arithbalance(expp, oper, &expr); + non_commutative_binop(expp, oper, expr); + break; + case '*': + fund = arithbalance(expp, oper, &expr); + commutative_binop(expp, oper, expr); + break; + case '+': + if (expr->ex_type->tp_fund == POINTER) { + /* swap operands */ + struct expr *etmp = expr; + expr = *expp; + *expp = etmp; + } + if ((*expp)->ex_type->tp_fund == POINTER) { + pointer_arithmetic(expp, oper, &expr); + if (expr->ex_type->tp_size != (*expp)->ex_type->tp_size) + ch7cast(&expr, CAST, (*expp)->ex_type); + pointer_binary(expp, oper, expr); + } + else { + fund = arithbalance(expp, oper, &expr); + commutative_binop(expp, oper, expr); + } + break; + case '-': + if ((*expp)->ex_type->tp_fund == POINTER) { + if (expr->ex_type->tp_fund == POINTER) + pntminuspnt(expp, oper, expr); + else { + pointer_arithmetic(expp, oper, &expr); + pointer_binary(expp, oper, expr); + } + } + else { + fund = arithbalance(expp, oper, &expr); + non_commutative_binop(expp, oper, expr); + } + break; + case LEFT: + case RIGHT: + opnd2integral(expp, oper); + opnd2integral(&expr, oper); + ch7cast(&expr, oper, int_type); /* leftop should be int */ + non_commutative_binop(expp, oper, expr); + break; + case '<': + case '>': + case LESSEQ: + case GREATEREQ: + case EQUAL: + case NOTEQUAL: + relbalance(expp, oper, &expr); + non_commutative_binop(expp, oper, expr); + (*expp)->ex_type = int_type; + break; + case '&': + case '^': + case '|': + opnd2integral(expp, oper); + opnd2integral(&expr, oper); + fund = arithbalance(expp, oper, &expr); /* <=== */ + commutative_binop(expp, oper, expr); + break; + case AND: + case OR: + opnd2test(expp, oper); + opnd2test(&expr, oper); + if (is_cp_cst(*expp)) { + struct expr *ex = *expp; + + /* the following condition is a short-hand for + ((oper == AND) && o1) || ((oper == OR) && !o1) + where o1 == (*expp)->VL_VALUE; + and ((oper == AND) || (oper == OR)) + */ + if ((oper == AND) == ((*expp)->VL_VALUE != (arith)0)) + *expp = expr; + else { + free_expression(expr); + *expp = intexpr((arith)((oper == AND) ? 0 : 1), + INT); + } + free_expression(ex); + } + else + if (is_cp_cst(expr)) { + /* Note!!!: the following condition is a short-hand for + ((oper == AND) && o2) || ((oper == OR) && !o2) + where o2 == expr->VL_VALUE + and ((oper == AND) || (oper == OR)) + */ + if ((oper == AND) == (expr->VL_VALUE != (arith)0)) + free_expression(expr); + else { + if (oper == OR) + expr->VL_VALUE = (arith)1; + ch7bin(expp, ',', expr); + } + } + else + *expp = new_oper(int_type, *expp, oper, expr); + (*expp)->ex_flags |= EX_LOGICAL; + break; + case ':': + if ( is_struct_or_union((*expp)->ex_type->tp_fund) + || is_struct_or_union(expr->ex_type->tp_fund) + ) { + if ((*expp)->ex_type != expr->ex_type) { + error("illegal balance"); + (*expp)->ex_type = error_type; + } + } + else { + relbalance(expp, oper, &expr); + } + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); + break; + case '?': + opnd2logical(expp, oper); + if (is_cp_cst(*expp)) + *expp = (*expp)->VL_VALUE ? + expr->OP_LEFT : expr->OP_RIGHT; + else + *expp = new_oper(expr->ex_type, *expp, oper, expr); + break; + case ',': + if (is_cp_cst(*expp)) + *expp = expr; + else + *expp = new_oper(expr->ex_type, *expp, oper, expr); + (*expp)->ex_flags |= EX_COMMA; + break; + } +} + +pntminuspnt(expp, oper, expr) + register struct expr **expp, *expr; +{ + /* Subtracting two pointers is so complicated it merits a + routine of its own. + */ + struct type *up_type = (*expp)->ex_type->tp_up; + + if (up_type != expr->ex_type->tp_up) { + error("subtracting incompatible pointers"); + free_expression(expr); + free_expression(*expp); + *expp = intexpr((arith)0, INT); + return; + } + /* we hope the optimizer will eliminate the load-time + pointer subtraction + */ + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); + ch7cast(expp, CAST, pa_type); /* ptr-ptr: result has pa_type */ + ch7bin(expp, '/', + intexpr(size_of_type(up_type, "object"), pa_type->tp_fund)); + ch7cast(expp, CAST, int_type); /* result will be an integer expr */ +} + +non_commutative_binop(expp, oper, expr) + register struct expr **expp, *expr; +{ + /* Constructs in *expp the operation indicated by the operands. + "oper" is a non-commutative operator + */ + if (is_cp_cst(expr) && is_cp_cst(*expp)) + cstbin(expp, oper, expr); + else + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); +} + +commutative_binop(expp, oper, expr) + register struct expr **expp, *expr; +{ + /* Constructs in *expp the operation indicated by the operands. + "oper" is a commutative operator + */ + if (is_cp_cst(expr) && is_cp_cst(*expp)) + cstbin(expp, oper, expr); + else + if ((*expp)->ex_depth > expr->ex_depth) + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); + else + *expp = new_oper((*expp)->ex_type, expr, oper, *expp); +} + +pointer_arithmetic(expp1, oper, expp2) + register struct expr **expp1, **expp2; +{ + /* prepares the integral expression expp2 in order to + apply it to the pointer expression expp1 + */ + if (any2arith(expp2, oper) == DOUBLE) { + expr_error(*expp2, + "illegal combination of float and pointer"); + free_expression(*expp2); + *expp2 = intexpr((arith)0, INT); + } + ch7bin( expp2, '*', + intexpr(size_of_type((*expp1)->ex_type->tp_up, "object"), + pa_type->tp_fund) + ); +} + +pointer_binary(expp, oper, expr) + register struct expr **expp, *expr; +{ + /* constructs the pointer arithmetic expression out of + a pointer expression, a binary operator and an integral + expression. + */ + if (is_ld_cst(expr) && is_ld_cst(*expp)) + cstbin(expp, oper, expr); + else + *expp = new_oper((*expp)->ex_type, *expp, oper, expr); +} diff --git a/lang/cem/cemcom/ch7mon.c b/lang/cem/cemcom/ch7mon.c new file mode 100644 index 000000000..061db8176 --- /dev/null +++ b/lang/cem/cemcom/ch7mon.c @@ -0,0 +1,148 @@ +/* $Header$ */ +/* SEMANTIC ANALYSIS (CHAPTER 7RM) -- MONADIC OPERATORS */ + +#include "nobitfield.h" +#include "botch_free.h" +#include "Lpars.h" +#include "arith.h" +#include "type.h" +#include "label.h" +#include "expr.h" +#include "storage.h" +#include "idf.h" +#include "def.h" + +extern char options[]; +char *symbol2str(); + +ch7mon(oper, expp) + register struct expr **expp; +{ + /* The monadic prefix operator oper is applied to *expp. + */ + register struct expr *expr; + + switch (oper) { + case '*': /* RM 7.2 */ + /* no FIELD type allowed */ + if ((*expp)->ex_type->tp_fund == ARRAY) + array2pointer(expp); + if ((*expp)->ex_type->tp_fund != POINTER) { + if ((*expp)->ex_type != error_type) + error("* applied to non-pointer (%s)", + symbol2str((*expp)->ex_type->tp_fund)); + (*expp)->ex_type = error_type; + } + else { + expr = *expp; + if (expr->ex_lvalue == 0) + /* dereference in administration only */ + expr->ex_type = expr->ex_type->tp_up; + else /* runtime code */ + *expp = new_oper(expr->ex_type->tp_up, NILEXPR, + '*', expr); + (*expp)->ex_lvalue = ( + (*expp)->ex_type->tp_fund != ARRAY && + (*expp)->ex_type->tp_fund != FUNCTION); + } + break; + case '&': + if ((*expp)->ex_type->tp_fund == ARRAY) { + array2pointer(expp); + } + else + if ((*expp)->ex_type->tp_fund == FUNCTION) { + function2pointer(expp); + } + else +#ifndef NOBITFIELD + if ((*expp)->ex_type->tp_fund == FIELD) { + error("& applied to field variable"); + (*expp)->ex_type = error_type; + } + else +#endif NOBITFIELD + if (!(*expp)->ex_lvalue) { + error("& applied to non-lvalue"); + (*expp)->ex_type = error_type; + } + else { + /* assume that enums are already filtered out */ + if ((*expp)->ex_class == Value && (*expp)->VL_IDF) { + register struct def *def = + (*expp)->VL_IDF->id_def; + + /* & indicates that cannot + be used as register anymore + */ + if (def->df_sc == REGISTER) { + error("'&' on register variable not allowed"); + (*expp)->ex_type = error_type; + break; /* break case '&' */ + } + def->df_register = REG_NONE; + } + (*expp)->ex_type = pointer_to((*expp)->ex_type); + (*expp)->ex_lvalue = 0; + } + break; + case '~': + { + int fund = (*expp)->ex_type->tp_fund; + + if (fund == FLOAT || fund == DOUBLE) { + error("~ not allowed on %s operands", symbol2str(fund)); + *expp = intexpr((arith)1, INT); + break; + } + } + case '-': + any2arith(expp, oper); + if (is_cp_cst(*expp)) { + arith o1 = (*expp)->VL_VALUE; + if (oper == '-') + o1 = -o1; + else + o1 = ~o1; + (*expp)->VL_VALUE = o1; + } + else + if (is_fp_cst(*expp)) + switch_sign_fp(*expp); + else + *expp = new_oper((*expp)->ex_type, NILEXPR, oper, *expp); + break; + case '!': + if ((*expp)->ex_type->tp_fund == FUNCTION) + function2pointer(expp); + if ((*expp)->ex_type->tp_fund != POINTER) + any2arith(expp, oper); + opnd2test(expp, '!'); + if (is_cp_cst(*expp)) { + arith o1 = (*expp)->VL_VALUE; + o1 = !o1; + (*expp)->VL_VALUE = o1; + (*expp)->ex_type = int_type; + } + else + *expp = new_oper(int_type, NILEXPR, oper, *expp); + (*expp)->ex_flags |= EX_LOGICAL; + break; + case PLUSPLUS: + case MINMIN: + ch7incr(expp, oper); + break; + case SIZEOF: + if ( (*expp)->ex_class == Value + && (*expp)->VL_IDF + && (*expp)->VL_IDF->id_def->df_formal_array + ) + warning("sizeof formal array %s is sizeof pointer!", + (*expp)->VL_IDF->id_text); + expr = intexpr(size_of_type((*expp)->ex_type, "object"), INT); + free_expression(*expp); + *expp = expr; + (*expp)->ex_flags |= EX_SIZEOF; + break; + } +} diff --git a/lang/cem/cemcom/char.tab b/lang/cem/cemcom/char.tab new file mode 100644 index 000000000..480bdf1d8 --- /dev/null +++ b/lang/cem/cemcom/char.tab @@ -0,0 +1,58 @@ +% +% CHARACTER CLASSES +% +% some general settings: +%S129 +%F %s, +% +% START OF TOKEN +% +%C +STGARB:\000-\200 +STSKIP:\r \t +STNL:\n\f\013 +STCOMP:!&+-<=>| +STSIMP:%()*,/:;?[]^{}~ +STCHAR:' +STIDF:a-zA-Z_ +STNUM:.0-9 +STSTR:" +STEOI:\200 +%T/* character classes */ +%T#include "class.h" +%Tchar tkclass[] = { +%p +%T}; +% +% INIDF +% +%C +1:a-zA-Z_0-9 +%Tchar inidf[] = { +%F %s, +%p +%T}; +% +% ISDIG +% +%C +1:0-9 +%Tchar isdig[] = { +%p +%T}; +% +% ISHEX +% +%C +1:a-fA-F +%Tchar ishex[] = { +%p +%T}; +% +% ISOCT +% +%C +1:0-7 +%Tchar isoct[] = { +%p +%T}; diff --git a/lang/cem/cemcom/class.h b/lang/cem/cemcom/class.h new file mode 100644 index 000000000..faaff2346 --- /dev/null +++ b/lang/cem/cemcom/class.h @@ -0,0 +1,37 @@ +/* $Header$ */ +/* U S E O F C H A R A C T E R C L A S S E S */ + +/* As a starter, chars are divided into classes, according to which + token they can be the start of. + At present such a class number is supposed to fit in 4 bits. +*/ + +#define class(ch) (tkclass[ch]) + +/* Being the start of a token is, fortunately, a mutual exclusive + property, so, although there are less than 16 classes they can be + packed in 4 bits. +*/ + +#define STSKIP 0 /* spaces and so on: skipped characters */ +#define STNL 1 /* newline character(s): update linenumber etc. */ +#define STGARB 2 /* garbage ascii character: not allowed in C */ +#define STSIMP 3 /* this character can occur as token in C */ +#define STCOMP 4 /* this one can start a compound token in C */ +#define STIDF 5 /* being the initial character of an identifier */ +#define STCHAR 6 /* the starter of a character constant */ +#define STSTR 7 /* the starter of a string */ +#define STNUM 8 /* the starter of a numeric constant */ +#define STEOI 9 /* End-Of-Information mark */ + +/* But occurring inside a token is not, so we need 1 bit for each + class. This is implemented as a collection of tables to speed up + the decision whether a character has a special meaning. +*/ +#define in_idf(ch) (inidf[ch]) +#define is_oct(ch) (isoct[ch]) +#define is_dig(ch) (isdig[ch]) +#define is_hex(ch) (ishex[ch]) + +extern char tkclass[]; +extern char inidf[], isoct[], isdig[], ishex[]; diff --git a/lang/cem/cemcom/code.c b/lang/cem/cemcom/code.c new file mode 100644 index 000000000..fb4e7e055 --- /dev/null +++ b/lang/cem/cemcom/code.c @@ -0,0 +1,491 @@ +/* $Header$ */ +/* C O D E - G E N E R A T I N G R O U T I N E S */ + +#include "dataflow.h" +#include "use_tmp.h" +#include "botch_free.h" + +#include "arith.h" +#include "type.h" +#include "idf.h" +#include "label.h" +#include "code.h" +#include "alloc.h" +#include "def.h" +#include "expr.h" +#include "sizes.h" +#include "stack.h" +#include "em.h" +#include "level.h" +#include "decspecs.h" +#include "declarator.h" +#include "Lpars.h" +#include "mes.h" +#include "LLlex.h" +#include "specials.h" +#include "storage.h" +#include "atw.h" +#include "assert.h" + +static struct stat_block *stat_sp, *stat_head; + +char *symbol2str(); +int fp_used; +label lab_count = 1; +label datlab_count = 1; + +extern char options[]; + +/* init_code() initialises the output file on which the compact + EM code is written +*/ +init_code(dst_file) + char *dst_file; +{ + if (C_open(dst_file) == 0) + fatal("cannot write to %s\n", dst_file); +#ifndef USE_TMP + famous_first_words(); +#endif USE_TMP + stat_sp = stat_head = new_stat_block(); + clear((char *)stat_sp, sizeof(struct stat_block)); +} + +famous_first_words() +{ + C_magic(); + C_ms_emx(word_size, pointer_size); +} + +end_code() +{ + /* end_code() performs the actions to be taken when closing + the output stream. + */ + C_ms_src((arith)(LineNumber - 2), FileName); + C_close(); +} + +#ifdef USE_TMP +prepend_scopes(dst_file) + char *dst_file; +{ + /* prepend_scopes() runs down the list of global idf's + and generates those exa's, exp's, ina's and inp's + that superior hindsight has provided, on the file dst_file. + */ + struct stack_entry *se = local_level->sl_entry; + + if (C_open(dst_file) == 0) + fatal("cannot create file %s", dst_file); + famous_first_words(); + while (se != 0) { + struct idf *idf = se->se_idf; + struct def *def = idf->id_def; + + if (def && + ( def->df_initialized || + def->df_used || + def->df_alloc + ) + ) + code_scope(idf->id_text, def); + se = se->next; + } + C_close(); +} +#endif USE_TMP + +code_scope(text, def) + char *text; + struct def *def; +{ + /* generates code for one name, text, of the storage class + as given by def, if meaningful. + */ + int fund = def->df_type->tp_fund; + + switch (def->df_sc) { + case EXTERN: + case GLOBAL: + case IMPLICIT: + if (fund == FUNCTION) + C_exp(text); + else + C_exa(text); + break; + case STATIC: + if (fund == FUNCTION) + C_inp(text); + else + C_ina(text); + break; + } +} + +static label return_label; +static char return_expr_occurred; +static struct type *func_tp; +static label func_res_label; +static char *last_fn_given = ""; +static label file_name_label; + +/* begin_proc() is called at the entrance of a new function + and performs the necessary code generation: + - a scope indicator (if needed) exp/inp + - the procedure entry pro $name + - reserves some space if the result of the function + does not fit in the return area + - a fil pseudo instruction +*/ +begin_proc(name, def) /* to be called when entering a procedure */ + char *name; + struct def *def; +{ + arith size; + +#ifndef USE_TMP + code_scope(name, def); +#endif USE_TMP +#ifdef DATAFLOW + if (options['d']) + DfaStartFunction(name); +#endif DATAFLOW + + func_tp = def->df_type->tp_up; + size = ATW(func_tp->tp_size); + C_pro_narg(name); + if (is_struct_or_union(func_tp->tp_fund)) { + C_ndlb(func_res_label = data_label()); + C_bss_cst(size, (arith)0, 1); + } + else + func_res_label = 0; + + /* Special arrangements if the function result doesn't fit in + the function return area of the EM machine. The size of + the function return area is implementation dependent. + */ + lab_count = (label) 1; + return_label = text_label(); + return_expr_occurred = 0; + + if (options['p']) { /* profiling */ + if (strcmp(last_fn_given, FileName) != 0) { + /* previous function came from other file */ + C_ndlb(file_name_label = data_label()); + C_con_begin(); + C_co_scon(last_fn_given = FileName, (arith)0); + C_con_end(); + } + /* enable debug trace of EM source */ + C_fil_ndlb(file_name_label, (arith)0); + C_lin((arith)LineNumber); + } +} + +/* end_proc() deals with the code to be generated at the end of + a function, as there is: + - the EM ret instruction: "ret 0" + - loading of the function result in the function result area + if there has been a return in the function body + (see do_return_expr()) + - indication of the use of floating points + - indication of the number of bytes used for formal parameters + - use of special identifiers such as "setjmp" + - "end" + number of bytes used for local variables +*/ +end_proc(fbytes, nbytes) + arith fbytes, nbytes; +{ + static int mes_flt_given = 0; /* once for the whole program */ + +#ifdef DATAFLOW + if (options['d']) + DfaEndFunction(); +#endif DATAFLOW + C_ret((arith)0); + if (return_expr_occurred != 0) { + C_ilb(return_label); + if (func_res_label != 0) { + C_lae_ndlb(func_res_label, (arith)0); + store_block(func_tp->tp_size, func_tp->tp_align); + C_lae_ndlb(func_res_label, (arith)0); + C_ret(pointer_size); + } + else + C_ret(ATW(func_tp->tp_size)); + } + if (fp_used && mes_flt_given == 0) { + /* floating point used */ + C_ms_flt(); + mes_flt_given++; + } + C_ms_par(fbytes); /* # bytes for formals */ + if (sp_occurred[SP_SETJMP]) { /* indicate use of "setjmp" */ + C_ms_gto(); + sp_occurred[SP_SETJMP] = 0; + } + C_end(ATW(nbytes)); +} + +do_return_expr(expr) + struct expr *expr; +{ + /* do_return_expr() generates the expression and the jump for + a return statement with an expression. + */ + ch7cast(&expr, RETURN, func_tp); + code_expr(expr, RVAL, TRUE, NO_LABEL, NO_LABEL); + C_bra(return_label); + return_expr_occurred = 1; +} + +code_declaration(idf, expr, lvl, sc) + struct idf *idf; /* idf to be declared */ + struct expr *expr; /* initialisation; NULL if absent */ + int lvl; /* declaration level */ + int sc; /* storage class, as in the declaration */ +{ + /* code_declaration() does the actual declaration of the + variable indicated by "idf" on declaration level "lvl". + If the variable is initialised, the expression is given + in "expr". + There are some cases to be considered: + - filter out typedefs, they don't correspond to code; + - global variables, coded only if initialized; + - local static variables; + - local automatic variables; + If there is a storage class indication (EXTERN/STATIC), + code_declaration() will generate an exa or ina. + The sc is the actual storage class, as given in the + declaration. This is to allow: + extern int a; + int a = 5; + while at the same time forbidding + extern int a = 5; + */ + char *text = idf->id_text; + struct def *def = idf->id_def; + arith size = def->df_type->tp_size; + int def_sc = def->df_sc; + + if (def_sc == TYPEDEF) /* no code for typedefs */ + return; + if (sc == EXTERN && expr && !is_anon_idf(idf)) + error("%s is extern; cannot initialize", text); + if (lvl == L_GLOBAL) { /* global variable */ + /* is this an allocating declaration? */ + if ( (sc == 0 || sc == STATIC) + && def->df_type->tp_fund != FUNCTION + && size >= 0 + ) + def->df_alloc = ALLOC_SEEN; + if (expr) { /* code only if initialized */ +#ifndef USE_TMP + code_scope(text, def); +#endif USE_TMP + def->df_alloc = ALLOC_DONE; + C_dnam(text); + do_ival(&(def->df_type), expr); + } + } + else + if (lvl >= L_LOCAL) { /* local variable */ + /* they are STATIC, EXTERN, GLOBAL, IMPLICIT, AUTO or + REGISTER + */ + switch (def_sc) { + case STATIC: + /* they are handled on the spot and get an + integer label in EM. + */ + C_ndlb((label)def->df_address); + if (expr) /* there is an initialisation */ + do_ival(&(def->df_type), expr); + else { /* produce blank space */ + if (size <= 0) { + error("size of \"%s\" unknown", text); + size = (arith)0; + } + C_bss_cst(align(size, word_align), (arith)0, 1); + } + break; + case EXTERN: + case GLOBAL: + case IMPLICIT: + /* we are sure there is no expression */ +#ifndef USE_TMP + code_scope(text, def); +#endif USE_TMP + break; + case AUTO: + case REGISTER: + if (expr) + loc_init(expr, idf); + break; + default: + crash("bad local storage class"); + break; + } + } +} + +loc_init(expr, id) + struct expr *expr; + struct idf *id; +{ + /* loc_init() generates code for the assignment of + expression expr to the local variable described by id. + */ + register struct type *tp = id->id_def->df_type; + + /* automatic aggregates cannot be initialised. */ + switch (tp->tp_fund) { + case ARRAY: + case STRUCT: + case UNION: + error("no automatic aggregate initialisation"); + return; + } + + if (ISCOMMA(expr)) { /* embraced: int i = {12}; */ + if (options['R']) { + if (ISCOMMA(expr->OP_LEFT)) /* int i = {{1}} */ + expr_error(expr, "extra braces not allowed"); + else + if (expr->OP_RIGHT != 0) /* int i = {1 , 2} */ + expr_error(expr, "too many initializers"); + } + while (expr) { + loc_init(expr->OP_LEFT, id); + expr = expr->OP_RIGHT; + } + } + else { /* not embraced */ + ch7cast(&expr, '=', tp); + EVAL(expr, RVAL, TRUE, NO_LABEL, NO_LABEL); + store_val(id, tp, (arith) 0); + } +} + +/* bss() allocates bss space for the global idf. +*/ +bss(idf) + struct idf *idf; +{ + register struct def *def = idf->id_def; + arith size = def->df_type->tp_size; + +#ifndef USE_TMP + code_scope(idf->id_text, def); +#endif USE_TMP + /* Since bss() is only called if df_alloc is non-zero, and + since df_alloc is only non-zero if size >= 0, we have: + */ + if (options['R'] && size == 0) + warning("actual array of size 0"); + C_dnam(idf->id_text); + C_bss_cst(align(size, word_align), (arith)0, 1); +} + +formal_cvt(def) + struct def *def; +{ + /* formal_cvt() converts a formal parameter of type char or + short from int to that type. + */ + register struct type* tp = def->df_type; + + if (tp->tp_size != int_size) + if (tp->tp_fund == CHAR || tp->tp_fund == SHORT) { + C_lol(def->df_address); + conversion(int_type, def->df_type); + C_lal(def->df_address); + C_sti(tp->tp_size); + def->df_register = REG_NONE; + } +} + +/* code_expr() is the parser's interface to the expression code + generator. + If line number trace is wanted, it generates a lin instruction. + EVAL() is called directly. +*/ +code_expr(expr, val, code, tlbl, flbl) + struct expr *expr; + label tlbl, flbl; +{ + if (options['p']) /* profiling */ + C_lin((arith)LineNumber); + EVAL(expr, val, code, tlbl, flbl); +} + +/* The FOR/WHILE/DO/SWITCH stacking mechanism: + stat_stack() has to be called at the entrance of a + for, while, do or switch statement to indicate the + EM labels where a subsequent break or continue causes + the program to jump to. +*/ +/* do_break() generates EM code needed at the occurrence of "break": + it generates a branch instruction to the break label of the + innermost statement in which break has a meaning. + As "break" is legal in any of 'while', 'do', 'for' or 'switch', + which are the only ones that are stacked, only the top of + the stack is interesting. + 0 is returned if the break cannot be bound to any enclosing + statement. +*/ +int +do_break() +{ + register struct stat_block *stat_ptr = stat_sp; + + if (stat_ptr) { + C_bra(stat_ptr->st_break); + return 1; + } + return 0; /* break is illegal */ +} + +/* do_continue() generates EM code needed at the occurrence of "continue": + it generates a branch instruction to the continue label of the + innermost statement in which continue has a meaning. + 0 is returned if the continue cannot be bound to any enclosing + statement. +*/ +int +do_continue() +{ + register struct stat_block *stat_ptr = stat_sp; + + while (stat_ptr) { + if (stat_ptr->st_continue) { + C_bra(stat_ptr->st_continue); + return 1; + } + stat_ptr = stat_ptr->next; + } + return 0; +} + +stat_stack(break_label, cont_label) + label break_label, cont_label; +{ + register struct stat_block *newb = new_stat_block(); + + newb->next = stat_sp; + newb->st_break = break_label; + newb->st_continue = cont_label; + stat_sp = newb; +} + +/* stat_unstack() unstacks the data of a statement + which may contain break or continue +*/ +stat_unstack() +{ + register struct stat_block *sbp = stat_sp; + stat_sp = stat_sp->next; + free_stat_block(sbp); +} diff --git a/lang/cem/cemcom/code.h b/lang/cem/cemcom/code.h new file mode 100644 index 000000000..3399b3e02 --- /dev/null +++ b/lang/cem/cemcom/code.h @@ -0,0 +1,23 @@ +/* $Header$ */ +/* C O D E - G E N E R A T O R D E F I N I T I O N S */ + +struct stat_block { + struct stat_block *next; + label st_break; + label st_continue; +}; + + +/* allocation definitions of struct stat_block */ +/* ALLOCDEF "stat_block" */ +extern char *st_alloc(); +extern struct stat_block *h_stat_block; +#define new_stat_block() ((struct stat_block *) \ + st_alloc((char **)&h_stat_block, sizeof(struct stat_block))) +#define free_stat_block(p) st_free(p, h_stat_block, sizeof(struct stat_block)) + + +#define LVAL 0 +#define RVAL 1 +#define FALSE 0 +#define TRUE 1 diff --git a/lang/cem/cemcom/code.str b/lang/cem/cemcom/code.str new file mode 100644 index 000000000..3399b3e02 --- /dev/null +++ b/lang/cem/cemcom/code.str @@ -0,0 +1,23 @@ +/* $Header$ */ +/* C O D E - G E N E R A T O R D E F I N I T I O N S */ + +struct stat_block { + struct stat_block *next; + label st_break; + label st_continue; +}; + + +/* allocation definitions of struct stat_block */ +/* ALLOCDEF "stat_block" */ +extern char *st_alloc(); +extern struct stat_block *h_stat_block; +#define new_stat_block() ((struct stat_block *) \ + st_alloc((char **)&h_stat_block, sizeof(struct stat_block))) +#define free_stat_block(p) st_free(p, h_stat_block, sizeof(struct stat_block)) + + +#define LVAL 0 +#define RVAL 1 +#define FALSE 0 +#define TRUE 1 diff --git a/lang/cem/cemcom/conversion.c b/lang/cem/cemcom/conversion.c new file mode 100644 index 000000000..07188d6c9 --- /dev/null +++ b/lang/cem/cemcom/conversion.c @@ -0,0 +1,130 @@ +/* $Header$ */ +/* C O N V E R S I O N - C O D E G E N E R A T O R */ + +#include "arith.h" +#include "type.h" +#include "em.h" +#include "sizes.h" +#include "Lpars.h" + +#define T_SIGNED 1 +#define T_UNSIGNED 2 +#define T_FLOATING 3 + +/* conversion() generates the EM code for a conversion between + the types char, short, int, long, float, double and pointer. + In case of integral type, the notion signed / unsigned is + taken into account. + The EM code to obtain this conversion looks like: + LOC sizeof(from_type) + LOC sizeof(to_type) + C?? +*/ + +conversion(from_type, to_type) + struct type *from_type, *to_type; +{ + arith from_size; + arith to_size; + + if (from_type == to_type) { /* a little optimisation */ + return; + } + + from_size = from_type->tp_size; + to_size = to_type->tp_size; + + switch (fundamental(from_type)) { + + case T_SIGNED: + switch (fundamental(to_type)) { + + case T_SIGNED: + C_loc(from_size); + C_loc(to_size < word_size ? word_size : to_size); + C_cii(); + break; + + case T_UNSIGNED: + C_loc(from_size < word_size ? word_size : from_size); + C_loc(to_size < word_size ? word_size : to_size); + C_ciu(); + break; + + case T_FLOATING: + C_loc(from_size < word_size ? word_size : from_size); + C_loc(to_size < word_size ? word_size : to_size); + C_cif(); + break; + } + break; + + case T_UNSIGNED: + C_loc(from_size < word_size ? word_size : from_size); + C_loc(to_size < word_size ? word_size : to_size); + + switch (fundamental(to_type)) { + + case T_SIGNED: + C_cui(); + break; + + case T_UNSIGNED: + C_cuu(); + break; + + case T_FLOATING: + C_cuf(); + break; + } + break; + + case T_FLOATING: + C_loc(from_size < word_size ? word_size : from_size); + C_loc(to_size < word_size ? word_size : to_size); + + switch (fundamental(to_type)) { + + case T_SIGNED: + C_cfi(); + break; + + case T_UNSIGNED: + C_cfu(); + break; + + case T_FLOATING: + C_cff(); + break; + } + break; + default: + crash("(conversion) illegal type conversion"); + } +} + +/* fundamental() returns in which category a given type falls: + signed, unsigned or floating +*/ +int +fundamental(tp) + struct type *tp; +{ + switch (tp->tp_fund) { + + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + return tp->tp_unsigned ? T_UNSIGNED : T_SIGNED; + + case FLOAT: + case DOUBLE: + return T_FLOATING; + + case POINTER: /* pointer : signed / unsigned ??? */ + return T_SIGNED; + } + return 0; +} diff --git a/lang/cem/cemcom/cstoper.c b/lang/cem/cemcom/cstoper.c new file mode 100644 index 000000000..414e18d67 --- /dev/null +++ b/lang/cem/cemcom/cstoper.c @@ -0,0 +1,230 @@ +/* $Header$ */ +/* C O N S T A N T E X P R E S S I O N H A N D L I N G */ + +#include "target_sizes.h" /* UF */ + +#include "idf.h" +#include "arith.h" +#include "type.h" +#include "label.h" +#include "expr.h" +#include "sizes.h" +#include "Lpars.h" + +long mach_long_sign; /* sign bit of the machine long */ +int mach_long_size; /* size of long on this machine == sizeof(long) */ +long full_mask[MAXSIZE];/* full_mask[1] == 0XFF, full_mask[2] == 0XFFFF, .. */ +arith max_int; /* maximum integer on target machine */ +arith max_unsigned; /* maximum unsigned on target machine */ + +cstbin(expp, oper, expr) + struct expr **expp, *expr; +{ + /* The operation oper is performed on the constant + expressions *expp and expr, and the result restored in + *expp. + */ + arith o1 = (*expp)->VL_VALUE; + arith o2 = expr->VL_VALUE; + int uns = (*expp)->ex_type->tp_unsigned; + + switch (oper) { + case '*': + o1 *= o2; + break; + case '/': + if (o2 == 0) { + error("division by 0"); + break; + } + if (uns) { + /* this is more of a problem than you might + think on C compilers which do not have + unsigned long. + */ + if (o2 & mach_long_sign) {/* o2 > max_long */ + o1 = ! (o1 >= 0 || o1 < o2); + /* this is the unsigned test + o1 < o2 for o2 > max_long + */ + } + else { /* o2 <= max_long */ + long half, bit, hdiv, hrem, rem; + + half = (o1 >> 1) & ~mach_long_sign; + bit = o1 & 01; + /* now o1 == 2 * half + bit + and half <= max_long + and bit <= max_long + */ + hdiv = half / o2; + hrem = half % o2; + rem = 2 * hrem + bit; + o1 = 2 * hdiv + (rem < 0 || rem >= o2); + /* that is the unsigned compare + rem >= o2 for o2 <= max_long + */ + } + } + else + o1 /= o2; + break; + case '%': + if (o2 == 0) { + error("modulo by 0"); + break; + } + if (uns) { + if (o2 & mach_long_sign) {/* o2 > max_long */ + o1 = (o1 >= 0 || o1 < o2) ? o1 : o1 - o2; + /* this is the unsigned test + o1 < o2 for o2 > max_long + */ + } + else { /* o2 <= max_long */ + long half, bit, hrem, rem; + + half = (o1 >> 1) & ~mach_long_sign; + bit = o1 & 01; + /* now o1 == 2 * half + bit + and half <= max_long + and bit <= max_long + */ + hrem = half % o2; + rem = 2 * hrem + bit; + o1 = (rem < 0 || rem >= o2) ? rem - o2 : rem; + } + } + else + o1 %= o2; + break; + case '+': + o1 += o2; + break; + case '-': + o1 -= o2; + break; + case LEFT: + o1 <<= o2; + break; + case RIGHT: + if (o2 == 0) + break; + if (uns) { + o1 >>= 1; + o1 & = ~mach_long_sign; + o1 >>= (o2-1); + } + else + o1 >>= o2; + break; + case '<': + if (uns) { + o1 = (o1 & mach_long_sign ? + (o2 & mach_long_sign ? o1 < o2 : 0) : + (o2 & mach_long_sign ? 1 : o1 < o2) + ); + } + else + o1 = o1 < o2; + break; + case '>': + if (uns) { + o1 = (o1 & mach_long_sign ? + (o2 & mach_long_sign ? o1 > o2 : 1) : + (o2 & mach_long_sign ? 0 : o1 > o2) + ); + } + else + o1 = o1 > o2; + break; + case LESSEQ: + if (uns) { + o1 = (o1 & mach_long_sign ? + (o2 & mach_long_sign ? o1 <= o2 : 0) : + (o2 & mach_long_sign ? 1 : o1 <= o2) + ); + } + else + o1 = o1 <= o2; + break; + case GREATEREQ: + if (uns) { + o1 = (o1 & mach_long_sign ? + (o2 & mach_long_sign ? o1 >= o2 : 1) : + (o2 & mach_long_sign ? 0 : o1 >= o2) + ); + } + else + o1 = o1 >= o2; + break; + case EQUAL: + o1 = o1 == o2; + break; + case NOTEQUAL: + o1 = o1 != o2; + break; + case '&': + o1 &= o2; + break; + case '|': + o1 |= o2; + break; + case '^': + o1 ^= o2; + break; + } + (*expp)->VL_VALUE = o1; + cut_size(*expp); + (*expp)->ex_flags |= expr->ex_flags; + (*expp)->ex_flags &= ~EX_PARENS; +} + +cut_size(expr) + struct expr *expr; +{ + /* The constant value of the expression expr is made to + conform to the size of the type of the expression. + */ + arith o1 = expr->VL_VALUE; + int uns = expr->ex_type->tp_unsigned; + int size = (int) expr->ex_type->tp_size; + + if (uns) { + if (o1 & ~full_mask[size]) + expr_warning(expr, + "overflow in unsigned constant expression"); + o1 &= full_mask[size]; + } + else { + int nbits = (int) (mach_long_size - size) * 8; + long remainder = o1 & ~full_mask[size]; + + if (remainder != 0 && remainder != ~full_mask[size]) + expr_warning(expr, "overflow in constant expression"); + o1 <<= nbits; /* ??? */ + o1 >>= nbits; + } + expr->VL_VALUE = o1; +} + +init_cst() +{ + int i = 0; + arith bt = (arith)0; + + while (!(bt < 0)) { + bt = (bt << 8) + 0377, i++; + if (i == MAXSIZE) + fatal("array full_mask too small for this machine"); + full_mask[i] = bt; + } + mach_long_size = i; + mach_long_sign = 1 << (mach_long_size * 8 - 1); + if (long_size < mach_long_size) + fatal("sizeof (long) insufficient on this machine"); + + + max_int = full_mask[int_size] & ~(1 << (int_size * 8 - 1)); + max_unsigned = full_mask[int_size]; +} diff --git a/lang/cem/cemcom/dataflow.c b/lang/cem/cemcom/dataflow.c new file mode 100644 index 000000000..7d1d88364 --- /dev/null +++ b/lang/cem/cemcom/dataflow.c @@ -0,0 +1,34 @@ +/* $Header$ */ +/* DATAFLOW ANALYSIS ON C PROGRAMS */ + +/* Compile the C compiler with flag DATAFLOW. + Use the compiler option --d. +*/ + +#include "dataflow.h" /* UF */ + +#ifdef DATAFLOW +char *CurrentFunction = 0; +int NumberOfCalls; + +DfaStartFunction(nm) + char *nm; +{ + CurrentFunction = nm; + NumberOfCalls = 0; +} + +DfaEndFunction() +{ + if (NumberOfCalls == 0) { + printf("DFA: %s: --none--\n", CurrentFunction); + } +} + +DfaCallFunction(s) + char *s; +{ + printf("DFA: %s: %s\n", CurrentFunction, s); + ++NumberOfCalls; +} +#endif DATAFLOW diff --git a/lang/cem/cemcom/declar.g b/lang/cem/cemcom/declar.g new file mode 100644 index 000000000..a758193b9 --- /dev/null +++ b/lang/cem/cemcom/declar.g @@ -0,0 +1,473 @@ +/* $Header$ */ +/* DECLARATION SYNTAX PARSER */ + +{ +#include "nobitfield.h" +#include "debug.h" +#include "arith.h" +#include "LLlex.h" +#include "idf.h" +#include "type.h" +#include "struct.h" +#include "field.h" +#include "decspecs.h" +#include "def.h" +#include "declarator.h" +#include "label.h" +#include "expr.h" +#include "sizes.h" + +extern char options[]; +} + +/* 8 */ +declaration + {struct decspecs Ds;} +: + {Ds = null_decspecs;} + decl_specifiers(&Ds) + init_declarator_list(&Ds)? + ';' +; + +/* A `decl_specifiers' describes a sequence of a storage_class_specifier, + an unsigned_specifier, a size_specifier and a simple type_specifier, + which may occur in arbitrary order and each of which may be absent; + at least one of them must be present, however, since the totally + empty case has already be dealt with in `external_definition'. + This means that something like: + unsigned extern int short xx; + is perfectly good C. + + On top of that, multiple occurrences of storage_class_specifiers, + unsigned_specifiers and size_specifiers are errors, but a second + type_specifier should end the decl_specifiers and be treated as + the name to be declared (see the thin ice in RM11.1). + Such a language is not easily expressed in a grammar; enumeration + of the permutations is unattractive. We solve the problem by + having a regular grammar for the "soft" items, handling the single + occurrence of the type_specifier in the grammar (we have no choice), + collecting all data in a `struct decspecs' and turning that data + structure into what we want. + + The existence of declarations like + short typedef yepp; + makes all hope of writing a specific grammar for typedefs illusory. +*/ + +decl_specifiers /* non-empty */ (struct decspecs *ds;) + /* Reads a non-empty decl_specifiers and fills the struct + decspecs *ds. + */ +: +[ + other_specifier(ds)+ + [%prefer /* the thin ice in R.M. 11.1 */ + single_type_specifier(ds) other_specifier(ds)* + | + empty + ] +| + single_type_specifier(ds) other_specifier(ds)* +] + {do_decspecs(ds);} +; + +/* 8.1 */ +other_specifier(struct decspecs *ds;): +[ + [ AUTO | STATIC | EXTERN | TYPEDEF | REGISTER ] + { if (ds->ds_sc_given) + error("repeated storage class specifier"); + else { + ds->ds_sc_given = 1; + ds->ds_sc = DOT; + } + } +| + [ SHORT | LONG ] + { if (ds->ds_size) + error("repeated size specifier"); + else ds->ds_size = DOT; + } +| + UNSIGNED + { if (ds->ds_unsigned) + error("unsigned specified twice"); + else ds->ds_unsigned = 1; + } +] +; + +/* 8.2 */ +type_specifier(struct type **tpp;) + /* Used in struct/union declarations and in casts; only the + type is relevant. + */ + {struct decspecs Ds; Ds = null_decspecs;} +: + decl_specifiers(&Ds) + { + if (Ds.ds_sc_given) + error("storage class ignored"); + if (Ds.ds_sc == REGISTER) + error("register ignored"); + } + {*tpp = Ds.ds_type;} +; + +single_type_specifier(struct decspecs *ds;): +[ + TYPE_IDENTIFIER /* this includes INT, CHAR, etc. */ + {idf2type(dot.tk_idf, &ds->ds_type);} +| + struct_or_union_specifier(&ds->ds_type) +| + enum_specifier(&ds->ds_type) +] +; + +/* 8.3 */ +init_declarator_list(struct decspecs *ds;): + init_declarator(ds) + [ ',' init_declarator(ds) ]* +; + +init_declarator(struct decspecs *ds;) + { + struct declarator Dc; + struct expr *expr = (struct expr *) 0; + } +: + { + Dc = null_declarator; + } +[ + declarator(&Dc) + { + reject_params(&Dc); + declare_idf(ds, &Dc, level); + } + initializer(Dc.dc_idf, &expr)? + { + code_declaration(Dc.dc_idf, expr, level, ds->ds_sc); + free_expression(expr); + } +] + {remove_declarator(&Dc);} +; + +/* + Functions yielding pointers to functions must be declared as, e.g., + int (*hehe(par1, par2))() char *par1, *par2; {} + Since the function heading is read as a normal declarator, + we just include the (formal) parameter list in the declarator + description list dc. +*/ +declarator(struct declarator *dc;) + { + arith count; + struct idstack_item *is = 0; + } +: +[ + primary_declarator(dc) + [%while(1) /* int i (M + 2) / 4; + is a function, not an + old-fashioned initialization. + */ + '(' + formal_list(&is) ? /* semantic check later... */ + ')' + { + add_decl_unary(dc, FUNCTION, (arith)0, is); + is = 0; + } + | + arrayer(&count) + {add_decl_unary(dc, ARRAY, count, NO_PARAMS);} + ]* +| + '*' declarator(dc) + {add_decl_unary(dc, POINTER, (arith)0, NO_PARAMS);} +] +; + +primary_declarator(struct declarator *dc;) : +[ + identifier(&dc->dc_idf) +| + '(' declarator(dc) ')' +] +; + +arrayer(arith *sizep;) + { struct expr *expr; } +: + '[' + [ + constant_expression(&expr) + { + array_subscript(expr); + *sizep = expr->VL_VALUE; + free_expression(expr); + } + | + empty + { *sizep = (arith)-1; } + ] + ']' +; + +formal_list (struct idstack_item **is;) +: + formal(is) [ ',' formal(is) ]* +; + +formal(struct idstack_item **is;) + {struct idf *idf; } +: + identifier(&idf) + { + struct idstack_item *new = new_idstack_item(); + + new->is_idf = idf; + new->next = *is; + *is = new; + } +; + +/* Change 2 */ +enum_specifier(struct type **tpp;) + { + struct idf *idf; + arith l = (arith)0; + } +: + ENUM + [ + {declare_struct(ENUM, (struct idf *) 0, tpp);} + enumerator_pack(*tpp, &l) + | + identifier(&idf) + [ + {declare_struct(ENUM, idf, tpp);} + enumerator_pack(*tpp, &l) + | + {apply_struct(ENUM, idf, tpp);} + empty + ] + ] +; + +enumerator_pack(struct type *tp; arith *lp;) : + '{' + enumerator(tp, lp) + [%while(AHEAD != '}') /* >>> conflict on ',' */ + ',' + enumerator(tp, lp) + ]* + ','? /* optional trailing comma */ + '}' + {tp->tp_size = int_size;} + /* fancy implementations that put small enums in 1 byte + or so should start here. + */ +; + +enumerator(struct type *tp; arith *lp;) + { + struct idf *idf; + struct expr *expr; + } +: + identifier(&idf) + [ + '=' + constant_expression(&expr) + { + *lp = expr->VL_VALUE; + free_expression(expr); + } + ]? + {declare_enum(tp, idf, (*lp)++);} +; + +/* 8.5 */ +struct_or_union_specifier(struct type **tpp;) + { + int fund; + struct idf *idf; + } +: + [ STRUCT | UNION ] + {fund = DOT;} + [ + { + declare_struct(fund, (struct idf *)0, tpp); + } + struct_declaration_pack(*tpp) + | + identifier(&idf) + [ + { + declare_struct(fund, idf, tpp); + (idf->id_struct->tg_busy)++; + } + struct_declaration_pack(*tpp) + { + (idf->id_struct->tg_busy)--; + } + | + {apply_struct(fund, idf, tpp);} + empty + ] + ] +; + +struct_declaration_pack(struct type *stp;) + { + struct sdef **sdefp = &stp->tp_sdef; + arith size = (arith)0; + } +: + /* The size is only filled in after the whole struct has + been read, to prevent recursive definitions. + */ + '{' + struct_declaration(stp, &sdefp, &size)+ + '}' + {stp->tp_size = align(size, stp->tp_align);} +; + +struct_declaration(struct type *stp; struct sdef ***sdefpp; arith *szp;) + {struct type *tp;} +: + type_specifier(&tp) + struct_declarator_list(tp, stp, sdefpp, szp) + [ /* in some standard UNIX compilers the semicolon + is optional, would you believe! + */ + ';' + | + empty + {warning("no semicolon after declarator");} + ] +; + +struct_declarator_list(struct type *tp, *stp; + struct sdef ***sdefpp; arith *szp;) +: + struct_declarator(tp, stp, sdefpp, szp) + [ ',' struct_declarator(tp, stp, sdefpp, szp) ]* +; + +struct_declarator(struct type *tp; struct type *stp; + struct sdef ***sdefpp; arith *szp;) + { + struct declarator Dc; + struct field *fd = 0; + } +: + { + Dc = null_declarator; + } +[ + declarator(&Dc) + {reject_params(&Dc);} + bit_expression(&fd)? +| + {Dc.dc_idf = gen_idf();} + bit_expression(&fd) +] + {add_sel(stp, declare_type(tp, &Dc), Dc.dc_idf, sdefpp, szp, fd);} + {remove_declarator(&Dc);} +; + +bit_expression(struct field **fd;) + { struct expr *expr; } +: + { + *fd = new_field(); + } + ':' + constant_expression(&expr) + { + (*fd)->fd_width = expr->VL_VALUE; + free_expression(expr); +#ifdef NOBITFIELD + error("bitfields are not implemented"); +#endif NOBITFIELD + } +; + +/* 8.6 */ +initializer(struct idf *idf; struct expr **expp;) : + [ + '=' + | + empty + {warning("old-fashioned initialization, insert =");} + /* This causes trouble at declarator and at + external_definition, q.v. + */ + ] + initial_value(expp) + { + if (idf->id_def->df_type->tp_fund == FUNCTION) { + error("illegal initialization of function"); + free_expression(*expp); + *expp = 0; + } + init_idf(idf); +#ifdef DEBUG + print_expr("initializer-expression", *expp); +#endif DEBUG + } +; + +/* 8.7 */ +cast(struct type **tpp;) {struct declarator Dc;} : + {Dc = null_declarator;} + '(' + type_specifier(tpp) + abstract_declarator(&Dc) + ')' + {*tpp = declare_type(*tpp, &Dc);} + {remove_declarator(&Dc);} +; + +/* This code is an abject copy of that of 'declarator', for lack of + a two-level grammar. +*/ +abstract_declarator(struct declarator *dc;) + {arith count;} +: +[ + primary_abstract_declarator(dc) + [ + '(' ')' + {add_decl_unary(dc, FUNCTION, (arith)0, NO_PARAMS);} + | + arrayer(&count) + {add_decl_unary(dc, ARRAY, count, NO_PARAMS);} + ]* +| + '*' abstract_declarator(dc) + {add_decl_unary(dc, POINTER, (arith)0, NO_PARAMS);} +] +; + +primary_abstract_declarator(struct declarator *dc;) : +[%if (AHEAD == ')') + empty +| + '(' abstract_declarator(dc) ')' +] +; + +empty: +; + +/* 8.8 */ +/* included in the IDENTIFIER/TYPE_IDENTIFIER mechanism */ diff --git a/lang/cem/cemcom/declar.str b/lang/cem/cemcom/declar.str new file mode 100644 index 000000000..5ecbb70c2 --- /dev/null +++ b/lang/cem/cemcom/declar.str @@ -0,0 +1,45 @@ +/* $Header$ */ +/* DEFINITION OF DECLARATOR DESCRIPTORS */ + +/* A 'declarator' consists of an idf and a linked list of + language-defined unary operations: *, [] and (), called + decl_unary's. +*/ + +struct declarator { + struct declarator *next; + struct idf *dc_idf; + struct decl_unary *dc_decl_unary; + struct idstack_item *dc_fparams; /* params for function */ +}; + + +/* allocation definitions of struct declarator */ +/* ALLOCDEF "declarator" */ +extern char *st_alloc(); +extern struct declarator *h_declarator; +#define new_declarator() ((struct declarator *) \ + st_alloc((char **)&h_declarator, sizeof(struct declarator))) +#define free_declarator(p) st_free(p, h_declarator, sizeof(struct declarator)) + + +#define NO_PARAMS ((struct idstack_item *) 0) + +struct decl_unary { + struct decl_unary *next; + int du_fund; /* POINTER, ARRAY or FUNCTION */ + arith du_count; /* for ARRAYs only */ +}; + + +/* allocation definitions of struct decl_unary */ +/* ALLOCDEF "decl_unary" */ +extern char *st_alloc(); +extern struct decl_unary *h_decl_unary; +#define new_decl_unary() ((struct decl_unary *) \ + st_alloc((char **)&h_decl_unary, sizeof(struct decl_unary))) +#define free_decl_unary(p) st_free(p, h_decl_unary, sizeof(struct decl_unary)) + + +extern struct type *declare_type(); +extern struct declarator null_declarator; diff --git a/lang/cem/cemcom/declarator.c b/lang/cem/cemcom/declarator.c new file mode 100644 index 000000000..c23cfcdc9 --- /dev/null +++ b/lang/cem/cemcom/declarator.c @@ -0,0 +1,106 @@ +/* $Header$ */ +/* D E C L A R A T O R M A N I P U L A T I O N */ + +#include "botch_free.h" /* UF */ +#include "alloc.h" +#include "arith.h" +#include "type.h" +#include "Lpars.h" +#include "declarator.h" +#include "storage.h" +#include "idf.h" +#include "label.h" +#include "expr.h" +#include "sizes.h" + +struct declarator null_declarator; + +struct type * +declare_type(tp, dc) + struct type *tp; + struct declarator *dc; +{ + /* Applies the decl_unary list starting at dc->dc_decl_unary + to the type tp and returns the result. + */ + register struct decl_unary *du = dc->dc_decl_unary; + + while (du) { + tp = construct_type(du->du_fund, tp, du->du_count); + du = du->next; + } + return tp; +} + +add_decl_unary(dc, fund, count, is) + struct declarator *dc; + arith count; + struct idstack_item *is; +{ + /* A decl_unary describing a constructor with fundamental + type fund and with size count is inserted in front of the + declarator dc. + */ + register struct decl_unary *new = new_decl_unary(); + + clear((char *)new, sizeof(struct decl_unary)); + new->next = dc->dc_decl_unary; + new->du_fund = fund; + new->du_count = count; + if (is) { + if (dc->dc_decl_unary) { + /* paramlist only allowed at first decl_unary */ + error("formal parameter list discarded"); + } + else { + /* register the parameters */ + dc->dc_fparams = is; + } + } + dc->dc_decl_unary = new; +} + +remove_declarator(dc) + struct declarator *dc; +{ + /* The decl_unary list starting at dc->dc_decl_unary is + removed. + */ + register struct decl_unary *du = dc->dc_decl_unary; + + while (du) { + struct decl_unary *old_du = du; + + du = du->next; + free_decl_unary(old_du); + } +} + +reject_params(dc) + struct declarator *dc; +{ + /* The declarator is checked to have no parameters, if it + is a function. + */ + if (dc->dc_fparams) { + error("non_empty formal parameter pack"); + del_idfstack(dc->dc_fparams); + dc->dc_fparams = 0; + } +} + +array_subscript(expr) + struct expr *expr; +{ + arith size = expr->VL_VALUE; + + if (size < 0) { + error("negative number of array elements"); + expr->VL_VALUE = (arith)1; + } + else + if (size & ~max_unsigned) { /* absolute ridiculous */ + expr_error(expr, "overflow in array size"); + expr->VL_VALUE = (arith)1; + } +} diff --git a/lang/cem/cemcom/declarator.h b/lang/cem/cemcom/declarator.h new file mode 100644 index 000000000..5ecbb70c2 --- /dev/null +++ b/lang/cem/cemcom/declarator.h @@ -0,0 +1,45 @@ +/* $Header$ */ +/* DEFINITION OF DECLARATOR DESCRIPTORS */ + +/* A 'declarator' consists of an idf and a linked list of + language-defined unary operations: *, [] and (), called + decl_unary's. +*/ + +struct declarator { + struct declarator *next; + struct idf *dc_idf; + struct decl_unary *dc_decl_unary; + struct idstack_item *dc_fparams; /* params for function */ +}; + + +/* allocation definitions of struct declarator */ +/* ALLOCDEF "declarator" */ +extern char *st_alloc(); +extern struct declarator *h_declarator; +#define new_declarator() ((struct declarator *) \ + st_alloc((char **)&h_declarator, sizeof(struct declarator))) +#define free_declarator(p) st_free(p, h_declarator, sizeof(struct declarator)) + + +#define NO_PARAMS ((struct idstack_item *) 0) + +struct decl_unary { + struct decl_unary *next; + int du_fund; /* POINTER, ARRAY or FUNCTION */ + arith du_count; /* for ARRAYs only */ +}; + + +/* allocation definitions of struct decl_unary */ +/* ALLOCDEF "decl_unary" */ +extern char *st_alloc(); +extern struct decl_unary *h_decl_unary; +#define new_decl_unary() ((struct decl_unary *) \ + st_alloc((char **)&h_decl_unary, sizeof(struct decl_unary))) +#define free_decl_unary(p) st_free(p, h_decl_unary, sizeof(struct decl_unary)) + + +extern struct type *declare_type(); +extern struct declarator null_declarator; diff --git a/lang/cem/cemcom/decspecs.c b/lang/cem/cemcom/decspecs.c new file mode 100644 index 000000000..7cc5a2116 --- /dev/null +++ b/lang/cem/cemcom/decspecs.c @@ -0,0 +1,92 @@ +/* $Header$ */ +/* D E C L A R A T I O N S P E C I F I E R C H E C K I N G */ + +#include "Lpars.h" +#include "decspecs.h" +#include "arith.h" +#include "type.h" +#include "level.h" +#include "def.h" + +extern char options[]; +extern int level; +extern char *symbol2str(); + +struct decspecs null_decspecs; + +do_decspecs(ds) + struct decspecs *ds; +{ + /* The provisional decspecs ds as obtained from the program + is turned into a legal consistent decspecs. + */ + struct type *tp = ds->ds_type; + + if (level == L_FORMAL1) + crash("do_decspecs"); + + if ( level == L_GLOBAL && + (ds->ds_sc == AUTO || ds->ds_sc == REGISTER) + ) { + warning("no global %s variable allowed", + symbol2str(ds->ds_sc)); + ds->ds_sc = GLOBAL; + } + + if (level == L_FORMAL2) { + if (ds->ds_sc_given && ds->ds_sc != AUTO && + ds->ds_sc != REGISTER){ + extern char *symbol2str(); + error("%s formal illegal", symbol2str(ds->ds_sc)); + } + ds->ds_sc = FORMAL; + } + /* The tests concerning types require a full knowledge of the + type and will have to be postponed to declare_idf. + */ + + /* some adjustments as described in RM 8.2 */ + if (tp == 0) + tp = int_type; + switch (ds->ds_size) { + case SHORT: + if (tp == int_type) + tp = short_type; + else error("short with illegal type"); + break; + case LONG: + if (tp == int_type) + tp = long_type; + else + if (tp == float_type) + tp = double_type; + else error("long with illegal type"); + break; + } + if (ds->ds_unsigned) { + switch (tp->tp_fund) { + case CHAR: + if (options['R']) + warning("unsigned char not allowed"); + tp = uchar_type; + break; + case SHORT: + if (options['R']) + warning("unsigned short not allowed"); + tp = ushort_type; + break; + case INT: + tp = uint_type; + break; + case LONG: + if (options['R']) + warning("unsigned long not allowed"); + tp = ulong_type; + break; + default: + error("unsigned with illegal type"); + break; + } + } + ds->ds_type = tp; +} diff --git a/lang/cem/cemcom/decspecs.h b/lang/cem/cemcom/decspecs.h new file mode 100644 index 000000000..0b1598c0e --- /dev/null +++ b/lang/cem/cemcom/decspecs.h @@ -0,0 +1,23 @@ +/* $Header$ */ +/* DECLARATION SPECIFIER DEFINITION */ + +struct decspecs { + struct decspecs *next; + struct type *ds_type; /* single type */ + int ds_sc_given; /* 1 if the st. class is explicitly given */ + int ds_sc; /* storage class, given or implied */ + int ds_size; /* LONG, SHORT or 0 */ + int ds_unsigned; /* 0 or 1 */ +}; + + +/* allocation definitions of struct decspecs */ +/* ALLOCDEF "decspecs" */ +extern char *st_alloc(); +extern struct decspecs *h_decspecs; +#define new_decspecs() ((struct decspecs *) \ + st_alloc((char **)&h_decspecs, sizeof(struct decspecs))) +#define free_decspecs(p) st_free(p, h_decspecs, sizeof(struct decspecs)) + + +extern struct decspecs null_decspecs; diff --git a/lang/cem/cemcom/decspecs.str b/lang/cem/cemcom/decspecs.str new file mode 100644 index 000000000..0b1598c0e --- /dev/null +++ b/lang/cem/cemcom/decspecs.str @@ -0,0 +1,23 @@ +/* $Header$ */ +/* DECLARATION SPECIFIER DEFINITION */ + +struct decspecs { + struct decspecs *next; + struct type *ds_type; /* single type */ + int ds_sc_given; /* 1 if the st. class is explicitly given */ + int ds_sc; /* storage class, given or implied */ + int ds_size; /* LONG, SHORT or 0 */ + int ds_unsigned; /* 0 or 1 */ +}; + + +/* allocation definitions of struct decspecs */ +/* ALLOCDEF "decspecs" */ +extern char *st_alloc(); +extern struct decspecs *h_decspecs; +#define new_decspecs() ((struct decspecs *) \ + st_alloc((char **)&h_decspecs, sizeof(struct decspecs))) +#define free_decspecs(p) st_free(p, h_decspecs, sizeof(struct decspecs)) + + +extern struct decspecs null_decspecs; diff --git a/lang/cem/cemcom/def.h b/lang/cem/cemcom/def.h new file mode 100644 index 000000000..abb281559 --- /dev/null +++ b/lang/cem/cemcom/def.h @@ -0,0 +1,37 @@ +/* $Header$ */ +/* IDENTIFIER DEFINITION DESCRIPTOR */ + +struct def { /* for ordinary tags */ + struct def *next; + int df_level; + struct type *df_type; + int df_sc; /* may be: + GLOBAL, STATIC, EXTERN, IMPLICIT, + TYPEDEF, + FORMAL, AUTO, + ENUM, LABEL + */ + int df_register; /* REG_NONE, REG_DEFAULT or REG_BONUS */ + char df_initialized; /* an initialization has been generated */ + char df_alloc; /* 0, ALLOC_SEEN or ALLOC_DONE */ + char df_used; /* set if idf is used */ + char df_formal_array; /* to warn if sizeof is taken */ + arith df_address; +}; + +#define ALLOC_SEEN 1 /* an allocating declaration has been seen */ +#define ALLOC_DONE 2 /* the allocating declaration has been done */ + +#define REG_NONE 0 /* no register candidate */ +#define REG_DEFAULT 1 /* register candidate, not declared as such */ +#define REG_BONUS 10 /* register candidate, declared as such */ + + +/* allocation definitions of struct def */ +/* ALLOCDEF "def" */ +extern char *st_alloc(); +extern struct def *h_def; +#define new_def() ((struct def *) \ + st_alloc((char **)&h_def, sizeof(struct def))) +#define free_def(p) st_free(p, h_def, sizeof(struct def)) + diff --git a/lang/cem/cemcom/def.str b/lang/cem/cemcom/def.str new file mode 100644 index 000000000..abb281559 --- /dev/null +++ b/lang/cem/cemcom/def.str @@ -0,0 +1,37 @@ +/* $Header$ */ +/* IDENTIFIER DEFINITION DESCRIPTOR */ + +struct def { /* for ordinary tags */ + struct def *next; + int df_level; + struct type *df_type; + int df_sc; /* may be: + GLOBAL, STATIC, EXTERN, IMPLICIT, + TYPEDEF, + FORMAL, AUTO, + ENUM, LABEL + */ + int df_register; /* REG_NONE, REG_DEFAULT or REG_BONUS */ + char df_initialized; /* an initialization has been generated */ + char df_alloc; /* 0, ALLOC_SEEN or ALLOC_DONE */ + char df_used; /* set if idf is used */ + char df_formal_array; /* to warn if sizeof is taken */ + arith df_address; +}; + +#define ALLOC_SEEN 1 /* an allocating declaration has been seen */ +#define ALLOC_DONE 2 /* the allocating declaration has been done */ + +#define REG_NONE 0 /* no register candidate */ +#define REG_DEFAULT 1 /* register candidate, not declared as such */ +#define REG_BONUS 10 /* register candidate, declared as such */ + + +/* allocation definitions of struct def */ +/* ALLOCDEF "def" */ +extern char *st_alloc(); +extern struct def *h_def; +#define new_def() ((struct def *) \ + st_alloc((char **)&h_def, sizeof(struct def))) +#define free_def(p) st_free(p, h_def, sizeof(struct def)) + diff --git a/lang/cem/cemcom/domacro.c b/lang/cem/cemcom/domacro.c new file mode 100644 index 000000000..5407591a6 --- /dev/null +++ b/lang/cem/cemcom/domacro.c @@ -0,0 +1,673 @@ +/* $Header$ */ +/* PREPROCESSOR: CONTROLLINE INTERPRETER */ + +#include "interface.h" +#include "arith.h" +#include "LLlex.h" +#include "Lpars.h" +#include "debug.h" +#include "idf.h" +#include "input.h" +#include "nopp.h" + +#ifndef NOPP +#include "ifdepth.h" +#include "botch_free.h" +#include "nparams.h" +#include "parbufsize.h" +#include "textsize.h" +#include "idfsize.h" + +#include "assert.h" +#include "alloc.h" +#include "class.h" +#include "macro.h" +#include "storage.h" + +IMPORT char *inctable[]; /* list of include directories */ +PRIVATE char ifstack[IFDEPTH]; /* if-stack: the content of an entry is */ + /* 1 if a corresponding ELSE has been */ + /* encountered. */ +PRIVATE int nestlevel = -1; /* initially no nesting level. */ + +PRIVATE struct idf * +GetIdentifier() +{ + /* returns a pointer to the descriptor of the identifier that is + read from the input stream. A null-pointer is returned if + the input does not contain an identifier. + The substitution of macros is disabled. + */ + int tok; + struct token tk; + + ReplaceMacros = 0; + tok = GetToken(&tk); + ReplaceMacros = 1; + return tok == IDENTIFIER ? tk.tk_idf : (struct idf *)0; +} + +/* domacro() is the control line interpreter. The '#' has already + been read by the lexical analyzer by which domacro() is called. + The token appearing directly after the '#' is obtained by calling + the basic lexical analyzing function GetToken() and is interpreted + to perform the action belonging to that token. + An error message is produced when the token is not recognized, + i.e. it is not one of "define" .. "undef" , integer or newline. +*/ +EXPORT +domacro() +{ + struct token tk; /* the token itself */ + + EoiForNewline = 1; + SkipEscNewline = 1; + switch(GetToken(&tk)) { /* select control line action */ + case IDENTIFIER: /* is it a macro keyword? */ + switch (tk.tk_idf->id_resmac) { + case K_DEFINE: /* "define" */ + do_define(); + break; + case K_ELIF: /* "elif" */ + do_elif(); + break; + case K_ELSE: /* "else" */ + do_else(); + break; + case K_ENDIF: /* "endif" */ + do_endif(); + break; + case K_IF: /* "if" */ + do_if(); + break; + case K_IFDEF: /* "ifdef" */ + do_ifdef(1); + break; + case K_IFNDEF: /* "ifndef" */ + do_ifdef(0); + break; + case K_INCLUDE: /* "include" */ + do_include(); + break; + case K_LINE: /* "line" */ + /* set LineNumber and FileName according to + the arguments. + */ + if (GetToken(&tk) != INTEGER) { + lexerror("#line without linenumber"); + SkipRestOfLine(); + } + else + do_line((unsigned int)tk.tk_ival); + break; + case K_UNDEF: /* "undef" */ + do_undef(); + break; + default: + /* invalid word seen after the '#' */ + lexerror("%s: unknown control", tk.tk_idf->id_text); + SkipRestOfLine(); + } + break; + case INTEGER: /* # []? */ + do_line((unsigned int)tk.tk_ival); + break; + case EOI: /* only `#' on this line: do nothing, ignore */ + break; + default: /* invalid token following '#' */ + lexerror("illegal # line"); + SkipRestOfLine(); + } + EoiForNewline = 0; + SkipEscNewline = 0; +} + +PRIVATE +skip_block() +{ + /* skip_block() skips the input from + 1) a false #if, #ifdef, #ifndef or #elif until the + corresponding #elif (resulting in true), #else or + #endif is read. + 2) a #else corresponding to a true #if, #ifdef, + #ifndef or #elif until the corresponding #endif is + seen. + */ + register int ch; + register skiplevel = nestlevel; /* current nesting level */ + struct token tk; + + NoUnstack++; + for (;;) { + LoadChar(ch); /* read first character after newline */ + if (ch != '#') { + if (ch == EOI) { + NoUnstack--; + return; + } + SkipRestOfLine(); + continue; + } + if (GetToken(&tk) != IDENTIFIER) { + SkipRestOfLine(); + continue; + } + /* an IDENTIFIER: look for #if, #ifdef and #ifndef + without interpreting them. + Interpret #else, #elif and #endif if they occur + on the same level. + */ + switch(tk.tk_idf->id_resmac) { + case K_IF: + case K_IFDEF: + case K_IFNDEF: + push_if(); + break; + case K_ELIF: + if (nestlevel == skiplevel) { + nestlevel--; + push_if(); + if (ifexpr()) { + NoUnstack--; + return; + } + } + break; + case K_ELSE: + ++(ifstack[nestlevel]); + if (nestlevel == skiplevel) { + SkipRestOfLine(); + NoUnstack--; + return; + } + break; + case K_ENDIF: + ASSERT(nestlevel >= 0); + if (nestlevel == skiplevel) { + SkipRestOfLine(); + nestlevel--; + NoUnstack--; + return; + } + nestlevel--; + break; + } + } +} + +PRIVATE +ifexpr() +{ + /* ifexpr() returns whether the restricted constant + expression following #if or #elif evaluates to true. This + is done by calling the LLgen generated subparser for + constant expressions. The result of this expression will + be given in the extern long variable "ifval". + */ + IMPORT arith ifval; + int errors = err_occurred; + + ifval = (arith)0; + AccDefined = 1; + UnknownIdIsZero = 1; + PushLex(); /* NEW parser */ + If_expr(); /* invoke constant expression parser */ + PopLex(); /* OLD parser */ + AccDefined = 0; + UnknownIdIsZero = 0; + return (errors == err_occurred) && (ifval != (arith)0); +} + +PRIVATE +do_include() +{ + /* do_include() performs the inclusion of a file. + */ + char *filenm; + int tok; + struct token tk; + + AccFileSpecifier = 1; + if (((tok = GetToken(&tk)) == FILESPECIFIER) || tok == STRING) + filenm = tk.tk_str; + else { + lexerror("bad include syntax"); + filenm = (char *)0; + } + AccFileSpecifier = 0; + SkipRestOfLine(); + if (filenm && !InsertFile(filenm, &inctable[tok == FILESPECIFIER])) + lexerror("cannot find include file \"%s\"", filenm); +} + +PRIVATE +do_define() +{ + /* do_define() interprets a #define control line. + */ + struct idf *id; /* the #defined identifier's descriptor */ + int nformals = -1; /* keep track of the number of formals */ + char *formals[NPARAMS]; /* pointers to the names of the formals */ + char parbuf[PARBUFSIZE]; /* names of formals */ + char *repl_text; /* start of the replacement text */ + int length; /* length of the replacement text */ + register ch; + char *get_text(); + + /* read the #defined macro's name */ + if (!(id = GetIdentifier())) { + lexerror("#define: illegal macro name"); + SkipRestOfLine(); + return; + } + /* there is a formal parameter list if the identifier is + followed immediately by a '('. + */ + LoadChar(ch); + if (ch == '(') { + if ((nformals = getparams(formals, parbuf)) == -1) { + SkipRestOfLine(); + return; /* an error occurred */ + } + LoadChar(ch); + } + /* read the replacement text if there is any */ + ch = skipspaces(ch); /* find first character of the text */ + ASSERT(ch != EOI); + if (class(ch) == STNL) { + /* Treat `#define something' as `#define something ""' + */ + repl_text = ""; + length = 0; + } + else { + PushBack(); + repl_text = get_text((nformals > 0) ? formals : 0, &length); + } + macro_def(id, repl_text, nformals, length, NOFLAG); + LineNumber++; +} + +PRIVATE +push_if() +{ + if (nestlevel >= IFDEPTH) + fatal("too many nested #if/#ifdef/#ifndef"); + else + ifstack[++nestlevel] = 0; +} + +PRIVATE +do_elif() +{ + if (nestlevel < 0 || (ifstack[nestlevel])) { + /* invalid elif encountered.. */ + lexerror("#elif without corresponding #if"); + SkipRestOfLine(); + } + else { + /* restart at this level as if a #if + is detected. + */ + nestlevel--; + push_if(); + skip_block(); + } +} + +PRIVATE +do_else() +{ + SkipRestOfLine(); + if (nestlevel < 0 || (ifstack[nestlevel])) + lexerror("#else without corresponding #if"); + else { /* mark this level as else-d */ + ++(ifstack[nestlevel]); + skip_block(); + } +} + +PRIVATE +do_endif() +{ + SkipRestOfLine(); + if (nestlevel-- < 0) + lexerror("#endif without corresponding #if"); +} + +PRIVATE +do_if() +{ + push_if(); + if (!ifexpr()) /* a false #if/#elif expression */ + skip_block(); +} + +PRIVATE +do_ifdef(how) +{ + struct idf *id; + + /* how == 1 : ifdef; how == 0 : ifndef + */ + push_if(); + if (id = GetIdentifier()) { + if ((how && !(id && id->id_macro)) || + (!how && id && id->id_macro)) + { /* this id is not defined */ + skip_block(); + } + else + SkipRestOfLine(); + } + else { + lexerror("illegal #ifdef construction"); + SkipRestOfLine(); + } +} + +PRIVATE +do_undef() +{ + struct idf *id; + + /* Forget a macro definition. */ + if (id = GetIdentifier()) { + if (id && id->id_macro) { /* forget the macro */ + free_macro(id->id_macro); + id->id_macro = (struct macro *) 0; + } + /* else: don't complain */ + } + else + lexerror("illegal #undef construction"); + SkipRestOfLine(); +} + +PRIVATE +do_line(l) + unsigned int l; +{ + struct token tk; + + LineNumber = l; + /* is there a filespecifier? */ + if (GetToken(&tk) == STRING) + FileName = tk.tk_str; + SkipRestOfLine(); +} + +PRIVATE int +getparams(buf, parbuf) + char *buf[]; + char parbuf[]; +{ + /* getparams() reads the formal parameter list of a macro + definition. + The number of parameters is returned. + As a formal parameter list is expected when calling this + routine, -1 is returned if an error is detected, for + example: + #define one(1), where 1 is not an identifier. + Note that the '(' has already been eaten. + The names of the formal parameters are stored into parbuf. + */ + register count = 0; + register c; + register char *ptr = &parbuf[0]; + + LoadChar(c); + c = skipspaces(c); + if (c == ')') { /* no parameters: #define name() */ + buf[0] = (char *) 0; + return 0; + } + for (;;) { /* eat the formal parameter list */ + if (class(c) != STIDF) { /* not an identifier */ + lexerror("#define: bad formal parameter"); + return -1; + } + buf[count++] = ptr; /* name of the formal */ + *ptr++ = c; + if (ptr >= &parbuf[PARBUFSIZE]) + fatal("formal parameter buffer overflow"); + do { /* eat the identifier name */ + LoadChar(c); + *ptr++ = c; + if (ptr >= &parbuf[PARBUFSIZE]) + fatal("formal parameter buffer overflow"); + } while (in_idf(c)); + *(ptr - 1) = '\0'; /* mark end of the name */ + c = skipspaces(c); + if (c == ')') { /* end of the formal parameter list */ + buf[count] = (char *) 0; + return count; + } + if (c != ',') { + lexerror("#define: bad formal parameter list"); + return -1; + } + LoadChar(c); + c = skipspaces(c); + } +} + +EXPORT +macro_def(id, text, nformals, length, flags) + struct idf *id; + char *text; +{ + register struct macro *newdef = id->id_macro; + + /* macro_def() puts the contents and information of a macro + definition into a structure and stores it into the symbol + table entry belonging to the name of the macro. + A warning is given if the definition overwrites another + (unless predefined!) + */ + if (newdef) { /* is there a redefinition? */ + if ((newdef->mc_flag & PREDEF) == 0) { + if (macroeq(newdef->mc_text, text)) + return; + lexwarning("redefine \"%s\"", id->id_text); + } + /* else: overwrite pre-definition */ + } + else + id->id_macro = newdef = new_macro(); + newdef->mc_text = text; /* replacement text */ + newdef->mc_nps = nformals; /* nr of formals */ + newdef->mc_length = length; /* length of repl. text */ + newdef->mc_flag = flags; /* special flags */ +} + +PRIVATE int +find_name(nm, index) + char *nm, *index[]; +{ + /* find_name() returns the index of "nm" in the namelist + "index" if it can be found there. 0 is returned if it is + not there. + */ + register char **ip = &index[0]; + + while (*ip) + if (strcmp(nm, *ip++) == 0) + return ip - &index[0]; + /* arrived here, nm is not in the name list. */ + return 0; +} + +PRIVATE char * +get_text(formals, length) + char *formals[]; + int *length; +{ + /* get_text() copies the replacement text of a macro + definition with zero, one or more parameters, thereby + substituting each formal parameter by a special character + (non-ascii: 0200 & (order-number in the formal parameter + list)) in order to substitute this character later by the + actual parameter. The replacement text is copied into + itself because the copied text will contain fewer or the + same amount of characters. The length of the replacement + text is returned. + + Implementation: + finite automaton : we are only interested in + identifiers, because they might be replaced by some actual + parameter. Other tokens will not be seen as such. + */ + register c; + register text_size; + char *text = Malloc(text_size = ITEXTSIZE); + register pos = 0; + + LoadChar(c); + + while ((c != EOI) && (class(c) != STNL)) { + if (c == '\\') { /* check for "\\\n" */ + LoadChar(c); + if (c == '\n') { + /* more than one line is used for the + replacement text. Replace "\\\n" by " ". + */ + text[pos++] = ' '; + ++LineNumber; + LoadChar(c); + } + else + text[pos++] = '\\'; + if (pos == text_size) + text = Srealloc(text, text_size += RTEXTSIZE); + } + else + if ( c == '/') { + LoadChar(c); + if (c == '*') { + skipcomment(); + text[pos++] = ' '; + LoadChar(c); + } + else + text[pos++] = '/'; + if (pos == text_size) + text = Srealloc(text, text_size += RTEXTSIZE); + } + else + if (formals && class(c) == STIDF) { + char id_buf[IDFSIZE + 1]; + register id_size = 0; + register n; + + /* read identifier: it may be a formal parameter */ + id_buf[id_size++] = c; + do { + LoadChar(c); + if (id_size <= IDFSIZE) + id_buf[id_size++] = c; + } while (in_idf(c)); + id_buf[--id_size] = '\0'; + if (n = find_name(id_buf, formals)) { + /* construct the formal parameter mark */ + text[pos++] = FORMALP | (char) n; + if (pos == text_size) + text = Srealloc(text, + text_size += RTEXTSIZE); + } + else { + register char *ptr = &id_buf[0]; + + while (pos + id_size >= text_size) + text = Srealloc(text, + text_size += RTEXTSIZE); + while (text[pos++] = *ptr++) ; + pos--; + } + } + else { + text[pos++] = c; + if (pos == text_size) + text = Srealloc(text, text_size += RTEXTSIZE); + LoadChar(c); + } + } + text[pos++] = '\0'; + *length = pos - 1; + return text; +} + +#define BLANK(ch) ((ch == ' ') || (ch == '\t')) + +/* macroeq() decides whether two macro replacement texts are + identical. This version compares the texts, which occur + as strings, without taking care of the leading and trailing + blanks (spaces and tabs). +*/ +PRIVATE +macroeq(s, t) + register char *s, *t; +{ + + /* skip leading spaces */ + while (BLANK(*s)) s++; + while (BLANK(*t)) t++; + /* first non-blank encountered in both strings */ + /* The actual comparison loop: */ + while (*s && *s == *t) + s++, t++; + /* two cases are possible when arrived here: */ + if (*s == '\0') { /* *s == '\0' */ + while (BLANK(*t)) t++; + return *t == '\0'; + } + else { /* *s != *t */ + while (BLANK(*s)) s++; + while (BLANK(*t)) t++; + return (*s == '\0') && (*t == '\0'); + } +} +#else NOPP +EXPORT +domacro() +{ + int tok; + struct token tk; + + EoiForNewline = 1; + SkipEscNewline = 1; + if ((tok = GetToken(&tk)) == IDENTIFIER) { + if (strcmp(tk.tk_idf->id_text, "line") != 0) { + error("illegal # line"); + SkipRestOfLine(); + return; + } + tok = GetToken(&tk); + } + if (tok != INTEGER) { + error("illegal # line"); + SkipRestOfLine(); + return; + } + LineNumber = tk.tk_ival; + if ((tok = GetToken(&tk)) == STRING) + FileName = tk.tk_str; + else + if (tok != EOI) { + error("illegal # line"); + SkipRestOfLine(); + } + EoiForNewline = 0; + SkipEscNewline = 0; +} +#endif NOPP + +PRIVATE +SkipRestOfLine() +{ + /* we do a PushBack because we don't want to skip the next line + if the last character was a newline + */ + PushBack(); + skipline(); +} diff --git a/lang/cem/cemcom/dumpidf.c b/lang/cem/cemcom/dumpidf.c new file mode 100644 index 000000000..e370512b8 --- /dev/null +++ b/lang/cem/cemcom/dumpidf.c @@ -0,0 +1,367 @@ +/* $Header$ */ +/* DUMP ROUTINES */ + +#include "debug.h" + +#ifdef DEBUG +#include "nopp.h" +#include "nobitfield.h" +#include "arith.h" +#include "stack.h" +#include "idf.h" +#include "def.h" +#include "type.h" +#include "struct.h" +#include "field.h" +#include "Lpars.h" +#include "label.h" +#include "expr.h" + +/* Some routines (symbol2str, token2str, type2str) which should have + * yielded strings are written to yield a pointer to a transient piece + * of memory, containing the string, since this is the only reasonable + * thing to do in C. `Transient' means that the result may soon + * disappear, which is generally not a problem, since normally it is + * consumed immediately. Sometimes we need more than one of them, and + * MAXTRANS is the maximum number we will need simultaneously. + */ +#define MAXTRANS 6 + +extern char options[]; + +extern char *sprintf(); + +extern struct idf *idf_hashtable[]; +extern char *symbol2str(), *type2str(), *next_transient(); + +enum sdef_kind {selector, field}; /* parameter for dumpsdefs */ + +static int dumplevel; + +static +newline() { + int dl = dumplevel; + + printf("\n"); + while (dl >= 2) { + printf("\t"); + dl -= 2; + } + if (dl) + printf(" "); +} + +dumpidftab(msg, opt) + char msg[]; +{ + /* Dumps the identifier table in readable form (but in + arbitrary order). + Unless opt & 1, macros are not dumped. + Unless opt & 2, reserved identifiers are not dumped. + Unless opt & 4, universal identifiers are not dumped. + */ + int i; + + printf(">>> DUMPIDF, %s (start)", msg); + dumpstack(); + for (i = 0; i < HASHSIZE; i++) { + struct idf *notch = idf_hashtable[i]; + + while (notch) { + dumpidf(notch, opt); + notch = notch->next; + } + } + newline(); + printf(">>> DUMPIDF, %s (end)\n", msg); +} + +dumpstack() { + /* Dumps the identifier stack, starting at the top. + */ + struct stack_level *stl = local_level; + + while (stl) { + struct stack_entry *se = stl->sl_entry; + + newline(); + printf("%3d: ", stl->sl_level); + while (se) { + printf("%s ", se->se_idf->id_text); + se = se->next; + } + stl = stl->sl_previous; + } + printf("\n"); +} + +dumpidf(idf, opt) + struct idf *idf; +{ + /* All information about the identifier idf is divulged in a + hopefully readable format. + */ + int started = 0; + + if (!idf) + return; +#ifndef NOPP + if ((opt&1) && idf->id_macro) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + printf(" macro"); + } +#endif NOPP + if ((opt&2) && idf->id_reserved) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + printf(" reserved: %d;", idf->id_reserved); + } + if (idf->id_def && ((opt&4) || idf->id_def->df_level)) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + dumpdefs(idf->id_def, opt); + } + if (idf->id_sdef) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + dumpsdefs(idf->id_sdef, selector); + } + if (idf->id_struct) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + dumptags(idf->id_struct); + } + if (idf->id_enum) { + if (!started++) { + newline(); + printf("%s:", idf->id_text); + } + dumptags(idf->id_enum); + } +} + +dumpdefs(def, opt) + register struct def *def; +{ + dumplevel++; + while (def && ((opt&4) || def->df_level)) { + newline(); + printf("L%d: %s %s%s%s%s%s %lo;", + def->df_level, + symbol2str(def->df_sc), + (def->df_register != REG_NONE) ? "reg " : "", + def->df_initialized ? "init'd " : "", + def->df_used ? "used " : "", + type2str(def->df_type), + def->df_sc == ENUM ? ", =" : " at", + def->df_address + ); + def = def->next; + } + dumplevel--; +} + +dumptags(tag) + struct tag *tag; +{ + dumplevel++; + while (tag) { + register struct type *tp = tag->tg_type; + register int fund = tp->tp_fund; + + newline(); + printf("L%d: %s %s", + tag->tg_level, + fund == STRUCT ? "struct" : + fund == UNION ? "union" : + fund == ENUM ? "enum" : "", + tp->tp_idf->id_text + ); + if (is_struct_or_union(fund)) { + printf(" {"); + dumpsdefs(tp->tp_sdef, field); + newline(); + printf("}"); + } + printf(";"); + tag = tag->next; + } + dumplevel--; +} + +dumpsdefs(sdef, sdk) + struct sdef *sdef; + enum sdef_kind sdk; +{ + /* Since sdef's are members of two chains, there are actually + two dumpsdefs's, one following the chain of all selectors + belonging to the same idf, starting at idf->id_sdef; + and the other following the chain of all selectors belonging + to the same struct, starting at stp->tp_sdef. + */ + + dumplevel++; + while (sdef) { + newline(); + printf("L%d: ", sdef->sd_level); +#ifndef NOBITFIELD + if (sdk == selector) +#endif NOBITFIELD + printf("selector %s at offset %lu in %s;", + type2str(sdef->sd_type), + sdef->sd_offset, type2str(sdef->sd_stype) + ); +#ifndef NOBITFIELD + else printf("field %s at offset %lu;", + type2str(sdef->sd_type), sdef->sd_offset + ); +#endif NOBITFIELD + sdef = (sdk == selector ? sdef->next : sdef->sd_sdef); + } + dumplevel--; +} + +char * +type2str(tp) + struct type *tp; +{ + /* Yields a pointer to a one-line description of the type tp. + */ + char *buf = next_transient(); + int ops = 1; + + buf[0] = '\0'; + if (!tp) { + sprintf(buf, ""); + return buf; + } + sprintf(buf, "(@%lx, #%ld, &%d) ", tp, (long)tp->tp_size, tp->tp_align); + while (ops) { + switch (tp->tp_fund) { + case POINTER: + sprintf(buf, "%spointer to ", buf); + break; + case ARRAY: + sprintf(buf, "%sarray [%ld] of ", buf, tp->tp_size); + break; + case FUNCTION: + sprintf(buf, "%sfunction yielding ", buf); + break; + default: + sprintf(buf, "%s%s%s", buf, + tp->tp_unsigned ? "unsigned " : "", + symbol2str(tp->tp_fund) + ); + if (tp->tp_idf) + sprintf(buf, "%s %s", buf, + tp->tp_idf->id_text); +#ifndef NOBITFIELD + if (tp->tp_field) { + struct field *fd = tp->tp_field; + + sprintf(buf, "%s [s=%ld,w=%ld]", buf, + fd->fd_shift, fd->fd_width); + } +#endif NOBITFIELD + ops = 0; + break; + } + tp = tp->tp_up; + } + return buf; +} + +char * /* the ultimate transient buffer supplier */ +next_transient() { + static int bnum; + static char buf[MAXTRANS][300]; + + if (++bnum == MAXTRANS) + bnum = 0; + return buf[bnum]; +} + +print_expr(msg, expr) + char msg[]; + struct expr *expr; +{ + /* Provisional routine to print an expression preceded by a + message msg. + */ + if (options['x']) { + printf("\n%s: ", msg); + printf("(L=line, T=type, r/lV=r/lvalue, F=flags, D=depth)\n"); + p1_expr(0, expr); + } +} + +p1_expr(lvl, expr) + struct expr *expr; +{ + extern char *type2str(), *symbol2str(); + + p1_indent(lvl); + if (!expr) { + printf("NILEXPR\n"); + return; + } + printf("expr: L=%u, T=%s, %cV, F=%02o, D=%d, %s: ", + expr->ex_line, + type2str(expr->ex_type), + expr->ex_lvalue ? 'l' : 'r', + expr->ex_flags, + expr->ex_depth, + expr->ex_class == Value ? "Value" : + expr->ex_class == String ? "String" : + expr->ex_class == Float ? "Float" : + expr->ex_class == Oper ? "Oper" : + expr->ex_class == Type ? "Type" : "UNKNOWN CLASS" + ); + switch (expr->ex_class) { + struct value *v; + struct oper *o; + case Value: + v = &expr->ex_object.ex_value; + if (v->vl_idf) + printf("%s + ", v->vl_idf->id_text); + printf(expr->ex_type->tp_unsigned ? "%lu\n" : "%ld\n", + v->vl_value); + break; + case String: + printf("%s\n", expr->SG_VALUE); + break; + case Float: + printf("%s\n", expr->FL_VALUE); + break; + case Oper: + o = &expr->ex_object.ex_oper; + printf("\n"); + p1_expr(lvl+1, o->op_left); + p1_indent(lvl); printf("%s\n", symbol2str(o->op_oper)); + p1_expr(lvl+1, o->op_right); + break; + case Type: + printf("\n"); + break; + default: + printf("UNKNOWN CLASS\n"); + break; + } +} + +p1_indent(lvl) { + while (lvl--) + printf(" "); +} +#endif DEBUG diff --git a/lang/cem/cemcom/em.c b/lang/cem/cemcom/em.c new file mode 100644 index 000000000..62c6024d9 --- /dev/null +++ b/lang/cem/cemcom/em.c @@ -0,0 +1,219 @@ +/* $Header$ */ +/* EM CODE OUTPUT ROUTINES */ + +#define CMODE 0644 +#define MAX_ARG_CNT 32 + +#include "em.h" +#include "system.h" +#include "bufsiz.h" +#include "arith.h" +#include "label.h" + +/* + putbyte(), C_open() and C_close() are the basic routines for + respectively write on, open and close the output file. + The put_*() functions serve as formatting functions of the + various EM language constructs. + See "Description of a Machine Architecture for use with + Block Structured Languages" par. 11.2 for the meaning of these + names. +*/ + +/* supply a kind of buffered output */ +#define flush(x) sys_write(ofd, &obuf[0], x); + +static char obuf[BUFSIZ]; +static char *opp = &obuf[0]; +int ofd = -1; + +putbyte(b) /* shouldn't putbyte() be a macro ??? (EB) */ + int b; +{ + if (opp >= &obuf[BUFSIZ]) { /* flush if buffer overflows */ + flush(BUFSIZ); + opp = &obuf[0]; + } + *opp++ = (char) b; +} + +C_open(nm) /* open file for compact code output */ + char *nm; +{ + if (nm == 0) + ofd = 1; /* standard output */ + else + if ((ofd = sys_creat(nm, CMODE)) < 0) + return 0; + return 1; +} + +C_close() +{ + flush(opp - &obuf[0]); + opp = obuf; /* reset opp */ + sys_close(ofd); + ofd = -1; +} + +C_busy() +{ + return ofd >= 0; /* true if code is being generated */ +} + +/*** front end for generating long CON/ROM lists ***/ +static arg_count; +static arg_rom; + +DC_start(rom){ + arg_count = 0; + arg_rom = rom; +} + +DC_check(){ + if (arg_count++ >= MAX_ARG_CNT) { + switch (arg_rom) { + case ps_con: + C_con_end(); + C_con_begin(); + break; + case ps_rom: + C_rom_end(); + C_rom_begin(); + break; + } + } +} + +/*** the compact code generating routines ***/ +#define fit16i(x) ((x) >= (long)0xFFFF8000 && (x) <= (long)0x00007FFF) +#define fit8u(x) ((x) <= 0xFF) /* x is already unsigned */ + +put_ilb(l) + label l; +{ + if (fit8u(l)) { + put8(sp_ilb1); + put8((int)l); + } + else { + put8(sp_ilb2); + put16(l); + } +} + +put_dlb(l) + label l; +{ + if (fit8u(l)) { + put8(sp_dlb1); + put8((int)l); + } + else { + put8(sp_dlb2); + put16(l); + } +} + +put_cst(l) + arith l; +{ + if (l >= (arith) -sp_zcst0 && l < (arith) (sp_ncst0 - sp_zcst0)) { + /* we can convert 'l' to an int because its value + can be stored in a byte. + */ + put8((int) l + (sp_zcst0 + sp_fcst0)); + } + else + if (fit16i(l)) { /* the cast from long to int causes no trouble here */ + put8(sp_cst2); + put16((int) l); + } + else { + put8(sp_cst4); + put32(l); + } +} + +put_doff(l, v) + label l; + arith v; +{ + if (v == 0) + put_dlb(l); + else { + put8(sp_doff); + put_dlb(l); + put_cst(v); + } +} + +put_noff(s, v) + char *s; + arith v; +{ + if (v == 0) + put_dnam(s); + else { + put8(sp_doff); + put_dnam(s); + put_cst(v); + } +} + +put_dnam(s) + char *s; +{ + put8(sp_dnam); + put_str(s); +} + +put_pnam(s) + char *s; +{ + put8(sp_pnam); + put_str(s); +} + +#ifdef ____ +put_fcon(s, sz) + char *s; + arith sz; +{ + put8(sp_fcon); + put_cst(sz); + put_str(s); +} +#endif ____ + +put_wcon(sp, v, sz) /* sp_icon, sp_ucon or sp_fcon with int repr */ + int sp; + char *v; + arith sz; +{ + /* how 'bout signextension int --> long ??? */ + put8(sp); + put_cst(sz); + put_str(v); +} + +put_str(s) + char *s; +{ + register int len; + + put_cst((arith) (len = strlen(s))); + while (--len >= 0) + put8(*s++); +} + +put_cstr(s) + char *s; +{ + register int len = prepare_string(s); + + put8(sp_scon); + put_cst((arith) len); + while (--len >= 0) + put8(*s++); +} diff --git a/lang/cem/cemcom/em.h b/lang/cem/cemcom/em.h new file mode 100644 index 000000000..7d9de7899 --- /dev/null +++ b/lang/cem/cemcom/em.h @@ -0,0 +1,42 @@ +/* $Header$ */ +/* DESCRIPTION OF INTERFACE TO EM CODE GENERATING ROUTINES */ + +#include "proc_intf.h" /* use macros or functions */ + +/* include the EM description files */ +#include +#include +#include +#include +#include + +/* macros used in the definitions of the interface functions C_* */ +#define OP(x) put_op(x) +#define CST(x) put_cst(x) +#define DCST(x) put_cst(x) +#define CSTR(x) put_cstr(x) +#define PS(x) put_ps(x) +#define DLB(x) put_dlb(x) +#define ILB(x) put_ilb(x) +#define NOFF(x,y) put_noff((x), (y)) +#define DOFF(x,y) put_doff((x), (y)) +#define PNAM(x) put_pnam(x) +#define DNAM(x) put_dnam(x) +#define CEND() put_cend() +#define WCON(x,y,z) put_wcon((x), (y), (z)) +#define FCON(x,y) put_fcon((x), (y)) + +/* variants of primitive "putbyte" */ +#define put8(x) putbyte(x) /* defined in "em.c" */ +#define put16(x) (put8((int) x), put8((int) (x >> 8))) +#define put32(x) (put16((int) x), put16((int) (x >> 16))) +#define put_cend() put8(sp_cend) +#define put_op(x) put8(x) +#define put_ps(x) put8(x) + +/* user interface */ +#define C_magic() put16(sp_magic) /* EM magic word */ + +#ifndef PROC_INTF +#include "writeem.h" +#endif PROC_INTF diff --git a/lang/cem/cemcom/emcode.def b/lang/cem/cemcom/emcode.def new file mode 100644 index 000000000..cf2530a23 --- /dev/null +++ b/lang/cem/cemcom/emcode.def @@ -0,0 +1,123 @@ +% emcode definitions for the CEM compiler -- intermediate code +C_adf(p) | arith p; | OP(op_adf), CST(p) +C_adi(p) | arith p; | OP(op_adi), CST(p) +C_adp(p) | arith p; | OP(op_adp), CST(p) +C_ads(p) | arith p; | OP(op_ads), CST(p) +C_adu(p) | arith p; | OP(op_adu), CST(p) +C_and(p) | arith p; | OP(op_and), CST(p) +C_asp(p) | arith p; | OP(op_asp), CST(p) +C_bra(l) | label l; | OP(op_bra), CST((arith)l) +C_cai() | | OP(op_cai) +C_cal(p) | char *p; | OP(op_cal), PNAM(p) +C_cff() | | OP(op_cff) +C_cfi() | | OP(op_cfi) +C_cfu() | | OP(op_cfu) +C_cif() | | OP(op_cif) +C_cii() | | OP(op_cii) +C_ciu() | | OP(op_ciu) +C_cmf(p) | arith p; | OP(op_cmf), CST(p) +C_cmi(p) | arith p; | OP(op_cmi), CST(p) +C_cmp() | | OP(op_cmp) +C_cmu(p) | arith p; | OP(op_cmu), CST(p) +C_com(p) | arith p; | OP(op_com), CST(p) +C_csa(p) | arith p; | OP(op_csa), CST(p) +C_csb(p) | arith p; | OP(op_csb), CST(p) +C_cuf() | | OP(op_cuf) +C_cui() | | OP(op_cui) +C_cuu() | | OP(op_cuu) +C_dup(p) | arith p; | OP(op_dup), CST(p) +C_dvf(p) | arith p; | OP(op_dvf), CST(p) +C_dvi(p) | arith p; | OP(op_dvi), CST(p) +C_dvu(p) | arith p; | OP(op_dvu), CST(p) +C_fil_ndlb(l, o) | label l; arith o; | OP(op_fil), DOFF(l, o) +C_ior(p) | arith p; | OP(op_ior), CST(p) +C_lae_dnam(p, o) | char *p; arith o; | OP(op_lae), NOFF(p, o) +C_lae_ndlb(l, o) | label l; arith o; | OP(op_lae), DOFF(l, o) +C_lal(p) | arith p; | OP(op_lal), CST(p) +C_ldc(p) | arith p; | OP(op_ldc), DCST(p) +C_lde_dnam(p, o) | char *p; arith o; | OP(op_lde), NOFF(p, o) +C_lde_ndlb(l, o) | label l; arith o; | OP(op_lde), DOFF(l, o) +C_ldl(p) | arith p; | OP(op_ldl), CST(p) +C_lfr(p) | arith p; | OP(op_lfr), CST(p) +C_lin(p) | arith p; | OP(op_lin), CST(p) +C_loc(p) | arith p; | OP(op_loc), CST(p) +C_loe_dnam(p, o) | char *p; arith o; | OP(op_loe), NOFF(p, o) +C_loe_ndlb(l, o) | label l; arith o; | OP(op_loe), DOFF(l, o) +C_loi(p) | arith p; | OP(op_loi), CST(p) +C_lol(p) | arith p; | OP(op_lol), CST(p) +C_lor(p) | arith p; | OP(op_lor), CST(p) +C_lpi(p) | char *p; | OP(op_lpi), PNAM(p) +C_mlf(p) | arith p; | OP(op_mlf), CST(p) +C_mli(p) | arith p; | OP(op_mli), CST(p) +C_mlu(p) | arith p; | OP(op_mlu), CST(p) +C_ngf(p) | arith p; | OP(op_ngf), CST(p) +C_ngi(p) | arith p; | OP(op_ngi), CST(p) +C_ret(p) | arith p; | OP(op_ret), CST(p) +C_rmi(p) | arith p; | OP(op_rmi), CST(p) +C_rmu(p) | arith p; | OP(op_rmu), CST(p) +C_sbf(p) | arith p; | OP(op_sbf), CST(p) +C_sbi(p) | arith p; | OP(op_sbi), CST(p) +C_sbs(p) | arith p; | OP(op_sbs), CST(p) +C_sbu(p) | arith p; | OP(op_sbu), CST(p) +C_sde_dnam(p, o) | char *p; arith o; | OP(op_sde), NOFF(p, o) +C_sde_ndlb(l, o) | label l; arith o; | OP(op_sde), DOFF(l, o) +C_sdl(p) | arith p; | OP(op_sdl), CST(p) +C_sli(p) | arith p; | OP(op_sli), CST(p) +C_slu(p) | arith p; | OP(op_slu), CST(p) +C_sri(p) | arith p; | OP(op_sri), CST(p) +C_sru(p) | arith p; | OP(op_sru), CST(p) +C_ste_dnam(p, o) | char *p; arith o; | OP(op_ste), NOFF(p, o) +C_ste_ndlb(l, o) | label l; arith o; | OP(op_ste), DOFF(l, o) +C_sti(p) | arith p; | OP(op_sti), CST(p) +C_stl(p) | arith p; | OP(op_stl), CST(p) +C_xor(p) | arith p; | OP(op_xor), CST(p) +C_zeq(l) | label l; | OP(op_zeq), CST((arith)l) +C_zge(l) | label l; | OP(op_zge), CST((arith)l) +C_zgt(l) | label l; | OP(op_zgt), CST((arith)l) +C_zle(l) | label l; | OP(op_zle), CST((arith)l) +C_zlt(l) | label l; | OP(op_zlt), CST((arith)l) +C_zne(l) | label l; | OP(op_zne), CST((arith)l) +% +C_ndlb(l) | label l; | DLB(l) +C_dnam(s) | char *s; | DNAM(s) +C_ilb(l) | label l; | ILB(l) +% +C_bss_cst(n, w, i) | arith n, w; int i; | + PS(ps_bss), DCST(n), CST(w), CST((arith)i) +% +C_con_begin() | | DC_start(ps_con), PS(ps_con) +C_con_end() | | CEND() +C_rom_begin() | | DC_start(ps_rom), PS(ps_rom) +C_rom_end() | | CEND() +C_co_cst(l) | arith l; | DC_check(), CST(l) +C_co_icon(val, siz) | char *val; arith siz; | + DC_check(), WCON(sp_icon, val, siz) +C_co_ucon(val, siz) | char *val; arith siz; | + DC_check(), WCON(sp_ucon, val, siz) +C_co_fcon(val, siz) | char *val; arith siz; | + DC_check(), WCON(sp_fcon, val, siz) +C_co_scon(str, siz) | char *str; arith siz; | DC_check(), CSTR(str) +C_co_dnam(str, val) | char *str; arith val; | DC_check(), NOFF(str, val) +C_co_ndlb(l, val) | label l; arith val; | DC_check(), DOFF(l, val) +C_co_pnam(str) | char *str; | DC_check(), PNAM(str) +C_co_ilb(l) | label l; | DC_check(), ILB(l) +% +C_pro_narg(p1) | char *p1; | PS(ps_pro), PNAM(p1), CEND() +C_end(l) | arith l; | PS(ps_end), CST(l) +% +C_exa(s) | char *s; | PS(ps_exa), DNAM(s) +C_exp(s) | char *s; | PS(ps_exp), PNAM(s) +C_ina_pt(l) | label l; | PS(ps_ina), DLB(l) +C_ina(s) | char *s; | PS(ps_ina), DNAM(s) +C_inp(s) | char *s; | PS(ps_inp), PNAM(s) +% +C_ms_err() | | PS(ps_mes), CST((arith)ms_err), CEND() +C_ms_emx(p1, p2) | arith p1, p2; | + PS(ps_mes), CST((arith)ms_emx), CST(p1), CST(p2), CEND() +C_ms_reg(a, b, c, d) | arith a, b; int c, d; | + PS(ps_mes), CST((arith)ms_reg), CST(a), CST(b), CST((arith)c), CST((arith)d), CEND() +C_ms_src(l, s) | arith l; char *s; | + PS(ps_mes), CST((arith)ms_src), CST(l), CSTR(s), CEND() +C_ms_flt() | | PS(ps_mes), CST((arith)ms_flt), CEND() +C_ms_par(l) | arith l; | PS(ps_mes), CST((arith)ms_par), CST(l), CEND() +C_ms_gto() | | PS(ps_mes), CST((arith)ms_gto), CEND() diff --git a/lang/cem/cemcom/error.c b/lang/cem/cemcom/error.c new file mode 100644 index 000000000..51d06b69d --- /dev/null +++ b/lang/cem/cemcom/error.c @@ -0,0 +1,212 @@ +/* $Header$ */ +/* E R R O R A N D D I A G N O S T I C R O U T I N E S */ + +#include "nopp.h" +#include "use_tmp.h" +#include "errout.h" +#include "debug.h" +#include "system.h" +#include "string.h" + +#include "tokenname.h" +#include "arith.h" +#include "label.h" +#include "expr.h" +#include "LLlex.h" +#include "em.h" + +/* This file contains the (non-portable) error-message and diagnostic + functions. Beware, they are called with a variable number of + arguments! +*/ + +/* error classes */ +#define ERROR 1 +#define WARNING 2 +#define LEXERROR 3 +#define LEXWARNING 4 +#define CRASH 5 +#define FATAL 6 + +int err_occurred; + +extern char *symbol2str(); +extern char options[]; + +/* There are three general error-message functions: + lexerror() lexical and pre-processor error messages + error() syntactic and semantic error messages + expr_error() errors in expressions + The difference lies in the place where the file name and line + number come from. + Lexical errors report from the global variables LineNumber and + FileName, expression errors get their information from the + expression, whereas other errors use the information in the token. +*/ + +/*VARARGS1*/ +error(fmt, args) + char *fmt; +{ + _error(ERROR, NILEXPR, fmt, &args); +} + +/*VARARGS2*/ +expr_error(expr, fmt, args) + struct expr *expr; + char *fmt; +{ + _error(ERROR, expr, fmt, &args); +} + +/*VARARGS1*/ +warning(fmt, args) + char *fmt; +{ + _error(WARNING, NILEXPR, fmt, &args); +} + +/*VARARGS2*/ +expr_warning(expr, fmt, args) + struct expr *expr; + char *fmt; +{ + _error(WARNING, expr, fmt, &args); +} + +/*VARARGS1*/ +lexerror(fmt, args) + char *fmt; +{ + _error(LEXERROR, NILEXPR, fmt, &args); +} + +#ifndef NOPP +/*VARARGS1*/ +lexwarning(fmt, args) char *fmt; { + _error(LEXWARNING, NILEXPR, fmt, &args); +} +#endif NOPP + +/*VARARGS1*/ +crash(fmt, args) + char *fmt; + int args; +{ + _error(CRASH, NILEXPR, fmt, &args); + C_close(); +#ifdef DEBUG + sys_stop(S_ABORT, 0); +#else DEBUG + sys_stop(S_EXIT, 1); +#endif DEBUG +} + +/*VARARGS1*/ +fatal(fmt, args) + char *fmt; + int args; +{ +#ifdef USE_TMP + extern char *tmpfile; /* main.c */ + + if (tmpfile) + sys_remove(tmpfile); /* may not successful! */ +#endif USE_TMP + + _error(FATAL, NILEXPR, fmt, &args); + sys_stop(S_EXIT, 1); +} + +_error(class, expr, fmt, argv) + int class; + struct expr *expr; + char *fmt; + int argv[]; +{ + /* _error attempts to limit the number of error messages + for a given line to MAXERR_LINE. + */ + static char *last_fn = 0; + static unsigned int last_ln = 0; + static int e_seen = 0; + char *fn = 0; + unsigned int ln = 0; + char *remark = 0; + + /* Since name and number are gathered from different places + depending on the class, we first collect the relevant + values and then decide what to print. + */ + /* preliminaries */ + switch (class) { + case ERROR: + case LEXERROR: + case CRASH: + case FATAL: + if (C_busy()) + C_ms_err(); + err_occurred = 1; + break; + + case WARNING: + case LEXWARNING: + if (options['w']) + return; + break; + } + + /* the remark */ + switch (class) { + case WARNING: + case LEXWARNING: + remark = "(warning)"; + break; + case CRASH: + remark = "CRASH\007"; + break; + case FATAL: + remark = "fatal error --"; + break; + } + + /* the place */ + switch (class) { + case WARNING: + case ERROR: + fn = expr ? expr->ex_file : dot.tk_file; + ln = expr ? expr->ex_line : dot.tk_line; + break; + case LEXWARNING: + case LEXERROR: + case CRASH: + case FATAL: + fn = FileName; + ln = LineNumber; + break; + } + + if (ln == last_ln && fn && last_fn && strcmp(fn, last_fn) == 0) { + /* we've seen this place before */ + e_seen++; + if (e_seen == MAXERR_LINE) + fmt = "etc ..."; + else + if (e_seen > MAXERR_LINE) + /* and too often, I'd say ! */ + return; + } + else { + /* brand new place */ + last_fn = fn; + last_ln = ln; + e_seen = 0; + } + + if (fn) + fprintf(ERROUT, "\"%s\", line %u: ", fn, ln); + if (remark) + fprintf(ERROUT, "%s ", remark); + doprnt(ERROUT, fmt, argv); /* contents of error */ + fprintf(ERROUT, "\n"); +} diff --git a/lang/cem/cemcom/eval.c b/lang/cem/cemcom/eval.c new file mode 100644 index 000000000..79b62a32d --- /dev/null +++ b/lang/cem/cemcom/eval.c @@ -0,0 +1,1028 @@ +/* $Header$ */ +/* EXPRESSION-CODE GENERATOR */ + +/* main functions : + EVAL() -- expression tree evaluator + tmp_pointer_var() -- deliver temporary pointer variable + free_tmp_var() -- return the pointer var + store_val() -- store primary expression + load_val() -- load primary expression + auxiliary functions: + assop() + compare() +*/ + +#include "debug.h" +#include "nobitfield.h" + +#include "string.h" +#include "dataflow.h" +#include "arith.h" +#include "type.h" +#include "idf.h" +#include "label.h" +#include "code.h" +#include "assert.h" +#include "def.h" +#include "expr.h" +#include "sizes.h" +#include "Lpars.h" +#include "level.h" +#include "stack.h" +#include "align.h" +#include "mes.h" +#include "atw.h" +#include "em.h" + +#define CRASH() crash("EVAL: CRASH at line %u", __LINE__) +#define roundup(n) ((n) < word_size ? word_size : (n)) + +char *symbol2str(); +arith tmp_pointer_var(); + +/* EVAL() serves as the main expression tree evaluator, which turns + any legal expression tree into legal EM code. + The parameters describe how EVAL should treat the expression tree: + + struct expr *expr: pointer to root of the expression tree to + be evaluated + + int val: indicates whether the resulting expression + is to be dereferenced (if val == RVAL and + expr->ex_lvalue == 1) or not (val == LVAL). + The latter case indicates that the resulting + expression is an lvalue expression which should + not be dereferenced by EVAL + + int code: indicates whether the expression tree must be + turned into EM code or not. E.g. the expression + statement "12;" delivers the expression "12" to + EVAL while this should not result in any EM code + + label false_label: + label true_label: if the expression is a logical or relational + expression and if the loop of the program + depends on the resulting value then EVAL + generates jumps to the specified program labels, + in case they are specified (i.e. are non-zero) +*/ + +EVAL(expr, val, code, true_label, false_label) + struct expr *expr; /* the expression tree itself */ + int val; /* either RVAL or LVAL */ + int code; /* generate explicit code or not */ + label true_label; + label false_label; /* labels to jump to in logical expr's */ +{ + register gencode = (code == TRUE); + + switch (expr->ex_class) { + + case Value: /* just a simple value */ + if (gencode) + load_val(expr, val); + break; + + case String: /* a string constant */ + if (gencode) { + label datlab = data_label(); + + C_ndlb(datlab); + C_con_begin(); + C_co_scon(expr->SG_VALUE, (arith)0); + C_con_end(); + C_lae_ndlb(datlab, (arith)0); + } + break; + + case Float: /* a floating constant */ + if (gencode) { + label datlab = data_label(); + + C_ndlb(datlab); + C_rom_begin(); + C_co_fcon(expr->FL_VALUE, expr->ex_type->tp_size); + C_rom_end(); + C_lae_ndlb(datlab, (arith)0); + C_loi(expr->ex_type->tp_size); + } + break; + + case Oper: /* compound expression */ + { + register int oper = expr->OP_OPER; + register struct expr *leftop = expr->OP_LEFT; + register struct expr *rightop = expr->OP_RIGHT; + register struct type *tp = expr->OP_TYPE; + + if (tp->tp_fund == ERRONEOUS) /* stop immediately */ + break; + + switch (oper) { + case '+': + /* We have the following possibilities : + int + int, pointer + int, pointer + long, + long + long, double + double + */ + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + + if (gencode) { + switch (tp->tp_fund) { + case INT: + case LONG: + if (tp->tp_unsigned) + C_adu(tp->tp_size); + else + C_adi(tp->tp_size); + break; + case POINTER: + C_ads(rightop->ex_type->tp_size); + break; + case DOUBLE: + C_adf(tp->tp_size); + break; + default: + crash("bad type +"); + } + } + break; + case '-': + if (leftop == 0) { /* unary */ + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) { + switch (tp->tp_fund) { + case DOUBLE: + C_ngf(tp->tp_size); + break; + case INT: + case LONG: + case POINTER: + C_ngi(tp->tp_size); + break; + default: + CRASH(); + } + } + break; + } + /* Binary: we have the following flavours: + int - int, pointer - int, pointer - long, + pointer - pointer, long - long, double - double + */ + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + + if (!gencode) + break; + switch (tp->tp_fund) { + case INT: + case LONG: + if (tp->tp_unsigned) + C_sbu(tp->tp_size); + else + C_sbi(tp->tp_size); + break; + case POINTER: + if (EXPRTYPE(rightop) == POINTER) + C_sbs(pointer_size); + else { + C_ngi(rightop->ex_type->tp_size); + C_ads(rightop->ex_type->tp_size); + } + break; + case DOUBLE: + C_sbf(tp->tp_size); + break; + default: + crash("bad type -"); + } + break; + case '*': + if (leftop == 0) /* unary */ + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + else { /* binary */ + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + switch (tp->tp_fund) { + case INT: + case LONG: + case POINTER: + if (tp->tp_unsigned) + C_mlu(tp->tp_size); + else + C_mli(tp->tp_size); + break; + case DOUBLE: + C_mlf(double_size); + break; + default: + crash("bad type *"); + } + } + break; + case '/': + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + switch (tp->tp_fund) { + case INT: + case LONG: + case POINTER: + if (tp->tp_unsigned) + C_dvu(tp->tp_size); + else + C_dvi(tp->tp_size); + break; + case DOUBLE: + C_dvf(double_size); + break; + default: + crash("bad type /"); + } + break; + case '%': + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + if (tp->tp_fund == INT || tp->tp_fund == LONG) { + if (tp->tp_unsigned) + C_rmu(tp->tp_size); + else + C_rmi(tp->tp_size); + } + else + crash("bad type %%"); + break; + case LEFT: + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + if (tp->tp_unsigned) + C_slu(tp->tp_size); + else + C_sli(tp->tp_size); + break; + case RIGHT: + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + if (tp->tp_unsigned) + C_sru(tp->tp_size); + else + C_sri(tp->tp_size); + break; + case '<': + case LESSEQ: + case '>': + case GREATEREQ: + case EQUAL: + case NOTEQUAL: + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) { + /* The operands have the same type */ + switch (tp->tp_fund) { + case INT: + case LONG: + if (leftop->ex_type->tp_unsigned) + C_cmu(leftop->ex_type->tp_size); + else + C_cmi(leftop->ex_type->tp_size); + break; + case FLOAT: + case DOUBLE: + C_cmf(leftop->ex_type->tp_size); + break; + case POINTER: + C_cmp(); + break; + case ENUM: + C_cmi(leftop->ex_type->tp_size); + break; + default: + CRASH(); + } + if (true_label != 0) { + compare(oper, true_label); + C_bra(false_label); + } + else { + label l_true = text_label(); + label l_end = text_label(); + + compare(oper, l_true); + C_loc((arith)0); + C_bra(l_end); + C_ilb(l_true); + C_loc((arith)1); + C_ilb(l_end); + } + } + break; + case '&': + case '|': + case '^': + /* both operands should have type int */ + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) { + arith size = tp->tp_size; + + if (size < word_size) + size = word_size; + switch (oper) { + case '&': + C_and(size); + break; + case '|': + C_ior(size); + break; + case '^': + C_xor(size); + break; + } + } + break; + case '=': +#ifndef NOBITFIELD + if (leftop->ex_type->tp_fund == FIELD) { + /* assignment to bitfield variable + */ + eval_field(expr, code); + break; + } +#endif NOBITFIELD + EVAL(rightop, RVAL, TRUE, NO_LABEL, NO_LABEL); + if (gencode) + C_dup(ATW(tp->tp_size)); + + if (leftop->ex_class != Value) { + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + store_block(tp->tp_size, tp->tp_align); + } + else + store_val(leftop->VL_IDF, leftop->ex_type, + leftop->VL_VALUE); + break; + case PLUSAB: + case MINAB: + case TIMESAB: + case DIVAB: + case MODAB: + case LEFTAB: + case RIGHTAB: + case ANDAB: + case XORAB: + case ORAB: +#ifndef NOBITFIELD + if (leftop->ex_type->tp_fund == FIELD) { + eval_field(expr, code); + break; + } +#endif NOBITFIELD + if (leftop->ex_class != Value) { + arith old_offset; + arith tmpvar = tmp_pointer_var(&old_offset); + + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + C_lal(tmpvar); + C_sti(pointer_size); + C_lal(tmpvar); + C_loi(pointer_size); + C_loi(tp->tp_size); + EVAL(rightop, RVAL, TRUE, NO_LABEL, NO_LABEL); + assop(tp, oper); + if (gencode) + C_dup(roundup(tp->tp_size)); + C_lal(tmpvar); + C_loi(pointer_size); + C_sti(tp->tp_size); + free_tmp_var(old_offset); + } + else { + load_val(leftop, RVAL); + EVAL(rightop, RVAL, TRUE, NO_LABEL, NO_LABEL); + assop(tp, oper); + if (gencode) + C_dup(roundup(tp->tp_size)); + store_val(leftop->VL_IDF, leftop->ex_type, + leftop->VL_VALUE); + } + break; + case '(': + { + register struct expr *expr; + arith ParSize = (arith)0; + + if (expr = rightop) { + /* function call with parameters*/ + while ( expr->ex_class == Oper && + expr->OP_OPER == PARCOMMA + ) { + EVAL(expr->OP_RIGHT, RVAL, TRUE, + NO_LABEL, NO_LABEL); + ParSize += + ATW(expr->ex_type->tp_size); + expr = expr->OP_LEFT; + } + EVAL(expr, RVAL, TRUE, NO_LABEL, NO_LABEL); + ParSize += ATW(expr->ex_type->tp_size); + } + if (leftop->ex_class == Value && leftop->VL_IDF != 0) { + /* just an example: + main() { (*((int (*)())0))(); } + */ + C_cal(leftop->VL_IDF->id_text); +#ifdef DATAFLOW + { extern char options[]; + if (options['d']) + DfaCallFunction( + leftop->VL_IDF->id_text + ); + } +#endif DATAFLOW + } + else { + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + C_cai(); + } + /* remove parameters from stack */ + if (ParSize > (arith)0) + C_asp(ParSize); + if (!gencode) + break; + if (is_struct_or_union(tp->tp_fund)) { + C_lfr(pointer_size); + load_block(tp->tp_size, tp->tp_align); + } + else + C_lfr(ATW(tp->tp_size)); + break; + } + case '.': + EVAL(leftop, LVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + C_adp(rightop->VL_VALUE); + break; + case ARROW: + EVAL(leftop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + C_adp(rightop->VL_VALUE); + break; + case ',': + EVAL(leftop, RVAL, FALSE, NO_LABEL, NO_LABEL); + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + break; + case '~': + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + C_com(tp->tp_size); + break; + case POSTINCR: + case POSTDECR: + case PLUSPLUS: + case MINMIN: + { + arith old_offset, tmp; + arith esize = tp->tp_size; +#ifndef NOBITFIELD + if (leftop->ex_type->tp_fund == FIELD) { + eval_field(expr, code); + break; + } +#endif NOBITFIELD + if (leftop->ex_class != Value) { + tmp = tmp_pointer_var(&old_offset); + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + C_dup(pointer_size); + C_lal(tmp); + C_sti(pointer_size); + C_loi(tp->tp_size); + } + else + load_val(leftop, RVAL); + + /* We made the choice to put this stuff here + and not to put the conversion in the expression + tree because this conversion is EM dependent + and not described in C + */ + if (esize < word_size) { + conversion(tp, word_type); + esize = word_size; + } + + if (gencode && (oper == POSTINCR || oper == POSTDECR)) + C_dup(esize); + EVAL(rightop, RVAL, TRUE, NO_LABEL, NO_LABEL); + assop(tp, oper); + if (gencode && (oper == PLUSPLUS || oper == MINMIN)) + C_dup(esize); + if (tp->tp_size < word_size) + conversion(word_type, tp); + if (leftop->ex_class != Value) { + C_lal(tmp); /* always init'd */ + C_loi(pointer_size); + C_sti(tp->tp_size); + free_tmp_var(old_offset); + } + else + store_val(leftop->VL_IDF, leftop->ex_type, + leftop->VL_VALUE); + break; + } + case '?': /* must be followed by ':' */ + { + label l_true = text_label(); + label l_false = text_label(); + label l_end = text_label(); + + EVAL(leftop, RVAL, TRUE, l_true, l_false); + C_ilb(l_true); + EVAL(rightop->OP_LEFT, RVAL, code, NO_LABEL, NO_LABEL); + C_bra(l_end); + C_ilb(l_false); + EVAL(rightop->OP_RIGHT, RVAL, code, NO_LABEL, NO_LABEL); + C_ilb(l_end); + break; + } + case AND: + if (true_label == 0) { + label l_true = text_label(); + label l_false = text_label(); + label l_maybe = text_label(); + label l_end = text_label(); + + EVAL(leftop, RVAL, TRUE, l_maybe, l_false); + C_ilb(l_maybe); + if (gencode) { + EVAL(rightop, RVAL, TRUE, + l_true, l_false); + C_ilb(l_true); + C_loc((arith)1); + C_bra(l_end); + C_ilb(l_false); + C_loc((arith)0); + C_ilb(l_end); + } + else { + EVAL(rightop, RVAL, FALSE, l_false, + l_false); + C_ilb(l_false); + } + } + else { + label l_maybe = text_label(); + + EVAL(leftop, RVAL, TRUE, l_maybe, false_label); + C_ilb(l_maybe); + EVAL(rightop, RVAL, code, true_label, + false_label); + } + break; + case OR: + if (true_label == 0) { + label l_true = text_label(); + label l_false = text_label(); + label l_maybe = text_label(); + label l_end = text_label(); + + EVAL(leftop, RVAL, TRUE, l_true, l_maybe); + C_ilb(l_maybe); + if (gencode) { + EVAL(rightop, RVAL, TRUE, + l_true, l_false); + C_ilb(l_false); + C_loc((arith)0); + C_bra(l_end); + C_ilb(l_true); + C_loc((arith)1); + C_ilb(l_end); + } + else { + EVAL(rightop, RVAL, FALSE, l_true, + l_true); + C_ilb(l_true); + } + } + else { + label l_maybe = text_label(); + + EVAL(leftop, RVAL, TRUE, true_label, l_maybe); + C_ilb(l_maybe); + EVAL(rightop, RVAL, code, true_label, + false_label); + } + break; + case '!': + if (true_label == 0) { + if (gencode) { + label l_true = text_label(); + label l_false = text_label(); + label l_end = text_label(); + + EVAL(rightop, RVAL, TRUE, + l_false, l_true); + C_ilb(l_false); + C_loc((arith)0); + C_bra(l_end); + C_ilb(l_true); + C_loc((arith)1); + C_ilb(l_end); + } + else + EVAL(rightop, RVAL, FALSE, + NO_LABEL, NO_LABEL); + } + else + EVAL(rightop, RVAL, code, false_label, + true_label); + break; + case INT2INT: + case INT2FLOAT: + case FLOAT2INT: + case FLOAT2FLOAT: + EVAL(rightop, RVAL, code, NO_LABEL, NO_LABEL); + if (gencode) + conversion(rightop->ex_type, leftop->ex_type); + break; + default: + crash("(EVAL) Bad operator %s\n", symbol2str(oper)); + } + + /* If the rvalue of the expression is required but + only its lvalue is evaluated, its rvalue is + loaded by the following statements: + */ + if (gencode && val == RVAL && expr->ex_lvalue == 1) + load_block(expr->ex_type->tp_size, + expr->ex_type->tp_align); + break; + } + case Type: + default: + crash("(EVAL) bad expression class"); + } +} + +/* compare() serves as an auxiliary function of EVAL */ +compare(relop, lbl) + int relop; + label lbl; +{ + switch (relop) { + case '<': + C_zlt(lbl); + break; + case LESSEQ: + C_zle(lbl); + break; + case '>': + C_zgt(lbl); + break; + case GREATEREQ: + C_zge(lbl); + break; + case EQUAL: + C_zeq(lbl); + break; + case NOTEQUAL: + C_zne(lbl); + break; + default: + CRASH(); + } +} + +/* assop() generates the opcode of an assignment operators op= */ +assop(type, oper) + struct type *type; + int oper; +{ + register arith size = type->tp_size; + register uns = type->tp_unsigned; + + if (size < word_size) + size = word_size; + switch (type->tp_fund) { + case CHAR: + case SHORT: + case INT: + case LONG: + case ENUM: + switch (oper) { + case PLUSAB: + case PLUSPLUS: + case POSTINCR: + if (uns) + C_adu(size); + else + C_adi(size); + break; + case MINAB: + case MINMIN: + case POSTDECR: + if (uns) + C_sbu(size); + else + C_sbi(size); + break; + case TIMESAB: + if (uns) + C_mlu(size); + else + C_mli(size); + break; + case DIVAB: + if (uns) + C_dvu(size); + else + C_dvi(size); + break; + case MODAB: + if (uns) + C_rmu(size); + else + C_rmi(size); + break; + case LEFTAB: + if (uns) + C_slu(size); + else + C_sli(size); + break; + case RIGHTAB: + if (uns) + C_sru(size); + else + C_sri(size); + break; + case ANDAB: + C_and(size); + break; + case XORAB: + C_xor(size); + break; + case ORAB: + C_ior(size); + break; + } + break; + case FLOAT: + case DOUBLE: + switch (oper) { + case PLUSAB: + case PLUSPLUS: + case POSTINCR: + C_adf(size); + break; + case MINAB: + case MINMIN: + case POSTDECR: + C_sbf(size); + break; + case TIMESAB: + C_mlf(size); + break; + case DIVAB: + C_dvf(size); + break; + } + break; + case POINTER: + if (oper == MINAB || oper == MINMIN || oper == POSTDECR) + C_ngi(size); + C_ads(size); + break; + case ERRONEOUS: + break; + default: + crash("(assop) bad type %s\n", symbol2str(type->tp_fund)); + } +} + +/* tmp_pointer_var() returns the EM address of a new temporary + pointer variable needed at increment, decrement and assignment + operations to store the address of some variable or lvalue-expression. +*/ +arith +tmp_pointer_var(oldoffset) + arith *oldoffset; /* previous allocated address */ +{ + struct stack_level *stl = local_level; + + *oldoffset = stl->sl_local_offset; + stl->sl_local_offset = + - align(-stl->sl_local_offset + pointer_size, pointer_align); + if (stl->sl_local_offset < stl->sl_max_block) + stl->sl_max_block = stl->sl_local_offset; + return stl->sl_local_offset; +} + +/* free_tmp_var() returns the address allocated by tmp_pointer_var() + and resets the last allocated address. +*/ +free_tmp_var(oldoffset) + arith oldoffset; +{ + local_level->sl_local_offset = oldoffset; +} + +/* store_val() generates code for a store operation. + There are four ways of storing data: + - into a global variable + - into an automatic local variable + - into a local static variable + - absolute addressing + When the destination is described by an (lvalue) expression, the call + is "store_val(ex->VL_IDF, ex->ex_type, ex->VL_VALUE)" +*/ +store_val(id, tp, offs) + register struct idf *id; + struct type *tp; + arith offs; +{ + arith size = tp->tp_size; + int tpalign = tp->tp_align; + + if (id) { + register struct def *df = id->id_def; + int al_on_word = (tpalign % word_align == 0); + register inword = (size == word_size && al_on_word); + register indword = (size == dword_size && al_on_word); + + if (df->df_level == L_GLOBAL) { + if (inword) + C_ste_dnam(id->id_text, offs); + else + if (indword) + C_sde_dnam(id->id_text, offs); + else { + C_lae_dnam(id->id_text, offs); + store_block(size, tpalign); + } + } + else + if (df->df_sc == STATIC) { + if (inword) + C_ste_ndlb((label)df->df_address, offs); + else + if (indword) + C_sde_ndlb((label)df->df_address, offs); + else { + C_lae_ndlb((label)df->df_address, offs); + store_block(size, tpalign); + } + } + else { + if (inword) + C_stl(df->df_address + offs); + else + if (indword) + C_sdl(df->df_address + offs); + else { + C_lal(df->df_address + offs); + store_block(size, tpalign); + df->df_register = REG_NONE; + } + } + } + else { /* absolute addressing */ + load_cst(offs, pointer_size); + store_block(size, tpalign); + } +} + + +/* load_val() generates code for stacking a certain value (from ex), + which can be obtained in one of the following ways: + - value from absolute addressed memory + - constant value + - function result + - global variable + - static variable + - local variable +*/ +load_val(expr, val) + struct expr *expr; /* expression containing the value */ + int val; /* generate either LVAL or RVAL */ +{ + register struct idf *id; + register struct type *tp = expr->ex_type; + register struct def *df; + register rvalue = (val == RVAL && expr->ex_lvalue != 0); + register arith exval = expr->VL_VALUE; + register arith size = tp->tp_size; + register tpalign = tp->tp_align; + register al_on_word = (tpalign % word_align == 0); + + if ((id = expr->VL_IDF) == 0) { + /* Note: enum constants are also dealt with here */ + if (rvalue) { + /* absolute addressing + */ + load_cst(exval, pointer_size); + load_block(size, tpalign); + } + else /* integer, unsigned, long, enum etc */ + load_cst(exval, size); + } + else + if ((df = id->id_def)->df_type->tp_fund == FUNCTION) + /* the previous statement tried to catch a function + identifier, which may be cast to a pointer to a + function. + ASSERT(!(rvalue)); ??? + */ + C_lpi(id->id_text); + else + if (df->df_level == L_GLOBAL) { + if (rvalue) { + if (size == word_size && al_on_word) + C_loe_dnam(id->id_text, exval); + else + if (size == dword_size && al_on_word) + C_lde_dnam(id->id_text, exval); + else { + C_lae_dnam(id->id_text, exval); + load_block(size, tpalign); + } + + } + else { + C_lae_dnam(id->id_text, (arith)0); + C_adp(exval); + } + } + else + if (df->df_sc == STATIC) { + if (rvalue) { + if (size == word_size && al_on_word) + C_loe_ndlb((label)df->df_address, exval); + else + if (size == dword_size && al_on_word) + C_lde_ndlb((label)df->df_address, exval); + else { + C_lae_ndlb((label)df->df_address, exval); + load_block(size, tpalign); + } + + } + else { + C_lae_ndlb((label)df->df_address, (arith)0); + C_adp(exval); + } + } + else { + if (rvalue) { + if (size == word_size && al_on_word) + C_lol(df->df_address + exval); + else + if (size == dword_size && al_on_word) + C_ldl(df->df_address + exval); + else { + C_lal(df->df_address + exval); + load_block(size, tpalign); + df->df_register = REG_NONE; + } + } + else { + /* following code may be used when + comparing addresses as in the following + example: + f() { + int a[10], *i; + for (i = &a[0]; i < &a[10]; i++) ...; + } + We don't accept the contents of a[10] to + be legitimate, so the RVAL of it may + contain a big mess. + */ + C_lal(df->df_address); + C_adp(exval); + df->df_register = REG_NONE; + } + } +} + +load_cst(val, siz) + arith val, siz; +{ + if (siz <= word_size) + C_loc(val); + else + if (siz == dword_size) + C_ldc(val); + else { + label datlab; + + C_ndlb(datlab = data_label()); + C_rom_begin(); + C_co_icon(itos(val), siz); + C_rom_end(); + C_lae_ndlb(datlab, (arith)0); + C_loi(siz); + } +} diff --git a/lang/cem/cemcom/expr.c b/lang/cem/cemcom/expr.c new file mode 100644 index 000000000..67d39b299 --- /dev/null +++ b/lang/cem/cemcom/expr.c @@ -0,0 +1,408 @@ +/* $Header$ */ +/* EXPRESSION TREE HANDLING */ + +#include "botch_free.h" /* UF */ +#include "alloc.h" +#include "idf.h" +#include "arith.h" +#include "def.h" +#include "type.h" +#include "label.h" +#include "expr.h" +#include "LLlex.h" +#include "Lpars.h" +#include "decspecs.h" +#include "declarator.h" +#include "storage.h" +#include "sizes.h" + +extern char *symbol2str(); +extern char options[]; + +int +rank_of(oper) + int oper; +{ + /* The rank of the operator oper is returned. + */ + switch (oper) { + default: + return 0; /* INT2INT etc. */ + case '[': + case '(': + case '.': + case ARROW: + case PARCOMMA: + return 1; + case '!': + case PLUSPLUS: + case MINMIN: + case CAST: + case SIZEOF: + return 2; /* monadic */ + case '*': + case '/': + case '%': + return 3; + case '+': + case '-': + return 4; + case LEFT: + case RIGHT: + return 5; + case '<': + case '>': + case LESSEQ: + case GREATEREQ: + return 6; + case EQUAL: + case NOTEQUAL: + return 7; + case '&': + return 8; + case '^': + return 9; + case '|': + return 10; + case AND: + return 11; + case OR: + return 12; + case '?': + case ':': + return 13; + case '=': + case PLUSAB: + case MINAB: + case TIMESAB: + case DIVAB: + case MODAB: + case RIGHTAB: + case LEFTAB: + case ANDAB: + case XORAB: + case ORAB: + return 14; + case ',': + return 15; + } + /*NOTREACHED*/ +} + +int +rank_of_expression(expr) + struct expr *expr; +{ + /* Returns the rank of the top node in the expression. + */ + if (!expr || (expr->ex_flags & EX_PARENS) || expr->ex_class != Oper) + return 0; + return rank_of(expr->OP_OPER); +} + +check_conditional(expr, oper, pos_descr) + struct expr *expr; + char *pos_descr; +{ + /* Warn if restricted C is in effect and the expression expr, + which occurs at the position pos_descr, is not lighter than + the operator oper. + */ + if (options['R'] && rank_of_expression(expr) >= rank_of(oper)) + warning("%s %s is ungrammatical", + symbol2str(expr->OP_OPER), pos_descr); +} + +dot2expr(expp) + struct expr **expp; +{ + /* The token in dot is converted into an expression, a + pointer to which is stored in *expp. + */ + *expp = new_expr(); + clear((char *)*expp, sizeof(struct expr)); + (*expp)->ex_file = dot.tk_file; + (*expp)->ex_line = dot.tk_line; + switch (DOT) { + case IDENTIFIER: + idf2expr(*expp); + break; + case STRING: + string2expr(*expp); + break; + case INTEGER: + *expp = intexpr(dot.tk_ival, dot.tk_fund); + break; + case FLOATING: + float2expr(*expp); + break; + default: + crash("bad conversion to expression"); + break; + } +} + +idf2expr(expr) + struct expr *expr; +{ + /* Dot contains an identifier which is turned into an + expression. + Note that this constitutes an applied occurrence of + the identifier. + */ + register struct idf *idf = dot.tk_idf; /* != 0*/ + register struct def *def = idf->id_def; + + if (def == 0) { + if (AHEAD == '(') { + /* Function call, so declare the name IMPLICITly. */ + /* See RM 13. */ + add_def(idf, IMPLICIT, funint_type, level); + } + else { + if (!is_anon_idf(idf)) + error("%s undefined", idf->id_text); + /* Declare the idf anyway */ + add_def(idf, 0, error_type, level); + } + def = idf->id_def; + } + /* now def != 0 */ + if (def->df_type->tp_fund == LABEL) { + error("illegal use of label %s", idf->id_text); + expr->ex_type = error_type; + } + else { + def->df_used = 1; + expr->ex_type = def->df_type; + } + expr->ex_lvalue = + ( def->df_type->tp_fund == FUNCTION || + def->df_type->tp_fund == ARRAY || + def->df_sc == ENUM + ) ? 0 : 1; + expr->ex_class = Value; + if (def->df_sc == ENUM) { + expr->VL_IDF = 0; + expr->VL_VALUE = def->df_address; + } + else { + expr->VL_IDF = idf; + expr->VL_VALUE = (arith)0; + } +} + +string2expr(expr) + struct expr *expr; +{ + /* Dot contains a string which is turned into an expression. + */ + expr->ex_type = string_type; + expr->ex_lvalue = 0; + expr->ex_class = String; + expr->SG_VALUE = dot.tk_str; + expr->SG_DATLAB = 0; +} + +struct expr* +intexpr(ivalue, fund) + arith ivalue; +{ + /* The value ivalue is turned into an integer expression of + the size indicated by fund. + */ + struct expr *expr = new_expr(); + + clear((char *)expr, sizeof(struct expr)); + expr->ex_file = dot.tk_file; + expr->ex_line = dot.tk_line; + + switch (fund) { + + case INT: + expr->ex_type = int_type; + break; + + case LONG: + expr->ex_type = long_type; + break; + + case UNSIGNED: + /* We cannot make a test like "ivalue <= max_unsigned" + because, if sizeof(long) == int_size holds, max_unsigned + may be a negative long in which case the comparison + results in an unexpected answer. We assume that + the type "unsigned long" is not part of portable C ! + */ + expr->ex_type = + (ivalue & ~max_unsigned) ? long_type : uint_type; + break; + + case INTEGER: + expr->ex_type = (ivalue <= max_int) ? int_type : long_type; + break; + + default: + crash("(intexpr) bad fund %s\n", symbol2str(fund)); + } + expr->ex_class = Value; + expr->VL_VALUE = ivalue; + + cut_size(expr); + return expr; +} + +float2expr(expr) + struct expr *expr; +{ + /* Dot contains a floating point constant which is turned + into an expression. + */ + expr->ex_type = double_type; + expr->ex_class = Float; + expr->FL_VALUE = dot.tk_fval; + expr->FL_DATLAB = 0; +} + +struct expr * +new_oper(tp, e1, oper, e2) + struct type *tp; + struct expr *e1, *e2; +{ + /* A new expression is constructed which consists of the + operator oper which has e1 and e2 as operands; for a + monadic operator e1 == NILEXPR. + During the construction of the right recursive initialisation + tree it is possible for e2 to be NILEXPR. + */ + struct expr *expr = new_expr(); + struct oper *op; + + clear((char *)expr, sizeof(struct expr)); + if (!e1 || !e2) { + expr->ex_file = dot.tk_file; + expr->ex_line = dot.tk_line; + } + else { + expr->ex_file = e2->ex_file; + expr->ex_line = e2->ex_line; + } + expr->ex_type = tp; + expr->ex_class = Oper; + /* combine depths and flags of both expressions */ + if (e2) { + int e1_depth = e1 ? e1->ex_depth : 0; + int e1_flags = e1 ? e1->ex_flags : 0; + + expr->ex_depth = + (e1_depth > e2->ex_depth ? e1_depth : e2->ex_depth) + + 1; + expr->ex_flags = (e1_flags | e2->ex_flags) & ~EX_PARENS; + } + op = &expr->ex_object.ex_oper; + op->op_type = tp; + op->op_oper = oper; + op->op_left = e1; + op->op_right = e2; + + return expr; +} + +chk_cst_expr(expp) + register struct expr **expp; +{ + /* The expression expr is checked for constancy. + + There are 6 places where constant expressions occur in C: + 1. after #if + 2. in a global initialization + 3. as size in an array declaration + 4. as value in an enum declaration + 5. as width in a bit field + 6. as case value in a switch + + The constant expression in a global initialization is + handled separately (by IVAL()). + + There are various disparate restrictions on each of + the others in the various C compilers. I have tried some + hypotheses to unify them, but all have failed. + + This routine will give a warning for those operators + not allowed by K&R, under the R-option only. The anomalies + are cast, logical operators and the expression comma. + Special problems (of which there is only one, sizeof in + Preprocessor #if) have to be dealt with locally + + Note that according to K&R the negation ! is illegal in + constant expressions and is indeed rejected by the + Ritchie compiler. + */ + register struct expr *expr = *expp; + register int fund = expr->ex_type->tp_fund; + register int flags = expr->ex_flags; + register int err = 0; + +#ifdef DEBUG + print_expr("constant_expression", expr); +#endif DEBUG + if ( fund != CHAR && fund != SHORT && fund != INT && + fund != ENUM && fund != LONG + ) { + expr_error(expr, "non-numerical constant expression"), err++; + } + else + if (!is_ld_cst(expr)) + expr_error(expr, "expression is not constant"), err++; + + if (options['R']) { + if (flags & EX_CAST) + expr_warning(expr, + "cast in constant expression"); + if (flags & EX_LOGICAL) + expr_warning(expr, + "logical operator in constant expression"); + if (flags & EX_COMMA) + expr_warning(expr, + "expression comma in constant expression"); + } + + if (err) { + free_expression(expr); + *expp = intexpr((arith)1, INT); + (*expp)->ex_type = error_type; + } +} + +init_expression(eppp, expr) + struct expr ***eppp, *expr; +{ + /* The expression expr is added to the tree designated + indirectly by **eppp. + The natural form of a tree representing an + initial_value_list is right-recursive, ie. with the + left-most comma as main operator. The iterative grammar in + expression.g, however, tends to produce a left-recursive + tree, ie. one with the right-most comma as its main + operator. + To produce a right-recursive tree from the iterative + grammar, we keep track of the address of the pointer where + the next expression must be hooked in. + */ + **eppp = new_oper(void_type, expr, INITCOMMA, NILEXPR); + *eppp = &(**eppp)->OP_RIGHT; +} + +free_expression(expr) + struct expr *expr; +{ + /* The expression expr is freed recursively. + */ + if (!expr) + return; + if (expr->ex_class == Oper) { + free_expression(expr->OP_LEFT); + free_expression(expr->OP_RIGHT); + } + free_expr(expr); +} diff --git a/lang/cem/cemcom/expr.h b/lang/cem/cemcom/expr.h new file mode 100644 index 000000000..46e658a05 --- /dev/null +++ b/lang/cem/cemcom/expr.h @@ -0,0 +1,102 @@ +/* $Header$ */ +/* EXPRESSION DESCRIPTOR */ + +/* What we want to define is the struct expr, but since it contains + a union of various goodies, we define them first; so be patient. +*/ + +struct value { + struct idf *vl_idf; /* idf of an external name or 0 */ + arith vl_value; /* constant, or offset if idf != 0 */ +}; + +struct string { + char *sg_value; /* string of characters repr. the constant */ + label sg_datlab; /* global data-label */ +}; + +struct floating { + char *fl_value; /* pointer to string repr. the fp const. */ + label fl_datlab; /* global data_label */ +}; + +struct oper { + struct type *op_type; /* resulting type of the operation */ + struct expr *op_left; + int op_oper; /* the symbol of the operator */ + struct expr *op_right; +}; + +/* The following constants indicate the class of the expression: */ +#define Value 0 /* it is a value known at load time */ +#define String 1 /* it is a string constant */ +#define Float 2 /* it is a floating point constant */ +#define Oper 3 /* it is a run-time expression */ +#define Type 4 /* only its type is relevant */ + +struct expr { + struct expr *next; + char *ex_file; /* the file it (probably) comes from */ + unsigned int ex_line; /* the line it (probably) comes from */ + struct type *ex_type; + char ex_lvalue; + char ex_flags; + int ex_class; + int ex_depth; + union { + struct value ex_value; + struct string ex_string; + struct floating ex_float; + struct oper ex_oper; + } ex_object; +}; + +/* some abbreviated selections */ +#define VL_VALUE ex_object.ex_value.vl_value +#define VL_IDF ex_object.ex_value.vl_idf +#define SG_VALUE ex_object.ex_string.sg_value +#define SG_DATLAB ex_object.ex_string.sg_datlab +#define FL_VALUE ex_object.ex_float.fl_value +#define FL_DATLAB ex_object.ex_float.fl_datlab +#define OP_TYPE ex_object.ex_oper.op_type +#define OP_LEFT ex_object.ex_oper.op_left +#define OP_OPER ex_object.ex_oper.op_oper +#define OP_RIGHT ex_object.ex_oper.op_right + +#define EXPRTYPE(e) ((e)->ex_type->tp_fund) + +/* An expression is a `load-time constant' if it is of the form + +/- or ; + it is a `compile-time constant' if it is an . +*/ +#define is_ld_cst(e) ((e)->ex_lvalue == 0 && (e)->ex_class == Value) +#define is_cp_cst(e) (is_ld_cst(e) && (e)->VL_IDF == 0) + +/* a floating constant expression ? +*/ +#define is_fp_cst(e) ((e)->ex_class == Float) + +/* some bits for the ex_flag field, to keep track of various + interesting properties of an expression. +*/ +#define EX_SIZEOF 001 /* contains sizeof operator */ +#define EX_CAST 002 /* contains cast */ +#define EX_LOGICAL 004 /* contains logical operator */ +#define EX_COMMA 010 /* contains expression comma */ +#define EX_PARENS 020 /* the top level is parenthesized */ + +#define NILEXPR ((struct expr *)0) + +extern struct expr *intexpr(), *new_oper(); + + +/* allocation definitions of struct expr */ +/* ALLOCDEF "expr" */ +extern char *st_alloc(); +extern struct expr *h_expr; +#define new_expr() ((struct expr *) \ + st_alloc((char **)&h_expr, sizeof(struct expr))) +#define free_expr(p) st_free(p, h_expr, sizeof(struct expr)) + + +#define ISCOMMA(e) ((e)->ex_class == Oper && (e)->OP_OPER == INITCOMMA) diff --git a/lang/cem/cemcom/expr.str b/lang/cem/cemcom/expr.str new file mode 100644 index 000000000..46e658a05 --- /dev/null +++ b/lang/cem/cemcom/expr.str @@ -0,0 +1,102 @@ +/* $Header$ */ +/* EXPRESSION DESCRIPTOR */ + +/* What we want to define is the struct expr, but since it contains + a union of various goodies, we define them first; so be patient. +*/ + +struct value { + struct idf *vl_idf; /* idf of an external name or 0 */ + arith vl_value; /* constant, or offset if idf != 0 */ +}; + +struct string { + char *sg_value; /* string of characters repr. the constant */ + label sg_datlab; /* global data-label */ +}; + +struct floating { + char *fl_value; /* pointer to string repr. the fp const. */ + label fl_datlab; /* global data_label */ +}; + +struct oper { + struct type *op_type; /* resulting type of the operation */ + struct expr *op_left; + int op_oper; /* the symbol of the operator */ + struct expr *op_right; +}; + +/* The following constants indicate the class of the expression: */ +#define Value 0 /* it is a value known at load time */ +#define String 1 /* it is a string constant */ +#define Float 2 /* it is a floating point constant */ +#define Oper 3 /* it is a run-time expression */ +#define Type 4 /* only its type is relevant */ + +struct expr { + struct expr *next; + char *ex_file; /* the file it (probably) comes from */ + unsigned int ex_line; /* the line it (probably) comes from */ + struct type *ex_type; + char ex_lvalue; + char ex_flags; + int ex_class; + int ex_depth; + union { + struct value ex_value; + struct string ex_string; + struct floating ex_float; + struct oper ex_oper; + } ex_object; +}; + +/* some abbreviated selections */ +#define VL_VALUE ex_object.ex_value.vl_value +#define VL_IDF ex_object.ex_value.vl_idf +#define SG_VALUE ex_object.ex_string.sg_value +#define SG_DATLAB ex_object.ex_string.sg_datlab +#define FL_VALUE ex_object.ex_float.fl_value +#define FL_DATLAB ex_object.ex_float.fl_datlab +#define OP_TYPE ex_object.ex_oper.op_type +#define OP_LEFT ex_object.ex_oper.op_left +#define OP_OPER ex_object.ex_oper.op_oper +#define OP_RIGHT ex_object.ex_oper.op_right + +#define EXPRTYPE(e) ((e)->ex_type->tp_fund) + +/* An expression is a `load-time constant' if it is of the form + +/- or ; + it is a `compile-time constant' if it is an . +*/ +#define is_ld_cst(e) ((e)->ex_lvalue == 0 && (e)->ex_class == Value) +#define is_cp_cst(e) (is_ld_cst(e) && (e)->VL_IDF == 0) + +/* a floating constant expression ? +*/ +#define is_fp_cst(e) ((e)->ex_class == Float) + +/* some bits for the ex_flag field, to keep track of various + interesting properties of an expression. +*/ +#define EX_SIZEOF 001 /* contains sizeof operator */ +#define EX_CAST 002 /* contains cast */ +#define EX_LOGICAL 004 /* contains logical operator */ +#define EX_COMMA 010 /* contains expression comma */ +#define EX_PARENS 020 /* the top level is parenthesized */ + +#define NILEXPR ((struct expr *)0) + +extern struct expr *intexpr(), *new_oper(); + + +/* allocation definitions of struct expr */ +/* ALLOCDEF "expr" */ +extern char *st_alloc(); +extern struct expr *h_expr; +#define new_expr() ((struct expr *) \ + st_alloc((char **)&h_expr, sizeof(struct expr))) +#define free_expr(p) st_free(p, h_expr, sizeof(struct expr)) + + +#define ISCOMMA(e) ((e)->ex_class == Oper && (e)->OP_OPER == INITCOMMA) diff --git a/lang/cem/cemcom/expression.g b/lang/cem/cemcom/expression.g new file mode 100644 index 000000000..94976c645 --- /dev/null +++ b/lang/cem/cemcom/expression.g @@ -0,0 +1,371 @@ +/* $Header$ */ +/* EXPRESSION SYNTAX PARSER */ + +{ +#include "arith.h" +#include "LLlex.h" +#include "type.h" +#include "idf.h" +#include "label.h" +#include "expr.h" + +extern char options[]; +extern struct expr *intexpr(); +} + +/* 7 */ +initial_value(struct expr **expp;) : +[ + assignment_expression(expp) + { + if ((*expp)->ex_type->tp_fund == ARRAY) + array2pointer(expp); + } +| + initial_value_pack(expp) +] +; + +initial_value_pack(struct expr **expp;) : + '{' + initial_value_list(expp) + '}' +; + +initial_value_list(struct expr **expp;) + {struct expr *e1;} +: + {*expp = NILEXPR;} + initial_value(&e1) + {init_expression(&expp, e1);} + [%while (AHEAD != '}') /* >>> conflict on ',' */ + ',' + initial_value(&e1) + {init_expression(&expp, e1);} + ]* + ','? /* optional trailing comma */ +; + + +/* 7.1 */ +primary(struct expr **expp;) : +[ + IDENTIFIER + {dot2expr(expp);} +| + constant(expp) +| + STRING + {dot2expr(expp);} +| + '(' expression(expp) ')' + {(*expp)->ex_flags |= EX_PARENS;} +] +; + +secundary(struct expr **expp;) : + primary(expp) + [ + index_pack(expp) + | + parameter_pack(expp) + | + selection(expp) + ]* +; + +index_pack(struct expr **expp;) + {struct expr *e1;} +: + '[' expression(&e1) ']' + {ch7bin(expp, '[', e1);} +; + +parameter_pack(struct expr **expp;) + {struct expr *e1 = 0;} +: + '(' parameter_list(&e1)? ')' + {ch7bin(expp, '(', e1);} +; + +selection(struct expr **expp;) + {int oper; struct idf *idf;} +: + [ '.' | ARROW ] + {oper = DOT;} + identifier(&idf) + {ch7sel(expp, oper, idf);} +; + +parameter_list(struct expr **expp;) + {struct expr *e1 = 0;} +: + assignment_expression(expp) + {any2opnd(expp, PARCOMMA);} + [ ',' + assignment_expression(&e1) + {any2opnd(&e1, PARCOMMA);} + {ch7bin(expp, PARCOMMA, e1);} + ]* +; + +/* 7.2 */ +postfixed(struct expr **expp;) + {int oper;} +: + secundary(expp) + [ + postop(&oper) + {ch7incr(expp, oper);} + | + empty + ] +; + +%first first_of_type_specifier, type_specifier; + +unary(struct expr **expp;) + {struct type *tp; int oper;} +: +[%if (first_of_type_specifier(AHEAD)) + cast(&tp) unary(expp) + { ch7cast(expp, CAST, tp); + (*expp)->ex_flags |= EX_CAST; + } +| + postfixed(expp) +| + unop(&oper) unary(expp) + {ch7mon(oper, expp);} +| + size_of(expp) +] +; + +size_of(struct expr **expp;) + {struct type *tp;} +: + SIZEOF + [%if (first_of_type_specifier(AHEAD)) + cast(&tp) + { + *expp = intexpr(size_of_type(tp, "type"), INT); + (*expp)->ex_flags |= EX_SIZEOF; + } + | + unary(expp) + {ch7mon(SIZEOF, expp);} + ] +; + +/* 7.3-7.12 */ +/* The set of operators in C is stratified in 15 levels, with level + N being treated in RM 7.N. In principle each operator is + assigned a rank, ranging from 1 to 15. Such an expression can + be parsed by a construct like: + binary_expression(int maxrank;) + {int oper;} + : + binary_expression(maxrank - 1) + [%if (rank_of(DOT) <= maxrank) + binop(&oper) + binary_expression(rank_of(oper)-1) + ]? + ; + except that some call of 'unary' is necessary, depending on the + grammar. + + This simple view is marred by three complications: + 1. Level 15 (comma operator) is not allowed in many + contexts and is different. + 2. Level 13 (conditional operator) is a ternary operator, + which does not fit this scheme at all. + 3. Level 14 (assignment operators) group right-to-left, as + opposed to 2-12, which group left-to-right (or are + immaterial). + 4. The operators in level 14 start with operators in levels + 2-13 (RM 7.14: The two parts of a compound assignment + operator are separate tokens.) This causes LL1 problems. + This forces us to have four rules: + binary_expression for level 2-12 + conditional_expression for level 13 + assignment_expression for level 14 and + expression for the most general expression +*/ + +binary_expression(int maxrank; struct expr **expp;) + {int oper; struct expr *e1;} +: + unary(expp) + [%while (rank_of(DOT) <= maxrank && AHEAD != '=') + /* '?', '=', and ',' are no binops, and the test + for AHEAD != '=' keeps the other assignment + operators out + */ + binop(&oper) + binary_expression(rank_of(oper)-1, &e1) + { + ch7bin(expp, oper, e1); + } + ]* +; + +/* 7.13 */ +conditional_expression(struct expr **expp;) +/* There is some unfortunate disagreement about what is allowed + between the '?' and the ':' of a conditional_expression. + Although the Ritchie compiler does not even allow + conditional_expressions there, some other compilers (e.g., VAX) + accept a full assignment_expression there, and programs + (like, e.g., emacs) rely on it. So we have little choice. +*/ + {struct expr *e1 = 0, *e2 = 0;} +: + /* allow all binary operators */ + binary_expression(rank_of('?') - 1, expp) + [ '?' + expression(&e1) + {check_conditional(e1, '?', "between ? and :");} + ':' + assignment_expression(&e2) + {check_conditional(e2, '=', "after :");} + { + ch7bin(&e1, ':', e2); + opnd2test(expp, NOTEQUAL); + ch7bin(expp, '?', e1); + } + ]? +; + +/* 7.14 */ +assignment_expression(struct expr **expp;) + { + int oper; + struct expr *e1 = 0; + } +: + conditional_expression(expp) + [%prefer /* (rank_of(DOT) <= maxrank) for any asgnop */ + asgnop(&oper) + assignment_expression(&e1) + {ch7asgn(expp, oper, e1);} + | + empty /* LLgen artefact ??? */ + ] +; + +/* 7.15 */ +expression(struct expr **expp;) + {struct expr *e1;} +: + assignment_expression(expp) + [ ',' + assignment_expression(&e1) + { + ch7bin(expp, ',', e1); + } + ]* +; + +unop(int *oper;) : + ['*' | '&' | '-' | '!' | '~' | PLUSPLUS | MINMIN] + {*oper = DOT;} +; + +postop(int *oper;): +[ + PLUSPLUS {*oper = POSTINCR;} +| + MINMIN {*oper = POSTDECR;} +] +; + +multop: + '*' | '/' | '%' +; + +addop: + '+' | '-' +; + +shiftop: + LEFT | RIGHT +; + +relop: + '<' | '>' | LESSEQ | GREATEREQ +; + +eqop: + EQUAL | NOTEQUAL +; + +arithop: + multop | addop | shiftop +| + '&' | '^' | '|' +; + +binop(int *oper;) : + [ arithop | relop | eqop | AND | OR ] + {*oper = DOT;} +; + +asgnop(int *oper;): +[ + '=' {*oper = DOT;} +| + '+' '=' {*oper = PLUSAB;} +| + '-' '=' {*oper = MINAB;} +| + '*' '=' {*oper = TIMESAB;} +| + '/' '=' {*oper = DIVAB;} +| + '%' '=' {*oper = MODAB;} +| + LEFT '=' {*oper = LEFTAB;} +| + RIGHT '=' {*oper = RIGHTAB;} +| + '&' '=' {*oper = ANDAB;} +| + '^' '=' {*oper = XORAB;} +| + '|' '=' {*oper = ORAB;} +| + [ PLUSAB | MINAB | TIMESAB | DIVAB | MODAB | + LEFTAB | RIGHTAB | ANDAB | XORAB | ORAB ] + { + char *symbol2str(); + + warning("old-fashioned assignment operator, use %s", + symbol2str(DOT)); + *oper = DOT; + } +] +; + +constant(struct expr **expp;) : +[ + INTEGER +| + FLOATING +] {dot2expr(expp);} +; + +/* 15 */ +constant_expression (struct expr **expp;) : + assignment_expression(expp) + {chk_cst_expr(expp);} +; + +identifier(struct idf **idfp;) : +[ + IDENTIFIER +| + TYPE_IDENTIFIER +] + {*idfp = dot.tk_idf;} +; diff --git a/lang/cem/cemcom/faulty.h b/lang/cem/cemcom/faulty.h new file mode 100644 index 000000000..8b1a754a0 --- /dev/null +++ b/lang/cem/cemcom/faulty.h @@ -0,0 +1,5 @@ +/* $Header$ */ +/* FAULTY DEFINITIONS */ + +#define faulty(tp) ((tp)_faulty(__FILE__, __LINE__)) +#define fault() (_faulty(__FILE__, __LINE__)) diff --git a/lang/cem/cemcom/field.c b/lang/cem/cemcom/field.c new file mode 100644 index 000000000..d9cc1e29b --- /dev/null +++ b/lang/cem/cemcom/field.c @@ -0,0 +1,199 @@ +/* $Header$ */ +/* BITFIELD EXPRESSION EVALUATOR */ + +#include "nobitfield.h" + +#ifndef NOBITFIELD +#include "debug.h" + +#include "arith.h" +#include "type.h" +#include "idf.h" +#include "label.h" +#include "code.h" +#include "assert.h" +#include "expr.h" +#include "sizes.h" +#include "Lpars.h" +#include "field.h" +#include "em.h" + +arith tmp_pointer_var(); /* eval.c */ +char *symbol2str(); /* symbol2str.c */ + +/* Eval_field() evaluates expressions involving bit fields. + The various instructions are not yet optimised in the expression + tree and are therefore dealt with in this function. + The actions taken at any operation are described clearly by the + code for this actions. + Note: the bitfields are packed in target machine integers! +*/ +eval_field(expr, code) + struct expr *expr; + int code; +{ + int op = expr->OP_OPER; + struct expr *leftop = expr->OP_LEFT; + struct expr *rightop = expr->OP_RIGHT; + struct field *fd = leftop->ex_type->tp_field; + struct type *tp = leftop->ex_type->tp_up; + arith old_offset, tmpvar; + + /* The type in which the bitfield arithmetic is done: + */ + struct type *atype = tp->tp_unsigned ? uword_type : word_type; + arith asize = atype->tp_size; + + ASSERT(leftop->ex_type->tp_fund == FIELD); + ASSERT(asize == word_size); /* make sure that C_loc() is legal */ + + leftop->ex_type = atype; /* this is cheating but it works... */ + + /* Note that op is either an assignment operator or an increment/ + decrement operator + */ + if (op == '=') { + /* F = E: f = ((E & mask)<fd_mask); + C_and(asize); + if (code == TRUE) { + C_dup(asize); + } + C_loc((arith)fd->fd_shift); + + if (atype->tp_unsigned) + C_slu(asize); + else + C_sli(asize); + + C_loc(~((fd->fd_mask << fd->fd_shift) | (~0 << (8 * asize)))); + + if (leftop->ex_depth == 0) { /* simple case */ + load_val(leftop, RVAL); + C_and(asize); + C_ior(asize); + store_val( + leftop->VL_IDF, + leftop->ex_type, + leftop->VL_VALUE + ); + } + else { /* complex case */ + tmpvar = tmp_pointer_var(&old_offset); + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + C_dup(pointer_size); + C_lal(tmpvar); + C_sti(pointer_size); + C_loi(asize); + C_and(asize); + C_ior(asize); + C_lal(tmpvar); + C_loi(pointer_size); + C_sti(asize); + free_tmp_var(old_offset); + } + } + else { /* treat ++F as F += 1 and --F as F -= 1 */ + + /* F op= e: f = (((((f>>shift)&mask) op e)&mask)<ex_depth == 0) { /* simple case */ + load_val(leftop, RVAL); + } + else { /* complex case */ + tmpvar = tmp_pointer_var(&old_offset); + EVAL(leftop, LVAL, TRUE, NO_LABEL, NO_LABEL); + C_dup(pointer_size); + C_lal(tmpvar); + C_sti(pointer_size); + C_loi(asize); + } + + C_loc((arith)fd->fd_shift); + + if (atype->tp_unsigned) + C_sru(asize); + else + C_sri(asize); + + C_loc(fd->fd_mask); + C_and(asize); + + if (code == TRUE && (op == POSTINCR || op == POSTDECR)) { + C_dup(asize); + } + + EVAL(rightop, RVAL, TRUE, NO_LABEL, NO_LABEL); + conversion(tp, atype); + + /* generate the code for the operator + */ + if (op == PLUSPLUS || op == POSTINCR) + assop(atype, PLUSAB); + else + if (op == MINMIN || op == POSTDECR) + assop(atype, MINAB); + else + assop(atype, op); + + C_loc(fd->fd_mask); + C_and(asize); + + if (code == TRUE && op != POSTINCR && op != POSTDECR) { + C_dup(asize); + } + + C_loc((arith)fd->fd_shift); + + if (atype->tp_unsigned) + C_slu(asize); + else + C_sli(asize); + + C_loc(~((fd->fd_mask << fd->fd_shift) | (~0 << (8 * asize)))); + + if (leftop->ex_depth == 0) { + load_val(leftop, RVAL); + C_and(asize); + C_ior(asize); + store_val( + leftop->VL_IDF, + leftop->ex_type, + leftop->VL_VALUE + ); + } + else { + C_lal(tmpvar); + C_loi(pointer_size); + C_loi(asize); + C_and(asize); + C_ior(asize); + C_lal(tmpvar); + C_loi(pointer_size); + C_sti(asize); + free_tmp_var(old_offset); + } + } + + if (code == TRUE) { + /* Take care that the effective value stored in + the bit field (i.e. the value that is got on + retrieval) is on top of stack. + */ + if (atype->tp_unsigned == 0) { /* sign extension */ + register arith shift = asize * 8 - fd->fd_width; + + C_loc(shift); + C_sli(asize); + C_loc(shift); + C_sri(asize); + } + + conversion(atype, tp); + } +} +#endif NOBITFIELD diff --git a/lang/cem/cemcom/field.h b/lang/cem/cemcom/field.h new file mode 100644 index 000000000..794920b66 --- /dev/null +++ b/lang/cem/cemcom/field.h @@ -0,0 +1,20 @@ +/* $Header$ */ +/* FIELD DESCRIPTOR */ + +struct field { /* for field specifiers */ + struct field *next; + arith fd_mask; + int fd_shift; + int fd_width; + struct sdef *fd_sdef; /* upward pointer */ +}; + + +/* allocation definitions of struct field */ +/* ALLOCDEF "field" */ +extern char *st_alloc(); +extern struct field *h_field; +#define new_field() ((struct field *) \ + st_alloc((char **)&h_field, sizeof(struct field))) +#define free_field(p) st_free(p, h_field, sizeof(struct field)) + diff --git a/lang/cem/cemcom/field.str b/lang/cem/cemcom/field.str new file mode 100644 index 000000000..794920b66 --- /dev/null +++ b/lang/cem/cemcom/field.str @@ -0,0 +1,20 @@ +/* $Header$ */ +/* FIELD DESCRIPTOR */ + +struct field { /* for field specifiers */ + struct field *next; + arith fd_mask; + int fd_shift; + int fd_width; + struct sdef *fd_sdef; /* upward pointer */ +}; + + +/* allocation definitions of struct field */ +/* ALLOCDEF "field" */ +extern char *st_alloc(); +extern struct field *h_field; +#define new_field() ((struct field *) \ + st_alloc((char **)&h_field, sizeof(struct field))) +#define free_field(p) st_free(p, h_field, sizeof(struct field)) + diff --git a/lang/cem/cemcom/idf.c b/lang/cem/cemcom/idf.c new file mode 100644 index 000000000..f29f43ba5 --- /dev/null +++ b/lang/cem/cemcom/idf.c @@ -0,0 +1,697 @@ +/* $Header$ */ +/* IDENTIFIER FIDDLING & SYMBOL TABLE HANDLING */ + +#include "debug.h" +#include "idfsize.h" +#include "botch_free.h" +#include "nopp.h" +#include "alloc.h" +#include "arith.h" +#include "align.h" +#include "LLlex.h" +#include "level.h" +#include "stack.h" +#include "idf.h" +#include "label.h" +#include "def.h" +#include "type.h" +#include "struct.h" +#include "declarator.h" +#include "decspecs.h" +#include "sizes.h" +#include "Lpars.h" +#include "assert.h" +#include "specials.h" /* registration of special identifiers */ +#include "storage.h" + +int idfsize = IDFSIZE; +extern char options[]; + +char sp_occurred[SP_TOTAL]; /* indicate occurrence of special id */ + +struct idf *idf_hashtable[HASHSIZE]; + /* All identifiers can in principle be reached through + idf_hashtable; idf_hashtable[hc] is the start of a chain of + idf's whose tags all hash to hc. Each idf is the start of + a chain of def's for that idf, sorted according to level, + with the most recent one on top. + Any identifier occurring on a level is entered into this + list, regardless of the nature of its declaration + (variable, selector, structure tag, etc.). + */ + +struct idf * +idf_hashed(tg, size, hc) + char *tg; + int size; /* includes the '\0' character */ + int hc; +{ + /* The tag tg with length size and known hash value hc is + looked up in the identifier table; if not found, it is + entered. A pointer to it is returned. + The identifier has already been truncated to idfsize + characters. + */ + register struct idf **hook = &idf_hashtable[hc], *notch; + + while ((notch = *hook)) { + register cmp = strcmp(tg, notch->id_text); + + if (cmp < 0) + break; + else + if (cmp == 0) { + /* suppose that special identifiers, as + "setjmp", are already inserted + */ + sp_occurred[notch->id_special] = 1; + return notch; + } + else + hook = ¬ch->next; + } + /* a new struct idf must be inserted at the hook */ + notch = new_idf(); + clear((char *)notch, sizeof(struct idf)); + notch->next = *hook; + *hook = notch; /* hooked in */ + notch->id_text = Salloc(tg, size); +#ifndef NOPP + notch->id_resmac = 0; +#endif NOPP + return notch; +} + +#ifdef DEBUG +hash_stat() +{ + if (options['h']) { + int i; + + printf("Hash table tally:\n"); + for (i = 0; i < HASHSIZE; i++) { + struct idf *notch = idf_hashtable[i]; + int cnt = 0; + + while (notch) { + cnt++; + notch = notch->next; + } + printf("%d %d\n", i, cnt); + } + printf("End hash table tally\n"); + } +} +#endif DEBUG + +struct idf * +str2idf(tg) + char tg[]; +{ + /* str2idf() returns an entry in the symbol table for the + identifier tg. If necessary, an entry is created. + It is used where the text of the identifier is available + but its hash value is not; otherwise idf_hashed() is to + be used. + */ + register char *cp = tg; + register int hash; + register int pos = -1; + register int ch; + char ntg[IDFSIZE + 1]; + register char *ncp = ntg; + + hash = STARTHASH(); + while (++pos < idfsize && (ch = *cp++)) { + *ncp++ = ch; + hash = ENHASH(hash, ch, pos); + } + hash = STOPHASH(hash); + *ncp++ = '\0'; + return idf_hashed(ntg, ncp - ntg, hash); +} + +struct idf * +gen_idf() +{ + /* A new idf is created out of nowhere, to serve as an + anonymous name. + */ + static int name_cnt; + char buff[100]; + char *sprintf(); + + sprintf(buff, "#%d in %s, line %u", + ++name_cnt, dot.tk_file, dot.tk_line); + return str2idf(buff); +} + +int +is_anon_idf(idf) + struct idf *idf; +{ + return idf->id_text[0] == '#'; +} + +declare_idf(ds, dc, lvl) + struct decspecs *ds; + struct declarator *dc; +{ + /* The identifier inside dc is declared on the level lvl, with + properties deduced from the decspecs ds and the declarator + dc. + The level is given explicitly to be able to insert, e.g., + labels on the outermost level inside the function. + This routine implements the rich semantics of C + declarations. + */ + register struct idf *idf = dc->dc_idf; + register int sc = ds->ds_sc; + /* This local copy is essential: + char b(), c; + makes b GLOBAL and c AUTO. + */ + register struct def *def = idf->id_def; /* may be NULL */ + register struct type *type; + struct stack_level *stl = stack_level_of(lvl); + char formal_array = 0; + + /* determine the present type */ + if (ds->ds_type == 0) { + /* at the L_FORMAL1 level there is no type specified yet + */ + ASSERT(lvl == L_FORMAL1); + type = 0; + } + else { + /* combine the decspecs and the declarator into one type */ + type = declare_type(ds->ds_type, dc); + if (type->tp_size == (arith)-1) { + /* the type is not yet known */ + if (actual_declaration(sc, type)) { + /* but it has to be: */ + extern char *symbol2str(); + error("unknown %s-type", + symbol2str(type->tp_fund)); + } + } + } + + /* some additional work for formal definitions */ + if (lvl == L_FORMAL2) { + switch (type->tp_fund) { + + case FUNCTION: + warning("%s is a function; cannot be formal", + idf->id_text); + type = construct_type(POINTER, type, (arith)0); + break; + case ARRAY: /* RM 10.1 */ + type = construct_type(POINTER, type->tp_up, (arith)0); + formal_array = 1; + break; + case FLOAT: /* RM 10.1 */ + type = double_type; + break; + case CHAR: + case SHORT: + /* The RM is not clear about this: we must + convert the parameter from int (they have + been pushed as ints) to the specified type. + The conversion to type int or uint is not + allowed. + */ + break; + } + } + + /* The tests on types, postponed from do_decspecs(), can now + be performed. + */ + /* update the storage class */ + if (type && type->tp_fund == FUNCTION) { + if (sc == 0 || (ds->ds_sc_given && sc == AUTO)) /* RM 8.1 */ + sc = GLOBAL; + else + if (sc == REGISTER) { + error("function has illegal storage class"); + ds->ds_sc = sc = GLOBAL; + } + } + else { /* non-FUNCTION */ + if (sc == 0) + sc = + lvl == L_GLOBAL ? + GLOBAL : + lvl == L_FORMAL1 || lvl == L_FORMAL2 ? + FORMAL : + AUTO; + } + + if (options['R']) { + /* some special K & R tests */ + + /* is it also an enum? */ + if (idf->id_enum && idf->id_enum->tg_level == level) + warning("%s is also an enum tag", idf->id_text); + + /* is it a universal typedef? */ + if (def && def->df_level == L_UNIVERSAL) + warning("redeclaring reserved word %s", idf->id_text); + } + if (def && def->df_level >= lvl) { + /* There is already a declaration for idf on this + level, or even more inside. + The rules differ for different levels. + */ + switch (lvl) { + case L_GLOBAL: + global_redecl(idf, sc, type); + break; + case L_FORMAL1: /* formal declaration */ + error("formal %s redeclared", idf->id_text); + break; + case L_FORMAL2: /* formal definition */ + default: /* local */ + error("%s redeclared", idf->id_text); + break; + } + } + else /* the idf is unknown on this level */ + if (lvl == L_FORMAL2 && sc != ENUM && good_formal(def, idf)) { + /* formal declaration, update only */ + def->df_type = type; + def->df_formal_array = formal_array; + def->df_sc = sc; + if (def->df_sc != FORMAL) + crash("non-formal formal"); + def->df_register = (sc == REGISTER) ? REG_BONUS : REG_DEFAULT; + } + else + if ( lvl >= L_LOCAL && + (type->tp_fund == FUNCTION || sc == EXTERN) + ) { + /* extern declaration inside function is treated the + same way as global extern declaration + */ + if ( options['R'] && + (sc == STATIC && type->tp_fund == FUNCTION) + ) { + if (!is_anon_idf(idf)) + warning("non-global static function %s", + idf->id_text); + } + declare_idf(ds, dc, L_GLOBAL); + } + else { + /* fill in the def block */ + register struct def *newdef = new_def(); + + clear((char *)newdef, sizeof(struct def)); + newdef->next = def; + newdef->df_level = lvl; + newdef->df_type = type; + newdef->df_sc = sc; + + /* link it into the name list in the proper place */ + idf->id_def = newdef; + update_ahead(idf); + stack_idf(idf, stl); + + /* We now calculate the address. + Globals have names and don't get addresses, they + get numbers instead (through data_label()). + Formals are handled by declare_formals(). + So here we hand out local addresses only. + */ + + if (lvl >= L_LOCAL) { + switch (sc) { + case 0: + crash("local sc == 0"); + break; + case REGISTER: + case AUTO: + if (type->tp_size == (arith)-1) { + error("size of local \"%s\" unknown", + idf->id_text); + type = idf->id_def->df_type = int_type; + } + idf->id_def->df_register = + (sc == REGISTER) + ? REG_BONUS : REG_DEFAULT; + idf->id_def->df_address = + stl->sl_max_block = + stl->sl_local_offset = + -align(-stl->sl_local_offset + + type->tp_size, type->tp_align); + break; + case STATIC: + idf->id_def->df_address = (arith) data_label(); + break; + } + } + } +} + +actual_declaration(sc, tp) + struct type *tp; +{ + /* An actual_declaration needs space, right here and now. + */ + register int fund = tp->tp_fund; + + /* virtual declarations */ + if (sc == ENUM || sc == TYPEDEF) + return 0; + /* allocation solved in other ways */ + if (fund == FUNCTION || fund == ARRAY) + return 0; + /* to be allocated */ + return 1; +} + +global_redecl(idf, new_sc, tp) + struct idf *idf; + struct type *tp; +{ + /* A global identifier may be declared several times, + provided the declarations do not conflict; they might + conflict in type (or supplement each other in the case of + an array) or they might conflict or supplement each other + in storage class. + */ + register struct def *def = idf->id_def; + + if (tp != def->df_type) { + struct type *otp = def->df_type; + + if ( tp->tp_fund != ARRAY || otp->tp_fund != ARRAY || + tp->tp_up != otp->tp_up + ) { + error("redeclaration of %s with different type", + idf->id_text); + return; + } + /* Multiple array declaration; this may be interesting */ + if (tp->tp_size < 0) { /* new decl has [] */ + /* nothing new */ + } + else + if (otp->tp_size < 0) { /* old decl has [] */ + def->df_type = tp; + } + else + if (tp->tp_size != otp->tp_size) + error("inconsistent size in redeclaration of array %s", + idf->id_text); + } + + /* Now we may be able to update the storage class. */ + /* Clean out this mess as soon as we know all the possibilities + for new_sc. + For now we have: + EXTERN: we have seen the word "extern" + GLOBAL: the item was declared on the outer + level, without either "extern" or + "static". + STATIC: we have seen the word "static" + IMPLICIT: function declaration inferred from + call + */ + if (new_sc == IMPLICIT) + return; /* no new information */ + + switch (def->df_sc) { /* the old storage class */ + + case EXTERN: + switch (new_sc) { /* the new storage class */ + + case EXTERN: + case GLOBAL: + break; + case STATIC: + if (def->df_initialized) { + error("cannot redeclare %s to static", + idf->id_text); + } + else { + warning("%s redeclared to static", + idf->id_text); + def->df_sc = STATIC; + } + def->df_sc = new_sc; + break; + default: + crash("bad storage class"); + break; + } + break; + + case GLOBAL: + switch (new_sc) { /* the new storage class */ + + case EXTERN: + def->df_sc = EXTERN; + break; + case GLOBAL: + break; + case STATIC: + if (def->df_initialized) { + error("cannot redeclare %s to static", + idf->id_text); + } + else { + if (options['R']) + warning("%s redeclared to static", + idf->id_text); + def->df_sc = STATIC; + } + break; + default: + crash("bad storage class"); + break; + } + break; + + case STATIC: + switch (new_sc) { /* the new storage class */ + + case EXTERN: + if (def->df_initialized) { + error("cannot redeclare %s to extern", + idf->id_text); + } + else { + warning("%s redeclared to extern", + idf->id_text); + def->df_sc = EXTERN; + } + break; + case GLOBAL: + case STATIC: + if (def->df_type->tp_fund != FUNCTION) + warning("%s was already static", + idf->id_text); + break; + default: + crash("bad storage class"); + break; + } + break; + + case IMPLICIT: + switch (new_sc) { /* the new storage class */ + + case EXTERN: + case GLOBAL: + def->df_sc = new_sc; + break; + case STATIC: + if (options['R']) + warning("%s was implicitly declared as extern", + idf->id_text); + def->df_sc = new_sc; + break; + default: + crash("bad storage class"); + break; + } + break; + + case ENUM: + case TYPEDEF: + error("illegal redeclaration of %s", idf->id_text); + break; + default: + crash("bad storage class"); + break; + } +} + +int +good_formal(def, idf) + register struct def *def; + struct idf *idf; +{ + /* Succeeds if def is a proper L_FORMAL1 definition and + gives an error message otherwise. + */ + if (!def || def->df_level != L_FORMAL1) { + /* not in parameter list */ + if (!is_anon_idf(idf)) + error("%s not in parameter list", + idf->id_text); + return 0; + } + return 1; +} + +declare_params(dc) + struct declarator *dc; +{ + /* Declares the formal parameters if they exist. + */ + register struct idstack_item *is = dc->dc_fparams; + + while (is) { + declare_parameter(is->is_idf); + is = is->next; + } + del_idfstack(dc->dc_fparams); + dc->dc_fparams = 0; +} + +init_idf(idf) + struct idf *idf; +{ + /* The topmost definition of idf is set to initialized. + */ + register struct def *def = idf->id_def; /* the topmost */ + + if (def->df_initialized) + error("multiple initialization of %s", idf->id_text); + if (def->df_sc == TYPEDEF) { + warning("typedef cannot be initialized"); + def->df_sc == EXTERN; /* ??? *//* What else ? */ + } + def->df_initialized = 1; +} + +declare_parameter(idf) + struct idf *idf; +{ + /* idf is declared as a formal. + */ + add_def(idf, FORMAL, (struct type *)0, level); +} + +declare_enum(tp, idf, l) + struct type *tp; + struct idf *idf; + arith l; +{ + /* idf is declared as an enum constant with value l. + */ + add_def(idf, ENUM, tp, level); + idf->id_def->df_address = l; +} + +declare_formals(fp) + arith *fp; +{ + /* Declares those formals as int that haven't been declared + by the user. + An address is assigned to each formal parameter. + The total size of the formals is returned in *fp; + */ + struct stack_entry *se = stack_level_of(L_FORMAL1)->sl_entry; + arith f_offset = (arith)0; + +#ifdef DEBUG + if (options['t']) + dumpidftab("start declare_formals", 0); +#endif DEBUG + while (se) { + struct idf *idf = se->se_idf; + struct def *def = idf->id_def; + + if (def->df_type == 0) + def->df_type = int_type; /* default type */ + def->df_address = f_offset; + + /* the alignment convention for parameters is: align on + word boundaries, i.e. take care that the following + parameter starts on a new word boundary. + */ + f_offset = align(f_offset + def->df_type->tp_size, + word_align); + + /* the following is absurd: any char or short formal + must be converted from integer to that type + */ + formal_cvt(def); + se = se->next; + } + *fp = f_offset; +} + +add_def(idf, sc, tp, lvl) + struct idf *idf; + struct type *tp; + int lvl; + int sc; +{ + /* The identifier idf is declared on level lvl with storage + class sc and type tp, through a faked C declaration. + This is probably the wrong way to structure the problem, + but it will have to do for the time being. + */ + struct decspecs Ds; struct declarator Dc; + + Ds = null_decspecs; + Ds.ds_type = tp; + Ds.ds_sc = sc; + Dc = null_declarator; + Dc.dc_idf = idf; + declare_idf(&Ds, &Dc, lvl); +} + +update_ahead(idf) + register struct idf *idf; +{ + /* The tk_symb of the token ahead is updated in the light of new + information about the identifier idf. + */ + register int tk_symb = AHEAD; + + if ( (tk_symb == IDENTIFIER || tk_symb == TYPE_IDENTIFIER) && + ahead.tk_idf == idf + ) + AHEAD = idf->id_def && idf->id_def->df_sc == TYPEDEF ? + TYPE_IDENTIFIER : IDENTIFIER; +} + +del_idfstack(is) + struct idstack_item *is; +{ + while (is) { + register struct idstack_item *tmp = is->next; + free_idstack_item(is); + is = tmp; + } +} + +char hmask[IDFSIZE]; + +init_hmask() { + /* A simple congruence random number generator, as + described in Knuth, vol 2. + */ + int h, rnd = HASH_X; + + for (h = 0; h < IDFSIZE; h++) { + hmask[h] = rnd; + rnd = (HASH_A * rnd + HASH_C) & HASHMASK; + } +} diff --git a/lang/cem/cemcom/idf.h b/lang/cem/cemcom/idf.h new file mode 100644 index 000000000..12496de18 --- /dev/null +++ b/lang/cem/cemcom/idf.h @@ -0,0 +1,68 @@ +/* $Header$ */ +/* IDENTIFIER DESCRIPTOR */ + +#include "nopp.h" + +/* Since the % operation in the calculation of the hash function + turns out to be expensive, it is replaced by the cheaper XOR (^). + Each character of the identifier is xored with an 8-bit mask which + depends on the position of the character; the sum of these results + is the hash value. The random masks are obtained from a + congruence generator in idf.c. +*/ + +#define HASHSIZE 256 /* must be a power of 2 */ +#define HASH_X 0253 /* Knuth's X */ +#define HASH_A 77 /* Knuth's a */ +#define HASH_C 153 /* Knuth's c */ + +extern char hmask[]; /* the random masks */ +#define HASHMASK (HASHSIZE-1) /* since it is a power of 2 */ +#define STARTHASH() (0) +#define ENHASH(hs,ch,ps) (hs + (ch ^ hmask[ps])) +#define STOPHASH(hs) (hs & HASHMASK) + +struct idstack_item { /* stack of identifiers */ + struct idstack_item *next; + struct idf *is_idf; +}; + + +/* allocation definitions of struct idstack_item */ +/* ALLOCDEF "idstack_item" */ +extern char *st_alloc(); +extern struct idstack_item *h_idstack_item; +#define new_idstack_item() ((struct idstack_item *) \ + st_alloc((char **)&h_idstack_item, sizeof(struct idstack_item))) +#define free_idstack_item(p) st_free(p, h_idstack_item, sizeof(struct idstack_item)) + + +struct idf { + struct idf *next; + char *id_text; +#ifndef NOPP + struct macro *id_macro; + int id_resmac; /* if nonzero: keyword of macroproc. */ +#endif NOPP + int id_reserved; /* non-zero for reserved words */ + struct def *id_def; /* variables, typedefs, enum-constants */ + struct sdef *id_sdef; /* selector tags */ + struct tag *id_struct; /* struct and union tags */ + struct tag *id_enum; /* enum tags */ + int id_special; /* special action needed at occurrence */ +}; + + +/* allocation definitions of struct idf */ +/* ALLOCDEF "idf" */ +extern char *st_alloc(); +extern struct idf *h_idf; +#define new_idf() ((struct idf *) \ + st_alloc((char **)&h_idf, sizeof(struct idf))) +#define free_idf(p) st_free(p, h_idf, sizeof(struct idf)) + + +extern struct idf *str2idf(), *idf_hashed(); + +extern int level; +extern struct idf *gen_idf(); diff --git a/lang/cem/cemcom/idf.str b/lang/cem/cemcom/idf.str new file mode 100644 index 000000000..12496de18 --- /dev/null +++ b/lang/cem/cemcom/idf.str @@ -0,0 +1,68 @@ +/* $Header$ */ +/* IDENTIFIER DESCRIPTOR */ + +#include "nopp.h" + +/* Since the % operation in the calculation of the hash function + turns out to be expensive, it is replaced by the cheaper XOR (^). + Each character of the identifier is xored with an 8-bit mask which + depends on the position of the character; the sum of these results + is the hash value. The random masks are obtained from a + congruence generator in idf.c. +*/ + +#define HASHSIZE 256 /* must be a power of 2 */ +#define HASH_X 0253 /* Knuth's X */ +#define HASH_A 77 /* Knuth's a */ +#define HASH_C 153 /* Knuth's c */ + +extern char hmask[]; /* the random masks */ +#define HASHMASK (HASHSIZE-1) /* since it is a power of 2 */ +#define STARTHASH() (0) +#define ENHASH(hs,ch,ps) (hs + (ch ^ hmask[ps])) +#define STOPHASH(hs) (hs & HASHMASK) + +struct idstack_item { /* stack of identifiers */ + struct idstack_item *next; + struct idf *is_idf; +}; + + +/* allocation definitions of struct idstack_item */ +/* ALLOCDEF "idstack_item" */ +extern char *st_alloc(); +extern struct idstack_item *h_idstack_item; +#define new_idstack_item() ((struct idstack_item *) \ + st_alloc((char **)&h_idstack_item, sizeof(struct idstack_item))) +#define free_idstack_item(p) st_free(p, h_idstack_item, sizeof(struct idstack_item)) + + +struct idf { + struct idf *next; + char *id_text; +#ifndef NOPP + struct macro *id_macro; + int id_resmac; /* if nonzero: keyword of macroproc. */ +#endif NOPP + int id_reserved; /* non-zero for reserved words */ + struct def *id_def; /* variables, typedefs, enum-constants */ + struct sdef *id_sdef; /* selector tags */ + struct tag *id_struct; /* struct and union tags */ + struct tag *id_enum; /* enum tags */ + int id_special; /* special action needed at occurrence */ +}; + + +/* allocation definitions of struct idf */ +/* ALLOCDEF "idf" */ +extern char *st_alloc(); +extern struct idf *h_idf; +#define new_idf() ((struct idf *) \ + st_alloc((char **)&h_idf, sizeof(struct idf))) +#define free_idf(p) st_free(p, h_idf, sizeof(struct idf)) + + +extern struct idf *str2idf(), *idf_hashed(); + +extern int level; +extern struct idf *gen_idf(); diff --git a/lang/cem/cemcom/init.c b/lang/cem/cemcom/init.c new file mode 100644 index 000000000..dbb5dec25 --- /dev/null +++ b/lang/cem/cemcom/init.c @@ -0,0 +1,107 @@ +/* $Header$ */ +/* PREPROCESSOR: INITIALIZATION ROUTINES */ + +#include "nopp.h" + +#ifndef NOPP +#include "predefine.h" /* UF */ +#include "alloc.h" +#include "class.h" +#include "macro.h" +#include "idf.h" +#include "interface.h" +#include "system.h" +#include "string.h" + +PRIVATE struct mkey { + char *mk_reserved; + int mk_key; +} mkey[] = { + {"define", K_DEFINE}, + {"elif", K_ELIF}, + {"else", K_ELSE}, + {"endif", K_ENDIF}, + {"if", K_IF}, + {"ifdef", K_IFDEF}, + {"ifndef", K_IFNDEF}, + {"include", K_INCLUDE}, + {"line", K_LINE}, + {"undef", K_UNDEF}, + {0, K_UNKNOWN} +}; + +EXPORT +init_pp() +{ + time_type clock; + static char date[30]; + char *ctime(); + + /* Initialise the control line keywords (if, include, define, etc) + Although the lexical analyzer treats them as identifiers, the + control line handler can recognize them as keywords by the + id_resmac field of the identifier. + */ + { + register struct mkey *mk = &mkey[0]; + + while (mk->mk_reserved) { + struct idf *idf = str2idf(mk->mk_reserved); + + if (idf->id_resmac) + fatal("maximum identifier length insufficient"); + idf->id_resmac = mk->mk_key; + mk++; + } + } + + /* Initialize __DATE__, __FILE__ and __LINE__ macro + definitions. The compile-time specified predefined macros + are also predefined: if this file is compiled with + -DPREDEFINE="vax,pdp", the macro definitions "vax" and + "pdp" are predefined macros. + */ + /* __DATE__ */ + clock = sys_time((time_type *) 0); + strcpy(&date[1], ctime(&clock)); + date[26] = '\0'; /* zap nl */ + date[0] = date[25] = '"'; + macro_def(str2idf("__DATE__"), date, -1, 26, NOFLAG); + + /* __LINE__ */ + macro_def(str2idf("__LINE__"), "0", -1, 1, FUNC); + + /* __FILE__ */ + macro_def(str2idf("__FILE__"), "", -1, 1, FUNC); + +#ifdef PREDEFINE + { + /* PREDEFINE is a compile-time defined string + containing a number of identifiers to be + predefined at the host machine (for example + -DPREDEFINE="vax,unix,pmds"). + Note that PREDEF causes the identifier not + to be substituted. + */ + register char *s = PREDEFINE; + register char *id; + char c; + + for (;;) { + while (*s && class(*s++) != STIDF); + if (*s) { + /* gobble identifier */ + id = s - 1; + while (in_idf(*s++)); + c = *--s; + *s = '\0'; + macro_def(str2idf(id), "", -1, 0, PREDEF); + *s = c; + } + else + break; + } + } +#endif PREDEFINE +} +#endif NOPP diff --git a/lang/cem/cemcom/input.c b/lang/cem/cemcom/input.c new file mode 100644 index 000000000..e3015cdd7 --- /dev/null +++ b/lang/cem/cemcom/input.c @@ -0,0 +1,458 @@ +/* $Header$ */ +/* INPUT AND BUFFER HANDLING MODULE */ + +/* + [input.c input.h] + Input buffering module: this module contains the routines that + offers an input buffering mechanism to the user. + + This module exports the following objects: + InsertFile() : suspend input from current buffer and obtain the + next input characters from the specified file + InsertText() : suspend input from current buffer and take the + specified text as stream of input characters + LoadChar() : (defined in input.h) read next character from + the input ; LoadChar() invokes loadbuf() on + encounting a ASCII NUL character + NoUnstack : if set to non-zero: + loadbuf() reports "unexpected EOF" on encounting + the end-of-file or end-of-stacked-text. + + Imported objects are: + IDEPTH, DEBUG, READ_IN_ONE, PATHLENGTH: compile-time parameters + Malloc(), Salloc(): memory allocation routines + fatal(), lexerror(): exception handling + FileName, LineNumber, WorkingDir: input trace for lexical analyser + + READ_IN_ONE DEFINED: every input file is read into memory completely + and made an input buffer + READ_IN_ONE NOT DEFINED: the input from files is buffered in + a fixed length input buffer +*/ + +#include "nopp.h" +#include "inputtype.h" /* UF */ +#include "interface.h" +#include "arith.h" +#include "LLlex.h" +#include "input.h" +#include "alloc.h" +#include "system.h" +#include "bufsiz.h" + +#ifndef NOPP +#include "idepth.h" /* UF */ +#include "debug.h" /* UF */ +#include "pathlength.h" /* UF */ +#include "assert.h" +#endif NOPP + +EXPORT char *ipp = 0; /* input pointer */ +EXPORT int NoUnstack = 0; /* if 1: report EOF */ + +#ifndef READ_IN_ONE +PRIVATE int FilDes = -1; /* current input medium */ +#endif READ_IN_ONE + +#ifndef NOPP +struct buffer_header { + char *bh_name; /* file name where the text comes from */ + unsigned int bh_lineno; + /* current lineno in file */ + long bh_size; /* = strlen (text), should be unsigned */ + char *bh_text; /* pointer to buffer containing text */ + char *bh_ipp; /* current read pointer (= stacked ipp) */ + char *bh_wdir; /* directory of current file */ + int bh_fd; /* >= 0 (fd if !READ_IN_ONE) in case of file */ +}; + +PRIVATE struct buffer_header instack[IDEPTH]; /* stack of input media */ +PRIVATE struct buffer_header *head = 0; /* current input buffer */ + +IMPORT char **WorkingDir; /* name of current working directory */ +#else NOPP +long isize; +char ibuf[BUFSIZ]; +#endif NOPP + +#ifdef READ_IN_ONE +/* readfile() creates a buffer in which the text of the file + is situated. A pointer to the start of this text is + returned. *size is initialized with the buffer length. + Note that the file input buffer is prepared for the + preprocessor by inserting a '\n' in the beginning of the + text and appending a '\n' at the end of the text. The + file text start at position 1 of the input buffer. This is + done to allow pushback. +*/ + +PRIVATE char * +readfile(filename, size) + char *filename; + long *size; +{ + int fd; /* filedescriptor for `filename' */ + char *cbuf; /* pointer to buffer to be returned */ + register tmp; + + if ((fd = sys_open(filename, OP_RDONLY)) < 0) /* can't open this file */ + return (char *) 0; + + if ((*size = sys_fsize(fd)) < 0) + fatal("(readfile) cannot get size of file"); + + /* allocate enough space to store contents of the file */ + cbuf = Malloc(*size + 2); + + tmp = sys_read(fd, cbuf + 1, (int) *size); /* read the file */ + if (tmp != *size) + fatal("(readfile) bad read count"); + + (*size)++; /* keep book of the size! */ + sys_close(fd); /* filedes no longer needed */ + cbuf[0] = '\0'; /* allow pushback of first char */ + cbuf[*size] = '\0'; /* invoke loadbuf() at end */ + return cbuf; +} +#endif READ_IN_ONE + +#ifndef NOPP +#ifndef READ_IN_ONE +/* Input buffer supplying routines: pushbuf() and popbuf() +*/ +PRIVATE char *bufstack[IDEPTH] = 0; +PRIVATE bufstptr = 0; + +PRIVATE char * +pushbuf() +{ + if (bufstptr >= IDEPTH) + fatal("ran out of input buffers"); + if (bufstack[bufstptr] == 0) { + bufstack[bufstptr] = Malloc(BUFSIZ + 4); + } + return bufstack[bufstptr++]; +} + +PRIVATE +popbuf() +{ + bufstptr--; + ASSERT(bufstptr >= 0); +} +#endif READ_IN_ONE +#endif NOPP + +#ifndef NOPP +/* Input buffer administration: push_bh() and pop_bh() +*/ +PRIVATE struct buffer_header * +push_bh() +{ + if (head) { + if (head >= &instack[IDEPTH - 1]) + fatal("too many nested input texts"); + head->bh_ipp = ipp; + head->bh_lineno = LineNumber; + head++; + } + else + head = &instack[0]; + + return head; +} +#endif NOPP + +#ifndef NOPP +/* pop_bh() uncovers the previous inputbuffer on the stack + of headers. 0 is returned if there are no more + inputbuffers on the stack, 1 is returned in the other case. +*/ +PRIVATE int +pop_bh() +{ + int pfd = head->bh_fd; + + if (NoUnstack) { + lexerror("unexpected EOF"); + } + + if (head <= &instack[0]) { /* no more entries */ + head = (struct buffer_header *) 0; + return 0; + } + + ipp = (--head)->bh_ipp; /* restore the previous input pointer */ + + if (pfd >= 0) { /* unstack a file */ +#ifndef READ_IN_ONE + closefile(pfd); + popbuf(); /* free last buffer */ +#endif READ_IN_ONE + LineNumber = head->bh_lineno; + FileName = head->bh_name; + *WorkingDir = head->bh_wdir; + } + +#ifndef READ_IN_ONE + FilDes = head->bh_fd; +#endif READ_IN_ONE + + return 1; +} +#endif NOPP + +#ifndef READ_IN_ONE +/* low level IO routines: openfile(), readblock() and closefile() +*/ + +PRIVATE int +openfile(filename) + char *filename; +{ + int fd; /* filedescriptor for `filename' */ + + if ((fd = sys_open(filename, OP_RDONLY)) < 0 && sys_errno == EMFILE) + fatal("too many files open"); + return fd; +} + +PRIVATE +closefile(fd) +{ + sys_close(fd); +} + +PRIVATE int +readblock(fd, buf) + char buf[]; +{ + register n; + + if ((n = sys_read(fd, &buf[1], BUFSIZ)) < 0) { + fatal("(readblock) bad read from file"); + } + buf[0] = buf[n + 1] = '\0'; + return n; +} +#endif READ_IN_ONE + +/* Interface routines : InsertFile(), InsertText() and loadbuf() +*/ + +EXPORT int +InsertFile(filnam, table) + char *filnam; + char *table[]; +{ + char *mk_filename(), *newfn; + char *strcpy(); + +#ifdef READ_IN_ONE + char *readfile(), *text; + long size; +#else READ_IN_ONE + int fd = -1; +#endif READ_IN_ONE + + if (!filnam) + return 0; + +#ifndef NOPP + if (table == 0 || filnam[0] == '/') { /* don't look in the table! */ +#endif NOPP +#ifdef READ_IN_ONE + text = readfile(filnam, &size); +#else READ_IN_ONE + fd = openfile(filnam); +#endif READ_IN_ONE +#ifndef NOPP + } + else { + while (*table) { /* look in the directory table */ + newfn = mk_filename(*table++, filnam); +#ifdef READ_IN_ONE + if (text = readfile(newfn, &size)) +#else READ_IN_ONE + if ((fd = openfile(newfn)) >= 0) +#endif READ_IN_ONE + { + /* free filnam ??? */ + filnam = Salloc(newfn, strlen(newfn) + 1); + break; + } + } + } +#endif NOPP + +#ifdef READ_IN_ONE + if (text) +#else READ_IN_ONE + if (fd >= 0) +#endif READ_IN_ONE +#ifndef NOPP + { + struct buffer_header *push_bh(); + register struct buffer_header *bh = push_bh(); + + setwdir(WorkingDir, filnam); + bh->bh_lineno = LineNumber = 0; + bh->bh_name = FileName = filnam; + bh->bh_wdir = *WorkingDir; +#ifdef READ_IN_ONE + bh->bh_size = size; + bh->bh_fd = 0; /* this is a file */ + ipp = bh->bh_text = text; +#else READ_IN_ONE + bh->bh_size = readblock(fd, ipp = bh->bh_text = pushbuf()) + 1; + FilDes = bh->bh_fd = fd; +#endif READ_IN_ONE + bh->bh_text[0] = '\n'; /* wake up pp if '#' comes first */ + return 1; + } +#else NOPP + { +#ifdef READ_IN_ONE + isize = size; + ipp = text; +#else READ_IN_ONE + isize = readblock(FilDes = fd, ipp = &ibuf[0]) + 1; +#endif READ_IN_ONE + ibuf[0] = '\n'; + return 1; + } +#endif NOPP + return 0; +} + +#ifndef NOPP +EXPORT +InsertText(text, length) + char *text; +{ + struct buffer_header *push_bh(); + register struct buffer_header *bh = push_bh(); + + bh->bh_name = FileName; + bh->bh_lineno = LineNumber; + bh->bh_size = (long) length; + bh->bh_text = text; + bh->bh_wdir = *WorkingDir; + bh->bh_fd = -1; /* this is no file ! */ + ipp = text + 1; +#ifndef READ_IN_ONE + FilDes = -1; +#endif READ_IN_ONE +} +#endif NOPP + +/* loadbuf() is called if LoadChar meets a '\0' character + which may be the end-of-buffer mark of the current input + buffer. The '\0' could be genuine although not likely. + Note: this routine is exported due to its occurence in the definition + of LoadChar [input.h], that is defined as a macro. +*/ +EXPORT int +loadbuf() +{ +#ifndef NOPP + if (!head) { + /* stack exhausted, EOF on sourcefile */ + return EOI; + } +#endif NOPP + +#ifndef NOPP + if (ipp < &(head->bh_text[head->bh_size])) +#else NOPP + if (ipp < &ibuf[isize]) +#endif NOPP + { + /* a genuine '\0' character has been seen */ + return '\0'; + } + +#ifndef READ_IN_ONE +#ifndef NOPP + if (FilDes >= 0 && (head->bh_size = readblock(FilDes, head->bh_text)) > 0) + return ipp = &(head->bh_text[1]), *ipp++; +#else NOPP + if (FilDes >= 0 && (isize = readblock(FilDes, &ibuf[0])) > 0) + return ipp = &ibuf[1], *ipp++; +#endif NOPP + +#endif READ_IN_ONE + +#ifdef NOPP + if (NoUnstack) + lexerror("unexpected EOF"); +#ifndef READ_IN_ONE + closefile(FilDes); +#endif READ_IN_ONE +#endif NOPP + + return +#ifndef NOPP + pop_bh() ? (*ipp ? *ipp++ : loadbuf()) : +#endif NOPP + (ipp = &"\0\0"[1], EOI); +} + +/* Some miscellaneous routines : setwdir() and mk_filename() +*/ + +#ifndef NOPP +/* setwdir() updates *wdir according to the old working + directory (*wdir) and the filename fn, which may contain + some path name. The algorithm used here is: + setwdir(DIR, FILE): + if (FILE == "/***") + *DIR = "/" + else + if (contains(FILE, '/')) + *DIR = directory(FILE) + else + *DIR remains unchanged +*/ +PRIVATE +setwdir(wdir, fn) + char *fn, **wdir; +{ + register char *p; + char *rindex(); + + p = rindex(fn, '/'); + while (p && *(p + 1) == '\0') { /* remove trailing /'s */ + *p = '\0'; + p = rindex(fn, '/'); + } + + if (fn[0] == '\0' || (fn[0] == '/' && p == &fn[0])) /* absolute path */ + *wdir = "/"; + else + if (p) { + *p = '\0'; + *wdir = Salloc(fn, p - &fn[0] + 1); + *p = '/'; + } +} +#endif NOPP + +#ifndef NOPP +/* mk_filename() concatenates a dir and filename. +*/ +PRIVATE char * +mk_filename(dir, file) + register char *dir, *file; +{ + static char newfn[PATHLENGTH]; + register char *dst = &newfn[0]; + + if (!(dir[0] == '.' && dir[1] == '\0')) { + while (*dst++ = *dir++); + *(dst - 1) = '/'; + } + while (*dst++ = *file++); + return &newfn[0]; +} +#endif NOPP diff --git a/lang/cem/cemcom/input.h b/lang/cem/cemcom/input.h new file mode 100644 index 000000000..111856226 --- /dev/null +++ b/lang/cem/cemcom/input.h @@ -0,0 +1,13 @@ +/* $Header$ */ +/* INPUT PRIMITIVES */ + +#define LoadChar(dest) ((dest = *ipp++) || (dest = loadbuf())) +#define PushBack() (ipp--) + +/* EOF may be defined as -1 in most programs but the character -1 may + be expanded to the int -1 which causes troubles at the indexing in + the class or boolean arrays. +*/ +#define EOI (0200) + +extern char *ipp; diff --git a/lang/cem/cemcom/interface.h b/lang/cem/cemcom/interface.h new file mode 100644 index 000000000..d4a8c6518 --- /dev/null +++ b/lang/cem/cemcom/interface.h @@ -0,0 +1,3 @@ +#define PRIVATE +#define IMPORT extern +#define EXPORT diff --git a/lang/cem/cemcom/ival.c b/lang/cem/cemcom/ival.c new file mode 100644 index 000000000..3a54e02aa --- /dev/null +++ b/lang/cem/cemcom/ival.c @@ -0,0 +1,792 @@ +/* $Header$ */ +/* CODE FOR THE INITIALISATION OF GLOBAL VARIABLES */ + +#include "debug.h" +#include "nobitfield.h" + +#include "string.h" +#include "em.h" +#include "arith.h" +#include "align.h" +#include "label.h" +#include "expr.h" +#include "type.h" +#include "struct.h" +#include "field.h" +#include "assert.h" +#include "Lpars.h" +#include "class.h" +#include "sizes.h" +#include "idf.h" +#include "level.h" +#include "def.h" + +extern char *symbol2str(); + +#define con_byte(c) C_co_ucon(itos((long)(c) & 0xFF), (arith)1) + +struct expr *do_array(), *do_struct(), *IVAL(); +struct expr *strings = 0; /* list of string constants within initialiser */ +static ConStarted; /* indicates the generation of a 'con' pseudo */ + +/* do_ival() performs the initialisation of a global variable + of type tp with the initialisation expression expr by calling IVAL(). + Guided by type tp, the expression is evaluated. +*/ +do_ival(tpp, expr) + struct type **tpp; + struct expr *expr; +{ + ConStarted = 0; + if (IVAL(tpp, expr) != 0) + too_many_initialisers(expr); + /* The following loop declares the string constants + used in the initialisation. + The code for these string constants may not appear in + the code of the initialisation because a data label + in EM causes the current initialisation to be completed. + E.g. char *s[] = {"hello", "world"}; + */ + C_con_end(); + while (strings != 0) { + C_ndlb(strings->SG_DATLAB); + C_con_begin(); + C_co_scon(strings->SG_VALUE, (arith)0); + C_con_end(); + strings = strings->next; + } +} + + +/* store_string() collects the string constants appearing in an + initialisation. +*/ +store_string(expr) + struct expr *expr; +{ + expr->next = strings; + strings = expr; +} + + +/* IVAL() recursively guides the initialisation expression through the + different routines for the different types of initialisation: + - array initialisation + - struct initialisation + - fundamental type initialisation + Upto now, the initialisation of a union is not allowed! + An initialisation expression tree consists of normal expressions + which can be joined together by ',' nodes, which operator acts + like the lisp function "cons" to build lists. + IVAL() returns a pointer to the remaining expression tree. +*/ +struct expr * +IVAL(tpp, expr) + struct type **tpp; /* type of global variable */ + struct expr *expr; /* initialiser expression */ +{ + register struct type *tp = *tpp; + + switch (tp->tp_fund) { + case ARRAY: /* array initialisation */ + if (valid_type(tp->tp_up, "array element") == 0) + return 0; + if (ISCOMMA(expr)) { + /* list of initialisation expressions */ + return do_array(expr, tpp); + } + /* There might be an initialisation of a string + like char s[] = "I am a string" + */ + if (tp->tp_up->tp_fund == CHAR && expr->ex_class == String) + init_string(tpp, expr); + else /* " int i[24] = 12;" */ + check_and_pad(expr, tpp); + return 0; /* nothing left */ + case STRUCT: /* struct initialisation */ + if (valid_type(tp, "struct") == 0) + return 0; + if (ISCOMMA(expr)) { + /* list of initialisation expressions */ + return do_struct(expr, tp); + } + /* "struct foo f = 12;" */ + check_and_pad(expr, tpp); + return 0; + case UNION: /* sorry, but .... */ + error("union initialisation not allowed"); + return 0; + case ERRONEOUS: + return 0; + default: /* fundamental type */ + if (ISCOMMA(expr)) { /* " int i = {12};" */ + if (IVAL(tpp, expr->OP_LEFT) != 0) + too_many_initialisers(expr); + /* return remainings of the list for the + other members of the aggregate, if this + item belongs to an aggregate. + */ + return expr->OP_RIGHT; + } + else { /* "int i = 12;" */ + check_ival(expr, tp); + return 0; + } + } + /* NOTREACHED */ +} + +/* do_array() initialises the members of an array described + by type tp with the expressions in expr. + Two important cases: + - the number of members is known + - the number of members is not known + In the latter case, do_array() digests the whole expression + tree it is given. + In the former case, do_array() eats as many members from + the expression tree as are needed for the array. + If there are not sufficient members for the array, the remaining + members are padded with zeroes +*/ +struct expr * +do_array(expr, tpp) + struct expr *expr; + struct type **tpp; +{ + /* it is certain that ISCOMMA(expr) and tp->tp_fund == ARRAY */ + register struct type *tp = *tpp; + register arith elem_count; + + ASSERT(tp->tp_fund == ARRAY); + /* the following test catches initialisations like + char c[] = {"just a string"}; + or + char d[] = {{"just another string"}} + The use of the brackets causes this problem. + Note: although the implementation of such initialisations + is completely foolish, we did it!! (no applause, thank you) + */ + if (tp->tp_up->tp_fund == CHAR) { + register struct expr *f = expr->OP_LEFT; + register struct expr *g = 0; + + while (ISCOMMA(f)) { /* eat the brackets!!! */ + g = f; + f = f->OP_LEFT; + } + if (f->ex_class == String) { /* hallelujah, it's a string! */ + init_string(tpp, f); + return g ? g->OP_RIGHT : expr->OP_RIGHT; + } + /* else: just go on with the next part of this function */ + if (g != 0) + expr = g; + } + if (tp->tp_size == (arith)-1) { + /* declared with unknown size: [] */ + for (elem_count = 0; expr; elem_count++) { + /* eat whole initialisation expression */ + if (ISCOMMA(expr->OP_LEFT)) { + /* the member expression is embraced */ + if (IVAL(&(tp->tp_up), expr->OP_LEFT) != 0) + too_many_initialisers(expr); + expr = expr->OP_RIGHT; + } + else { + if (aggregate_type(tp->tp_up)) + expr = IVAL(&(tp->tp_up), expr); + else { + check_ival(expr->OP_LEFT, tp->tp_up); + expr = expr->OP_RIGHT; + } + } + } + /* set the proper size */ + *tpp = construct_type(ARRAY, tp->tp_up, elem_count); + } + else { /* the number of members is already known */ + arith dim = tp->tp_size / tp->tp_up->tp_size; + + for (elem_count = 0; elem_count < dim && expr; elem_count++) { + if (ISCOMMA(expr->OP_LEFT)) { + /* embraced member initialisation */ + if (IVAL(&(tp->tp_up), expr->OP_LEFT) != 0) + too_many_initialisers(expr); + expr = expr->OP_RIGHT; + } + else { + if (aggregate_type(tp->tp_up)) + /* the member is an aggregate */ + expr = IVAL(&(tp->tp_up), expr); + else { + check_ival(expr->OP_LEFT, tp->tp_up); + expr = expr->OP_RIGHT; + } + } + } + if (expr && elem_count == dim) + /* all the members are initialised but there + remains a part of the expression tree which + is returned + */ + return expr; + if ((expr == 0) && elem_count < dim) { + /* the expression tree is completely absorbed + but there are still members which must be + initialised with zeroes + */ + do + pad(tp->tp_up); + while (++elem_count < dim); + } + } + return 0; +} + + +/* do_struct() initialises a struct of type tp with the expression expr. + The main loop is just controlled by the definition of the selectors + during which alignment is taken care of. +*/ +struct expr * +do_struct(expr, tp) + struct expr *expr; + struct type *tp; +{ + /* tp is a STRUCT and expr->OP_OPER == INITCOMMA */ + + struct sdef *sd = tp->tp_sdef; + arith bytes_upto_here = (arith)0; + arith last_offset = (arith)-1; + + /* as long as there are selectors and there is an initialiser.. */ + while (sd && expr) { + if (ISCOMMA(expr->OP_LEFT)) { /* embraced expression */ + if (IVAL(&(sd->sd_type), expr->OP_LEFT) != 0) + too_many_initialisers(expr); + expr = expr->OP_RIGHT; + } + else { + if (aggregate_type(sd->sd_type)) + /* selector is an aggregate itself */ + expr = IVAL(&(sd->sd_type), expr); + else { +#ifdef NOBITFIELD + /* fundamental type, not embraced */ + check_ival(expr->OP_LEFT, sd->sd_type); + expr = expr->OP_RIGHT; +#else + if (is_anon_idf(sd->sd_idf)) + /* a hole in the struct due to + the use of ";:n;" in a struct + definition. + */ + put_bf(sd->sd_type, (arith)0); + else { + /* fundamental type, not embraced */ + check_ival(expr->OP_LEFT, + sd->sd_type); + expr = expr->OP_RIGHT; + } +#endif NOBITFIELD + } + } + /* align upto the next selector boundary */ + if (sd->sd_sdef) + bytes_upto_here += zero_bytes(sd); + if (last_offset != sd->sd_offset) { + /* don't take the field-width more than once */ + bytes_upto_here += size_of_type(sd->sd_type, "selector"); + last_offset = sd->sd_offset; + } + sd = sd->sd_sdef; + } + /* perfect fit if (expr && (sd == 0)) holds */ + if ((expr == 0) && (sd != 0)) { + /* there are selectors left which must be padded with + zeroes + */ + do { + pad(sd->sd_type); + /* take care of the alignment restrictions */ + if (sd->sd_sdef) + bytes_upto_here += zero_bytes(sd); + /* no field thrown-outs here */ + bytes_upto_here += size_of_type(sd->sd_type, "selector"); + } while (sd = sd->sd_sdef); + } + /* keep on aligning... */ + while (bytes_upto_here++ < tp->tp_size) + con_byte(0); + return expr; +} + +/* check_and_pad() is given a simple initialisation expression + where the type can be either a simple or an aggregate type. + In the latter case, only the first member is initialised and + the rest is zeroed. +*/ +check_and_pad(expr, tpp) + struct expr *expr; + struct type **tpp; +{ + /* expr is of a fundamental type */ + struct type *tp = *tpp; + + if (tp->tp_fund == ARRAY) { + if (valid_type(tp->tp_up, "array element") == 0) + return; + check_and_pad(expr, &(tp->tp_up)); /* first member */ + if (tp->tp_size == (arith)-1) + /* no size specified upto here: just + set it to the size of one member. + */ + tp = *tpp = + construct_type(ARRAY, tp->tp_up, (arith)1); + else { + register dim = tp->tp_size / tp->tp_up->tp_size; + /* pad remaining members with zeroes */ + while (--dim > 0) + pad(tp->tp_up); + } + } + else + if (tp->tp_fund == STRUCT) { + register struct sdef *sd = tp->tp_sdef; + + if (valid_type(tp, "struct") == 0) + return; + check_and_pad(expr, &(sd->sd_type)); + /* Next selector is aligned by adding extra zeroes */ + if (sd->sd_sdef) + zero_bytes(sd); + while (sd = sd->sd_sdef) { /* pad remaining selectors */ + pad(sd->sd_type); + if (sd->sd_sdef) + zero_bytes(sd); + } + } + else /* simple type */ + check_ival(expr, tp); +} + +/* pad() fills an element of type tp with zeroes. + If the element is an aggregate, pad() is called recursively. +*/ +pad(tp) + struct type *tp; +{ + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + switch (tp->tp_fund) { + case ARRAY: + { + register long dim; + + if (valid_type(tp->tp_up, "array element") == 0) + return; + + dim = tp->tp_size / tp->tp_up->tp_size; + + /* Assume the dimension is known */ + while (dim-- > 0) + pad(tp->tp_up); + break; + } + case STRUCT: + { + register struct sdef *sdef = tp->tp_sdef; + + if (valid_type(tp, "struct") == 0) + return; + + do { + pad(sdef->sd_type); + if (sdef->sd_sdef) + zero_bytes(sdef); + } while (sdef = sdef->sd_sdef); + break; + } +#ifndef NOBITFIELD + case FIELD: + put_bf(tp, (arith)0); + break; +#endif NOBITFIELD + case INT: + case SHORT: + case LONG: + case CHAR: + case ENUM: + case POINTER: + C_co_ucon("0", tp->tp_size); + break; + case FLOAT: + case DOUBLE: + C_co_fcon("0", tp->tp_size); + break; + case UNION: + error("initialisation of unions not allowed"); + break; + case ERRONEOUS: + break; + default: + crash("(generate) bad fundamental type %s\n", + symbol2str(tp->tp_fund)); + } +} + +/* check_ival() checks whether the initialisation of an element + of a fundamental type is legal and, if so, performs the initialisation + by directly generating the necessary code. + No further comment is needed to explain the internal structure + of this straightforward function. +*/ +check_ival(expr, type) + struct expr *expr; + struct type *type; +{ + /* The philosophy here is that ch7cast puts an explicit + conversion node in front of the expression if the types + are not compatible. In this case, the initialisation is + not legal. ??? + */ + + switch (type->tp_fund) { + case CHAR: + case SHORT: + case INT: + case LONG: + if (expr->ex_class == Oper || expr->VL_IDF != 0) { + illegal_init_cst(expr); + break; + } + ch7cast(&expr, '=', type); + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + con_int(expr); + break; +#ifndef NOBITFIELD + case FIELD: + if (expr->ex_class == Oper || expr->VL_IDF != 0) { + illegal_init_cst(expr); + break; + } + ch7cast(&expr, '=', type->tp_up); + put_bf(type, expr->VL_VALUE); + break; +#endif NOBITFIELD + case ENUM: + if (expr->ex_class == Oper) { + illegal_init_cst(expr); + break; + } + ch7cast(&expr, '=', type); + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + con_int(expr); + break; + case FLOAT: + case DOUBLE: + ch7cast(&expr, '=', type); + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + if (expr->ex_class == Float) + C_co_fcon(expr->FL_VALUE, expr->ex_type->tp_size); + else + if (expr->ex_class == Oper && expr->OP_OPER == INT2FLOAT) { + expr = expr->OP_RIGHT; + if (expr->ex_class == Value && expr->VL_IDF == 0) + C_co_fcon(itos(expr->VL_VALUE), type->tp_size); + else + illegal_init_cst(expr); + } + else + illegal_init_cst(expr); + break; + case POINTER: + ch7cast(&expr, '=', type); + switch (expr->ex_class) { + case Oper: + illegal_init_cst(expr); + break; + case String: /* char *s = "...." */ + { + label datlab = data_label(); + + if (ConStarted) + C_con_end(); + else + ConStarted = 1; /* ??? */ + C_ina_pt(datlab); + C_con_begin(); + C_co_ndlb(datlab, (arith)0); + expr->SG_DATLAB = datlab; + store_string(expr); + break; + } + case Value: + { + struct value *vl = &(expr->ex_object.ex_value); + struct idf *idf = vl->vl_idf; + + ASSERT(expr->ex_type->tp_fund == POINTER); + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + if (expr->ex_type->tp_up->tp_fund == FUNCTION) { + if (idf) + C_co_pnam(idf->id_text); + else /* int (*func)() = 0 */ + con_int(expr); + } + else + if (idf) { + register struct def *def = idf->id_def; + + if (def->df_level >= L_LOCAL) { + if (def->df_sc != STATIC) + /* Eg. int a; + static int *p = &a; + */ + expr_error(expr, + "illegal initialisation"); + else + C_co_ndlb((label)def->df_address, + vl->vl_value); + } + else + C_co_dnam(idf->id_text, vl->vl_value); + } + else + con_int(expr); + break; + } + default: + crash("(check_ival) illegal initialisation expression"); + } + break; + case ERRONEOUS: + break; + default: + crash("(check_ival) bad fundamental type %s", + symbol2str(type->tp_fund)); + } +} + +/* init_string() initialises an array of characters by specifying + a string constant. + Escaped characters should be converted into its corresponding + ASCII character value. E.g. '\000' -> (char) 0. + Alignment is taken care of. +*/ +init_string(tpp, expr) + struct type **tpp; /* type tp = array of characters */ + struct expr *expr; +{ + register struct type *tp = *tpp; + register arith length; + char *s = expr->SG_VALUE; + arith ntopad; + + length = prepare_string(s); + if (tp->tp_size == (arith)-1) { + /* set the dimension */ + tp = *tpp = construct_type(ARRAY, tp->tp_up, length); + ntopad = align(tp->tp_size, word_align) - tp->tp_size; + } + else { + arith dim = tp->tp_size / tp->tp_up->tp_size; + + ntopad = align(dim, word_align) - length; + if (length > dim) + expr_error(expr, + "too many characters in initialiser string"); + } + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + /* throw out the characters of the already prepared string */ + do + con_byte(*s++); + while (--length > 0); + /* pad the allocated memory (the alignment has been calculated) */ + while (ntopad-- > 0) + con_byte(0); +} + +/* prepare_string() strips the escaped characters of a + string and replaces them by the ascii characters they stand for. + The ascii length of the resulting string is returned, including the + terminating null-character. +*/ +int +prepare_string(str) + register char *str; +{ + register char *t = str; + register count = 1; /* there's always a null at the end ! */ + + while (*str) { + count++; + if (*str == '\\') { + switch (*++str) { + case 'b': + *t++ = '\b'; + str++; + break; + case 'f': + *t++ = '\f'; + str++; + break; + case 'n': + *t++ = '\n'; + str++; + break; + case 'r': + *t++ = '\r'; + str++; + break; + case 't': + *t++ = '\t'; + str++; + break; + + /* octal value of: */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { + register cnt = 0, oct = 0; + + do + oct = oct * 8 + *str - '0'; + while (is_oct(*++str) && ++cnt < 3); + *t++ = (char) oct; + break; + } + default: + *t++ = *str++; + break; + } + } + else + *t++ = *str++; + } + *t = '\0'; /* don't forget this one !!! */ + return count; +} + +#ifndef NOBITFIELD +/* put_bf() takes care of the initialisation of (bit-)field + selectors of a struct: each time such an initialisation takes place, + put_bf() is called instead of the normal code generating routines. + Put_bf() stores the given integral value into "field" and + "throws" the result of "field" out if the current selector + is the last of this number of fields stored at the same address. +*/ +put_bf(tp, val) + struct type *tp; + arith val; +{ + static long field = (arith)0; + static arith offset = (arith)-1; + register struct field *fd = tp->tp_field; + register struct sdef *sd = fd->fd_sdef; + static struct expr expr; + + ASSERT(sd); + if (offset == (arith)-1) { + /* first bitfield in this field */ + offset = sd->sd_offset; + expr.ex_type = tp->tp_up; + expr.ex_class = Value; + } + if (val != 0) /* insert the value into "field" */ + field |= (val & fd->fd_mask) << fd->fd_shift; + if (sd->sd_sdef == 0 || sd->sd_sdef->sd_offset != offset) { + /* the selector was the last stored at this address */ + expr.VL_VALUE = field; + if (ConStarted == 0) { + C_con_begin(); + ConStarted = 1; + } + con_int(&expr); + field = (arith)0; + offset = (arith)-1; + } +} +#endif NOBITFIELD + +int +zero_bytes(sd) + struct sdef *sd; +{ + /* fills the space between a selector of a struct + and the next selector of that struct with zero-bytes. + */ + register int n = + sd->sd_sdef->sd_offset - sd->sd_offset - + size_of_type(sd->sd_type, "struct member"); + register count = n; + + while (n-- > 0) + con_byte((arith)0); + return count; +} + +int +valid_type(tp, str) + struct type *tp; + char *str; +{ + if (tp->tp_size < 0) { + error("size of %s unknown", str); + return 0; + } + return 1; +} + +con_int(expr) + register struct expr *expr; +{ + register struct type *tp = expr->ex_type; + + if (tp->tp_unsigned) + C_co_ucon(itos(expr->VL_VALUE), tp->tp_size); + else + C_co_icon(itos(expr->VL_VALUE), tp->tp_size); +} + +illegal_init_cst(expr) + struct expr *expr; +{ + if (expr->ex_type->tp_fund != ERRONEOUS) + expr_error(expr, "illegal initialisation constant"); +} + +too_many_initialisers(expr) + struct expr *expr; +{ + expr_error(expr, "too many initialisers"); +} + +aggregate_type(tp) + struct type *tp; +{ + return tp->tp_fund == ARRAY || tp->tp_fund == STRUCT; +} diff --git a/lang/cem/cemcom/label.c b/lang/cem/cemcom/label.c new file mode 100644 index 000000000..0ced30b0d --- /dev/null +++ b/lang/cem/cemcom/label.c @@ -0,0 +1,88 @@ +/* $Header$ */ +/* L A B E L H A N D L I N G */ + +#include "Lpars.h" +#include "level.h" +#include "idf.h" +#include "label.h" +#include "arith.h" +#include "def.h" +#include "type.h" + +extern char options[]; + +define_label(idf) + struct idf *idf; +{ + /* The identifier idf is defined as a label. If it is new, + it is entered into the idf list with the largest possible + scope, i.e., on the lowest possible level. + */ + enter_label(idf, 1); +} + +apply_label(idf) + struct idf *idf; +{ + /* The identifier idf is applied as a label. It may or may + not be there, and if it is there, it may be from a + declaration or another application. + */ + enter_label(idf, 0); +} + +enter_label(idf, defining) + struct idf *idf; +{ + /* The identifier idf is entered as a label. If it is new, + it is entered into the idf list with the largest possible + scope, i.e., on the lowest possible level. + If defining, the label comes from a label statement. + */ + if (idf->id_def) { + struct def *def = idf->id_def; + + if (def->df_sc == LABEL) { + if (defining && def->df_initialized) + error("redeclaration of label %s", + idf->id_text); + } + else { /* there may still be room for it */ + int deflevel = def->df_level; + + if (options['R'] && def->df_sc == TYPEDEF) + warning("label %s is also a typedef", + idf->id_text); + + if (deflevel == level) /* but alas, no */ + error("%s is not a label", idf->id_text); + else { + int lvl; + + if (options['R'] && deflevel > L_LOCAL) + warning("label %s is not function-wide", + idf->id_text); + lvl = deflevel + 1; + if (lvl < L_LOCAL) + lvl = L_LOCAL; + add_def(idf, LABEL, label_type, lvl); + } + } + } + else { + add_def(idf, LABEL, label_type, L_LOCAL); + } + if (idf->id_def->df_address == 0) + idf->id_def->df_address = (arith) text_label(); + if (defining) + idf->id_def->df_initialized = 1; +} + +unstack_label(idf) + struct idf *idf; +{ + /* The scope in which the label idf occurred is left. + */ + if (!idf->id_def->df_initialized && !is_anon_idf(idf)) + error("label %s not defined", idf->id_text); +} diff --git a/lang/cem/cemcom/label.h b/lang/cem/cemcom/label.h new file mode 100644 index 000000000..dc93d5c3d --- /dev/null +++ b/lang/cem/cemcom/label.h @@ -0,0 +1,11 @@ +/* $Header$ */ +/* L A B E L D E F I N I T I O N */ + +#define label unsigned int +#define NO_LABEL (label) 0 + +extern label lab_count; +#define text_label() (lab_count++) /* returns a new text label */ + +extern label datlab_count; +#define data_label() (datlab_count++) /* returns a new data label */ diff --git a/lang/cem/cemcom/level.h b/lang/cem/cemcom/level.h new file mode 100644 index 000000000..f4ee61661 --- /dev/null +++ b/lang/cem/cemcom/level.h @@ -0,0 +1,15 @@ +/* $Header$ */ +/* LEVEL DEFINITIONS */ + +/* The level of the top-most stack_level is kept in a global variable + with the obvious name 'level'. Although this variable is consulted + by a variety of routines, it turns out that its actual value is of + importance in only a very few files. Therefore the names of the + values are put in a separate include-file. +*/ + +#define L_UNIVERSAL 0 +#define L_GLOBAL 1 +#define L_FORMAL1 2 /* formal declaration */ +#define L_FORMAL2 3 /* formal definition */ +#define L_LOCAL 4 /* and up */ diff --git a/lang/cem/cemcom/macro.h b/lang/cem/cemcom/macro.h new file mode 100644 index 000000000..cdd023f89 --- /dev/null +++ b/lang/cem/cemcom/macro.h @@ -0,0 +1,52 @@ +/* $Header$ */ +/* PREPROCESSOR: DEFINITION OF MACRO DESCRIPTOR */ + +#include "nopp.h" + +#ifndef NOPP +/* The flags of the mc_flag field of the macro structure. Note that + these flags can be set simultaneously. +*/ +#define NOFLAG 0 /* no special flags */ +#define FUNC 01 /* function attached */ +#define PREDEF 02 /* predefined macro */ + +#define FORMALP 0200 /* mask for creating macro formal parameter */ + +/* The macro descriptor is very simple, except the fact that the + mc_text, which points to the replacement text, contains the + non-ascii characters \201, \202, etc, indicating the position of a + formal parameter in this text. +*/ +struct macro { + struct macro *next; + char * mc_text; /* the replacement text */ + int mc_nps; /* number of formal parameters */ + int mc_length; /* length of replacement text */ + char mc_flag; /* marking this macro */ +}; + + +/* allocation definitions of struct macro */ +/* ALLOCDEF "macro" */ +extern char *st_alloc(); +extern struct macro *h_macro; +#define new_macro() ((struct macro *) \ + st_alloc((char **)&h_macro, sizeof(struct macro))) +#define free_macro(p) st_free(p, h_macro, sizeof(struct macro)) + + +/* `token' numbers of keywords of command-line processor +*/ +#define K_UNKNOWN 0 +#define K_DEFINE 1 +#define K_ELIF 2 +#define K_ELSE 3 +#define K_ENDIF 4 +#define K_IF 5 +#define K_IFDEF 6 +#define K_IFNDEF 7 +#define K_INCLUDE 8 +#define K_LINE 9 +#define K_UNDEF 10 +#endif NOPP diff --git a/lang/cem/cemcom/macro.str b/lang/cem/cemcom/macro.str new file mode 100644 index 000000000..cdd023f89 --- /dev/null +++ b/lang/cem/cemcom/macro.str @@ -0,0 +1,52 @@ +/* $Header$ */ +/* PREPROCESSOR: DEFINITION OF MACRO DESCRIPTOR */ + +#include "nopp.h" + +#ifndef NOPP +/* The flags of the mc_flag field of the macro structure. Note that + these flags can be set simultaneously. +*/ +#define NOFLAG 0 /* no special flags */ +#define FUNC 01 /* function attached */ +#define PREDEF 02 /* predefined macro */ + +#define FORMALP 0200 /* mask for creating macro formal parameter */ + +/* The macro descriptor is very simple, except the fact that the + mc_text, which points to the replacement text, contains the + non-ascii characters \201, \202, etc, indicating the position of a + formal parameter in this text. +*/ +struct macro { + struct macro *next; + char * mc_text; /* the replacement text */ + int mc_nps; /* number of formal parameters */ + int mc_length; /* length of replacement text */ + char mc_flag; /* marking this macro */ +}; + + +/* allocation definitions of struct macro */ +/* ALLOCDEF "macro" */ +extern char *st_alloc(); +extern struct macro *h_macro; +#define new_macro() ((struct macro *) \ + st_alloc((char **)&h_macro, sizeof(struct macro))) +#define free_macro(p) st_free(p, h_macro, sizeof(struct macro)) + + +/* `token' numbers of keywords of command-line processor +*/ +#define K_UNKNOWN 0 +#define K_DEFINE 1 +#define K_ELIF 2 +#define K_ELSE 3 +#define K_ENDIF 4 +#define K_IF 5 +#define K_IFDEF 6 +#define K_IFNDEF 7 +#define K_INCLUDE 8 +#define K_LINE 9 +#define K_UNDEF 10 +#endif NOPP diff --git a/lang/cem/cemcom/main.c b/lang/cem/cemcom/main.c new file mode 100644 index 000000000..ce3a88ac2 --- /dev/null +++ b/lang/cem/cemcom/main.c @@ -0,0 +1,382 @@ +/* $Header$ */ +/* MAIN PROGRAM */ + +#include "nopp.h" +#include "target_sizes.h" +#include "debug.h" +#include "myalloc.h" +#include "use_tmp.h" +#include "maxincl.h" +#include "system.h" +#include "inputtype.h" +#include "bufsiz.h" + +#include "input.h" +#include "level.h" +#include "idf.h" +#include "arith.h" +#include "type.h" +#include "declarator.h" +#include "tokenname.h" +#include "Lpars.h" +#include "LLlex.h" +#include "alloc.h" +#include "specials.h" + +extern struct tokenname tkidf[], tkother[]; +extern char *symbol2str(); +char options[128]; /* one for every char */ + +#ifndef NOPP +int inc_pos = 1; /* place where next -I goes */ +char *inctable[MAXINCL] = { /* list for includes */ + ".", + "/usr/include", + 0 +}; + +char **WorkingDir = &inctable[0]; +#endif NOPP + +struct sp_id special_ids[] = { + {"setjmp", SP_SETJMP}, /* non-local goto's are registered */ + {0, 0} +}; + +arith + short_size = SZ_SHORT, + word_size = SZ_WORD, + dword_size = (2 * SZ_WORD), + int_size = SZ_INT, + long_size = SZ_LONG, + float_size = SZ_FLOAT, + double_size = SZ_DOUBLE, + pointer_size = SZ_POINTER; + +int + short_align = AL_SHORT, + word_align = AL_WORD, + int_align = AL_INT, + long_align = AL_LONG, + float_align = AL_FLOAT, + double_align = AL_DOUBLE, + pointer_align = AL_POINTER, + struct_align = AL_STRUCT, + union_align = AL_UNION; + +#ifndef NOPP +arith ifval; /* ifval will contain the result of the #if expression */ +#endif NOPP + +char *prog_name; + +main(argc, argv) + char *argv[]; +{ + /* parse and interpret the command line options */ + prog_name = argv[0]; + +#ifdef OWNALLOC + init_mem(); +#endif OWNALLOC + + init_hmask(); +#ifndef NOPP + init_pp(); /* initialise the preprocessor macros */ +#endif NOPP + + /* Note: source file "-" indicates that the source is supplied + as standard input. This is only allowed if READ_IN_ONE is + not defined! + */ +#ifdef READ_IN_ONE + while (argc > 1 && *argv[1] == '-') { +#else READ_IN_ONE + while (argc > 1 && *argv[1] == '-' && argv[1][1] != '\0') { +#endif READ_IN_ONE + char *par = &argv[1][1]; + + if (*par == '-') + par++; + do_option(par); + argc--, argv++; + } + compile(argc - 1, &argv[1]); + +#ifdef OWNALLOC +#ifdef DEBUG + mem_stat(); +#endif DEBUG +#endif OWNALLOC + +#ifdef DEBUG + hash_stat(); +#endif DEBUG + + return err_occurred; +} + +char *source = 0; +char *destination = 0; + +char *nmlist = 0; + +#ifdef USE_TMP +extern char *mktemp(); /* library routine */ +static char tmpname[] = "/tmp/Cem.XXXXXX"; +char *tmpfile = 0; +#endif USE_TMP + +compile(argc, argv) + char *argv[]; +{ +#ifndef NOPP + int pp_only = options['E'] || options['P']; +#endif NOPP + + source = argv[0]; + + switch (argc) { + + case 1: +#ifndef NOPP + if (!pp_only) +#endif NOPP + fatal("%s: destination file not specified", prog_name); + break; + case 2: + destination = argv[1]; + break; + + case 3: + nmlist = argv[2]; + destination = argv[1]; + break; + default: + fatal("use: %s source destination [namelist]", prog_name); + break; + } + +#ifdef USE_TMP + tmpfile = mktemp(tmpname); +#endif USE_TMP + + if (!InsertFile(source, (char **) 0)) { + /* read the source file */ + fatal("%s: no source file %s\n", prog_name, source); + } + init(); + + /* needed ??? */ + FileName = source; + PushLex(); + +#ifndef NOPP + if (pp_only) { + /* run the preprocessor as if it is stand-alone */ + preprocess(); + } + else { +#endif NOPP + +#ifdef USE_TMP + init_code(tmpfile); +#else USE_TMP + init_code(destination); +#endif USE_TMP + + /* compile the source text */ + C_program(); + end_code(); + +#ifdef USE_TMP + prepend_scopes(destination); + AppendFile(tmpfile, destination); + sys_remove(tmpfile); +#endif USE_TMP + +#ifdef DEBUG + if (options['u']) /* unstack L_UNIVERSAL */ + unstack_level(); + if (options['f'] || options['t']) + dumpidftab("end of main", options['f'] ? 0 : 0); +#endif DEBUG +#ifndef NOPP + } +#endif NOPP + PopLex(); +} + +init() +{ + init_cst(); /* initialize variables of "cstoper.c" */ + reserve(tkidf); /* mark the C reserved words as such */ + init_specials(special_ids); /* mark special ids as such */ + + if (options['R']) + reserve(tkother); + + char_type = standard_type(CHAR, 0, 1, (arith)1); + uchar_type = standard_type(CHAR, UNSIGNED, 1, (arith)1); + + short_type = standard_type(SHORT, 0, short_align, short_size); + ushort_type = standard_type(SHORT, UNSIGNED, short_align, short_size); + + /* Treat type `word' as `int', having its own size and + alignment requirements. + This type is transparent to the user. + */ + word_type = standard_type(INT, 0, word_align, word_size); + uword_type = standard_type(INT, UNSIGNED, word_align, word_size); + + int_type = standard_type(INT, 0, int_align, int_size); + uint_type = standard_type(INT, UNSIGNED, int_align, int_size); + + long_type = standard_type(LONG, 0, long_align, long_size); + ulong_type = standard_type(LONG, UNSIGNED, long_align, long_size); + + float_type = standard_type(FLOAT, 0, float_align, float_size); + double_type = standard_type(DOUBLE, 0, double_align, double_size); + void_type = standard_type(VOID, 0, 0, (arith)0); + label_type = standard_type(LABEL, 0, 0, (arith)0); + error_type = standard_type(ERRONEOUS, 0, 1, (arith)1); + + /* Pointer Arithmetic type: all arithmetics concerning + pointers is supposed to be performed in the + pointer arithmetic type which is equal to either + int_type or long_type, depending on the pointer_size + */ + if (pointer_size == word_size) + pa_type = word_type; + else + if (pointer_size == short_size) + pa_type = short_type; + else + if (pointer_size == int_size) + pa_type = int_type; + else + if (pointer_size == long_size) + pa_type = long_type; + else + fatal("pointer size incompatible with any integral size"); + if (short_size > int_size || int_size > long_size) + fatal("sizes of short/int/long decreasing"); + + /* Build a type for function returning int, RM 13 */ + funint_type = construct_type(FUNCTION, int_type, (arith)0); + string_type = construct_type(POINTER, char_type, (arith)0); + + /* Define the standard type identifiers. */ + add_def(str2idf("char"), TYPEDEF, char_type, L_UNIVERSAL); + add_def(str2idf("int"), TYPEDEF, int_type, L_UNIVERSAL); + add_def(str2idf("float"), TYPEDEF, float_type, L_UNIVERSAL); + add_def(str2idf("double"), TYPEDEF, double_type, L_UNIVERSAL); + add_def(str2idf("void"), TYPEDEF, void_type, L_UNIVERSAL); + stack_level(); +} + +init_specials(si) + struct sp_id *si; +{ + while (si->si_identifier) { + struct idf *idf = str2idf(si->si_identifier); + + if (idf->id_special) + fatal("maximum identifier length insufficient"); + idf->id_special = si->si_flag; + si++; + } +} + +#ifndef NOPP +preprocess() +{ + /* preprocess() is the "stand-alone" preprocessor which + consecutively calls the lexical analyzer LLlex() to get + the tokens and prints them in a suitable way. + */ + static unsigned int lastlineno = 0; + static char *lastfilenm = ""; + + while (LLlex() != EOI) { + if (lastlineno != dot.tk_line) { + if (strcmp(lastfilenm, dot.tk_file) == 0) { + if (dot.tk_line - lastlineno <= 1) { + lastlineno++; + printf("\n"); + } + else { + lastlineno = dot.tk_line; + if (!options['P']) + printf("\n#line %ld \"%s\"\n", + lastlineno, lastfilenm); + } + } + else { + lastfilenm = dot.tk_file; + lastlineno = dot.tk_line; + if (!options['P']) + printf("\n#line %ld \"%s\"\n", + lastlineno, lastfilenm); + } + } + else + if (strcmp(lastfilenm, dot.tk_file) != 0) { + lastfilenm = dot.tk_file; + if (!options['P']) + printf("\n#line %ld \"%s\"\n", + lastlineno, lastfilenm); + } + + switch (DOT) { + + case IDENTIFIER: + case TYPE_IDENTIFIER: + printf(dot.tk_idf->id_text); + printf(" "); + break; + + case STRING: + printf("\"%s\" ", dot.tk_str); + break; + + case INTEGER: + printf("%ld ", dot.tk_ival); + break; + + case FLOATING: + printf("%s ", dot.tk_fval); + break; + + case EOI: + case EOF: + return; + + default: /* very expensive... */ + printf("%s ", symbol2str(DOT)); + } + } +} +#endif NOPP + +#ifdef USE_TMP +AppendFile(src, dst) + char *src, *dst; +{ + int fd_src, fd_dst; + char buf[BUFSIZ]; + int n; + + if ((fd_src = sys_open(src, OP_RDONLY)) < 0) { + fatal("cannot read %s", src); + } + if ((fd_dst = sys_open(dst, OP_APPEND)) < 0) { + fatal("cannot write to %s", src); + } + while ((n = sys_read(fd_src, buf, BUFSIZ)) > 0) { + sys_write(fd_dst, buf, n); + } + sys_close(fd_src); + sys_close(fd_dst); +} +#endif USE_TMP diff --git a/lang/cem/cemcom/make.emfun b/lang/cem/cemcom/make.emfun new file mode 100755 index 000000000..d3fe92f60 --- /dev/null +++ b/lang/cem/cemcom/make.emfun @@ -0,0 +1,19 @@ +ed - $1 <<'--EOI--' +g/^%/d +g/^ /.-1,.j +1,$s/^\([^|]*\)|\([^|]*\)|\(.*\)$/\ +\1 \2 {\ +\3;\ +}/ +1i +/* EM COMPACT CODE -- PROCEDURAL INTERFACE (generated from emcode.def) */ +#include "em.h" +#ifdef PROC_INTF +#include "label.h" +#include "arith.h" +. +$a +#endif PROC_INTF +. +1,$p +--EOI-- diff --git a/lang/cem/cemcom/make.emmac b/lang/cem/cemcom/make.emmac new file mode 100755 index 000000000..5337f40f8 --- /dev/null +++ b/lang/cem/cemcom/make.emmac @@ -0,0 +1,10 @@ +ed - $1 <<'--EOI--' +g/^%/d +g/^ /.-1,.j +1,$s/^\([^|]*\)|[^|]*|\(.*\)$/\ +#define \1 (\2)/ +1i +/* EM COMPACT CODE -- MACRO DEFINITIONS (generated from emcode.def) */ +. +1,$p +--EOI-- diff --git a/lang/cem/cemcom/make.hfiles b/lang/cem/cemcom/make.hfiles new file mode 100755 index 000000000..2132dd618 --- /dev/null +++ b/lang/cem/cemcom/make.hfiles @@ -0,0 +1,35 @@ +: Update Files from database + +PATH=/bin:/usr/bin + +case $# in +1) ;; +*) echo use: $0 file >&2 + exit 1 +esac + +( +IFCOMMAND="if (<\$FN) 2>/dev/null;\ + then if cmp -s \$FN \$TMP;\ + then rm \$TMP;\ + else mv \$TMP \$FN;\ + echo update \$FN;\ + fi;\ + else mv \$TMP \$FN;\ + echo create \$FN;\ + fi" +echo 'TMP=.uf$$' +echo 'FN=$TMP' +echo 'cat >$TMP <<\!EOF!' +sed -n '/^!File:/,${ +/^$/d +/^!File:[ ]*\(.*\)$/s@@!EOF!\ +'"$IFCOMMAND"'\ +FN=\1\ +cat >$TMP <<\\!EOF!@ +p +}' $1 +echo '!EOF!' +echo $IFCOMMAND +) | +sh diff --git a/lang/cem/cemcom/make.next b/lang/cem/cemcom/make.next new file mode 100755 index 000000000..be69d8d69 --- /dev/null +++ b/lang/cem/cemcom/make.next @@ -0,0 +1,3 @@ +sed -n ' +s:^.*ALLOCDEF.*"\(.*\)".*$:struct \1 *h_\1 = 0;:p +' $* diff --git a/lang/cem/cemcom/make.tokcase b/lang/cem/cemcom/make.tokcase new file mode 100755 index 000000000..ef32292f9 --- /dev/null +++ b/lang/cem/cemcom/make.tokcase @@ -0,0 +1,34 @@ +cat <<'--EOT--' +#include "Lpars.h" + +char * +symbol2str(tok) + int tok; +{ + static char buf[2] = { '\0', '\0' }; + + if (040 <= tok && tok < 0177) { + buf[0] = tok; + buf[1] = '\0'; + return buf; + } + switch (tok) { +--EOT-- +sed ' +/{[A-Z]/!d +s/.*{\(.*\),.*\(".*"\).*$/ case \1 :\ + return \2;/ +' +cat <<'--EOT--' + case '\n': + case '\f': + case '\v': + case '\r': + case '\t': + buf[0] = tok; + return buf; + default: + return "bad token"; + } +} +--EOT-- diff --git a/lang/cem/cemcom/make.tokfile b/lang/cem/cemcom/make.tokfile new file mode 100755 index 000000000..494b7e3cc --- /dev/null +++ b/lang/cem/cemcom/make.tokfile @@ -0,0 +1,6 @@ +sed ' +/{[A-Z]/!d +s/.*{// +s/,.*// +s/.*/%token &;/ +' diff --git a/lang/cem/cemcom/mcomm.c b/lang/cem/cemcom/mcomm.c new file mode 100644 index 000000000..ea133d531 --- /dev/null +++ b/lang/cem/cemcom/mcomm.c @@ -0,0 +1,241 @@ +/* mcomm.c -- change ".lcomm name" into ".comm name" where "name" + is specified in a list. +*/ +#include + +#define IDFSIZE 4096 + +char *readfile(); + +struct node { + char *name; + struct node *left, *right; +}; + +char * +Malloc(n) + unsigned n; +{ + char *space; + char *malloc(); + + if ((space = malloc(n)) == 0) { + fprintf(stderr, "out of memory\n"); + exit(1); + } + return space; +} + +struct node *make_tree(); + +#define new_node() ((struct node *) Malloc(sizeof (struct node))) + +main(argc, argv) + char *argv[]; +{ + char *nl_file, *as_file; + char *nl_text, *as_text; + struct node *nl_tree = 0; + int nl_siz, as_siz; + + if (argc != 3) { + fprintf(stderr, "use: %s namelist assembler_file\n", argv[0]); + exit(1); + } + nl_file = argv[1]; + as_file = argv[2]; + + if ((nl_text = readfile(nl_file, &nl_siz)) == 0) { + fprintf(stderr, "%s: cannot read namelist %s\n", + argv[0], nl_file); + exit(1); + } + + if ((as_text = readfile(as_file, &as_siz)) == 0) { + fprintf(stderr, "%s: cannot read assembler file %s\n", + argv[0], as_file); + exit(1); + } + + nl_tree = make_tree(nl_text); + edit(as_text, nl_tree); + + if (writefile(as_file, as_text, as_siz) == 0) { + fprintf(stderr, "%s: cannot write to %s\n", argv[0], as_file); + exit(1); + } + return 0; +} + +#include +#include + +char * +readfile(filename, psiz) + char *filename; + int *psiz; +{ + struct stat stbuf; /* for `stat' to get filesize */ + register int fd; /* filedescriptor for `filename' */ + register char *cbuf; /* pointer to buffer to be returned */ + + if (((fd = open(filename, 0)) < 0) || (fstat(fd, &stbuf) != 0)) + return 0; + cbuf = Malloc(stbuf.st_size + 1); + if (read(fd, cbuf, stbuf.st_size) != stbuf.st_size) + return 0; + cbuf[stbuf.st_size] = '\0'; + close(fd); /* filedes no longer needed */ + *psiz = stbuf.st_size; + return cbuf; +} + +int +writefile(filename, text, size) + char *filename, *text; +{ + register fd; + + if ((fd = open(filename, 1)) < 0) + return 0; + if (write(fd, text, size) != size) + return 0; + close(fd); + return 1; +} + +struct node * +make_tree(nl) + char *nl; +{ + char *id = nl; + struct node *tree = 0; + + while (*nl) { + if (*nl == '\n') { + *nl = '\0'; + insert(&tree, id); + id = ++nl; + } + else { + ++nl; + } + } + return tree; +} + +insert(ptree, id) + struct node **ptree; + char *id; +{ + register cmp; + + if (*ptree == 0) { + register struct node *nnode = new_node(); + + nnode->name = id; + nnode->left = nnode->right = 0; + *ptree = nnode; + } + else + if ((cmp = strcmp((*ptree)->name, id)) < 0) + insert(&((*ptree)->right), id); + else + if (cmp > 0) + insert(&((*ptree)->left), id); +} + +struct node * +find(tree, id) + struct node *tree; + char *id; +{ + register cmp; + + if (tree == 0) + return 0; + if ((cmp = strcmp(tree->name, id)) < 0) + return find(tree->right, id); + if (cmp > 0) + return find(tree->left, id); + return tree; +} + +edit(text, tree) + char *text; + struct node *tree; +{ + register char *ptr = text; + char idbuf[IDFSIZE]; + register char *id; + register char *save_ptr; + + while (*ptr) { + if ( + *ptr == '.' && + *++ptr == 'l' && + *++ptr == 'c' && + *++ptr == 'o' && + *++ptr == 'm' && + *++ptr == 'm' && + (*++ptr == ' ' || *ptr == '\t') + ) + { + save_ptr = ptr - 6; + while (*++ptr == ' ' || *ptr == '\t') + ; + if (*ptr == '_') + ++ptr; + if (InId(*ptr)) { + id = &idbuf[0]; + *id++ = *ptr++; + while (InId(*ptr)) + *id++ = *ptr++; + *id = '\0'; + if (find(tree, idbuf) != 0) { + *save_ptr++ = ' '; + *save_ptr++ = '.'; + } + } + } + while (*ptr && *ptr++ != '\n') + ; + } +} + +InId(c) +{ + switch (c) { + + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case '_': + case '.': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return 1; + + default: + return 0; + } +} + +puttree(nd) + struct node *nd; +{ + if (nd) { + puttree(nd->left); + printf("%s\n", nd->name); + puttree(nd->right); + } +} diff --git a/lang/cem/cemcom/mes.h b/lang/cem/cemcom/mes.h new file mode 100644 index 000000000..f5e3c4031 --- /dev/null +++ b/lang/cem/cemcom/mes.h @@ -0,0 +1,4 @@ +/* $Header$ */ +/* MESSAGE ADMINISTRATION */ + +extern int fp_used; /* code.c */ diff --git a/lang/cem/cemcom/options b/lang/cem/cemcom/options new file mode 100644 index 000000000..378a540b4 --- /dev/null +++ b/lang/cem/cemcom/options @@ -0,0 +1,28 @@ +User options: + +C while running preprocessor, copy comment +D see identifier following as a macro +E run preprocessor only +I expand include table with directory name following +M set identifier length +n don't generate register messages +p generate linenumbers and filename indications + while generating compact EM code +P in running the preprocessor do not output '# line' lines +R restricted C +U undefine predefined name +V set objectsize and alignment requirements +w suppress warning diagnostics + + +Debug options: + +d perform a small dataflow analysis +f dump whole identifier table, including macros and reserved words +h supply hash table statistics +i print name of include files +m supply memory allocation statistics +r right-adjust bitfield +t dump table of identifiers +u unstack L_UNIVERSAL +x dump expressions diff --git a/lang/cem/cemcom/options.c b/lang/cem/cemcom/options.c new file mode 100644 index 000000000..a21456ea5 --- /dev/null +++ b/lang/cem/cemcom/options.c @@ -0,0 +1,252 @@ +/* $Header$ */ +/* U S E R O P T I O N - H A N D L I N G */ + +#include "nopp.h" +#include "idfsize.h" +#include "maxincl.h" +#include "nobitfield.h" +#include "class.h" +#include "macro.h" +#include "idf.h" +#include "arith.h" +#include "sizes.h" +#include "align.h" +#include "storage.h" + +#ifndef NOPP +extern char *inctable[MAXINCL]; +extern int inc_pos; +#endif NOPP + +extern char options[]; +extern int idfsize; + +int txt2int(); + +do_option(text) + char *text; +{ + switch(*text++) { + + default: + options[text[-1]] = 1; /* flags, debug options etc. */ + break; + + case 'C' : /* E option + comment output */ +#ifndef NOPP + options['E'] = 1; + warning("-C: comment is not output"); +#else NOPP + warning("-C option ignored"); +#endif NOPP + break; + + case 'D' : { /* -Dname : predefine name */ +#ifndef NOPP + register char *cp = text, *name, *mactext; + + if (class(*cp) != STIDF) { + error("identifier missing in -D%s", text); + break; + } + + name = cp; + + while (*cp && in_idf(*cp)) { + ++cp; + } + + if (!*cp) { /* -Dname */ + mactext = "1"; + } + else + if (*cp == '=') { /* -Dname=text */ + *cp++ = '\0'; /* end of name */ + mactext = cp; + } + else { /* -Dname?? */ + error("malformed option -D%s", text); + break; + } + + macro_def(str2idf(name), mactext, -1, strlen(mactext), + NOFLAG); +#else NOPP + warning("-D option ignored"); +#endif NOPP + break; + } + + case 'E' : /* run preprocessor only, with # */ +#ifndef NOPP + options['E'] = 1; +#else NOPP + warning("-E option ignored"); +#endif NOPP + break; + + case 'I' : /* -Ipath : insert "path" into include list */ +#ifndef NOPP + if (*text) { + register int i = inc_pos++; + register char *new = text; + + while (new) { + register char *tmp = inctable[i]; + + inctable[i++] = new; + if (i == MAXINCL) + fatal("too many -I options"); + new = tmp; + } + } +#else NOPP + warning("-I option ignored"); +#endif NOPP + break; + + case 'L' : + warning("-L: default no EM profiling; use -p for EM profiling"); + break; + + case 'M': /* maximum identifier length */ + idfsize = txt2int(&text); + if (*text || idfsize <= 0) + fatal("malformed -M option"); + if (idfsize > IDFSIZE) + fatal("maximum identifier length is %d", IDFSIZE); + break; + + case 'p' : /* generate profiling code (fil/lin) */ + options['p'] = 1; + break; + + case 'P' : /* run preprocessor stand-alone, without #'s */ +#ifndef NOPP + options['E'] = 1; + options['P'] = 1; +#else NOPP + warning("-P option ignored"); +#endif NOPP + break; + + case 'U' : { /* -Uname : undefine predefined */ +#ifndef NOPP + struct idf *idef; + + if (*text) { + if ((idef = str2idf(text))->id_macro) { + free_macro(idef->id_macro); + idef->id_macro = (struct macro *) 0; + } + } +#else NOPP + warning("-U option ignored"); +#endif NOPP + break; + } + + case 'V' : /* set object sizes and alignment requirements */ + { + arith size, align; + char c; + + while (c = *text++) { + size = txt2int(&text); + align = 0; + if (*text == '.') { + text++; + align = txt2int(&text); + } + switch (c) { + + case 's': /* short */ + if (size != (arith)0) + short_size = size; + if (align != 0) + short_align = align; + break; + case 'w': /* word */ + if (size != (arith)0) + dword_size = (word_size = size) << 1; + if (align != 0) + word_align = align; + break; + case 'i': /* int */ + if (size != (arith)0) + int_size = size; + if (align != 0) + int_align = align; + break; + case 'l': /* long */ + if (size != (arith)0) + long_size = size; + if (align != 0) + long_align = align; + break; + case 'f': /* float */ + if (size != (arith)0) + float_size = size; + if (align != 0) + float_align = align; + break; + case 'd': /* double */ + if (size != (arith)0) + double_size = size; + if (align != 0) + double_align = align; + break; + case 'p': /* pointer */ + if (size != (arith)0) + pointer_size = size; + if (align != 0) + pointer_align = align; + break; + case 'r': /* adjust bitfields right */ +#ifndef NOBITFIELD + options['r'] = 1; +#else NOBITFIELD + warning("bitfields are not implemented"); +#endif NOBITFIELD + break; + case 'S': /* initial struct alignment */ + if (size != (arith)0) + struct_align = size; + break; + case 'U': /* initial union alignment */ + if (size != (arith)0) + union_align = size; + break; + default: + error("-V: bad type indicator %c\n", c); + } + } + break; + } + + case 'n': + options['n'] = 1; /* use no registers */ + break; + + case 'w': + options['w'] = 1; /* no warnings will be given */ + break; + } +} + +int +txt2int(tp) + char **tp; +{ + /* the integer pointed to by *tp is read, while increasing + *tp; the resulting value is yielded. + */ + register int val = 0; + register int ch; + + while (ch = **tp, ch >= '0' && ch <= '9') { + val = val * 10 + ch - '0'; + (*tp)++; + } + return val; +} diff --git a/lang/cem/cemcom/program.g b/lang/cem/cemcom/program.g new file mode 100644 index 000000000..761e19db9 --- /dev/null +++ b/lang/cem/cemcom/program.g @@ -0,0 +1,190 @@ +/* $Header$ */ +/* PROGRAM PARSER */ + +/* The presence of typedef declarations renders it impossible to + make a context-free grammar of C. Consequently we need + context-sensitive parsing techniques, the simplest one being + a subtle cooperation between the parser and the lexical scanner. + The lexical scanner has to know whether to return IDENTIFIER + or TYPE_IDENTIFIER for a given tag, and it obtains this information + from the definition list, as constructed by the parser. + The present grammar is essentially LL(2), and is processed by + a parser generator which accepts LL(1) with tie breaking rules + in C, of the form %if(cond) and %while(cond). To solve the LL(1) + ambiguities, the lexical scanner does a one symbol look-ahead. + This symbol, however, cannot always be correctly assessed, since + the present symbol may cause a change in the definition list + which causes the identification of the look-ahead symbol to be + invalidated. + The lexical scanner relies on the parser (or its routines) to + detect this situation and then update the look-ahead symbol. + An alternative approach would be to reassess the look-ahead symbol + in the lexical scanner when it is promoted to dot symbol. This + would be more beautiful but less correct, since then for a short + while there would be a discrepancy between the look-ahead symbol + and the definition list; I think it would nevertheless work in + correct programs. + A third solution would be to enter the identifier as soon as it + is found; its storage class is then known, although its full type + isn't. We would have to fill that in afterwards. + + At block exit the situation is even worse. Upon reading the + closing brace, the names declared inside the function are cleared + from the name list. This action may expose a type identifier that + is the same as the identifier in the look-ahead symbol. This + situation certainly invalidates the third solution, and casts + doubts upon the second. +*/ + +%lexical LLlex; +%start C_program, program; +%start If_expr, control_if_expression; + +{ +#include "nopp.h" +#include "alloc.h" +#include "arith.h" +#include "LLlex.h" +#include "idf.h" +#include "label.h" +#include "type.h" +#include "declarator.h" +#include "decspecs.h" +#include "code.h" +#include "expr.h" +#include "def.h" + +#ifndef NOPP +extern arith ifval; +#endif NOPP + +/*VARARGS*/ +extern error(); +} + +control_if_expression + { + struct expr *expr; + } +: + constant_expression(&expr) + { +#ifndef NOPP + if (expr->ex_flags & EX_SIZEOF) + error("sizeof not allowed in preprocessor"); + ifval = expr->VL_VALUE; + free_expression(expr); +#endif NOPP + } +; + +/* 10 */ +program: + [%persistent external_definition]* + {unstack_world();} +; + +/* A C identifier definition is remarkable in that it formulates + the declaration in a way different from most other languages: + e.g., rather than defining x as a pointer-to-integer, it defines + *x as an integer and lets the compiler deduce that x is actually + pointer-to-integer. This has profound consequences, but for the + structure of an identifier definition and for the compiler. + + A definition starts with a decl_specifiers, which contains things + like + typedef int + which is implicitly repeated for every definition in the list, and + then for each identifier a declarator is given, of the form + *a() + or so. The decl_specifiers is kept in a struct decspecs, to be + used again and again, while the declarator is stored in a struct + declarator, only to be passed to declare_idf together with the + struct decspecs. +*/ + +external_definition + { + struct decspecs Ds; + struct declarator Dc; + } +: + { + Ds = null_decspecs; + Dc = null_declarator; + } +[ + ext_decl_specifiers(&Ds) + [ + declarator(&Dc) + {declare_idf(&Ds, &Dc, level);} + [%if (Dc.dc_idf->id_def->df_type->tp_fund == FUNCTION) + /* int i (1) {2, 3} + is a function, not an old-fashioned + initialization. + */ + function(&Dc) + | + non_function(&Ds, &Dc) + ] + | + ';' + ] + {remove_declarator(&Dc);} +| + asm_statement /* top level, would you believe */ +] +; + +ext_decl_specifiers(struct decspecs *ds;) : +[%prefer /* the thin ice in R.M. 11.1 */ + decl_specifiers(ds) +| + empty + {do_decspecs(ds);} +] +; + +non_function(struct decspecs *ds; struct declarator *dc;) + { + struct expr *expr = (struct expr *) 0; + } +: + {reject_params(dc);} + initializer(dc->dc_idf, &expr)? + { + code_declaration(dc->dc_idf, expr, level, ds->ds_sc); + free_expression(expr); + } + [ + ',' + init_declarator(ds) + ]* + ';' +; + +/* 10.1 */ +function(struct declarator *dc;) + { + arith fbytes, nbytes; + } +: + { struct idf *idf = dc->dc_idf; + + init_idf(idf); + stack_level(); /* L_FORMAL1 declarations */ + declare_params(dc); + begin_proc(idf->id_text, idf->id_def); + stack_level(); /* L_FORMAL2 declarations */ + } + declaration* + { + declare_formals(&fbytes); + } + compound_statement(&nbytes) + { + unstack_level(); /* L_FORMAL2 declarations */ + unstack_level(); /* L_FORMAL1 declarations */ + end_proc(fbytes, nbytes); + } +; diff --git a/lang/cem/cemcom/replace.c b/lang/cem/cemcom/replace.c new file mode 100644 index 000000000..24c93586b --- /dev/null +++ b/lang/cem/cemcom/replace.c @@ -0,0 +1,158 @@ +/* $Header$ */ +/* PREPROCESSOR: MACRO-TEXT REPLACEMENT ROUTINES */ + +#include "nopp.h" + +#ifndef NOPP +#include "debug.h" /* UF */ +#include "pathlength.h" /* UF */ +#include "strsize.h" /* UF */ + +#include "string.h" +#include "alloc.h" +#include "idf.h" +#include "input.h" +#include "macro.h" +#include "arith.h" +#include "LLlex.h" +#include "class.h" +#include "assert.h" +#include "interface.h" + +EXPORT int +replace(idef) + struct idf *idef; +{ + /* replace() is called by the lexical analyzer to perform + macro replacement. "idef" is the description of the + identifier which leads to the replacement. If the + optional actual parameters of the macro are OK, the text + of the macro is prepared to serve as an input buffer, + which is pushed onto the input stack. + replace() returns 1 if the replacement succeeded and 0 if + some error has occurred. + */ + register char c; + register char flags = idef->id_macro->mc_flag; + char **actpars, **getactuals(); + char *reptext, *macro2buffer(); + int size; + + if (idef->id_macro->mc_nps != -1) { /* with parameter list */ + LoadChar(c); + c = skipspaces(c); + + if (c != '(') { /* no replacement if no () */ + lexerror("(warning) macro %s needs arguments", + idef->id_text); + PushBack(); + return 0; + } + + actpars = getactuals(idef); /* get act.param. list */ + } + + if (flags & PREDEF) { /* don't replace this one... */ + return 0; + } + + if (flags & FUNC) { /* this macro leads to special action */ + macro_func(idef); + } + + /* create and input buffer */ + reptext = macro2buffer(idef, actpars, &size); + InsertText(reptext, size); + + return 1; +} + +PRIVATE +macro_func(idef) + struct idf *idef; +{ + /* macro_func() performs the special actions needed with some + macros. These macros are __FILE__ and __LINE__ which + replacement texts must be evaluated at the time they are + used. + */ + static char FilNamBuf[PATHLENGTH]; + + /* This switch is very blunt... */ + switch (idef->id_text[2]) { + + case 'F' : /* __FILE__ */ + FilNamBuf[0] = '"'; + strcpy(&FilNamBuf[1], FileName); + strcat(FilNamBuf, "\""); + idef->id_macro->mc_text = FilNamBuf; + idef->id_macro->mc_length = strlen(FilNamBuf); + break; + + case 'L' : /* __LINE__ */ + idef->id_macro->mc_text = itos(LineNumber); + idef->id_macro->mc_length = 1; + break; + + default : + crash("(macro_func) illegal macro %s\n", idef->id_text); + + } +} + +PRIVATE char * +macro2buffer(idef, actpars, siztext) + struct idf *idef; + char **actpars; + int *siztext; +{ + /* Macro2buffer() turns the macro replacement text, as it is + stored, into an input buffer, while each occurrence of the + non-ascii formal parameter mark is replaced by its + corresponding actual parameter specified in the actual + parameter list actpars. A pointer to the beginning of the + constructed text is returned, while *siztext is filled + with its length. + + If there are no parameters, this function behaves + the same as strcpy(). + */ + register int size = 8; + register char *text = Malloc(size); + register pos = 0; + register char *ptr = idef->id_macro->mc_text; + + text[pos++] = '\0'; /* allow pushback */ + + while (*ptr) { + if (*ptr & FORMALP) { /* non-asc formal param. mark */ + register int n = *ptr++ & 0177; + register char *p; + + ASSERT(n != 0); + + /* copy the text of the actual parameter + into the replacement text + */ + for (p = actpars[n - 1]; *p; p++) { + text[pos++] = *p; + + if (pos == size) { + text = Srealloc(text, size += RSTRSIZE); + } + } + } + else { + text[pos++] = *ptr++; + + if (pos == size) { + text = Srealloc(text, size += RSTRSIZE); + } + } + } + + text[pos] = '\0'; + *siztext = pos; + return text; +} +#endif NOPP diff --git a/lang/cem/cemcom/scan.c b/lang/cem/cemcom/scan.c new file mode 100644 index 000000000..c34edf4bb --- /dev/null +++ b/lang/cem/cemcom/scan.c @@ -0,0 +1,224 @@ +/* $Header$ */ +/* PREPROCESSOR: SCANNER FOR THE ACTUAL PARAMETERS OF MACROS */ + +#include "nopp.h" + +#ifndef NOPP +/* This file contains the function getactuals() which scans an actual + parameter list and splits it up into a list of strings, each one + representing an actual parameter. +*/ + +#include "lapbuf.h" /* UF */ +#include "nparams.h" /* UF */ + +#include "input.h" +#include "class.h" +#include "idf.h" +#include "macro.h" +#include "interface.h" + +#define EOS '\0' +#define overflow() (fatal("actual parameter buffer overflow")) + +PRIVATE char apbuf[LAPBUF]; /* temporary storage for actual parameters */ +PRIVATE char *actparams[NPARAMS]; /* pointers to the text of the actuals */ +PRIVATE char *aptr; /* pointer to last inserted character in apbuf */ + +#define copy(ch) ((aptr < &apbuf[LAPBUF]) ? (*aptr++ = ch) : overflow()) + +PRIVATE int nr_of_params; /* number of actuals read until now */ + +PRIVATE char ** +getactuals(idef) + struct idf *idef; +{ + /* getactuals() collects the actual parameters and turns them + into a list of strings, a pointer to which is returned. + */ + register acnt = idef->id_macro->mc_nps; + + nr_of_params = 0; + actparams[0] = aptr = &apbuf[0]; + copyact('(', ')', 0); /* read the actual parameters */ + copy(EOS); /* mark the end of it all */ + + if (!nr_of_params++) { /* 0 or 1 parameter */ + /* there could be a ( ) + */ + register char *p = actparams[0]; + + while ((class(*p) == STSKIP) || (*p == '\n')) { + ++p; + } + + if (!*p) { /* the case () : 0 parameters */ + nr_of_params--; + } + } + + if (nr_of_params != acnt) { + /* argument mismatch: too many or too few + actual parameters. + */ + lexerror("argument mismatch, %s", idef->id_text); + + while (++nr_of_params < acnt) { + /* too few paraeters: remaining actuals are "" + */ + actparams[nr_of_params] = (char *) 0; + } + } + + return actparams; +} + +PRIVATE +copyact(ch1, ch2, level) + char ch1, ch2; + int level; +{ + /* copyact() is taken from Ceriel Jacobs' LLgen, with + permission. Its task is to build a list of actuals + parameters, which list is surrounded by '(' and ')' and in + which the parameters are separated by ',' if there are + more than 1. The balancing of '(',')' and '[',']' and + '{','}' is taken care of by calling this function + recursively. At each level, copyact() reads the input, + upto the corresponding closing bracket. + + Opening bracket is ch1, closing bracket is ch2. If + level != 0, copy opening and closing parameters too. + */ + register int ch; /* Current char */ + register int match; /* used to read strings */ + + if (level) { + copy(ch1); + } + + for (;;) { + LoadChar(ch); + + if (ch == ch2) { + if (level) { + copy(ch); + } + return; + } + + switch(ch) { + + case ')': + case '}': + case ']': + lexerror("unbalanced parenthesis"); + break; + + case '(': + copyact('(', ')', level+1); + break; + + case '{': + /* example: + #define declare(v, t) t v + declare(v, union{int i, j; float r;}); + */ + copyact('{', '}', level+1); + break; + + case '[': + copyact('[', ']', level+1); + break; + + case '\n': + while (LoadChar(ch), ch == '#') { + /* This piece of code needs some + explanation: consider the call of + the macro defined as: + #define sum(b,c) (b + c) + in the following form: + sum( + #include my_phone_number + ,2) + in which case the include must be + interpreted as such. + */ + domacro(); /* has read nl, vt or ff */ + /* Loop, for another control line */ + } + + PushBack(); + copy('\n'); + break; + + case '/': + LoadChar(ch); + + if (ch == '*') { /* skip comment */ + skipcomment(); + continue; + } + + PushBack(); + copy('/'); + break; + + case ',': + if (!level) { /* next parameter encountered */ + copy(EOS); + + if (++nr_of_params >= NPARAMS) { + fatal("(getact) too many actuals"); + } + + actparams[nr_of_params] = aptr; + } + else { + copy(ch); + } + break; + + case '\'': + case '"' : + /* watch out for brackets in strings, they do + not count ! + */ + match = ch; + copy(ch); + while (LoadChar(ch), ch != EOI) { + if (ch == match) { + break; + } + + if (ch == '\\') { + copy(ch); + LoadChar(ch); + } + else + if (ch == '\n') { + lexerror("newline in string"); + copy(match); + break; + } + + copy(ch); + } + + if (ch == match) { + copy(ch); + break; + } + /* Fall through */ + + case EOI : + lexerror("unterminated macro call"); + return; + + default: + copy(ch); + break; + } + } +} +#endif NOPP diff --git a/lang/cem/cemcom/sizes.h b/lang/cem/cemcom/sizes.h new file mode 100644 index 000000000..d0ae01e24 --- /dev/null +++ b/lang/cem/cemcom/sizes.h @@ -0,0 +1,8 @@ +/* $Header$ */ +/* VARIOUS TARGET MACHINE SIZE DESCRIPTORS */ + +extern arith + short_size, word_size, dword_size, int_size, long_size, + float_size, double_size, pointer_size; + +extern arith max_int, max_unsigned; /* cstoper.c */ diff --git a/lang/cem/cemcom/skip.c b/lang/cem/cemcom/skip.c new file mode 100644 index 000000000..64b8e137f --- /dev/null +++ b/lang/cem/cemcom/skip.c @@ -0,0 +1,73 @@ +/* $Header$ */ +/* PREPROCESSOR: INPUT SKIP FUNCTIONS */ + +#include "nopp.h" +#include "arith.h" +#include "LLlex.h" +#include "class.h" +#include "input.h" +#include "interface.h" + +#ifndef NOPP +PRIVATE int +skipspaces(ch) + register int ch; +{ + /* skipspaces() skips any white space and returns the first + non-space character. + */ + for (;;) { + while (class(ch) == STSKIP) + LoadChar(ch); + + /* How about "\\\n"????????? */ + + if (ch == '/') { + LoadChar(ch); + if (ch == '*') { + skipcomment(); + LoadChar(ch); + } + else { + PushBack(); + return '/'; + } + } + else + return ch; + } +} +#endif NOPP + +PRIVATE +skipline() +{ + /* skipline() skips all characters until a newline character + is seen, not escaped by a '\\'. + Any comment is skipped. + */ + register int c; + + LoadChar(c); + while (class(c) != STNL && c != EOI) { + if (c == '\\') { + LoadChar(c); + if (class(c) == STNL) + ++LineNumber; + } + if (c == '/') { + LoadChar(c); + if (c == '*') + skipcomment(); + else + continue; + } + LoadChar(c); + } + ++LineNumber; + + if (c == EOI) { /* garbage input... */ + lexerror("unexpected EOF while skipping text"); + PushBack(); + } +} diff --git a/lang/cem/cemcom/specials.h b/lang/cem/cemcom/specials.h new file mode 100644 index 000000000..33896b9aa --- /dev/null +++ b/lang/cem/cemcom/specials.h @@ -0,0 +1,14 @@ +/* $Header$ */ +/* OCCURANCES OF SPECIAL IDENTIFIERS */ + +#define SP_SETJMP 1 + +#define SP_TOTAL 1 + +struct sp_id { + char *si_identifier; /* its name */ + int si_flag; /* index into sp_occurred array */ +}; + +extern char sp_occurred[]; /* idf.c */ +extern struct sp_id special_ids[]; /* main.c */ diff --git a/lang/cem/cemcom/stack.c b/lang/cem/cemcom/stack.c new file mode 100644 index 000000000..060d793c3 --- /dev/null +++ b/lang/cem/cemcom/stack.c @@ -0,0 +1,280 @@ +/* DERIVED FROM $Header$ */ +/* S T A C K / U N S T A C K R O U T I N E S */ + +#include "debug.h" +#include "use_tmp.h" +#include "botch_free.h" + +#include "system.h" +#include "alloc.h" +#include "Lpars.h" +#include "arith.h" +#include "stack.h" +#include "type.h" +#include "idf.h" +#include "def.h" +#include "struct.h" +#include "storage.h" +#include "level.h" +#include "mes.h" +#include "em.h" + +/* #include */ + +extern char options[]; + +static struct stack_level UniversalLevel; +struct stack_level *local_level = &UniversalLevel; +/* The main reason for having this secondary stacking + mechanism besides the linked lists pointed to by the idf's + is efficiency. + To remove the idf's of a given level, one could scan the + hash table and chase down the idf chains; with a hash + table size of 100 this is feasible, but with a size of say + 100000 this becomes painful. Therefore all idf's are also + kept in a stack of sets, one set for each level. +*/ + +int level; /* Always equal to local_level->sl_level. */ + +stack_level() { + /* A new level is added on top of the identifier stack. + */ + struct stack_level *stl = new_stack_level(); + + clear((char *)stl, sizeof(struct stack_level)); + local_level->sl_next = stl; + stl->sl_previous = local_level; + stl->sl_level = ++level; + stl->sl_local_offset = stl->sl_max_block = local_level->sl_local_offset; + local_level = stl; +} + +stack_idf(idf, stl) + struct idf *idf; + struct stack_level *stl; +{ + /* The identifier idf is inserted in the stack on level stl. + */ + register struct stack_entry *se = new_stack_entry(); + + clear((char *)se, sizeof(struct stack_entry)); + /* link it into the stack level */ + se->next = stl->sl_entry; + se->se_idf = idf; + stl->sl_entry = se; +} + +struct stack_level * +stack_level_of(lvl) +{ + /* The stack_level corresponding to level lvl is returned. + The stack should probably be an array, to be extended with + realloc where needed. + */ + if (lvl == level) + return local_level; + else { + register struct stack_level *stl = &UniversalLevel; + + while (stl->sl_level != lvl) + stl = stl->sl_next; + return stl; + } + /*NOTREACHED*/ +} + +unstack_level() +{ + /* The top level of the identifier stack is removed. + */ + struct stack_level *lastlvl; + +#ifdef DEBUG + if (options['t']) + dumpidftab("before unstackidfs", 0); +#endif DEBUG + /* The implementation below is more careful than strictly + necessary. Optimists may optimize it afterwards. + */ + while (local_level->sl_entry) { + register struct stack_entry *se = local_level->sl_entry; + register struct idf *idf = se->se_idf; + register struct def *def; + register struct sdef *sdef; + register struct tag *tag; + + /* unlink it from the local stack level */ + local_level->sl_entry = se->next; + free_stack_entry(se); + + while ((def = idf->id_def) && def->df_level >= level) { + /* unlink it from the def list under the idf block */ + if (def->df_sc == LABEL) + unstack_label(idf); + else + if (level == L_LOCAL || level == L_FORMAL1) { + if ( def->df_register != REG_NONE && + def->df_sc != STATIC && + options['n'] == 0 + ) { + int reg; + + switch (def->df_type->tp_fund) { + + case POINTER: + reg = reg_pointer; + break; + case FLOAT: + case DOUBLE: + reg = reg_float; + break; + default: + reg = reg_any; + break; + } + C_ms_reg(def->df_address, + def->df_type->tp_size, + reg, def->df_register + ); + } + } + idf->id_def = def->next; + free_def(def); + update_ahead(idf); + } + while ((sdef = idf->id_sdef) && sdef->sd_level >= level) { + /* unlink it from the sdef list under the idf block */ + idf->id_sdef = sdef->next; + free_sdef(sdef); + } + while ((tag = idf->id_struct) && tag->tg_level >= level) { + /* unlink it from the struct list under the idf block */ + idf->id_struct = tag->next; + free_tag(tag); + } + while ((tag = idf->id_enum) && tag->tg_level >= level) { + /* unlink it from the enum list under the idf block */ + idf->id_enum = tag->next; + free_tag(tag); + } + } + /* Unlink the local stack level from the stack. + */ + lastlvl = local_level; + local_level = local_level->sl_previous; + if (level > L_LOCAL && lastlvl->sl_max_block < local_level->sl_max_block) + local_level->sl_max_block = lastlvl->sl_max_block; + free_stack_level(lastlvl); + local_level->sl_next = (struct stack_level *) 0; + level = local_level->sl_level; + +#ifdef DEBUG + if (options['t']) + dumpidftab("after unstackidfs", 0); +#endif DEBUG +} + +unstack_world() +{ + /* The global level of identifiers is scanned, and final + decisions are taken about such issues as + extern/static/global and un/initialized. + Effects on the code generator: initialised variables + have already been encoded while the uninitialised ones + are not and have to be encoded at this moment. + */ + struct stack_entry *se = local_level->sl_entry; + + open_name_list(); + + while (se) { + register struct idf *idf = se->se_idf; + register struct def *def = idf->id_def; + + if (!def) { + /* global selectors, etc. */ + se = se->next; + continue; + } + +#ifdef DEBUG + if (options['a']) { + printf("\"%s\", %s, %s, %s\n", + idf->id_text, + (def->df_alloc == 0) ? "no alloc" : + (def->df_alloc == ALLOC_SEEN) ? "alloc seen" : + (def->df_alloc == ALLOC_DONE) ? "alloc done" : + "illegal alloc info", + def->df_initialized ? "init" : "no init", + def->df_used ? "used" : "not used"); + } +#endif DEBUG + /* find final storage class */ + if (def->df_sc == GLOBAL || def->df_sc == IMPLICIT) { + /* even now we still don't know */ + def->df_sc = EXTERN; + } + + if ( def->df_sc == STATIC + && def->df_type->tp_fund == FUNCTION + && !def->df_initialized + ) { + /* orphaned static function */ + if (options['R']) + warning("static function %s never defined, %s", + idf->id_text, + "changed to extern" + ); + def->df_sc = EXTERN; + } + + if ( def->df_alloc == ALLOC_SEEN && + !def->df_initialized + ) { + /* space must be allocated */ + bss(idf); + namelist(idf->id_text); /* may be common */ + def->df_alloc = ALLOC_DONE; + /* df_alloc must be set to ALLOC_DONE because + the idf entry may occur several times in + the list. + The reason is that the same name may be used + for different purposes on the same level, e.g + struct s {int s;} s; + is a legal definition and contains 3 defining + occurrences of s. Each definition has been + entered into the idfstack. Although only + one of them concerns a variable, we meet the + s 3 times when scanning the idfstack. + */ + } + se = se->next; + } +} + +/* A list of potential common names is kept, to be fed to + an understanding loader. The list is written to a file + the name of which is nmlist. If nmlist == NULL, no name + list is generated. +*/ +extern char *nmlist; /* BAH! -- main.c */ +static int nfd; + +open_name_list() +{ + if (nmlist) { + if ((nfd = sys_creat(nmlist, 0644)) < 0) { + fatal("cannot create namelist %s", nmlist); + } + } +} + +namelist(nm) + char *nm; +{ + if (nmlist) { + sys_write(nfd, nm, strlen(nm)); + sys_write(nfd, "\n", 1); + } +} diff --git a/lang/cem/cemcom/stack.h b/lang/cem/cemcom/stack.h new file mode 100644 index 000000000..27a7f312f --- /dev/null +++ b/lang/cem/cemcom/stack.h @@ -0,0 +1,46 @@ +/* $Header$ */ +/* IDENTIFIER STACK DEFINITIONS */ + +/* The identifier stack is implemented as a stack of sets. + The stack is implemented by a doubly linked list, + the sets by singly linked lists. +*/ + +struct stack_level { + struct stack_level *next; + struct stack_level *sl_next; /* upward link */ + struct stack_level *sl_previous; /* downward link */ + struct stack_entry *sl_entry; /* sideward link */ + arith sl_local_offset; /* @ for first coming object */ + arith sl_max_block; /* maximum size of sub-block */ + int sl_level; +}; + + +/* allocation definitions of struct stack_level */ +/* ALLOCDEF "stack_level" */ +extern char *st_alloc(); +extern struct stack_level *h_stack_level; +#define new_stack_level() ((struct stack_level *) \ + st_alloc((char **)&h_stack_level, sizeof(struct stack_level))) +#define free_stack_level(p) st_free(p, h_stack_level, sizeof(struct stack_level)) + + +struct stack_entry { + struct stack_entry *next; + struct idf *se_idf; +}; + + +/* allocation definitions of struct stack_entry */ +/* ALLOCDEF "stack_entry" */ +extern char *st_alloc(); +extern struct stack_entry *h_stack_entry; +#define new_stack_entry() ((struct stack_entry *) \ + st_alloc((char **)&h_stack_entry, sizeof(struct stack_entry))) +#define free_stack_entry(p) st_free(p, h_stack_entry, sizeof(struct stack_entry)) + + +extern struct stack_level *local_level; +extern struct stack_level *stack_level_of(); +extern int level; diff --git a/lang/cem/cemcom/stack.str b/lang/cem/cemcom/stack.str new file mode 100644 index 000000000..27a7f312f --- /dev/null +++ b/lang/cem/cemcom/stack.str @@ -0,0 +1,46 @@ +/* $Header$ */ +/* IDENTIFIER STACK DEFINITIONS */ + +/* The identifier stack is implemented as a stack of sets. + The stack is implemented by a doubly linked list, + the sets by singly linked lists. +*/ + +struct stack_level { + struct stack_level *next; + struct stack_level *sl_next; /* upward link */ + struct stack_level *sl_previous; /* downward link */ + struct stack_entry *sl_entry; /* sideward link */ + arith sl_local_offset; /* @ for first coming object */ + arith sl_max_block; /* maximum size of sub-block */ + int sl_level; +}; + + +/* allocation definitions of struct stack_level */ +/* ALLOCDEF "stack_level" */ +extern char *st_alloc(); +extern struct stack_level *h_stack_level; +#define new_stack_level() ((struct stack_level *) \ + st_alloc((char **)&h_stack_level, sizeof(struct stack_level))) +#define free_stack_level(p) st_free(p, h_stack_level, sizeof(struct stack_level)) + + +struct stack_entry { + struct stack_entry *next; + struct idf *se_idf; +}; + + +/* allocation definitions of struct stack_entry */ +/* ALLOCDEF "stack_entry" */ +extern char *st_alloc(); +extern struct stack_entry *h_stack_entry; +#define new_stack_entry() ((struct stack_entry *) \ + st_alloc((char **)&h_stack_entry, sizeof(struct stack_entry))) +#define free_stack_entry(p) st_free(p, h_stack_entry, sizeof(struct stack_entry)) + + +extern struct stack_level *local_level; +extern struct stack_level *stack_level_of(); +extern int level; diff --git a/lang/cem/cemcom/statement.g b/lang/cem/cemcom/statement.g new file mode 100644 index 000000000..ea5cbfb32 --- /dev/null +++ b/lang/cem/cemcom/statement.g @@ -0,0 +1,402 @@ +/* $Header$ */ +/* STATEMENT SYNTAX PARSER */ + +{ +#include "debug.h" +#include "botch_free.h" + +#include "arith.h" +#include "LLlex.h" +#include "type.h" +#include "idf.h" +#include "label.h" +#include "expr.h" +#include "code.h" +#include "storage.h" +#include "em.h" +#include "stack.h" +#include "def.h" + +extern int level; +} + +/* Each statement construction is stacked in order to trace a + statement to such a construction. Example: a case statement should + be recognized as a piece of the most enclosing switch statement. +*/ + +/* 9 */ +statement +: +[%if (AHEAD != ':') + expression_statement +| + label ':' statement +| + compound_statement((arith *)0) +| + if_statement +| + while_statement +| + do_statement +| + for_statement +| + switch_statement +| + case_statement +| + default_statement +| + break_statement +| + continue_statement +| + return_statement +| + jump +| + ';' +| + asm_statement +] +; + +expression_statement + { struct expr *expr; + } +: + expression(&expr) + ';' + { +#ifdef DEBUG + print_expr("Full expression", expr); +#endif DEBUG + code_expr(expr, RVAL, FALSE, NO_LABEL, NO_LABEL); + free_expression(expr); + } +; + +label + { struct idf *idf; + } +: + identifier(&idf) + { + /* This allows the following absurd case: + + typedef int grz; + main() { + grz: printf("A labelled statement\n"); + } + */ + define_label(idf); + C_ilb((label)idf->id_def->df_address); + } +; + +if_statement + { + struct expr *expr; + label l_true = text_label(); + label l_false = text_label(); + label l_end = text_label(); + } +: + IF + '(' + expression(&expr) + { + opnd2test(&expr, NOTEQUAL); + if (expr->ex_class != Value) { + /* What's happening here? If the + expression consisted of a constant + expression, the comparison has + been optimized to a 0 or 1. + */ + code_expr(expr, RVAL, TRUE, l_true, l_false); + C_ilb(l_true); + } + else { + if (expr->VL_VALUE == (arith)0) { + C_bra(l_false); + } + } + free_expression(expr); + } + ')' + statement + [%prefer + ELSE + { + C_bra(l_end); + C_ilb(l_false); + } + statement + { C_ilb(l_end); + } + | + empty + { C_ilb(l_false); + } + ] +; + +while_statement + { + struct expr *expr; + label l_break = text_label(); + label l_continue = text_label(); + label l_body = text_label(); + } +: + WHILE + { + stat_stack(l_break, l_continue); + C_ilb(l_continue); + } + '(' + expression(&expr) + { + opnd2test(&expr, NOTEQUAL); + if (expr->ex_class != Value) { + code_expr(expr, RVAL, TRUE, l_body, l_break); + C_ilb(l_body); + } + else { + if (expr->VL_VALUE == (arith)0) { + C_bra(l_break); + } + } + } + ')' + statement + { + C_bra(l_continue); + C_ilb(l_break); + stat_unstack(); + free_expression(expr); + } +; + +do_statement + { struct expr *expr; + label l_break = text_label(); + label l_continue = text_label(); + label l_body = text_label(); + } +: + DO + { C_ilb(l_body); + stat_stack(l_break, l_continue); + } + statement + WHILE + '(' + { C_ilb(l_continue); + } + expression(&expr) + { + opnd2test(&expr, NOTEQUAL); + if (expr->ex_class != Value) { + code_expr(expr, RVAL, TRUE, l_body, l_break); + } + else { + if (expr->VL_VALUE == (arith)1) { + C_bra(l_body); + } + } + C_ilb(l_break); + } + ')' + ';' + { + stat_unstack(); + free_expression(expr); + } +; + +for_statement + { struct expr *e_init = 0, *e_test = 0, *e_incr = 0; + label l_break = text_label(); + label l_continue = text_label(); + label l_body = text_label(); + label l_test = text_label(); + } +: + FOR + { stat_stack(l_break, l_continue); + } + '(' + [ + expression(&e_init) + { code_expr(e_init, RVAL, FALSE, NO_LABEL, NO_LABEL); + } + ]? + ';' + { C_ilb(l_test); + } + [ + expression(&e_test) + { + opnd2test(&e_test, NOTEQUAL); + if (e_test->ex_class != Value) { + code_expr(e_test, RVAL, TRUE, l_body, l_break); + C_ilb(l_body); + } + else { + if (e_test->VL_VALUE == (arith)0) { + C_bra(l_break); + } + } + } + ]? + ';' + expression(&e_incr)? + ')' + statement + { + C_ilb(l_continue); + if (e_incr) + code_expr(e_incr, RVAL, FALSE, NO_LABEL, NO_LABEL); + C_bra(l_test); + C_ilb(l_break); + stat_unstack(); + free_expression(e_init); + free_expression(e_test); + free_expression(e_incr); + } +; + +switch_statement + { + struct expr *expr; + } +: + SWITCH + '(' + expression(&expr) /* this must be an integer expression! */ + { + ch7cast(&expr, CAST, int_type); + code_startswitch(expr); + } + ')' + statement + { + code_endswitch(); + free_expression(expr); + } +; + +case_statement + { + struct expr *expr; + } +: + CASE + constant_expression(&expr) + { + code_case(expr->VL_VALUE); + free_expression(expr); + } + ':' + statement +; + +default_statement +: + DEFAULT + { + code_default(); + } + ':' + statement +; + +break_statement +: + BREAK + { + if (!do_break()) + error("invalid break"); + } + ';' +; + +continue_statement +: + CONTINUE + { + if (!do_continue()) + error("invalid continue"); + } + ';' +; + +return_statement + { struct expr *expr = 0; + } +: + RETURN + [ + expression(&expr) + { + do_return_expr(expr); + free_expression(expr); + } + | + empty + { + C_ret((arith)0); + } + ] + ';' +; + +jump + { struct idf *idf; + } +: + GOTO + identifier(&idf) + ';' + { + apply_label(idf); + C_bra((label)idf->id_def->df_address); + } +; + +compound_statement(arith *nbytes;): + '{' + { + stack_level(); + } + [%while (AHEAD != ':') /* >>> conflict on TYPE_IDENTIFIER */ + declaration + ]* + [%persistent + statement + ]* + '}' + { + if (nbytes) + *nbytes = (- local_level->sl_max_block); + unstack_level(); + } +; + +asm_statement + { char *asm_string; + } +: + ASM + '(' + STRING + { asm_string = dot.tk_str; + } + ')' + ';' + { asm_seen(asm_string); + } +; diff --git a/lang/cem/cemcom/stb.c b/lang/cem/cemcom/stb.c new file mode 100644 index 000000000..23ba9d945 --- /dev/null +++ b/lang/cem/cemcom/stb.c @@ -0,0 +1,11 @@ +/* $Header$ */ +/* library routine for copying structs */ + +__stb(n, f, t) + register char *f, *t; register n; +{ + if (n > 0) + do + *t++ = *f++; + while (--n); +} diff --git a/lang/cem/cemcom/storage.c b/lang/cem/cemcom/storage.c new file mode 100644 index 000000000..3bae164ba --- /dev/null +++ b/lang/cem/cemcom/storage.c @@ -0,0 +1,67 @@ +/* $Header$ */ +/* S T R U C T U R E - S T O R A G E M A N A G E M E N T */ + +/* Assume that each structure contains a field "next", of pointer + type, as first tagfield. + struct xxx serves as a general structure: it just declares the + tagfield "next" as first field of a structure. + Please don't worry about any warnings when compiling this file + because some dirty tricks are performed to obtain the necessary + actions. +*/ + +#include "debug.h" /* UF */ +#include "botch_free.h" /* UF */ +#include "assert.h" +#include "alloc.h" +#include "storage.h" + +struct xxx { + char *next; +}; + +char * +st_alloc(phead, size) + char **phead; + int size; +{ + struct xxx *tmp; + + if (*phead == 0) { + return Malloc(size); + } + tmp = (struct xxx *) (*phead); + *phead = (char *) tmp->next; + return (char *) tmp; +} + +/* instead of Calloc: */ +clear(ptr, n) + char *ptr; + int n; +{ + ASSERT((long)ptr % sizeof (long) == 0); + while (n >= sizeof (long)) { /* high-speed clear loop */ + *(long *)ptr = 0L; + ptr += sizeof (long), n -= sizeof (long); + } + while (n--) + *ptr++ = '\0'; +} + +#ifdef BOTCH_FREE +botch(ptr, n) + char *ptr; + int n; +{ /* Writes garbage over n chars starting from ptr. + Used to check if freed memory is used inappropriately. + */ + ASSERT((long)ptr % sizeof (long) == 0); + while (n >= sizeof (long)) { /* high-speed botch loop */ + *(long *)ptr = 025252525252L; + ptr += sizeof (long), n -= sizeof (long); + } + while (n--) + *ptr++ = '\252'; +} +#endif BOTCH_FREE diff --git a/lang/cem/cemcom/storage.h b/lang/cem/cemcom/storage.h new file mode 100644 index 000000000..73b024b4d --- /dev/null +++ b/lang/cem/cemcom/storage.h @@ -0,0 +1,9 @@ +/* $Header$ */ +/* S T R U C T U R E - S T O R A G E D E F I N I T I O N S */ + +#ifndef BOTCH_FREE +#define st_free(ptr, head, size) {ptr->next = head; head = ptr;} +#else def BOTCH_FREE +#define st_free(ptr, head, size) {botch((char *)(ptr), size); \ + ptr->next = head; head = ptr;} +#endif BOTCH_FREE diff --git a/lang/cem/cemcom/string.c b/lang/cem/cemcom/string.c new file mode 100644 index 000000000..bb7ab4869 --- /dev/null +++ b/lang/cem/cemcom/string.c @@ -0,0 +1,275 @@ +/* $Header$ */ +/* STRING MANIPULATION AND PRINT ROUTINES */ + +#include "string.h" +#include "nopp.h" +#include "str_params.h" +#include "arith.h" +#include "system.h" + +doprnt(fd, fmt, argp) + char *fmt; + int argp[]; +{ + char buf[SSIZE]; + + sys_write(fd, buf, format(buf, fmt, (char *)argp)); +} + +/*VARARGS1*/ +printf(fmt, args) + char *fmt; + char args; +{ + char buf[SSIZE]; + + sys_write(1, buf, format(buf, fmt, &args)); +} + +/*VARARGS1*/ +fprintf(fd, fmt, args) + char *fmt; + char args; +{ + char buf[SSIZE]; + + sys_write(fd, buf, format(buf, fmt, &args)); +} + +/*VARARGS1*/ +char * +sprintf(buf, fmt, args) + char *buf, *fmt; + char args; +{ + buf[format(buf, fmt, &args)] = '\0'; + return buf; +} + +int +format(buf, fmt, argp) + char *buf, *fmt; + char *argp; +{ + register char *pf = fmt, *pa = argp; + register char *pb = buf; + + while (*pf) { + if (*pf == '%') { + register width, base, pad, npad; + char *arg; + char cbuf[2]; + char *badformat = ""; + + /* get padder */ + if (*++pf == '0') { + pad = '0'; + ++pf; + } + else + pad = ' '; + + /* get width */ + width = 0; + while (*pf >= '0' && *pf <= '9') + width = 10 * width + *pf++ - '0'; + + /* get text and move pa */ + if (*pf == 's') { + arg = *(char **)pa; + pa += sizeof(char *); + } + else + if (*pf == 'c') { + cbuf[0] = * (char *) pa; + cbuf[1] = '\0'; + pa += sizeof(int); + arg = &cbuf[0]; + } + else + if (*pf == 'l') { + /* alignment ??? */ + if (base = integral(*++pf)) { + arg = int_str(*(long *)pa, base); + pa += sizeof(long); + } + else { + pf--; + arg = badformat; + } + } + else + if (base = integral(*pf)) { + arg = int_str((long)*(int *)pa, base); + pa += sizeof(int); + } + else + if (*pf == '%') + arg = "%"; + else + arg = badformat; + + npad = width - strlen(arg); + + while (npad-- > 0) + *pb++ = pad; + + while (*pb++ = *arg++); + pb--; + pf++; + } + else + *pb++ = *pf++; + } + return pb - buf; +} + +integral(c) +{ + switch (c) { + case 'b': + return -2; + case 'd': + return 10; + case 'o': + return -8; + case 'u': + return -10; + case 'x': + return -16; + } + return 0; +} + +/* Integer to String translator +*/ +char * +int_str(val, base) + register long val; + register base; +{ + /* int_str() is a very simple integer to string converter. + base < 0 : unsigned. + base must be an element of [-16,-2] V [2,16]. + */ + static char numbuf[MAXWIDTH]; + static char vec[] = "0123456789ABCDEF"; + register char *p = &numbuf[MAXWIDTH]; + int sign = (base > 0); + + *--p = '\0'; /* null-terminate string */ + if (val) { + if (base > 0) { + if (val < (arith)0) { + if ((val = -val) < (arith)0) + goto overflow; + } + else + sign = 0; + } + else + if (base < 0) { /* unsigned */ + base = -base; + if (val < (arith)0) { + register mod, i; + + overflow: + /* this takes a rainy Sunday afternoon to explain */ + /* ??? */ + mod = 0; + for (i = 0; i < 8 * sizeof val; i++) { + mod <<= 1; + if (val < 0) + mod++; + val <<= 1; + if (mod >= base) { + mod -= base; + val++; + } + } + *--p = vec[mod]; + } + } + + do { + *--p = vec[(int) (val % base)]; + val /= base; + } while (val != (arith)0); + + if (sign) + *--p = '-'; /* don't forget it !! */ + } + else + *--p = '0'; /* just a simple 0 */ + + return p; +} + +/* return negative, zero or positive value if + resp. s < t, s == t or s > t +*/ +int +strcmp(s, t) + register char *s, *t; +{ + while (*s == *t++) + if (*s++ == '\0') + return 0; + return *s - *--t; +} + +/* return length of s +*/ +int +strlen(s) + char *s; +{ + register char *b = s; + + while (*b++) + ; + return b - s - 1; +} + +#ifndef NOPP +/* append t to s +*/ +char * +strcat(s, t) + register char *s, *t; +{ + register char *b = s; + + while (*s++) + ; + s--; + while (*s++ = *t++) + ; + return b; +} + +/* Copy t into s +*/ +char * +strcpy(s, t) + register char *s, *t; +{ + register char *b = s; + + while (*s++ = *t++) + ; + return b; +} + +char * +rindex(str, chr) + register char *str, chr; +{ + register char *retptr = 0; + + while (*str) + if (*str++ == chr) + retptr = &str[-1]; + return retptr; +} +#endif NOPP diff --git a/lang/cem/cemcom/string.h b/lang/cem/cemcom/string.h new file mode 100644 index 000000000..ffeeb6bba --- /dev/null +++ b/lang/cem/cemcom/string.h @@ -0,0 +1,13 @@ +/* $Header$ */ +/* STRING-ROUTINE DEFINITIONS */ + +#define stdin 0 +#define stdout 1 +#define stderr 2 + +#define itos(n) int_str((long)(n), 10) + +char *sprintf(); /* string.h */ +char *int_str(); /* string.h */ + +char *strcpy(), *strcat(), *rindex(); diff --git a/lang/cem/cemcom/struct.c b/lang/cem/cemcom/struct.c new file mode 100644 index 000000000..752bcdf87 --- /dev/null +++ b/lang/cem/cemcom/struct.c @@ -0,0 +1,503 @@ +/* $Header$ */ +/* ADMINISTRATION OF STRUCT AND UNION DECLARATIONS */ + +#include "nobitfield.h" +#include "debug.h" +#include "botch_free.h" +#include "arith.h" +#include "stack.h" +#include "idf.h" +#include "def.h" +#include "type.h" +#include "struct.h" +#include "field.h" +#include "LLlex.h" +#include "Lpars.h" +#include "align.h" +#include "level.h" +#include "storage.h" +#include "assert.h" +#include "sizes.h" + +/* Type of previous selector declared with a field width specified, + if any. If a selector is declared with no field with it is set to 0. +*/ +static field_busy = 0; + +extern char options[]; +int lcm(); + +/* The semantics of the identification of structure/union tags is + obscure. Some highly regarded compilers are found out to accept, + e.g.: + f(xp) struct aap *xp; { + struct aap {char *za;}; + xp->za; + } + Equally highly regarded software uses this feature, so we shall + humbly oblige. + The rules we use are: + 1. A structure definition applies at the level where it is + found, unless there is a structure declaration without a + definition on an outer level, in which case the definition + is applied at that level. + 2. A selector is applied on the same level as on which its + structure is being defined. + + If below struct is mentioned, union is implied (and sometimes enum + as well). +*/ + +add_sel(stp, tp, idf, sdefpp, szp, fd) /* this is horrible */ + struct type *stp; /* type of the structure */ + struct type *tp; /* type of the selector */ + struct idf *idf; /* idf of the selector */ + struct sdef ***sdefpp; /* address of hook to selector definition */ + arith *szp; /* pointer to struct size upto here */ + struct field *fd; +{ + /* The selector idf with type tp is added to two chains: the + selector identification chain starting at idf->id_sdef, + and to the end of the member list starting at stp->tp_sdef. + The address of the hook in the latest member (sdef) is + given in sdefpp; the hook itself must still be empty. + */ + arith offset; +#ifndef NOBITFIELD + extern arith add_field(); +#endif NOBITFIELD + + register struct tag *tg = stp->tp_idf->id_struct; /* or union */ + register struct sdef *sdef = idf->id_sdef; + register struct sdef *newsdef; + int lvl = tg->tg_level; + +/* + * char *type2str(); + * printf("add_sel: \n stp = %s\n tp = %s\n name = %s\n *szp = %ld\n", + * type2str(stp), type2str(tp), idf->id_text, *szp); + * ASSERT(**sdefpp == 0); + * ASSERT(tg->tg_type == stp); + */ + + if (options['R'] && !is_anon_idf(idf)) { + /* a K & R test */ + if (idf->id_struct && idf->id_struct->tg_level == level + ) { + warning("%s is also a struct/union tag", + idf->id_text); + } + } + + if (stp->tp_fund == STRUCT) { +#ifndef NOBITFIELD + if (fd == 0) { /* no field width specified */ +#endif NOBITFIELD + offset = align(*szp, tp->tp_align); + field_busy = 0; +#ifndef NOBITFIELD + } + else { + /* if something is wrong, the type of the + specified selector remains unchanged; its + bitfield specifier, however, is thrown away. + */ + offset = add_field(szp, fd, &tp, idf, stp); + } +#endif NOBITFIELD + } + else { /* (stp->tp_fund == UNION) */ + if (fd) { + error("fields not allowed in unions"); + free_field(fd); + fd = 0; + } + offset = (arith)0; + } + + check_selector(idf, stp); + if (options['R']) { + if ( sdef && sdef->sd_level == lvl && + sdef->sd_offset != offset + ) /* RM 8.7 */ + warning("selector %s redeclared", idf->id_text); + } + + newsdef = new_sdef(); + newsdef->sd_sdef = (struct sdef *) 0; + + /* link into selector descriptor list of this id + */ + newsdef->next = sdef; + idf->id_sdef = newsdef; + + newsdef->sd_level = lvl; + newsdef->sd_idf = idf; + newsdef->sd_stype = stp; + newsdef->sd_type = tp; + newsdef->sd_offset = offset; + +#ifndef NOBITFIELD + if (tp->tp_fund == FIELD) { + tp->tp_field->fd_sdef = newsdef; + } +#endif NOBITFIELD + + stack_idf(idf, stack_level_of(lvl)); + + /* link into selector definition list of the struct/union + */ + **sdefpp = newsdef; + *sdefpp = &newsdef->sd_sdef; + + /* update the size of the struct/union upward */ + if (stp->tp_fund == STRUCT && fd == 0) { + /* Note: the case that a bitfield is declared is + handled by add_field() ! + */ + *szp = offset + size_of_type(tp, "member"); + stp->tp_align = lcm(stp->tp_align, tp->tp_align); + } + else + if (stp->tp_fund == UNION) { + arith sel_size = size_of_type(tp, "member"); + + if (*szp < sel_size) { + *szp = sel_size; + } + stp->tp_align = lcm(stp->tp_align, tp->tp_align); + } +} + +check_selector(idf, stp) + struct idf *idf; + struct type *stp; /* the type of the struct */ +{ + /* checks if idf occurs already as a selector in + struct or union *stp. + */ + struct sdef *sdef = stp->tp_sdef; + + while (sdef) { + if (sdef->sd_idf == idf) + error("multiple selector %s", idf->id_text); + sdef = sdef->sd_sdef; + } +} + +declare_struct(fund, idf, tpp) + struct idf *idf; + struct type **tpp; +{ + /* A struct, union or enum (depending on fund) with tag (!) + idf is declared, and its type (incomplete as it may be) is + returned in *tpp. + The idf may be missing (i.e. idf == 0), in which case an + anonymous struct etc. is defined. + */ + extern char *symbol2str(); + register struct tag **tgp; + register struct tag *tg; + + if (!idf) + idf = gen_idf(); + tgp = (fund == ENUM ? &idf->id_enum : &idf->id_struct); + + if (options['R'] && !is_anon_idf(idf)) { + /* a K & R test */ + if ( fund != ENUM && + idf->id_sdef && idf->id_sdef->sd_level == level + ) { + warning("%s is also a selector", idf->id_text); + } + if ( fund == ENUM && + idf->id_def && idf->id_def->df_level == level + ) { + warning("%s is also a variable", idf->id_text); + } + } + + tg = *tgp; + if (tg && tg->tg_type->tp_size < 0 && tg->tg_type->tp_fund == fund) { + /* An unfinished declaration has preceded it, possibly on + an earlier level. We just fill in the answer. + */ + if (tg->tg_busy) { + error("recursive declaration of struct/union %s", + idf->id_text); + declare_struct(fund, gen_idf(), tpp); + } + else { + if (options['R'] && tg->tg_level != level) + warning("%s declares %s in different range", + idf->id_text, symbol2str(fund)); + *tpp = tg->tg_type; + } + } + else + if (tg && tg->tg_level == level) { + /* There is an already defined struct/union of this name + on our level! + */ + error("redeclaration of struct/union %s", idf->id_text); + declare_struct(fund, gen_idf(), tpp); + /* to allow a second struct_declaration_pack */ + } + else { + /* The struct is new. */ + /* Hook in a new struct tag */ + tg = new_tag(); + tg->next = *tgp; + *tgp = tg; + tg->tg_level = level; + /* and supply room for a type */ + tg->tg_type = create_type(fund); + tg->tg_type->tp_align = + fund == ENUM ? int_align : + fund == STRUCT ? struct_align : + /* fund == UNION */ union_align; + tg->tg_type->tp_idf = idf; + *tpp = tg->tg_type; + stack_idf(idf, local_level); + } +} + +apply_struct(fund, idf, tpp) + struct idf *idf; + struct type **tpp; +{ + /* The occurrence of a struct, union or enum (depending on + fund) with tag idf is noted. It may or may not have been + declared before. Its type (complete or incomplete) is + returned in *tpp. + */ + register struct tag **tgp; + + tgp = (is_struct_or_union(fund) ? &idf->id_struct : &idf->id_enum); + + if (*tgp) + *tpp = (*tgp)->tg_type; + else + declare_struct(fund, idf, tpp); +} + +struct sdef * +idf2sdef(idf, tp) + struct idf *idf; + struct type *tp; +{ + /* The identifier idf is identified as a selector, preferably + in the struct tp, but we will settle for any unique + identification. + If the attempt fails, a selector of type error_type is + created. + */ + struct sdef **sdefp = &idf->id_sdef, *sdef; + + /* Follow chain from idf, to meet tp. */ + while ((sdef = *sdefp)) { + if (sdef->sd_stype == tp) + return sdef; + sdefp = &(*sdefp)->next; + } + + /* Tp not met; any unique identification will do. */ + if (sdef = idf->id_sdef) { + /* There is an identification */ + if (uniq_selector(sdef)) { + /* and it is unique, so we accept */ + warning("selector %s applied to alien type", + idf->id_text); + } + else { + /* it is ambiguous */ + error("ambiguous use of selector %s", idf->id_text); + } + return sdef; + } + + /* No luck; create an error entry. */ + if (!is_anon_idf(idf)) + error("unknown selector %s", idf->id_text); + *sdefp = sdef = new_sdef(); + clear((char *)sdef, sizeof(struct sdef)); + sdef->sd_idf = idf; + sdef->sd_type = error_type; + return sdef; +} + +int +uniq_selector(idf_sdef) + struct sdef *idf_sdef; +{ + /* Returns true if idf_sdef (which is guaranteed to exist) + is unique for this level, i.e there is no other selector + on this level with the same name or the other selectors + with the same name have the same offset. + See /usr/src/cmd/sed/sed.h for an example of this absurd + case! + */ + + struct sdef *sdef = idf_sdef->next; + + while (sdef && sdef->sd_level == idf_sdef->sd_level) { + if ( sdef->sd_type != idf_sdef->sd_type + || sdef->sd_offset != idf_sdef->sd_offset + ) { + return 0; /* ambiguity found */ + } + sdef = sdef->next; + } + return 1; +} + +#ifndef NOBITFIELD +arith +add_field(szp, fd, pfd_type, idf, stp) + arith *szp; /* size of struct upto here */ + struct field *fd; /* bitfield, containing width */ + struct type **pfd_type; /* type of selector */ + struct idf *idf; /* name of selector */ + struct type *stp; /* current struct descriptor */ +{ + /* The address where this selector is put is returned. If the + selector with specified width does not fit in the word, or + an explicit alignment is given, a new address is needed. + Note that the fields are packed into machine words (according + to the RM.) + */ + long bits_in_type = word_size * 8; + static int field_offset = (arith)0; + static struct type *current_struct = 0; + static long bits_declared; /* nr of bits used in *field_offset */ + + if (current_struct != stp) { + /* This struct differs from the last one + */ + field_busy = 0; + current_struct = stp; + } + + if ( fd->fd_width < 0 || + (fd->fd_width == 0 && !is_anon_idf(idf)) || + fd->fd_width > bits_in_type + ) { + error("illegal field-width specified"); + *pfd_type = error_type; + return field_offset; + } + + switch ((*pfd_type)->tp_fund) { + + case CHAR: + case SHORT: + case INT: + case ENUM: + case LONG: + /* right type; size OK? */ + if ((*pfd_type)->tp_size > word_size) { + error("bit field type %s doesn't fit in word", + symbol2str((*pfd_type)->tp_fund)); + *pfd_type = error_type; + return field_offset; + } + break; + + default: + /* wrong type altogether */ + error("illegal field type (%s)", + symbol2str((*pfd_type)->tp_fund)); + *pfd_type = error_type; + return field_offset; + } + + if (field_busy == 0) { + /* align this selector on the next boundary : + the previous selector wasn't a bitfield. + */ + field_offset = align(*szp, word_align); + *szp = field_offset + word_size; + stp->tp_align = lcm(stp->tp_align, word_align); + bits_declared = (arith)0; + field_busy = 1; + } + + if (fd->fd_width > bits_in_type - bits_declared) { + /* field overflow: fetch next memory unit + */ + field_offset = align(*szp, word_align); + *szp = field_offset + word_size; + stp->tp_align = lcm(stp->tp_align, word_align); + bits_declared = fd->fd_width; + } + else + if (fd->fd_width == 0) { + /* next field should be aligned on the next boundary. + This will take care that no field will fit in the + space allocated upto here. + */ + bits_declared = bits_in_type + 1; + } + else { /* the bitfield fits in the current field */ + bits_declared += fd->fd_width; + } + + /* Arrived here, the place where the selector is stored in the + struct is computed. + Now we need a mask to use its value in expressions. + */ + + *pfd_type = construct_type(FIELD, *pfd_type, (arith)0); + (*pfd_type)->tp_field = fd; + + /* Set the mask right shifted. This solution avoids the + problem of having sign extension when using the mask for + extracting the value from the field-int. + Sign extension could occur on some machines when shifting + the mask to the left. + */ + fd->fd_mask = (1 << fd->fd_width) - 1; + + if (options['r']) { /* adjust the field at the right */ + fd->fd_shift = bits_declared - fd->fd_width; + } + else { /* adjust the field at the left */ + fd->fd_shift = bits_in_type - bits_declared; + } + + return field_offset; +} +#endif NOBITFIELD + +/* some utilities */ +int +is_struct_or_union(fund) + register int fund; +{ + return fund == STRUCT || fund == UNION; +} + +/* Greatest Common Divisor + */ +int +gcd(m, n) + register int m, n; +{ + register int r; + + while (n) { + r = m % n; + m = n; + n = r; + } + return m; +} + +/* Least Common Multiple + */ +int +lcm(m, n) + register int m, n; +{ + return m * (n / gcd(m, n)); +} diff --git a/lang/cem/cemcom/struct.h b/lang/cem/cemcom/struct.h new file mode 100644 index 000000000..8caab6786 --- /dev/null +++ b/lang/cem/cemcom/struct.h @@ -0,0 +1,44 @@ +/* $Header$ */ +/* SELECTOR DESCRIPTOR */ + +struct sdef { /* for selectors */ + struct sdef *next; + int sd_level; + struct idf *sd_idf; /* its name */ + struct sdef *sd_sdef; /* the next selector */ + struct type *sd_stype; /* the struct it belongs to */ + struct type *sd_type; /* its type */ + arith sd_offset; +}; + +extern char *st_alloc(); + + +/* allocation definitions of struct sdef */ +/* ALLOCDEF "sdef" */ +extern char *st_alloc(); +extern struct sdef *h_sdef; +#define new_sdef() ((struct sdef *) \ + st_alloc((char **)&h_sdef, sizeof(struct sdef))) +#define free_sdef(p) st_free(p, h_sdef, sizeof(struct sdef)) + + +struct tag { /* for struct-, union- and enum tags */ + struct tag *next; + int tg_level; + int tg_busy; /* non-zero during declaration of struct/union pack */ + struct type *tg_type; +}; + + + +/* allocation definitions of struct tag */ +/* ALLOCDEF "tag" */ +extern char *st_alloc(); +extern struct tag *h_tag; +#define new_tag() ((struct tag *) \ + st_alloc((char **)&h_tag, sizeof(struct tag))) +#define free_tag(p) st_free(p, h_tag, sizeof(struct tag)) + + +struct sdef *idf2sdef(); diff --git a/lang/cem/cemcom/struct.str b/lang/cem/cemcom/struct.str new file mode 100644 index 000000000..8caab6786 --- /dev/null +++ b/lang/cem/cemcom/struct.str @@ -0,0 +1,44 @@ +/* $Header$ */ +/* SELECTOR DESCRIPTOR */ + +struct sdef { /* for selectors */ + struct sdef *next; + int sd_level; + struct idf *sd_idf; /* its name */ + struct sdef *sd_sdef; /* the next selector */ + struct type *sd_stype; /* the struct it belongs to */ + struct type *sd_type; /* its type */ + arith sd_offset; +}; + +extern char *st_alloc(); + + +/* allocation definitions of struct sdef */ +/* ALLOCDEF "sdef" */ +extern char *st_alloc(); +extern struct sdef *h_sdef; +#define new_sdef() ((struct sdef *) \ + st_alloc((char **)&h_sdef, sizeof(struct sdef))) +#define free_sdef(p) st_free(p, h_sdef, sizeof(struct sdef)) + + +struct tag { /* for struct-, union- and enum tags */ + struct tag *next; + int tg_level; + int tg_busy; /* non-zero during declaration of struct/union pack */ + struct type *tg_type; +}; + + + +/* allocation definitions of struct tag */ +/* ALLOCDEF "tag" */ +extern char *st_alloc(); +extern struct tag *h_tag; +#define new_tag() ((struct tag *) \ + st_alloc((char **)&h_tag, sizeof(struct tag))) +#define free_tag(p) st_free(p, h_tag, sizeof(struct tag)) + + +struct sdef *idf2sdef(); diff --git a/lang/cem/cemcom/switch.c b/lang/cem/cemcom/switch.c new file mode 100644 index 000000000..4ce1c1814 --- /dev/null +++ b/lang/cem/cemcom/switch.c @@ -0,0 +1,184 @@ +/* $Header$ */ +/* S W I T C H - S T A T E M E N T A D M I N I S T R A T I O N */ + +#include "debug.h" +#include "botch_free.h" +#include "density.h" + +#include "idf.h" +#include "label.h" +#include "arith.h" +#include "switch.h" +#include "code.h" +#include "storage.h" +#include "assert.h" +#include "expr.h" +#include "type.h" +#include "em.h" + +#define compact(nr, low, up) (nr != 0 && (up - low) / nr <= (DENSITY - 1)) + +static struct switch_hdr *switch_stack = 0; + +code_startswitch(expr) + struct expr *expr; +{ + /* stack a new case header and fill in the necessary fields. + */ + register label l_table = text_label(); + register label l_break = text_label(); + register struct switch_hdr *sh = new_switch_hdr(); + + stat_stack(l_break, NO_LABEL); + sh->sh_break = l_break; + sh->sh_default = 0; + sh->sh_table = l_table; + sh->sh_nrofentries = 0; + sh->sh_type = expr->ex_type; /* the expression switched */ + sh->sh_lowerbd = sh->sh_upperbd = (arith)0; /* ??? */ + sh->sh_entries = (struct case_entry *) 0; /* case-entry list */ + sh->next = switch_stack; /* push onto switch-stack */ + switch_stack = sh; + code_expr(expr, RVAL, TRUE, NO_LABEL, NO_LABEL); + /* evaluate the switch expr. */ + C_bra(l_table); /* goto start of switch_table */ +} + +code_endswitch() +{ + register struct switch_hdr *sh = switch_stack; + register label tablabel; + register struct case_entry *ce, *tmp; + + if (sh->sh_default == 0) /* no default occurred yet */ + sh->sh_default = sh->sh_break; + C_bra(sh->sh_break); /* skip the switch table now */ + C_ilb(sh->sh_table); /* switch table entry */ + tablabel = data_label(); /* the rom must have a label */ + C_ndlb(tablabel); + C_rom_begin(); + C_co_ilb(sh->sh_default); + if (compact(sh->sh_nrofentries, sh->sh_lowerbd, sh->sh_upperbd)) { + /* CSA */ + register arith val; + + C_co_cst(sh->sh_lowerbd); + C_co_cst(sh->sh_upperbd - sh->sh_lowerbd); + ce = sh->sh_entries; + for (val = sh->sh_lowerbd; val <= sh->sh_upperbd; val++) { + ASSERT(ce); + if (val == ce->ce_value) { + C_co_ilb(ce->ce_label); + ce = ce->next; + } + else + C_co_ilb(sh->sh_default); + } + C_rom_end(); + C_lae_ndlb(tablabel, (arith)0); /* perform the switch */ + C_csa(sh->sh_type->tp_size); + } + else { /* CSB */ + C_co_cst((arith)sh->sh_nrofentries); + for (ce = sh->sh_entries; ce; ce = ce->next) { + /* generate the entries: value + prog.label */ + C_co_cst(ce->ce_value); + C_co_ilb(ce->ce_label); + } + C_rom_end(); + C_lae_ndlb(tablabel, (arith)0); /* perform the switch */ + C_csb(sh->sh_type->tp_size); + } + C_ilb(sh->sh_break); + switch_stack = sh->next; /* unstack the switch descriptor */ + /* free the allocated switch structure */ + for (ce = sh->sh_entries; ce; ce = tmp) { + tmp = ce->next; + free_case_entry(ce); + } + free_switch_hdr(sh); + stat_unstack(); +} + +code_case(val) + arith val; +{ + register struct case_entry *ce; + register struct switch_hdr *sh = switch_stack; + + if (sh == 0) { + error("case statement not in switch"); + return; + } + ce = new_case_entry(); + C_ilb(ce->ce_label = text_label()); + ce->ce_value = val; + if (sh->sh_entries == 0) { + /* first case entry */ + ce->next = (struct case_entry *) 0; + sh->sh_entries = ce; + sh->sh_lowerbd = sh->sh_upperbd = ce->ce_value; + sh->sh_nrofentries = 1; + } + else { + /* second etc. case entry */ + /* find the proper place to put ce into the list */ + register struct case_entry *c1 = sh->sh_entries, *c2 = 0; + + if (val < sh->sh_lowerbd) + sh->sh_lowerbd = val; + else + if (val > sh->sh_upperbd) + sh->sh_upperbd = val; + while (c1 && c1->ce_value < ce->ce_value) { + c2 = c1; + c1 = c1->next; + } + /* At this point three cases are possible: + 1: c1 != 0 && c2 != 0: + insert ce somewhere in the middle + 2: c1 != 0 && c2 == 0: + insert ce right after the head + 3: c1 == 0 && c2 != 0: + append ce to last element + The case c1 == 0 && c2 == 0 cannot occur! + */ + if (c1) { + if (c1->ce_value == ce->ce_value) { + error("multiple case entry for value %ld", + ce->ce_value); + free_case_entry(ce); + return; + } + if (c2) { + ce->next = c2->next; + c2->next = ce; + } + else { + ce->next = sh->sh_entries; + sh->sh_entries = ce; + } + } + else { + ASSERT(c2); + ce->next = (struct case_entry *) 0; + c2->next = ce; + } + (sh->sh_nrofentries)++; + } +} + +code_default() +{ + register struct switch_hdr *sh = switch_stack; + + if (sh == 0) { + error("default not in switch"); + return; + } + if (sh->sh_default != 0) { + error("multiple entry for default in switch"); + return; + } + C_ilb(sh->sh_default = text_label()); +} diff --git a/lang/cem/cemcom/switch.h b/lang/cem/cemcom/switch.h new file mode 100644 index 000000000..07998b9e7 --- /dev/null +++ b/lang/cem/cemcom/switch.h @@ -0,0 +1,40 @@ +/* $Header$ */ +/* S W I T C H - T A B L E - S T R U C T U R E */ + +struct switch_hdr { + struct switch_hdr *next; + label sh_break; + label sh_default; + label sh_table; + int sh_nrofentries; + struct type *sh_type; + arith sh_lowerbd; + arith sh_upperbd; + struct case_entry *sh_entries; +}; + + +/* allocation definitions of struct switch_hdr */ +/* ALLOCDEF "switch_hdr" */ +extern char *st_alloc(); +extern struct switch_hdr *h_switch_hdr; +#define new_switch_hdr() ((struct switch_hdr *) \ + st_alloc((char **)&h_switch_hdr, sizeof(struct switch_hdr))) +#define free_switch_hdr(p) st_free(p, h_switch_hdr, sizeof(struct switch_hdr)) + + +struct case_entry { + struct case_entry *next; + label ce_label; + arith ce_value; +}; + + +/* allocation definitions of struct case_entry */ +/* ALLOCDEF "case_entry" */ +extern char *st_alloc(); +extern struct case_entry *h_case_entry; +#define new_case_entry() ((struct case_entry *) \ + st_alloc((char **)&h_case_entry, sizeof(struct case_entry))) +#define free_case_entry(p) st_free(p, h_case_entry, sizeof(struct case_entry)) + diff --git a/lang/cem/cemcom/switch.str b/lang/cem/cemcom/switch.str new file mode 100644 index 000000000..07998b9e7 --- /dev/null +++ b/lang/cem/cemcom/switch.str @@ -0,0 +1,40 @@ +/* $Header$ */ +/* S W I T C H - T A B L E - S T R U C T U R E */ + +struct switch_hdr { + struct switch_hdr *next; + label sh_break; + label sh_default; + label sh_table; + int sh_nrofentries; + struct type *sh_type; + arith sh_lowerbd; + arith sh_upperbd; + struct case_entry *sh_entries; +}; + + +/* allocation definitions of struct switch_hdr */ +/* ALLOCDEF "switch_hdr" */ +extern char *st_alloc(); +extern struct switch_hdr *h_switch_hdr; +#define new_switch_hdr() ((struct switch_hdr *) \ + st_alloc((char **)&h_switch_hdr, sizeof(struct switch_hdr))) +#define free_switch_hdr(p) st_free(p, h_switch_hdr, sizeof(struct switch_hdr)) + + +struct case_entry { + struct case_entry *next; + label ce_label; + arith ce_value; +}; + + +/* allocation definitions of struct case_entry */ +/* ALLOCDEF "case_entry" */ +extern char *st_alloc(); +extern struct case_entry *h_case_entry; +#define new_case_entry() ((struct case_entry *) \ + st_alloc((char **)&h_case_entry, sizeof(struct case_entry))) +#define free_case_entry(p) st_free(p, h_case_entry, sizeof(struct case_entry)) + diff --git a/lang/cem/cemcom/system.c b/lang/cem/cemcom/system.c new file mode 100644 index 000000000..dd80863b1 --- /dev/null +++ b/lang/cem/cemcom/system.c @@ -0,0 +1,72 @@ +/* $Header$ */ +/* SYSTEM DEPENDENT ROUTINES */ + +#include "system.h" +#include "inputtype.h" +#include + +extern long lseek(); + +int +xopen(name, flag, mode) + char *name; +{ + if (name[0] == '-' && name[1] == '\0') + return (flag == OP_RDONLY) ? 0 : 1; + + switch (flag) { + + case OP_RDONLY: + return open(name, 0); + case OP_WRONLY: + return open(name, 1); + case OP_CREAT: + return creat(name, mode); + case OP_APPEND: + { + register fd; + + if ((fd = open(name, 1)) < 0) + return -1; + lseek(fd, 0L, 2); + return fd; + } + } + /*NOTREACHED*/ +} + +int +xclose(fildes) +{ + if (fildes != 0 && fildes != 1) + return close(fildes); + return -1; +} + +#ifdef READ_IN_ONE +long +xfsize(fildes) +{ + struct stat stbuf; + + if (fstat(fildes, &stbuf) != 0) + return -1; + return stbuf.st_size; +} +#endif READ_IN_ONE + +exit(n) +{ + _exit(n); +} + +xstop(how, stat) +{ + switch (how) { + case S_ABORT: + abort(); + case S_EXIT: + exit(stat); + } + /*NOTREACHED*/ +} diff --git a/lang/cem/cemcom/system.h b/lang/cem/cemcom/system.h new file mode 100644 index 000000000..ae69ff8cb --- /dev/null +++ b/lang/cem/cemcom/system.h @@ -0,0 +1,34 @@ +/* $Header$ */ +/* SYSTEM DEPENDANT DEFINITIONS */ + +#include +#include + +#define OP_RDONLY 0 /* open for read */ +#define OP_WRONLY 1 /* open for write */ +#define OP_CREAT 2 /* create and open for write */ +#define OP_APPEND 3 /* open for write at end */ + +#define sys_open(name, flag) xopen(name, flag, 0) +#define sys_close(fildes) xclose(fildes) +#define sys_read(fildes, buffer, nbytes) read(fildes, buffer, nbytes) +#define sys_write(fildes, buffer, nbytes) write(fildes, buffer, nbytes) +#define sys_creat(name, mode) xopen(name, OP_CREAT, mode) +#define sys_remove(name) unlink(name) +#define sys_fsize(fd) xfsize(fd) +#define sys_sbrk(incr) sbrk(incr) +#define sys_stop(how, stat) xstop(how, stat) + +#define S_ABORT 1 +#define S_EXIT 2 + +char *sbrk(); +long xfsize(); + +extern int errno; + +#define sys_errno errno + +#define time_type time_t +#define sys_time(tloc) time(tloc) +time_type time(); diff --git a/lang/cem/cemcom/tab.c b/lang/cem/cemcom/tab.c new file mode 100644 index 000000000..8e39d7af6 --- /dev/null +++ b/lang/cem/cemcom/tab.c @@ -0,0 +1,295 @@ +/* $Header$ */ +/* @cc tab.c -o $INSTALLDIR/tab@ + tab - table generator + + Author: Erik Baalbergen (..tjalk!erikb) +*/ + +#include + +#define MAXTAB 10000 +#define MAXBUF 10000 +#define COMCOM '-' +#define FILECOM '%' + +int InputForm = 'c'; +char OutputForm[MAXBUF] = "%s,\n"; +int TabSize = 257; +char *Table[MAXTAB]; +char *ProgCall; + +main(argc, argv) + char *argv[]; +{ + ProgCall = *argv++; + argc--; + while (argc-- > 0) { + if (**argv == COMCOM) { + option(*argv++); + } + else { + process(*argv++, InputForm); + } + } +} + +char * +Salloc(s) + char *s; +{ + extern char *malloc(), *strcpy(); + char *ns = malloc((unsigned int)strlen(s) + 1); + + if (ns) { + strcpy(ns, s); + } + return ns; +} + +option(str) + char *str; +{ + /* note that *str indicates the source of the option: + either COMCOM (from command line) or FILECOM (from a file). + */ + extern char *sprintf(); + + switch (*++str) { + + case ' ': /* command */ + case '\t': + case '\0': + break; + case 'I': + InputForm = *++str; + break; + case 'f': + if (*++str == '\0') { + fprintf(stderr, "%s: -f: name expected\n", ProgCall); + exit(1); + } + DoFile(str); + break; + case 'F': + sprintf(OutputForm, "%s\n", ++str); + break; + case 'T': + printf("%s\n", ++str); + break; + case 'p': + PrintTable(); + break; + case 'C': + ClearTable(); + break; + case 'S': + { + register i = stoi(++str); + + if (i <= 0 || i > MAXTAB) { + fprintf(stderr, "%s: size would exceed maximum\n", + ProgCall); + } + else { + TabSize = i; + } + break; + } + default: + fprintf(stderr, "%s: bad option -%s\n", ProgCall, str); + } +} + +ClearTable() +{ + register i; + + for (i = 0; i < MAXTAB; i++) { + Table[i] = 0; + } +} + +PrintTable() +{ + register i; + + for (i = 0; i < TabSize; i++) { + if (Table[i]) { + printf(OutputForm, Table[i]); + } + else { + printf(OutputForm, "0"); + } + } +} + +process(str, format) + char *str; +{ + char *cstr = str; + char *Name = cstr; /* overwrite original string! */ + + /* strip of the entry name + */ + while (*str && *str != ':') { + if (*str == '\\') { + ++str; + } + *cstr++ = *str++; + } + + if (*str != ':') { + fprintf(stderr, "%s: bad specification: \"%s\", ignored\n", + ProgCall, Name); + return 0; + } + *cstr = '\0'; + str++; + + switch (format) { + + case 'c': + return c_proc(str, Name); + default: + fprintf(stderr, "%s: bad input format\n", ProgCall); + } + return 0; +} + +c_proc(str, Name) + char *str; + char *Name; +{ + int ch, ch2; + int quoted(); + + while (*str) { + if (*str == '\\') { + ch = quoted(&str); + } + else { + ch = *str++; + } + if (*str == '-') { + if (*++str == '\\') { + ch2 = quoted(&str); + } + else { + if (ch2 = *str++); + else str--; + } + if (ch > ch2) { + fprintf(stderr, "%s: bad range\n", ProgCall); + return 0; + } + if (ch >= 0 && ch2 <= 255) + while (ch <= ch2) + Table[ch++] = Salloc(Name); + } + else { + if (ch >= 0 && ch <= 255) + Table[ch] = Salloc(Name); + } + } + return 1; +} + +int +quoted(pstr) + char **pstr; +{ + register int ch; + register int i; + register char *str = *pstr; + + if ((*++str >= '0') && (*str <= '9')) { + ch = 0; + for (i = 0; i < 3; i++) { + ch = 8 * ch + *str - '0'; + if (*++str < '0' || *str > '9') + break; + } + } + else { + switch (*str++) { + + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'r': + ch = '\r'; + break; + case 'f': + ch = '\f'; + break; + default : + ch = *str; + } + } + *pstr = str; + return ch & 0377; +} + +int +stoi(str) + char *str; +{ + register i = 0; + + while (*str >= '0' && *str <= '9') { + i = i * 10 + *str++ - '0'; + } + return i; +} + +char * +getline(s, n, fp) + char *s; + FILE *fp; +{ + register c = getc(fp); + char *str = s; + + while (n--) { + if (c == EOF) { + return NULL; + } + else + if (c == '\n') { + *str++ = '\0'; + return s; + } + *str++ = c; + c = getc(fp); + } + s[n - 1] = '\0'; + return s; +} + +#define BUFSIZE 1024 + +DoFile(name) + char *name; +{ + char text[BUFSIZE]; + FILE *fp; + + if ((fp = fopen(name, "r")) == NULL) { + fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name); + exit(1); + } + while (getline(text, BUFSIZE, fp) != NULL) { + if (text[0] == FILECOM) { + option(text); + } + else { + process(text, InputForm); + } + } +} diff --git a/lang/cem/cemcom/tokenname.c b/lang/cem/cemcom/tokenname.c new file mode 100644 index 000000000..d66ff723e --- /dev/null +++ b/lang/cem/cemcom/tokenname.c @@ -0,0 +1,143 @@ +/* $Header$ */ +/* TOKEN NAME DEFINITIONS */ + +#include "idf.h" +#include "arith.h" +#include "LLlex.h" +#include "tokenname.h" +#include "Lpars.h" + +/* To centralize the declaration of %tokens, their presence in this + file is taken as their declaration. The Makefile will produce + a grammar file (tokenfile.g) from this file. + Moreover, rather than looking up a symbol in all these lists + to find its printable name, a fast version of symbol2str() is + generated from these tables. + Consequenty some of these tables are not referenced explicitly + in the C text any more. To save space and to avoid lint confusion, + these have been made pseudo-invisible by #ifdefs. +*/ + +#ifdef ____ +struct tokenname tkspec[] = { /* the names of the special tokens */ + {IDENTIFIER, "identifier"}, + {TYPE_IDENTIFIER, "type_identifier"}, + {STRING, "string"}, + {FILESPECIFIER, "filespecifier"}, + {INTEGER, "integer"}, + {FLOATING, "floating"}, + {0, ""} +}; +#endif ____ + +#ifdef ____ +struct tokenname tkcomp[] = { /* names of the composite tokens */ + {NOTEQUAL, "!="}, + {AND, "&&"}, + {PLUSPLUS, "++"}, + {MINMIN, "--"}, + {ARROW, "->"}, + {LEFT, "<<"}, + {LESSEQ, "<="}, + {EQUAL, "=="}, + {GREATEREQ, ">="}, + {RIGHT, ">>"}, + {OR, "||"}, + {0, ""} +}; +#endif ____ + +struct tokenname tkidf[] = { /* names of the identifier tokens */ + {ASM, "asm"}, + {AUTO, "auto"}, + {BREAK, "break"}, + {CASE, "case"}, + {CONTINUE, "continue"}, + {DEFAULT, "default"}, + {DO, "do"}, + {ELSE, "else"}, + {ENUM, "enum"}, + {EXTERN, "extern"}, + {FOR, "for"}, + {GOTO, "goto"}, + {IF, "if"}, + {LONG, "long"}, + {REGISTER, "register"}, + {RETURN, "return"}, + {SHORT, "short"}, + {SIZEOF, "sizeof"}, + {STATIC, "static"}, + {STRUCT, "struct"}, + {SWITCH, "switch"}, + {TYPEDEF, "typedef"}, + {UNION, "union"}, + {UNSIGNED, "unsigned"}, + {WHILE, "while"}, + {0, ""} +}; + +struct tokenname tkother[] = { /* additional keywords from the RM */ + {ENTRY, "entry"}, + {FORTRAN, "fortran"}, + {0, ""} +}; + +#ifdef ____ +struct tokenname tkfunny[] = { /* internal keywords */ + {CHAR, "char"}, + {INT, "int"}, + {FLOAT, "float"}, + {DOUBLE, "double"}, + {VOID, "void"}, + + {ARRAY, "array"}, + {FUNCTION, "function"}, + {POINTER, "pointer"}, + {FIELD, "field"}, + {NEWLINE, "newline"}, + + {GLOBAL, "global"}, + {IMPLICIT, "implicit"}, + {FORMAL, "formal"}, + {LABEL, "label"}, + {ERRONEOUS, "erroneous"}, + + {PARCOMMA, "parcomma"}, + {INITCOMMA, "initcomma"}, + {CAST, "cast"}, + {POSTINCR, "postfix ++"}, + {POSTDECR, "postfix --"}, + {PLUSAB, "+="}, + {MINAB, "-="}, + {TIMESAB, "*="}, + {DIVAB, "/="}, + {MODAB, "%="}, + {LEFTAB, "<<="}, + {RIGHTAB, ">>="}, + {ANDAB, "&="}, + {XORAB, "^="}, + {ORAB, "|="}, + + {INT2INT, "int2int"}, + {INT2FLOAT, "int2float"}, + {FLOAT2INT, "float2int"}, + {FLOAT2FLOAT, "float2float"}, + {0, ""} +}; +#endif ____ + +reserve(resv) + register struct tokenname resv[]; +{ + /* The names of the tokens described in resv are entered + as reserved words. + */ + while (resv->tn_symbol) { + struct idf *idf = str2idf(resv->tn_name); + + if (idf->id_reserved) + fatal("maximum identifier length insufficient"); + idf->id_reserved = resv->tn_symbol; + resv++; + } +} diff --git a/lang/cem/cemcom/tokenname.h b/lang/cem/cemcom/tokenname.h new file mode 100644 index 000000000..7e5ea386d --- /dev/null +++ b/lang/cem/cemcom/tokenname.h @@ -0,0 +1,9 @@ +/* $Header$ */ +/* TOKENNAME DEFINITION */ + +struct tokenname { /* Used for defining the name of a + token as identified by its symbol + */ + int tn_symbol; + char *tn_name; +}; diff --git a/lang/cem/cemcom/type.c b/lang/cem/cemcom/type.c new file mode 100644 index 000000000..7ca933996 --- /dev/null +++ b/lang/cem/cemcom/type.c @@ -0,0 +1,217 @@ +/* $Header$ */ +/* T Y P E D E F I N I T I O N M E C H A N I S M */ + +#include "nobitfield.h" +#include "alloc.h" +#include "Lpars.h" +#include "arith.h" +#include "type.h" +#include "idf.h" +#include "def.h" +#include "sizes.h" +#include "align.h" + +struct type *function_of(), *array_of(); +#ifndef NOBITFIELD +struct type *field_of(); +#endif NOBITFIELD + +/* To be created dynamically in main() from defaults or from command + line parameters. +*/ +struct type + *char_type, *uchar_type, + *short_type, *ushort_type, + *word_type, *uword_type, + *int_type, *uint_type, + *long_type, *ulong_type, + *float_type, *double_type, + *void_type, *label_type, + *string_type, *funint_type, *error_type; + +struct type *pa_type; /* Pointer-Arithmetic type */ + +struct type * +create_type(fund) + register int fund; +{ + /* A brand new struct type is created, and its tp_fund set + to fund. + */ + register struct type *ntp = new_type(); + + clear((char *)ntp, sizeof(struct type)); + ntp->tp_fund = fund; + ntp->tp_size = (arith)-1; + + return ntp; +} + +struct type * +construct_type(fund, tp, count) + struct type *tp; + arith count; /* for fund == ARRAY only */ +{ + /* fund must be a type constructor: FIELD, FUNCTION, POINTER or + ARRAY. The pointer to the constructed type is returned. + */ + struct type *dtp; + + switch (fund) { +#ifndef NOBITFIELD + case FIELD: + dtp = field_of(tp); + break; +#endif NOBITFIELD + + case FUNCTION: + if (tp->tp_fund == FUNCTION) { + error("function cannot yield function"); + return error_type; + } + if (tp->tp_fund == ARRAY) { + error("function cannot yield array"); + return error_type; + } + + dtp = function_of(tp); + break; + case POINTER: + dtp = pointer_to(tp); + break; + case ARRAY: + if (tp->tp_size < 0) { + error("cannot construct array of unknown type"); + count = (arith)-1; + } + if (count >= (arith)0) + count *= tp->tp_size; + dtp = array_of(tp, count); + break; + } + return dtp; +} + +struct type * +function_of(tp) + struct type *tp; +{ + struct type *dtp = tp->tp_function; + + if (!dtp) { + tp->tp_function = dtp = create_type(FUNCTION); + dtp->tp_up = tp; + dtp->tp_size = pointer_size; + dtp->tp_align = pointer_align; + } + return dtp; +} + +struct type * +pointer_to(tp) + struct type *tp; +{ + struct type *dtp = tp->tp_pointer; + + if (!dtp) { + tp->tp_pointer = dtp = create_type(POINTER); + dtp->tp_unsigned = 1; + dtp->tp_up = tp; + dtp->tp_size = pointer_size; + dtp->tp_align = pointer_align; + } + return dtp; +} + +struct type * +array_of(tp, count) + struct type *tp; + arith count; +{ + struct type *dtp = tp->tp_array; + + /* look for a type with the right size */ + while (dtp && dtp->tp_size != count) + dtp = dtp->next; + + if (!dtp) { + dtp = create_type(ARRAY); + dtp->tp_up = tp; + dtp->tp_size = count; + dtp->tp_align = tp->tp_align; + dtp->next = tp->tp_array; + tp->tp_array = dtp; + } + return dtp; +} + +#ifndef NOBITFIELD +struct type * +field_of(tp) + struct type *tp; +{ + struct type *dtp = create_type(FIELD); + + dtp->tp_up = tp; + dtp->tp_align = tp->tp_align; + dtp->tp_size = tp->tp_size; + return dtp; +} +#endif NOBITFIELD + +arith +size_of_type(tp, nm) + struct type *tp; + char nm[]; +{ + arith sz = tp->tp_size; + + if (sz < 0) { + error("size of %s unknown", nm); + return (arith)1; + } + return sz; +} + +idf2type(idf, tpp) + struct idf *idf; + struct type **tpp; +{ + /* Decoding a typedef-ed identifier: if the size is yet + unknown we have to make copy of the type descriptor to + prevent garbage at the initialisation of arrays with + unknown size. + */ + if ( idf->id_def->df_type->tp_size < (arith)0 && + idf->id_def->df_type->tp_fund == ARRAY + ) { + struct type *ntp = new_type(); + *ntp = *(idf->id_def->df_type); + /* this is really a structure assignment, AAGH!!! */ + *tpp = ntp; + } + else { + *tpp = idf->id_def->df_type; + } +} + +arith +align(pos, al) + arith pos; + int al; +{ + return ((pos + al - 1) / al) * al; +} + +struct type * +standard_type(fund, sign, align, size) + int align; arith size; +{ + register struct type *tp = create_type(fund); + + tp->tp_unsigned = sign; + tp->tp_align = align; + tp->tp_size = size; + + return tp; +} diff --git a/lang/cem/cemcom/type.h b/lang/cem/cemcom/type.h new file mode 100644 index 000000000..1937a569e --- /dev/null +++ b/lang/cem/cemcom/type.h @@ -0,0 +1,52 @@ +/* $Header$ */ +/* TYPE DESCRIPTOR */ + +#include "nobitfield.h" + +struct type { + struct type *next; /* used only with ARRAY */ + short tp_fund; /* fundamental type */ + char tp_unsigned; + int tp_align; + arith tp_size; /* -1 if declared but not defined */ + struct idf *tp_idf; /* name of STRUCT, UNION or ENUM */ + struct sdef *tp_sdef; /* to first selector */ + struct type *tp_up; /* from FIELD, POINTER, ARRAY + or FUNCTION to fund. */ + struct field *tp_field; /* field descriptor if fund == FIELD */ + struct type *tp_pointer;/* to POINTER */ + struct type *tp_array; /* to ARRAY */ + struct type *tp_function;/* to FUNCTION */ +}; + +extern struct type + *create_type(), *standard_type(), *construct_type(), *pointer_to(), + *array_of(), *function_of(); + +#ifndef NOBITFIELD +extern struct type *field_of(); +#endif NOBITFIELD + +extern struct type + *char_type, *uchar_type, + *short_type, *ushort_type, + *word_type, *uword_type, + *int_type, *uint_type, + *long_type, *ulong_type, + *float_type, *double_type, + *void_type, *label_type, + *string_type, *funint_type, *error_type; + +extern struct type *pa_type; /* type.c */ + +extern arith size_of_type(), align(); + + +/* allocation definitions of struct type */ +/* ALLOCDEF "type" */ +extern char *st_alloc(); +extern struct type *h_type; +#define new_type() ((struct type *) \ + st_alloc((char **)&h_type, sizeof(struct type))) +#define free_type(p) st_free(p, h_type, sizeof(struct type)) + diff --git a/lang/cem/cemcom/type.str b/lang/cem/cemcom/type.str new file mode 100644 index 000000000..1937a569e --- /dev/null +++ b/lang/cem/cemcom/type.str @@ -0,0 +1,52 @@ +/* $Header$ */ +/* TYPE DESCRIPTOR */ + +#include "nobitfield.h" + +struct type { + struct type *next; /* used only with ARRAY */ + short tp_fund; /* fundamental type */ + char tp_unsigned; + int tp_align; + arith tp_size; /* -1 if declared but not defined */ + struct idf *tp_idf; /* name of STRUCT, UNION or ENUM */ + struct sdef *tp_sdef; /* to first selector */ + struct type *tp_up; /* from FIELD, POINTER, ARRAY + or FUNCTION to fund. */ + struct field *tp_field; /* field descriptor if fund == FIELD */ + struct type *tp_pointer;/* to POINTER */ + struct type *tp_array; /* to ARRAY */ + struct type *tp_function;/* to FUNCTION */ +}; + +extern struct type + *create_type(), *standard_type(), *construct_type(), *pointer_to(), + *array_of(), *function_of(); + +#ifndef NOBITFIELD +extern struct type *field_of(); +#endif NOBITFIELD + +extern struct type + *char_type, *uchar_type, + *short_type, *ushort_type, + *word_type, *uword_type, + *int_type, *uint_type, + *long_type, *ulong_type, + *float_type, *double_type, + *void_type, *label_type, + *string_type, *funint_type, *error_type; + +extern struct type *pa_type; /* type.c */ + +extern arith size_of_type(), align(); + + +/* allocation definitions of struct type */ +/* ALLOCDEF "type" */ +extern char *st_alloc(); +extern struct type *h_type; +#define new_type() ((struct type *) \ + st_alloc((char **)&h_type, sizeof(struct type))) +#define free_type(p) st_free(p, h_type, sizeof(struct type)) + -- 2.34.1