From: Alan Cox Date: Mon, 8 Jun 2015 21:30:51 +0000 (+0100) Subject: cpp: CPP from bcc X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=53b9cca9644a6310477ca50e7bf377552bc621f1;p=FUZIX.git cpp: CPP from bcc 48K rather than 32K target. Some thought might be needed for a 32K cpp however! --- diff --git a/Applications/cpp/Makefile b/Applications/cpp/Makefile new file mode 100644 index 00000000..5579d044 --- /dev/null +++ b/Applications/cpp/Makefile @@ -0,0 +1,43 @@ +PLATFORM = 6809 +CC = m6809-unknown-gcc +# These are wrappers for lwasm and lwar +ASM = m6809-unknown-as +AR = m6809-unknown-ar +LINKER = lwlink +CFLAGS = -I../../Library/include -I../../Library/include/6502 -Wall -pedantic -fno-strict-aliasing +COPT = -Os +LINKER_OPT = --format=raw -L../../Library/libs -lc6809 +LIBGCCDIR = $(dir $(shell $(CC) -print-libgcc-file-name)) +LINKER_OPT += -L$(LIBGCCDIR) -lgcc -m cpp.map +LINKER_OPT += --script=../util/$(TARGET).link +ASM_OPT = -o +CRT0 = ../../Library/libs/crt0_6809.o + +OBJS = cpp.o hash.o main.o token1.o token2.o + +all: cpp + +cpp: main.o cpp.o hash.o token1.o token2.o + $(LINKER) -o $@ $(LINKER_OPT) $(CRT0) $(OBJS) + +clean realclean: + rm -f cpp main.o cpp.o hash.o token1.o token2.o tmp.h *~ + +maintclean: realclean + rm -f token1.h token2.h + +main.o: cc.h +cpp.o: cc.h +hash.o: cc.h +tree.o: cc.h + +token1.o: token1.h +token2.o: token2.h + +token1.h: token1.tok + gperf -aptTc -N is_ctok -H hash1 token1.tok > tmp.h + mv tmp.h token1.h + +token2.h: token2.tok + gperf -aptTc -k1,3 -N is_ckey -H hash2 token2.tok > tmp.h + mv tmp.h token2.h diff --git a/Applications/cpp/cc.h b/Applications/cpp/cc.h new file mode 100644 index 00000000..9c298e71 --- /dev/null +++ b/Applications/cpp/cc.h @@ -0,0 +1,115 @@ + +#ifndef P +#if __STDC__ +#define P(x) x +#else +#define P(x) () +#endif +#endif + +extern void cfatal P((char*)); +extern void cerror P((char*)); +extern void cwarn P((char*)); +extern FILE * open_include P((char*, char*, int)); + +extern FILE * curfile; +extern char curword[]; +extern char * c_fname; +extern int c_lineno; +extern int alltok; +extern int dialect; + +#define DI_KNR 1 +#define DI_ANSI 2 + +extern int gettok P((void)); + +struct token_trans { char * name; int token; }; +struct token_trans * is_ctok P((const char *str, unsigned int len)); +struct token_trans * is_ckey P((const char *str, unsigned int len)); + +#define WORDSIZE 128 +#define TK_WSPACE 256 +#define TK_WORD 257 +#define TK_NUM 258 +#define TK_FLT 259 +#define TK_QUOT 260 +#define TK_STR 261 +#define TK_FILE 262 +#define TK_LINE 263 +#define TK_COPY 264 + +#define TKS_CTOK 0x200 +#define TKS_CKEY 0x300 + +#define TK_NE_OP (TKS_CTOK+ 0) +#define TK_MOD_ASSIGN (TKS_CTOK+ 1) +#define TK_AND_OP (TKS_CTOK+ 2) +#define TK_AND_ASSIGN (TKS_CTOK+ 3) +#define TK_MUL_ASSIGN (TKS_CTOK+ 4) +#define TK_INC_OP (TKS_CTOK+ 5) +#define TK_ADD_ASSIGN (TKS_CTOK+ 6) +#define TK_DEC_OP (TKS_CTOK+ 7) +#define TK_SUB_ASSIGN (TKS_CTOK+ 8) +#define TK_PTR_OP (TKS_CTOK+ 9) +#define TK_ELLIPSIS (TKS_CTOK+10) +#define TK_DIV_ASSIGN (TKS_CTOK+11) +#define TK_LEFT_OP (TKS_CTOK+12) +#define TK_LEFT_ASSIGN (TKS_CTOK+13) +#define TK_LE_OP (TKS_CTOK+14) +#define TK_EQ_OP (TKS_CTOK+15) +#define TK_GE_OP (TKS_CTOK+16) +#define TK_RIGHT_OP (TKS_CTOK+17) +#define TK_RIGHT_ASSIGN (TKS_CTOK+18) +#define TK_XOR_ASSIGN (TKS_CTOK+19) +#define TK_OR_ASSIGN (TKS_CTOK+20) +#define TK_OR_OP (TKS_CTOK+21) + +#define TK_AUTO (TKS_CKEY+ 0) +#define TK_BREAK (TKS_CKEY+ 1) +#define TK_CASE (TKS_CKEY+ 2) +#define TK_CHAR (TKS_CKEY+ 3) +#define TK_CONST (TKS_CKEY+ 4) +#define TK_CONTINUE (TKS_CKEY+ 5) +#define TK_DEFAULT (TKS_CKEY+ 6) +#define TK_DO (TKS_CKEY+ 7) +#define TK_DOUBLE (TKS_CKEY+ 8) +#define TK_ELSE (TKS_CKEY+ 9) +#define TK_ENUM (TKS_CKEY+10) +#define TK_EXTERN (TKS_CKEY+11) +#define TK_FLOAT (TKS_CKEY+12) +#define TK_FOR (TKS_CKEY+13) +#define TK_GOTO (TKS_CKEY+14) +#define TK_IF (TKS_CKEY+15) +#define TK_INT (TKS_CKEY+16) +#define TK_LONG (TKS_CKEY+17) +#define TK_REGISTER (TKS_CKEY+18) +#define TK_RETURN (TKS_CKEY+19) +#define TK_SHORT (TKS_CKEY+20) +#define TK_SIGNED (TKS_CKEY+21) +#define TK_SIZEOF (TKS_CKEY+22) +#define TK_STATIC (TKS_CKEY+23) +#define TK_STRUCT (TKS_CKEY+24) +#define TK_SWITCH (TKS_CKEY+25) +#define TK_TYPEDEF (TKS_CKEY+26) +#define TK_UNION (TKS_CKEY+27) +#define TK_UNSIGNED (TKS_CKEY+28) +#define TK_VOID (TKS_CKEY+29) +#define TK_VOLATILE (TKS_CKEY+30) +#define TK_WHILE (TKS_CKEY+31) + +#define MAX_INCLUDE 64 /* Nested includes */ +#define MAX_DEFINE 64 /* Nested defines */ + +extern char * set_entry P((int,char*,void*)); +extern void * read_entry P((int,char*)); + +struct define_item +{ + struct define_arg * next; + char * name; + int arg_count; /* -1 = none; >=0 = brackets with N args */ + int in_use; /* Skip this one for looking up #defines */ + int varargs; /* No warning if unexpected arguments. */ + char value[1]; /* [arg,]*value */ +}; diff --git a/Applications/cpp/cpp.c b/Applications/cpp/cpp.c new file mode 100644 index 00000000..a6e73377 --- /dev/null +++ b/Applications/cpp/cpp.c @@ -0,0 +1,1514 @@ + +#include +#include +#ifdef __STDC__ +#include +#else +#include +#endif +#include "cc.h" + +#define CPP_DEBUG 0 /* LOTS of junk to stderr. */ + +/* + * This file comprises the 'guts' of a C preprocessor. + * + * Functions exported from this file: + * gettok() Returns the next token from the source + * curword contains the text of the token + * + * Variables + * curword Contains the text of the last token parsed. + * curfile Currently open primary file + * c_fname Name of file being parsed + * c_lineno Current line number in file being parsed. + * + * alltok Control flag for the kind of tokens you want (C or generic) + * dialect Control flag to change the preprocessor for Ansi C. + * + * TODO: + * #asm -> asm("...") translation. + * ?: in #if expressions + * Complete #line directive. + * \n in "\n" in a stringized argument. + * Comments in stringized arguments should be deleted. + * + * Poss: Seperate current directory for #include from errors (#line). + * (For editors that hunt down source files) + * Poss: C99 Variable macro args. + */ + +#define KEEP_SPACE 0 +#define SKIP_SPACE 1 + +#define EOT 4 +#define SYN 22 + +char curword[WORDSIZE]; +int alltok = 0; +int dialect = 0; + +FILE * curfile; +char * c_fname; +int c_lineno = 0; + +#ifdef __BCC__ +typedef long int_type; /* Used for preprocessor expressions */ +#else +typedef int int_type; /* Used for preprocessor expressions */ +#endif +static int curtok = 0; /* Used for preprocessor expressions */ + +static int fi_count = 0; +static FILE * saved_files[MAX_INCLUDE]; +static char * saved_fname[MAX_INCLUDE]; +static int saved_lines[MAX_INCLUDE]; + +static char * def_ptr = 0; +static char * def_start = 0; +static struct define_item * def_ref = 0; + +static int def_count =0; +static char * saved_def[MAX_DEFINE]; +static char * saved_start[MAX_DEFINE]; +static long saved_unputc[MAX_DEFINE]; +static struct define_item * saved_ref[MAX_DEFINE]; + +static long unputc = 0; + +static int last_char = '\n'; +static int in_preproc = 0; +static int dont_subst = 0; +static int quoted_str = 0; + +static int if_count = 0; +static int if_false = 0; +static int if_has_else = 0; +static int if_hidden = 0; +static unsigned int if_stack = 0; + +struct arg_store { + char * name; + char * value; + int in_define; +}; + +static int chget P((void)); +static int chget_raw P((void)); +static void unchget P((int)); +static int gettok_nosub P((void)); +static int get_onetok P((int)); +static int pgetc P((void)); +static int do_preproc P((void)); +static int do_proc_copy_hashline P((void)); +static int do_proc_if P((int)); +static void do_proc_include P((void)); +static void do_proc_define P((void)); +static void do_proc_undef P((void)); +static void do_proc_else P((void)); +static void do_proc_endif P((void)); +static void do_proc_tail P((void)); +static int get_if_expression P((void)); +static int_type get_expression P((int)); +static int_type get_exp_value P((void)); +static void gen_substrings P((char *, char *, int, int)); +static char * insert_substrings P((char *, struct arg_store *, int)); + +int +gettok() +{ + int ch; + + for(;;) + { + /* Tokenised C-Preprocessing */ + if (!quoted_str) + { + if (alltok) + ch = get_onetok(KEEP_SPACE); + else + ch = get_onetok(SKIP_SPACE); + + if( ch == '"' || ch == '\'' ) + quoted_str = ch; + + if( ch == TK_WORD ) + { + struct token_trans *p = is_ckey(curword, strlen(curword)) ; + if( p ) + return p->token; + } + + if (ch == '\n') continue; + return ch; + } + + /* Special for quoted strings */ + *curword = '\0'; + ch = chget(); + if( ch == EOF ) return ch; + + *curword = ch; + curword[1] = '\0'; + + if( ch == quoted_str ) { + if( ch == '"' ) + { + if (dialect == DI_ANSI) { + /* Found a terminator '"' check for ansi continuation */ + while( (ch = pgetc()) <= ' ' && ch != EOF) ; + if( ch == '"' ) continue; + unchget(ch); + *curword = '"'; + curword[1] = '\0'; + } + + quoted_str = 0; + return '"'; + } else { + quoted_str = 0; + return ch; + } + } + if( ch == '\n' ) { + quoted_str = 0; + unchget(ch); /* Make sure error line is right */ + return ch; + } + if( ch == '\\' ) { + unchget(ch); + ch = get_onetok(KEEP_SPACE); + return ch; + } + return TK_STR; + } +} + +static int +gettok_nosub() +{ int rv; dont_subst++; rv=get_onetok(SKIP_SPACE); dont_subst--; return rv; } + +static int +get_onetok(keep) +int keep; +{ + char * p; + int state; + int ch, cc; + +Try_again: + *(p=curword) = '\0'; + state=cc=ch=0; + + /* First skip whitespace, if the arg says so then we need to keep it */ + while( (ch = pgetc()) == ' ' || ch == '\t' ) + { + if (keep == KEEP_SPACE) { + if( p < curword + WORDSIZE-1 ) { + *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; + } + } + } + + if( ch > 0xFF ) return ch; + if( p != curword ) { unchget(ch); return TK_WSPACE; } + if( ch == '\n') return ch; + if( ch == EOF ) return ch; + if( ch >= 0 && ch < ' ' ) goto Try_again; + + for(;;) + { + switch(state) + { + case 0: if( (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') + || ch == '_' || ch == '$' ) + state = 1; + else if(ch == '0') + state = 2; + else if(ch >= '1' && ch <= '9') + state = 5; + else + goto break_break; + break; + case 1: if( (ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') + || ch == '_' || ch == '$' ) + break; + else + goto break_break; + case 2: if( ch >= '0' && ch <= '7') + state = 3; + else if( ch == 'x' || ch == 'X' ) + state = 4; + else + goto break_break; + break; + case 3: if( ch >= '0' && ch <= '7') + break; + else + goto break_break; + case 4: if( (ch >= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'F') + || (ch >= 'a' && ch <= 'f') ) + break; + else + goto break_break; + case 5: + case 6: if( ch >= '0' && ch <= '9') + ; + else if( ch == '.' && state != 6 ) + state = 6; + else if( ch == 'e' || ch == 'E' ) + state = 7; + else + goto break_break; + break; + case 7: if( ch == '+' || ch == '-' ) + break; + state = 8; + /* FALLTHROUGH */ + case 8: if( ch >= '0' && ch <= '9') + break; + else + goto break_break; + } + if( cc < WORDSIZE-1 ) *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; cc++; + ch = chget(); + if (ch == SYN) ch = chget(); + } +break_break: + /* Numbers */ + if( state >= 2 ) + { + if( state < 6 ) + { + if( ch == 'u' || ch == 'U' ) + { + if( cc < WORDSIZE-1 ) *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; cc++; + ch = chget(); + } + if( ch == 'l' || ch == 'L' ) + { + if( cc < WORDSIZE-1 ) *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; cc++; + } + else unchget(ch); + return TK_NUM; + } + unchget(ch); + return TK_FLT; + } + + /* Words */ + if( state == 1 ) + { + struct define_item * ptr; + unchget(ch); + if( !dont_subst + && (ptr = read_entry(0, curword)) != 0 + && !ptr->in_use + ) + { + if ( def_count >= MAX_DEFINE ) { + cwarn("Preprocessor recursion overflow"); + return TK_WORD; + } else if( ptr->arg_count >= 0 ) + { + /* An open bracket must follow the word */ + int ch1 = 0; + while ((ch = chget()) == ' ' || ch == '\t' ) ch1 = ch; + if (ch != '(') { + unchget(ch); + if (ch1) unchget(ch1); + return TK_WORD; + } + + /* We have arguments to process so lets do so. */ + gen_substrings(ptr->name, ptr->value, ptr->arg_count, ptr->varargs); + + /* Don't mark macros with arguments as in use, it's very + * difficult to say what the correct result would be so + * I'm letting the error happen. Also if I do block + * recursion then it'll also block 'pseudo' recursion + * where the arguments have a call to this macro. + * + def_ref = ptr; + ptr->in_use = 1; + */ + } + else if (ptr->value[0]) + { + /* Simple direct substitution; note the shortcut (above) for + * macros that are defined as null */ + saved_ref[def_count] = def_ref; + saved_def[def_count] = def_ptr; + saved_start[def_count] = def_start; + saved_unputc[def_count] = unputc; + def_count++; + unputc = 0; + def_ref = ptr; + def_ptr = ptr->value; + def_start = 0; + ptr->in_use = 1; + } + goto Try_again; + } + return TK_WORD; + } + + /* Quoted char for preprocessor expressions */ + if(in_preproc && ch == '\'' ) + { + *p++ = ch; ch = chget(); + for(;;) + { + if( cc < WORDSIZE-1 ) *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; cc++; + if( ch == '\'' || ch == '\n' ) break; + + if( ch == '\\' ) + { + ch = chget(); + if( cc < WORDSIZE-1 ) *p++ = ch; /* Clip to WORDSIZE */ + *p = '\0'; cc++; + } + ch = chget(); + } + ch = TK_QUOT; + } + + /* Collect and translate \xyx strings, (should probably translate these + * all to some standard form (eg \ooo plus \N ) + * + * ___________________________________________________________________ + * | new-line NL (LF) \n| audible alert BEL \a | + * | horizontal tab HT \t| question mark ? \? | + * | vertical tab VT \v| double quote " \" | + * | backspace BS \b| octal escape ooo \ooo| + * | carriage return CR \r| hexadecimal escape hh \xhh| + * | formfeed FF \f| backslash \ \\ | + * | single quote ' \'| | + * |_______________________________|_________________________________| + */ + + if( ch == '\\' ) + { + int i; + + *p++ = ch; ch = chget(); + if (ch >= '0' && ch <= '7' ) { + for(i=0; i<3; i++) { + if (ch >= '0' && ch <= '7' ) { + *p++ = ch; ch = chget(); + } + } + unchget(ch); + } else if (ch == 'x' || ch == 'X') { + *p++ = ch; ch = chget(); + for(i=0; i<2; i++) { + if ( (ch >= '0' && ch <= '9' ) || + (ch >= 'A' && ch <= 'F' ) || + (ch >= 'a' && ch <= 'f' ) ) { + *p++ = ch; ch = chget(); + } + } + unchget(ch); + } else if (ch == '?') { + p[-1] = '?'; + } else if (ch != '\n' && ch != EOF) { + *p++ = ch; + } else + unchget(ch); + *p = '\0'; + return TK_STR; + } + + /* Possible composite tokens */ + if( ch > ' ' && ch <= '~' ) + { + struct token_trans *tt; + *curword = cc = ch; + + for(state=1; ; state++) + { + curword[state] = ch = chget(); + if( !(tt=is_ctok(curword, state+1)) ) + { + unchget(ch); + curword[state] = '\0'; + return cc; + } + cc=tt->token; + } + } + return ch; +} + +static int +pgetc() +{ + int ch, ch1; + + for(;;) + { + if ((ch = chget()) == EOF) return ch; + + if( !in_preproc && last_char == '\n' && ch == '#' ) + { + in_preproc = 1; + ch = do_preproc(); + in_preproc = 0; + if(if_false || ch == 0) continue; + + last_char = '\n'; + return ch; + } + if( last_char != '\n' || (ch != ' ' && ch != '\t') ) + last_char = ch; + + /* Remove comments ... */ + if( ch != '/' ) + { if(if_false && !in_preproc) continue; return ch; } + ch1 = chget(); /* Allow "/\\\n*" as comment start too!? */ + + if( ch1 == '/' ) /* Double slash style comments */ + { + do { ch = chget(); } while(ch != '\n' && ch != EOF); + return ch; /* Keep the return. */ + } + + if( ch1 != '*' ) + { + unchget(ch1); + if(if_false && !in_preproc) continue; + return ch; + } + + for(;;) + { + if( ch == '*' ) + { + ch = chget(); + if( ch == EOF ) return EOF; + if( ch == '/' ) break; + } + else ch = chget(); + } + if (dialect == DI_ANSI) + return ' '; /* If comments become " " */ + else return SYN; /* Comments become nulls, but we need a + * marker so I can do token concat properly. */ + } +} + +/* This function handles the first and second translation phases of Ansi-C */ +static int +chget() +{ + int ch, ch1; + for(;;) { + ch = chget_raw(); + if (ch == '\\') { + ch1 = chget_raw(); + if (ch1 == '\n') continue; + unchget(ch1); + } + + /* Ansi trigraphs -- Ewww, it needs lots of 'unchget' space too. */ + if (dialect == DI_ANSI && ch == '?') { + ch1 = chget_raw(); + if (ch1 != '?') + unchget(ch1); + else { + static char trig1[] = "()<>/!'-="; + static char trig2[] = "[]{}\\|^~#"; + char * s; + ch1 = chget_raw(); + s = strchr(trig1, ch1); + if (s) { + unchget(trig2[s-trig1]); /* Unchget so that ??/ can be used as */ + continue; /* a real backslash at EOL. */ + } else { + unchget(ch1); + unchget('?'); + } + } + } + + return ch; + } +} + +static void +unchget(ch) +{ +#if CPP_DEBUG + fprintf(stderr, "\b", ch); +#endif + if(ch == 0) return; /* Hummm */ + if(ch == EOF) ch=EOT; /* EOF is pushed back as a normal character. */ + ch &= 0xFF; + + if(unputc&0xFF000000) + cerror("Internal character pushback stack overflow"); + else unputc = (unputc<<8) + (ch); + if( ch == '\n' ) c_lineno--; +} + +static int +chget_raw() +#if CPP_DEBUG +{ + int ch; +static int last_def = 0; +static int last_fi = 0; + if (last_fi != fi_count) fprintf(stderr, "", fi_count); + if (last_def != def_count) fprintf(stderr, "", def_count); + last_def = def_count; last_fi = fi_count; + + ch = realchget(); + if (ch == EOF) fprintf(stderr, ""); else fprintf(stderr, "%c", ch); + + if (last_def != def_count) fprintf(stderr, "", def_count); + if (last_fi != fi_count) fprintf(stderr, "", fi_count); + last_def = def_count; last_fi = fi_count; + + return ch; +} + +static int +realchget() +#endif +{ + int ch; + for(;;) + { + if( unputc ) + { + if((unputc&0xFF)==EOT && in_preproc) return '\n'; + ch=(unputc&0xFF); unputc>>=8; + if( ch == EOT ) ch = EOF; + if( ch == '\n' ) c_lineno++; + return ch; + } + + if( def_ptr ) + { + ch = *def_ptr++; if(ch) return (unsigned char)ch; + if( def_start ) free(def_start); + if( def_ref ) def_ref->in_use = 0; + + def_count--; + def_ref = saved_ref[def_count]; + def_ptr = saved_def[def_count]; + def_start = saved_start[def_count]; + unputc = saved_unputc[def_count]; + continue; + } + + ch = getc(curfile); + if( ch == EOF && fi_count != 0) + { + fclose(curfile); + fi_count--; + curfile = saved_files[fi_count]; + if(c_fname) free(c_fname); + c_fname = saved_fname[fi_count]; + c_lineno = saved_lines[fi_count]; + ch = '\n'; /* Ensure end of line on end of file */ + } + else if( ch == '\n' ) c_lineno++; + + /* Treat all control characters, except the standard whitespace + * characters of TAB and NL as completely invisible. + */ + if( ch >= 0 && ch < ' ' && ch!='\n' && ch!='\t' && ch!=EOF ) continue; + + if( ch == EOF ) { unchget(ch); return '\n'; } /* Ensure EOL before EOF */ + return (unsigned char)ch; + } +} + +static int +do_preproc() +{ + int val, no_match=0; + + if( (val=get_onetok(SKIP_SPACE)) == TK_WORD ) + { + if( strcmp(curword, "ifdef") == 0 ) + do_proc_if(0); + else if( strcmp(curword, "ifndef") == 0 ) + do_proc_if(1); + else if( strcmp(curword, "if") == 0 ) + do_proc_if(2); + else if( strcmp(curword, "elif") == 0 ) + do_proc_if(3); + else if( strcmp(curword, "else") == 0 ) + do_proc_else(); + else if( strcmp(curword, "endif") == 0 ) + do_proc_endif(); + else if(if_false) + no_match=1; + else + { + if( strcmp(curword, "include") == 0 ) + do_proc_include(); + else if( strcmp(curword, "define") == 0 ) + do_proc_define(); + else if( strcmp(curword, "undef") == 0 ) + do_proc_undef(); + else if( strcmp(curword, "error") == 0 ) { + strcpy(curword, "#error"); + do_proc_copy_hashline(); pgetc(); + cerror(curword); + } else if( strcmp(curword, "warning") == 0 ) { + strcpy(curword, "#warning"); + do_proc_copy_hashline(); pgetc(); + cwarn(curword); + } else if( strcmp(curword, "pragma") == 0 ) { + do_proc_copy_hashline(); pgetc(); + /* Ignore #pragma ? */ + } else if( strcmp(curword, "line") == 0 ) { + do_proc_copy_hashline(); pgetc(); + /* Ignore #line for now. */ + } else if( strcmp(curword, "asm") == 0 ) { + alltok |= 0x100; + return do_proc_copy_hashline(); + } else if( strcmp(curword, "endasm") == 0 ) { + alltok &= ~0x100; + return do_proc_copy_hashline(); + } else + no_match=1; + } + } else if (!val) { + /* Empty directives used to denote that a file is to be run through + * the preprocessor in K&R. Do not complain if we got no token. */ + no_match=1; + } + + if( no_match ) + { + if(!if_false) cerror("Unknown preprocessor directive"); + while( val != '\n' ) val = pgetc(); + } + + *curword = 0; /* Just in case */ + return 0; +} + +static int +do_proc_copy_hashline() +{ + int off, ch; + + off = strlen(curword); + + while( (ch=pgetc()) != '\n' ) + { + if( off < WORDSIZE ) curword[off++] = ch; + } + if( off == WORDSIZE ) + { + cerror("Preprocessor directive too long"); + curword[WORDSIZE-1] = '\0'; + } + else + curword[off] = '\0'; + + unchget('\n'); + return TK_COPY; +} + +static void +do_proc_include() +{ + int ch, ch1; + char * p; + FILE * fd; + + ch = get_onetok(SKIP_SPACE); + if( ch == '<' || ch == '"' ) + { + if( ch == '"' ) ch1 = ch; else ch1 = '>'; + p = curword; + while(p< curword+WORDSIZE-1) + { + ch = pgetc(); + if( ch == '\n' ) break; + if( ch == ch1 ) + { + *p = '\0'; + p = strdup(curword); + + do { ch1 = pgetc(); } while(ch1 == ' ' || ch1 == '\t'); + unchget(ch1); + do_proc_tail(); + + saved_files[fi_count] = curfile; + saved_fname[fi_count] = c_fname; + saved_lines[fi_count] = c_lineno; + + fd = open_include(p, "r", (ch=='"')); + if( fd ) { + fi_count++; + curfile = fd; + } else + cerror("Cannot open include file"); + + return; + } + *p++ = ch; + } + } + cerror("Bad #include command"); + while(ch != '\n') ch = pgetc(); + return; +} + +static void +do_proc_define() +{ + int ch, ch1; + struct define_item * ptr, * old_value = 0; + int cc, len; + char name[WORDSIZE]; + + if( (ch=gettok_nosub()) == TK_WORD ) + { + strcpy(name, curword); + ptr = read_entry(0, name); + if(ptr) + { + set_entry(0, name, (void*)0); /* Unset var */ + if (ptr->in_use) + /* Eeeek! This shouldn't happen; so just let it leak. */ + cwarn("macro redefined while it was in use!?"); + else + old_value = ptr; + } + + /* Skip blanks */ + for(ch=ch1=pgetc(); ch == ' ' || ch == '\t' ; ch=pgetc()) ; + + len = WORDSIZE; + ptr = malloc(sizeof(struct define_item) + WORDSIZE); + if(ptr==0) cfatal("Preprocessor out of memory"); + ptr->value[cc=0] = '\0'; + + /* Add in arguments */ + if( ch1 == '(' ) + { + ptr->arg_count=0; + for(;;) + { + ch=gettok_nosub(); + if( ptr->arg_count==0 && ch == ')' ) break; + if( ch == TK_WORD ) + { + if( cc+strlen(curword)+4 >= len) + { + len = cc + WORDSIZE; + ptr = (struct define_item *) realloc(ptr, sizeof(struct define_item) + len); + if(ptr==0) cfatal("Preprocessor out of memory"); + } + if( cc+strlen(curword) < len) + { + strcpy(ptr->value+cc, curword); + cc+=strlen(curword); + strcpy(ptr->value+cc, ","); + cc++; + ptr->arg_count++; + ch=gettok_nosub(); + if( ch == TK_ELLIPSIS ) { + ptr->varargs = 1; + ch=gettok_nosub(); + if (ch == ',') ch = '*'; /* Force error if not ')' */ + } + if( ch == ')' ) break; + if( ch == ',' ) continue; + } + } + cerror("Bad #define command"); + free(ptr); + while(ch != '\n') ch = pgetc(); + set_entry(0, name, (void*)old_value); /* Return var to old. */ + return; + } + while((ch=pgetc())==' ' || ch=='\t'); + } + else ptr->arg_count = -1; + + /* And the substitution string */ + while(ch != '\n') + { + if( cc+4 > len ) + { + len = cc + WORDSIZE; + ptr = (struct define_item *) realloc(ptr, sizeof(struct define_item) + len); + if(ptr==0) cfatal("Preprocessor out of memory"); + } + ptr->value[cc++] = ch; + ch = pgetc(); + } + if (cc) + ptr->value[cc++] = ' ';/* Byte of lookahead for recursive macros */ + ptr->value[cc++] = '\0'; + +#if CPP_DEBUG + if (cc == 1) + fprintf(stderr, "\n### Define '%s' as null\n", name); + else if (ptr->arg_count<0) + fprintf(stderr, "\n### Define '%s' as '%s'\n", + name, ptr->value); + else + fprintf(stderr, "\n### Define '%s' as %d args '%s'\n", + name, ptr->arg_count, ptr->value); +#endif + + /* Clip to correct size and save */ + ptr = (struct define_item *) realloc(ptr, sizeof(struct define_item) + cc); + ptr->name = set_entry(0, name, ptr); + ptr->in_use = 0; + ptr->next = 0; + + if (old_value) { + if (strcmp(old_value->value, ptr->value) != 0) + cwarn("#define redefined macro"); + free(old_value); + } + } + else cerror("Bad #define command"); + while(ch != '\n') ch = pgetc(); +} + +static void +do_proc_undef() +{ + int ch; + struct define_item * ptr; + if( (ch=gettok_nosub()) == TK_WORD ) + { + ptr = read_entry(0, curword); + if(ptr) + { + set_entry(0, curword, (void*)0); /* Unset var */ + if (ptr->in_use) + /* Eeeek! This shouldn't happen; so just let it leak. */ + cwarn("macro undefined while it was in use!?"); + else + free(ptr); + } + do_proc_tail(); + } + else + { + cerror("Bad #undef command"); + while(ch != '\n') ch = pgetc(); + } +} + +static int +do_proc_if(type) +int type; +{ + int ch = 0; + if(if_false && if_hidden) + { + if( type != 3 ) if_hidden++; + do_proc_tail(); + return 0; + } + + if( type == 3 ) + { + if( if_count == 0 ) + cerror("#elif without matching #if"); + else + { + if( if_has_else ) + cerror("#elif following #else for one #if"); + if( if_has_else || if_false != 1 ) + { + if_false=2; + while(ch != '\n') ch = pgetc(); + return 0; + } + if_false=0; + } + if_has_else = 0; + } + if(if_false) + { + if( type != 3 ) if_hidden++; + do_proc_tail(); + } + else + { + if( type != 3 ) + { + if_count++; + if_stack <<= 1; + if_stack |= if_has_else; + if_has_else = 0; + } + if(type > 1) + { + ch = get_if_expression(); + if_false=!ch; + } + else + { + ch = gettok_nosub(); + if( ch == TK_WORD ) + { + do_proc_tail(); + if_false = (read_entry(0, curword) == 0); + if(type == 1) if_false = !if_false; + } + else + { + cerror("Bad #if command"); + if_false = 0; + while(ch != '\n') ch = pgetc(); + } + } + } + return 0; +} + +static void +do_proc_else() +{ + if( if_hidden == 0 ) + { + if( if_count == 0 ) + cerror("#else without matching #if"); + else + if_false = (if_false^1); + if( if_has_else ) + cerror("Multiple #else's for one #if"); + if_has_else = 1; + } + do_proc_tail(); +} + +static void +do_proc_endif() +{ + if( if_hidden ) + if_hidden--; + else + { + if( if_count == 0 ) + cerror("Unmatched #endif"); + else + { + if_count--; + if_false=0; + if_has_else = (if_stack&1); + if_stack >>=1; + } + } + do_proc_tail(); +} + +static void +do_proc_tail() +{ + int ch, flg=1; + while((ch = pgetc()) != '\n') if(ch > ' ') + { + if (!if_false && flg) + cwarn("Unexpected text following preprocessor command"); + flg=0; + } +} + +static int +get_if_expression() +{ + int value = get_expression(0); + + if (curtok != '\n') + do_proc_tail(); + + return value; +} + +static int_type +get_expression(prio) +int prio; +{ + int_type lvalue; + int_type rvalue; + int no_op = 0; + + curtok = get_onetok(SKIP_SPACE); + lvalue = get_exp_value(); + + do + { + switch(curtok) + { + case '*': case '/': case '%': + if (prio >= 10) return lvalue; + break; + case '+': case '-': + if (prio >= 9) return lvalue; + break; + case TK_RIGHT_OP: case TK_LEFT_OP: + if (prio >= 8) return lvalue; + break; + case '<': case '>': case TK_LE_OP: case TK_GE_OP: + if (prio >= 7) return lvalue; + break; + case TK_EQ_OP: case TK_NE_OP: + if (prio >= 6) return lvalue; + break; + case '&': + if (prio >= 5) return lvalue; + break; + case '^': + if (prio >= 4) return lvalue; + break; + case '|': + if (prio >= 3) return lvalue; + break; + case TK_AND_OP: + if (prio >= 2) return lvalue; + break; + case TK_OR_OP: + if (prio >= 1) return lvalue; + break; + } + switch(curtok) + { + case '*': + rvalue = get_expression(10); + lvalue *= rvalue; + break; + case '/': + rvalue = get_expression(10); + if (rvalue) + lvalue /= rvalue; + break; + case '%': + rvalue = get_expression(10); + if (rvalue) + lvalue %= rvalue; + break; + case '+': + rvalue = get_expression(9); + lvalue += rvalue; + break; + case '-': + rvalue = get_expression(9); + lvalue -= rvalue; + break; + case TK_RIGHT_OP: + rvalue = get_expression(8); + lvalue >>= rvalue; + break; + case TK_LEFT_OP: + rvalue = get_expression(8); + lvalue <<= rvalue; + break; + case '<': + rvalue = get_expression(7); + lvalue = (lvalue < rvalue); + break; + case '>': + rvalue = get_expression(7); + lvalue = (lvalue > rvalue); + break; + case TK_LE_OP: + rvalue = get_expression(7); + lvalue = (lvalue <= rvalue); + break; + case TK_GE_OP: + rvalue = get_expression(7); + lvalue = (lvalue >= rvalue); + break; + case TK_EQ_OP: + rvalue = get_expression(6); + lvalue = (lvalue == rvalue); + break; + case TK_NE_OP: + rvalue = get_expression(6); + lvalue = (lvalue != rvalue); + break; + case '&': + rvalue = get_expression(5); + lvalue = (lvalue & rvalue); + break; + case '^': + rvalue = get_expression(4); + lvalue = (lvalue ^ rvalue); + break; + case '|': + rvalue = get_expression(3); + lvalue = (lvalue | rvalue); + break; + case TK_AND_OP: + rvalue = get_expression(2); + lvalue = (lvalue && rvalue); + break; + case TK_OR_OP: + rvalue = get_expression(1); + lvalue = (lvalue || rvalue); + break; + + case '?': /* XXX: To add */ + + default: + no_op = 1; + } + } + while(prio == 0 && !no_op); + + return lvalue; +} + +static int_type +get_exp_value() +{ + int_type value = 0; + int sign = 1; + + if (curtok == '!') { + curtok = get_onetok(SKIP_SPACE); + return !get_exp_value(); + } + if (curtok == '~') { + curtok = get_onetok(SKIP_SPACE); + return ~get_exp_value(); + } + + while (curtok == '+' || curtok == '-') { + if (curtok == '-') sign = -sign; + curtok = get_onetok(SKIP_SPACE); + } + + if (curtok == TK_NUM) { + value = strtoul(curword, (void*)0, 0); + curtok = get_onetok(SKIP_SPACE); + } else if (curtok == TK_QUOT) { + value = curword[1]; + if (value == '\\') { + if (curword[2] >= '0' && curword[2] <= '7') { + value = curword[2] - '0'; + if (curword[3] >= '0' && curword[3] <= '7') { + value = (value<<3) + curword[3] - '0'; + if (curword[4] >= '0' && curword[4] <= '7') { + value = (value<<3) + curword[4] - '0'; + } + } + } else switch(curword[2]) { + case 'n': value = '\n'; break; + case 'f': value = '\f'; break; + case 't': value = '\t'; break; + default: value = curword[2]; break; + } + } +#ifdef NATIVE_CPP + value = (char) value; /* Fix range */ +#elif SIGNED_CHAR + value = (signed char) value; +#else + value = (unsigned char) value; +#endif + curtok = get_onetok(SKIP_SPACE); + } else if (curtok == TK_WORD) { + value = 0; + if (strcmp("defined", curword) == 0) { + curtok = gettok_nosub(); + if (curtok == '(' && gettok_nosub() != TK_WORD) + cerror("'defined' keyword requires argument"); + else { + value = (read_entry(0, curword) != 0); + if (curtok == '(' && gettok_nosub() != ')') + cerror("'defined' keyword requires closing ')'"); + else + curtok = get_onetok(SKIP_SPACE); + } + } + else + curtok = get_onetok(SKIP_SPACE); + + } else if (curtok == '(') { + value = get_expression(0); + if (curtok == ')') + curtok = get_onetok(SKIP_SPACE); + else { + curtok = '$'; + cerror("Expected ')'"); + } + } + + return sign<0 ? -value: value; +} + +void +gen_substrings(macname, data_str, arg_count, is_vararg) +char * macname; +char * data_str; +int arg_count; +int is_vararg; +{ + char * mac_text = 0; + struct arg_store *arg_list; + int ac, ch, cc, len; + + int paren_count = 0; + int in_quote = 0; + int quote_char = 0; + int commas_found = 0; + int args_found = 0; + + arg_list = malloc(sizeof(struct arg_store) * arg_count); + memset(arg_list, 0, sizeof(struct arg_store) * arg_count); + + for(ac=0; *data_str && ac < arg_count; data_str++) { + if( *data_str == ',' ) { ac++; continue; } + + if (arg_list[ac].name == 0) cc = len = 0; + + if (cc+2 >= len) { + len += 20; + arg_list[ac].name = realloc(arg_list[ac].name, len); + } + arg_list[ac].name[cc++] = *data_str; + arg_list[ac].name[cc] = '\0'; + } + + for(;;) { + if ((ch = chget()) == EOF) break; + if(in_quote == 2) { + in_quote = 1; + } else if (in_quote) { + if ( ch == quote_char ) in_quote = 0; + if ( ch == '\\') in_quote = 2; + } else { + if ( ch == '(' ) paren_count++; + if ( ch == '"' || ch == '\'' ) { in_quote = 1; quote_char = ch; } + if (paren_count == 0 && ch == ',' ) { + commas_found++; + if (commas_found < arg_count) + continue; + } + if ( ch == ')' ) { + if (paren_count == 0) break; + paren_count--; + } + } + args_found = 1; + /* Too many args, deal with, or ignore, the rest. */ + if (commas_found >= arg_count) { + if(arg_count == 0) continue; + ac = arg_count-1; + } else + ac = commas_found; + + if (arg_list[ac].value == 0) { + cc = len = 0; + arg_list[ac].in_define = def_count; + } + + if (cc+2 >= len) { + len += 20; + arg_list[ac].value = realloc(arg_list[ac].value, len); + } + +#if 0 + if (ch == '\n' && cc>0 && arg_list[ac].value[cc-1] == '\n' ) { + ... ? + } +#endif + + arg_list[ac].value[cc++] = ch; + arg_list[ac].value[cc] = '\0'; + } + + if (commas_found || args_found) args_found = commas_found+1; + + if( arg_count == 0 && args_found != 0 ) + cerror("Arguments given to macro without them."); + else if( !is_vararg && arg_count != args_found ) + cwarn("Incorrect number of macro arguments"); + + mac_text = insert_substrings(data_str, arg_list, arg_count); + + /* + * At this point 'mac_text' contains the full expansion of the macro. + * + * So we could scan this for calls to this macro and if we find one + * that _exactly_ matches this call (including arguments) then we mark + * this call's in_use flag. + * + * OTOH, it would probably be best to throw away this expansion and + * pretend we never noticed this macro expansion in the first place. + * + * Still this is mostly academic as the error trapping works and + * recursive macros _with_arguments_ are both rare and unpredictable. + */ + + if (arg_list) { + for (ac=0; ac\n", def_count, mac_text); +#endif +} + +static char * +insert_substrings(data_str, arg_list, arg_count) +char * data_str; +struct arg_store *arg_list; +int arg_count; +{ + int ac, ch; + char * p, * s; + char * rv = 0; + int len = 0; + int cc = 0; + int in_quote = 0; + int quote_char = 0; + int ansi_stringize = 0; + +#if CPP_DEBUG + fprintf(stderr, "\n### Macro substitution in '%s'\n", data_str); + for (ac=0; ac= '0' && ch <= '9') + || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') + || ch == '_' || ch == '$' ) + *p++ = *data_str++; + else + break; + } + + if (p == curword) { + /* Ansi Stringize and concat */ + if (*data_str == '#' && dialect != DI_KNR) { + if (data_str[1] == '#') { + while(cc>0 && (rv[cc-1] == ' ' || rv[cc-1] == '\t')) + cc--; + data_str+=2; + while(*data_str == ' ' || *data_str == '\t') + data_str++; + if (*data_str == '\0') { /* Hummm */ + data_str--; + cerror("'##' operator at end of macro"); + } + continue; + } + data_str++; + ansi_stringize = 1; + continue; + } + + if (ansi_stringize) { + ansi_stringize = 0; + cerror("'#' operator should be followed by a macro argument name"); + } + + /* Other characters ... */ + if (cc+2 > len) { len += 20; rv = realloc(rv, len); } + rv[cc++] = *data_str++; + continue; + } + *p = '\0'; s = curword; + for (ac=0; acarg_count == -1) { + s = ptr->value; + } + } + + rv[cc++] = '"'; + while(*s == ' ' || *s == '\t') s++; + while (*s) { + if (cc+4 > len) { len += 20; rv = realloc(rv, len); } + if (*s == '"') rv[cc++] = '\\'; + rv[cc++] = *s++; + } + while(cc>0 && (rv[cc-1] == ' ' || rv[cc-1] == '\t')) + cc--; + rv[cc++] = '"'; + rv[cc++] = '\0'; + ansi_stringize = 0; + s = ""; + break; + } + + break; + } + } + + if (ansi_stringize) { + ansi_stringize = 0; + cerror("'#' operator should be followed by a macro argument name"); + } + + if (cc+2+strlen(s) > len) { len += strlen(s)+20; rv = realloc(rv, len); } + strcpy(rv+cc, s); + cc = strlen(rv); + } + + rv[cc] = '\0'; + return rv; +} diff --git a/Applications/cpp/hash.c b/Applications/cpp/hash.c new file mode 100644 index 00000000..6013c54b --- /dev/null +++ b/Applications/cpp/hash.c @@ -0,0 +1,119 @@ + +#include +#ifdef __STDC__ +#include +#include +#else +#include +#endif +#include "cc.h" + +/* + * Two functions: + * char * set_entry(int namespace, char * name, void * value); + * returns a pointer to the copy of the name; + * + * void * read_entry(int namespace, char * name); + * returns the value; + */ + +struct hashentry +{ + struct hashentry * next; + void * value; + int namespace; + char word[1]; +}; + +struct hashentry ** hashtable; +int hashsize = 0xFF; /* 2^X -1 */ +int hashcount = 0; +static int hashvalue P((int namespace, char * word)); + +void * +read_entry(namespace, word) +int namespace; +char * word; +{ + int hash_val; + struct hashentry * hashline; + if( hashtable == 0 ) return 0; + hash_val = hashvalue(namespace, word); + + hashline = hashtable[hash_val]; + + for(; hashline; hashline = hashline->next) + { + if(namespace != hashline->namespace) continue; + if(word[0] != hashline->word[0]) continue; + if(strcmp(word, hashline->word) ) continue; + return hashline->value; + } + return 0; +} + +char * +set_entry(namespace, word, value) +int namespace; +char * word; +void * value; +{ + int hash_val, i; + struct hashentry * hashline, *prev; + hash_val = hashvalue(namespace, word); + + if( hashtable ) + { + hashline = hashtable[hash_val]; + + for(prev=0; hashline; prev=hashline, hashline = hashline->next) + { + if(namespace != hashline->namespace) continue; + if(word[0] != hashline->word[0]) continue; + if(strcmp(word, hashline->word) ) continue; + if( value ) hashline->value = value; + else + { + if( prev == 0 ) hashtable[hash_val] = hashline->next; + else prev->next = hashline->next; + free(hashline); + return 0; + } + return hashline->word; + } + } + if( value == 0 ) return 0; + if( hashtable == 0 ) + { + hashtable = malloc((hashsize+1)*sizeof(char*)); + if( hashtable == 0 ) cfatal("Out of memory"); + for(i=0; i<=hashsize; i++) hashtable[i] = 0; + } + /* Add record */ + hashline = malloc(sizeof(struct hashentry)+strlen(word)); + if( hashline == 0 ) cfatal("Out of memory"); + else + { + hashline->next = hashtable[hash_val]; + hashline->namespace = namespace; + hashline->value = value; + strcpy(hashline->word, word); + hashtable[hash_val] = hashline; + } + return hashline->word; +} + +static int hashvalue(namespace, word) +int namespace; +char * word; +{ + int val = namespace; + char *p = word; + + while(*p) + { + val = ((val<<4)^((val>>12)&0xF)^((*p++)&0xFF)); + } + val &= hashsize; + return val; +} diff --git a/Applications/cpp/main.c b/Applications/cpp/main.c new file mode 100644 index 00000000..78db160c --- /dev/null +++ b/Applications/cpp/main.c @@ -0,0 +1,526 @@ + +#include +#if __STDC__ +#include +#include +#else +#include +#endif +#include +#include +#include + +#include "cc.h" + +#define MAXINCPATH 5 + +int main P((int argc, char ** argv)); +void undefine_macro P((char * name)); +void define_macro P((char * name)); +void print_toks_cpp P((void)); +void print_toks_raw P((void)); +void define_macro P((char *)); +void undefine_macro P((char *)); +void cmsg P((char * mtype, char * str)); +char * token_txn P((int)); +void pr_indent P((int)); +void hash_line P((void)); + +char * include_paths[MAXINCPATH]; + +char last_name[512] = ""; +int last_line = -1; +int debug_mode = 0; +int p_flag = 0; +int exit_code = 0; + +char * outfile = 0; +FILE * ofd = 0; + +int +main(argc, argv) +int argc; +char ** argv; +{ + int ar, i; + char * p; +static char Usage[] = "Usage: cpp -E -0 -Dxxx -Uxxx -Ixxx infile -o outfile"; + +#ifdef LC_CTYPE + setlocale(LC_CTYPE, ""); +#endif + + alltok = 1; /* Get all tokens from the cpp. */ + + for(ar=1; ar=argc) cfatal(Usage); + p = argv[ar]; + } + for(i=0; i=MAXINCPATH) + cfatal("Too many items in include path for CPP"); + break; + case 'D': + if (argv[ar][2]) p=argv[ar]+2; + else { + ar++; + if (ar>=argc) cfatal(Usage); + p = argv[ar]; + } + define_macro(p); + break; + case 'U': + if (argv[ar][2]) p=argv[ar]+2; + else { + ar++; + if (ar>=argc) cfatal(Usage); + p = argv[ar]; + } + undefine_macro(p); + break; + case 'o': + if (argv[ar][2]) p=argv[ar]+2; + else { + ar++; + if (ar>=argc) cfatal(Usage); + p = argv[ar]; + } + if (outfile) cfatal(Usage); + outfile = p; + break; + default: + fprintf(stderr, "CPP Unknown option %s\n", argv[ar]); + cfatal(Usage); + } else if (!curfile) { + /* Input file */ + curfile = fopen(argv[ar], "r"); + c_fname = argv[ar]; c_lineno = 1; + if (!curfile) + cfatal("Cannot open input file"); + } else + cfatal(Usage); + + if (!curfile) + cfatal(Usage); + + /* Define date and time macros. */ + if (dialect != DI_KNR) { + time_t now; + char * timep; + char buf[128]; + time(&now); + timep = ctime(&now); + + /* Yes, well */ + sprintf(buf, "__TIME__=\"%.8s\"", timep + 11); + define_macro(buf); + /* US order; Seems to be mandated by standard. */ + sprintf(buf, "__DATE__=\"%.3s %.2s %.4s\"", timep + 4, timep + 8, timep + 20); + define_macro(buf); + } + + if (outfile) ofd = fopen(outfile, "w"); + else ofd = stdout; + if (!ofd) + cfatal("Cannot open output file"); + + if (debug_mode) + print_toks_raw(); + else + print_toks_cpp(); + + if (outfile) fclose(ofd); + exit(exit_code); +} + +void +undefine_macro(name) +char * name; +{ + struct define_item * ptr; + + ptr = read_entry(0, name); + if (ptr) { + set_entry(0, name, (void*)0); + if (!ptr->in_use) free(ptr); + } +} + +void +define_macro(name) +char * name; +{ + char * p; + char * value; + struct define_item * ptr; + + if ((p=strchr(name, '=')) != 0) { + *p = 0; + value = p+1; + } else + value = "1"; + + undefine_macro(name); + + ptr = malloc(sizeof(struct define_item) + strlen(value)); + ptr->name = set_entry(0, name, ptr); + strcpy(ptr->value, value); + ptr->arg_count = -1; + ptr->in_use = 0; + ptr->next = 0; +} + +FILE * +open_include(fname, mode, checkrel) +char * fname; +char * mode; +int checkrel; +{ + FILE * fd = 0; + int i; + char buf[256], *p; + + if( checkrel ) + { + strcpy(buf, c_fname); + p = strrchr(buf, '/'); + if (p) *++p = 0; else *(p=buf) = 0; + strcpy(p, fname); + + fd=fopen(buf, mode); + } + if (!fd) { + for(i=0; i10) count=10; + while(count>0) {fprintf(ofd, "\t"); count--; } +} + +void +hash_line() +{ + if( strcmp(last_name, c_fname) != 0 ) last_line = -1; + if( c_lineno != last_line || last_line <= 0 ) + { + if( outpos != 0 ) { + fputc('\n', ofd); outpos=0; + if (last_line > 0) last_line++; + } + while( c_lineno > last_line && + (p_flag || c_lineno < last_line+4) && + last_line > 0 && + !debug_mode ) + { + fputc('\n', ofd); last_line++; + } + + if( !p_flag && (c_lineno != last_line || last_line <= 0 )) + { + fprintf(ofd, "# %d", c_lineno); + if( last_line <= 0 ) fprintf(ofd, " \"%s\"", c_fname); + fprintf(ofd, "\n"); + } + + strcpy(last_name, c_fname); + last_line = c_lineno; + } +} + +void +print_toks_cpp() +{ + int i; + int indent=0; + int paren=0; + + hash_line(); + while( (i=gettok()) != EOF ) + { + hash_line(); + switch(i) + { + case '\n': + cwarn("newline received from tokeniser!"); + break; + + case TK_STR: + outpos += fprintf(ofd, "%s", curword); + break; + + case TK_COPY: + if( outpos ) { fputc('\n', ofd); last_line++; } + outpos = 0; last_line++; + fprintf(ofd, "#%s\n", curword); + break; + + case TK_FILE: sprintf(curword, "\"%s\"", c_fname); if(0) { + case TK_LINE: sprintf(curword, "%d", c_lineno); + } + /*FALLTHROUGH*/ + default: + if (!alltok) { + if(i == '}' || i == TK_CASE || i == TK_DEFAULT ) indent--; + if(i ==')') paren--; + + if(outpos) { fputc(' ', ofd); outpos++; } + else pr_indent(indent+(paren!=0)); + + if(i == '{' || i == TK_CASE || i == TK_DEFAULT ) indent++; + if(i ==';') paren=0; + if(i =='(') paren++; + } + + outpos += fprintf(ofd, "%s", curword); + + if ( i == '"' || i == '\'' ) + { + while((i=gettok()) == TK_STR) { + outpos += fprintf(ofd, "%s", curword); + } + if (i != '\n') + outpos += fprintf(ofd, "%s", curword); + } + break; + } + } + if( outpos ) fputc('\n', ofd); + outpos = 0; +} + +void +print_toks_raw() +{ + int i; + long val; + + hash_line(); + while( (i=gettok()) != EOF ) + { + hash_line(); + switch(i) + { + case '"': case '\'': + if (debug_mode < 2) { + fprintf(ofd, "%-16s: %s", "Quoted string", curword); + while((i=gettok()) == TK_STR) + outpos+= fprintf(ofd, "%s", curword); + if ( i == '\n' ) fprintf(ofd, " --> EOL!!\n"); + else outpos+= fprintf(ofd, "%s\n", curword); + break; + } + /*FALLTHROUGH*/ + default: fprintf(ofd, "%-16s: '", token_txn(i)); + { + char *p; + for(p=curword; *p; p++) + if(isprint(*p) && *p != '\'' && *p != '\\') + fputc(*p, ofd); + else if (*p == '\n') fprintf(ofd, "\\n"); + else if (*p == '\t') fprintf(ofd, "\\t"); + else if (*p == '\v') fprintf(ofd, "\\v"); + else if (*p == '\b') fprintf(ofd, "\\b"); + else if (*p == '\r') fprintf(ofd, "\\r"); + else if (*p == '\f') fprintf(ofd, "\\f"); + else if (*p == '\a') fprintf(ofd, "\\a"); + else + fprintf(ofd, "\\x%02x", (unsigned char)*p); + } + fprintf(ofd, "'\n"); + break; + case TK_NUM: + val = strtoul(curword, (void*)0, 0); + fprintf(ofd, "%-16s: ", token_txn(i)); + fprintf(ofd, "%s => %ld\n", curword, val); + break; + case TK_COPY: + fprintf(ofd, "%-16s: ", token_txn(i)); + fprintf(ofd, "#%s\n", curword); + break; + case '\n': + fprintf(ofd, "%-16s:\n", "Newline char"); + break; + } + } +} + +char * +token_txn(token) +int token; +{ + char * s = "UNKNOWN"; + static char buf[17]; + + if (token> ' ' && token <= '~') + { + sprintf(buf, "TK_CHAR('%c')", token); + return buf; + } + if (token >= 0 && token < 0x100) + { + sprintf(buf, "TK_CHAR(%d)", token); + return buf; + } + + switch(token) + { + case TK_WSPACE : s="TK_WSPACE"; break; + case TK_WORD : s="TK_WORD"; break; + case TK_NUM : s="TK_NUM"; break; + case TK_FLT : s="TK_FLT"; break; + case TK_QUOT : s="TK_QUOT"; break; + case TK_STR : s="TK_STR"; break; + case TK_FILE : s="TK_FILE"; break; + case TK_LINE : s="TK_LINE"; break; + case TK_COPY : s="TK_COPY"; break; + case TK_NE_OP : s="TK_NE_OP"; break; + case TK_MOD_ASSIGN : s="TK_MOD_ASSIGN"; break; + case TK_AND_OP : s="TK_AND_OP"; break; + case TK_AND_ASSIGN : s="TK_AND_ASSIGN"; break; + case TK_MUL_ASSIGN : s="TK_MUL_ASSIGN"; break; + case TK_INC_OP : s="TK_INC_OP"; break; + case TK_ADD_ASSIGN : s="TK_ADD_ASSIGN"; break; + case TK_DEC_OP : s="TK_DEC_OP"; break; + case TK_SUB_ASSIGN : s="TK_SUB_ASSIGN"; break; + case TK_PTR_OP : s="TK_PTR_OP"; break; + case TK_ELLIPSIS : s="TK_ELLIPSIS"; break; + case TK_DIV_ASSIGN : s="TK_DIV_ASSIGN"; break; + case TK_LEFT_OP : s="TK_LEFT_OP"; break; + case TK_LEFT_ASSIGN : s="TK_LEFT_ASSIGN"; break; + case TK_LE_OP : s="TK_LE_OP"; break; + case TK_EQ_OP : s="TK_EQ_OP"; break; + case TK_GE_OP : s="TK_GE_OP"; break; + case TK_RIGHT_OP : s="TK_RIGHT_OP"; break; + case TK_RIGHT_ASSIGN : s="TK_RIGHT_ASSIGN"; break; + case TK_XOR_ASSIGN : s="TK_XOR_ASSIGN"; break; + case TK_OR_ASSIGN : s="TK_OR_ASSIGN"; break; + case TK_OR_OP : s="TK_OR_OP"; break; + case TK_AUTO : s="TK_AUTO"; break; + case TK_BREAK : s="TK_BREAK"; break; + case TK_CASE : s="TK_CASE"; break; + case TK_CHAR : s="TK_CHAR"; break; + case TK_CONST : s="TK_CONST"; break; + case TK_CONTINUE : s="TK_CONTINUE"; break; + case TK_DEFAULT : s="TK_DEFAULT"; break; + case TK_DO : s="TK_DO"; break; + case TK_DOUBLE : s="TK_DOUBLE"; break; + case TK_ELSE : s="TK_ELSE"; break; + case TK_ENUM : s="TK_ENUM"; break; + case TK_EXTERN : s="TK_EXTERN"; break; + case TK_FLOAT : s="TK_FLOAT"; break; + case TK_FOR : s="TK_FOR"; break; + case TK_GOTO : s="TK_GOTO"; break; + case TK_IF : s="TK_IF"; break; + case TK_INT : s="TK_INT"; break; + case TK_LONG : s="TK_LONG"; break; + case TK_REGISTER : s="TK_REGISTER"; break; + case TK_RETURN : s="TK_RETURN"; break; + case TK_SHORT : s="TK_SHORT"; break; + case TK_SIGNED : s="TK_SIGNED"; break; + case TK_SIZEOF : s="TK_SIZEOF"; break; + case TK_STATIC : s="TK_STATIC"; break; + case TK_STRUCT : s="TK_STRUCT"; break; + case TK_SWITCH : s="TK_SWITCH"; break; + case TK_TYPEDEF : s="TK_TYPEDEF"; break; + case TK_UNION : s="TK_UNION"; break; + case TK_UNSIGNED : s="TK_UNSIGNED"; break; + case TK_VOID : s="TK_VOID"; break; + case TK_VOLATILE : s="TK_VOLATILE"; break; + case TK_WHILE : s="TK_WHILE"; break; + } + return s; +} diff --git a/Applications/cpp/token1.c b/Applications/cpp/token1.c new file mode 100644 index 00000000..f3aa420d --- /dev/null +++ b/Applications/cpp/token1.c @@ -0,0 +1,11 @@ + +#include +#include +#include "cc.h" + +#ifdef __GNUC__ +__inline +#endif +static unsigned int hash1 P((register const char *, register unsigned int)); + +#include "token1.h" diff --git a/Applications/cpp/token1.h b/Applications/cpp/token1.h new file mode 100644 index 00000000..2e2b9eba --- /dev/null +++ b/Applications/cpp/token1.h @@ -0,0 +1,117 @@ +/* C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -aptTc -N is_ctok -H hash1 token1.tok */ + +#define TOTAL_KEYWORDS 23 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 3 +#define MIN_HASH_VALUE 2 +#define MAX_HASH_VALUE 63 +/* maximum key range = 62, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#endif +static unsigned int +hash1 (str, len) + register const char *str; + register unsigned int len; +{ + static unsigned char asso_values[] = + { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 1, 64, 64, 64, 3, 25, 64, + 64, 64, 13, 18, 64, 8, 30, 15, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 5, 0, 20, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 30, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 23, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64 + }; + return len + asso_values[(unsigned char)str[len - 1]] + asso_values[(unsigned char)str[0]]; +} + +#ifdef __GNUC__ +__inline +#endif +struct token_trans * +is_ctok (str, len) + register const char *str; + register unsigned int len; +{ + static struct token_trans wordlist[] = + { + {""}, {""}, + {"==", TK_EQ_OP}, + {"!=", TK_NE_OP}, + {""}, + {"%=", TK_MOD_ASSIGN}, + {""}, + {"<=", TK_LE_OP}, + {"<<=", TK_LEFT_ASSIGN}, + {""}, + {"-=", TK_SUB_ASSIGN}, + {""}, + {"<<", TK_LEFT_OP}, + {""}, {""}, + {"*=", TK_MUL_ASSIGN}, + {""}, + {"/=", TK_DIV_ASSIGN}, + {"--", TK_DEC_OP}, + {""}, + {"+=", TK_ADD_ASSIGN}, + {""}, + {">=", TK_GE_OP}, + {">>=", TK_RIGHT_ASSIGN}, + {""}, + {"|=", TK_OR_ASSIGN}, + {""}, + {"&=", TK_AND_ASSIGN}, + {""}, {""}, + {"->", TK_PTR_OP}, + {""}, + {"^=", TK_XOR_ASSIGN}, + {""}, {""}, {""}, {""}, {""}, + {"++", TK_INC_OP}, + {""}, {""}, {""}, + {">>", TK_RIGHT_OP}, + {""}, {""}, {""}, {""}, {""}, + {"||", TK_OR_OP}, + {""}, {""}, {""}, + {"&&", TK_AND_OP}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"..", TK_WORD}, + {"...", TK_ELLIPSIS} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash1 (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/Applications/cpp/token1.tok b/Applications/cpp/token1.tok new file mode 100644 index 00000000..cd668cef --- /dev/null +++ b/Applications/cpp/token1.tok @@ -0,0 +1,25 @@ +struct token_trans { char * name; int token; }; +%% +..., TK_ELLIPSIS +>>=, TK_RIGHT_ASSIGN +<<=, TK_LEFT_ASSIGN ++=, TK_ADD_ASSIGN +-=, TK_SUB_ASSIGN +*=, TK_MUL_ASSIGN +/=, TK_DIV_ASSIGN +%=, TK_MOD_ASSIGN +&=, TK_AND_ASSIGN +^=, TK_XOR_ASSIGN +|=, TK_OR_ASSIGN +>>, TK_RIGHT_OP +<<, TK_LEFT_OP +++, TK_INC_OP +--, TK_DEC_OP +->, TK_PTR_OP +&&, TK_AND_OP +||, TK_OR_OP +<=, TK_LE_OP +>=, TK_GE_OP +==, TK_EQ_OP +!=, TK_NE_OP +.., TK_WORD diff --git a/Applications/cpp/token2.c b/Applications/cpp/token2.c new file mode 100644 index 00000000..b4d22b1f --- /dev/null +++ b/Applications/cpp/token2.c @@ -0,0 +1,11 @@ + +#include +#include +#include "cc.h" + +#ifdef __GNUC__ +__inline +#endif +static unsigned int hash2 P((register const char *, register unsigned int)); + +#include "token2.h" diff --git a/Applications/cpp/token2.h b/Applications/cpp/token2.h new file mode 100644 index 00000000..62e69b1c --- /dev/null +++ b/Applications/cpp/token2.h @@ -0,0 +1,139 @@ +/* C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -aptTc -k1,3 -N is_ckey -H hash2 token2.tok */ + +#define TOTAL_KEYWORDS 34 +#define MIN_WORD_LENGTH 2 +#define MAX_WORD_LENGTH 8 +#define MIN_HASH_VALUE 2 +#define MAX_HASH_VALUE 69 +/* maximum key range = 68, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#endif +static unsigned int +hash2 (str, len) + register const char *str; + register unsigned int len; +{ + static unsigned char asso_values[] = + { + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 5, 70, 70, 70, 70, 70, 0, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 0, 70, 5, 5, 10, + 10, 20, 20, 25, 70, 0, 70, 70, 50, 70, + 0, 15, 0, 70, 15, 0, 40, 20, 0, 0, + 70, 70, 10, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70 + }; + register int hval = len; + + switch (hval) + { + default: + case 3: + hval += asso_values[(unsigned char)str[2]]; + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval; +} + +#ifdef __GNUC__ +__inline +#endif +struct token_trans * +is_ckey (str, len) + register const char *str; + register unsigned int len; +{ + static struct token_trans wordlist[] = + { + {""}, {""}, + {"if", TK_IF}, + {""}, + {"void", TK_VOID}, + {"while", TK_WHILE}, + {"switch", TK_SWITCH}, + {""}, + {"__LINE__", TK_LINE}, + {""}, {""}, + {"static", TK_STATIC}, + {"do", TK_DO}, + {"__FILE__", TK_FILE}, + {"case", TK_CASE}, + {"const", TK_CONST}, + {"sizeof", TK_SIZEOF}, + {""}, + {"continue", TK_CONTINUE}, + {"char", TK_CHAR}, + {"short", TK_SHORT}, + {"struct", TK_STRUCT}, + {""}, {""}, + {"else", TK_ELSE}, + {"union", TK_UNION}, + {""}, {""}, + {"unsigned", TK_UNSIGNED}, + {""}, + {"break", TK_BREAK}, + {"signed", TK_SIGNED}, + {""}, {""}, {""}, {""}, + {"double", TK_DOUBLE}, + {"default", TK_DEFAULT}, + {"for", TK_FOR}, + {""}, + {"float", TK_FLOAT}, + {""}, {""}, + {"int", TK_INT}, + {"enum", TK_ENUM}, + {""}, {""}, + {"typedef", TK_TYPEDEF}, + {"register", TK_REGISTER}, + {"auto", TK_AUTO}, + {""}, {""}, {""}, {""}, + {"long", TK_LONG}, + {""}, {""}, {""}, + {"volatile", TK_VOLATILE}, + {""}, {""}, + {"return", TK_RETURN}, + {""}, {""}, {""}, {""}, + {"extern", TK_EXTERN}, + {""}, {""}, + {"goto", TK_GOTO} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash2 (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) + return &wordlist[key]; + } + } + return 0; +} diff --git a/Applications/cpp/token2.tok b/Applications/cpp/token2.tok new file mode 100644 index 00000000..d04e6e89 --- /dev/null +++ b/Applications/cpp/token2.tok @@ -0,0 +1,36 @@ +struct token_trans { char * name; int token; }; +%% +auto, TK_AUTO +break, TK_BREAK +case, TK_CASE +char, TK_CHAR +const, TK_CONST +continue, TK_CONTINUE +default, TK_DEFAULT +do, TK_DO +double, TK_DOUBLE +else, TK_ELSE +enum, TK_ENUM +extern, TK_EXTERN +float, TK_FLOAT +for, TK_FOR +goto, TK_GOTO +if, TK_IF +int, TK_INT +long, TK_LONG +register, TK_REGISTER +return, TK_RETURN +short, TK_SHORT +signed, TK_SIGNED +sizeof, TK_SIZEOF +static, TK_STATIC +struct, TK_STRUCT +switch, TK_SWITCH +typedef, TK_TYPEDEF +union, TK_UNION +unsigned, TK_UNSIGNED +void, TK_VOID +volatile, TK_VOLATILE +while, TK_WHILE +__FILE__, TK_FILE +__LINE__, TK_LINE diff --git a/Applications/cpp/torture.c b/Applications/cpp/torture.c new file mode 100644 index 00000000..5bd13ce8 --- /dev/null +++ b/Applications/cpp/torture.c @@ -0,0 +1,18 @@ +/\ +* \ This _evil_ little file is compilable Ansi C. +* / There are NO extensions ... Waddya think ? +\/ + +// ***/ func() { printf("Hello /* world */ %d ???/?=\n" ??/ +, 1? +'\\ +007': +'??/"'/*"*/ + );} + + +main() +{ + func(); + +}