From: ceriel Date: Fri, 21 Feb 1997 11:27:57 +0000 (+0000) Subject: Added non-correcting error recovery stuff X-Git-Tag: release-5-5~32 X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=c2607fdf0f1d4b9d2490865d466e8abc786d3bf4;p=ack.git Added non-correcting error recovery stuff --- diff --git a/util/LLgen/lib/incl b/util/LLgen/lib/incl index f13bdc56b..5de3ab623 100644 --- a/util/LLgen/lib/incl +++ b/util/LLgen/lib/incl @@ -21,6 +21,10 @@ extern unsigned int LLscnt[]; extern unsigned int LLtcnt[]; extern int LLcsymb; +#if LL_NON_CORR +extern int LLstartsymb; +#endif + #define LLsdecr(d) {LL_assert(LLscnt[d] > 0); LLscnt[d]--;} #define LLtdecr(d) {LL_assert(LLtcnt[d] > 0); LLtcnt[d]--;} #define LLsincr(d) LLscnt[d]++ diff --git a/util/LLgen/lib/nc_incl b/util/LLgen/lib/nc_incl new file mode 100644 index 000000000..7a4c9106f --- /dev/null +++ b/util/LLgen/lib/nc_incl @@ -0,0 +1,70 @@ +#define LLALT 9999 /* Alternative is following */ + +#define LLTERMINAL 1 /* Symbol is a terminal */ +#define LLNONTERMINAL 2 /* Symbol is a nonterminal */ +#define LLEORULE 0 /* No more alternatives */ + + +struct lhs { /* LHS of a rule */ + int nr; /* Nr of the nonterminal */ + struct symbol *rhs; /* Pointer to RHS */ + char first[LLSETSIZE]; /* First set */ + char follow[LLSETSIZE]; /* Follow set */ + char empty; /* Set if nonterminal produces empty */ +}; + +struct symbol { /* Symbol in the RHS of a rule */ + int x; /* LLTERMINAL or LLNONTERMINAL */ + int nr; /* Nr of the symbol */ + struct symbol *link; /* Ptr to next rule with this symbol */ + struct symbol *next; /* Ptr to next symbol in this rule */ + struct lhs *lhs; /* Ptr to LHS */ +}; + +struct terminal { /* Array with links to terminals in a */ + struct symbol *link; /* rule */ +}; + +struct nonterminal { /* Array with links to nt's in a rule */ + struct symbol *link; /* and pointer to LHS's */ + struct lhs *rule; +}; + +struct stack_elt { /* Stack element */ + int flags; /* Some flags */ + int nr; /* Nr of symbol */ + int ref_count; /* Nr of predecessors */ + int hyp_ref_count; /* Temporary nr of predecessors */ + int matched; /* Nr of LHS trying to match */ + int nr_nexts; /* Nr of successors */ + struct edge *edges; /* Array of edges to other stack elt's*/ +}; + +/* Possible flags in a stack element */ +#define LLHEAD 1 /* Stack element is a head */ +#define LLDUMMY 2 /* Stack element is substituted */ +#define LLGEN_SEARCH 8 /* Set by 'generate_heads()' */ + + +struct edge { /* Edges of a stack element */ + char flags; /* Some flags */ + struct stack_elt *ptr; /* Array with pointers to stack elt's */ +}; + +/* Possible flags in an edge */ +#define LLLOOP 1 /* Belongs to a loop */ +#define LLLOOP_SEARCH 2 /* Used by 'loop()' */ +#define LLHYP_SEARCH 4 /* Used by 'hyp_run()' */ +#define PRINT_SEARCH 8 /* DEBUG */ +#define LLMARK_SEARCH 16 /* Used by 'mark_loop()' */ +#define LLYES 32 +#define LLNO 64 + +#define LLEOSTACK -1 /* Indicates last element of a stack */ +#define LLHEADS_BUF_INCR 10 /* Nr of elements the buffer will be */ +#define LLCLEANUP_BUF_INCR 25 /* increased by */ +#define LL_VIS_INCR 200 + +/* Macro's to manipulate bit sets */ +#define LLIN(a, i) ((a)[(i)/8] & (1 << ((i) % 8))) +#define LLPUTIN(a, i) ((a)[(i)/8] |= (1 << ((i) % 8))) diff --git a/util/LLgen/lib/nc_rec b/util/LLgen/lib/nc_rec new file mode 100644 index 000000000..9fd3a23df --- /dev/null +++ b/util/LLgen/lib/nc_rec @@ -0,0 +1,1791 @@ +#include +#include +#include +#include +#include "Lpars.h" + +/* + compile with -DNOFIRST to disable firstset optimization + -DFOLLOW_OPT to enable followset optimization + NOTE: Followset optimization is not + supported when using -s option of LLgen + -DDEBUG to print debug information +*/ + +extern int LLsymb; +extern int LLstartsymb; + +#if LL_ANSI_C +void LLmessage(int); +#endif + + +struct stacks { + + /* Acces to the stacks is through a 'dynamic array' of pointers + * to the heads. We implemented it this way to save on the number + * of Malloc() calls. nr_heads is the number of heads; heads_buf_size + * is the current size of heads_buf. + */ + + int nr_heads; + struct stack_elt **heads_buf; + int heads_buf_size; + + /* r_rec contains nonterminals already tried to build a new + * stack with, to prevent right-recursive rules making the + * process loop forever + */ + + char r_rec[(LLNNONTERMINALS + 7)/8]; + + /* join_array contains pointers to already substituted stack + * elements, so that if the same nonterminal turns up again + * we can make a link + */ + + struct stack_elt *join_array[LLNNONTERMINALS]; + + /* cleanup_buf contains pointerts to elements that can possibly + * be deleted. Again this is implemented as a `growing array'. + * Although it's not so clean to do it this way, it DOES save + * a lot of time, mainly because much less pointer manipulation + * is required, and because it's not necessary to deallocate + * the buffer after each turn. Just set nr_cleanups to 0... + */ + + int nr_cleanups; + int cleanup_buf_size; + struct stack_elt **cleanup_buf; + + /* visited_buf contains pointers to elements whose flags + * need to be cleared + */ + + int nr_visited; + int visited_buf_size; + struct stack_elt **visited_buf; + + + /* start_seen indicates if the last prediction phase + * has matched the start symbol + */ + + int start_seen; + + /* exp_terminals will contain the terminals that are `on top' + * of the prediction graph after a prediction phase + */ + + char exp_terminals[LLSETSIZE]; + + /* check_run_ok indicates whether a stack element can be deleted + * or not + */ + + int check_run_ok; +}; + + +#ifdef DEBUG +static int allocates = 0; +static int deallocates = 0; +static int max_in_use = 0; +static int edge_allocates = 0; +static int edge_deallocates = 0; +static int edge_max_in_use = 0; +#endif + +static int grammar_index = 0; + +/* The grammar should only be build the first time we enter the + * recovery routine. grammar_read == 0 indicates this has not + * been done yet + */ + +static int grammar_read = 0; + +/* 'terminals' is an array indexed by the number of a terminal and links + * all rules containing this terminal in the RHS + */ + +static struct terminal *terminals; + +/* 'nonterminals' is an array indexed by the number of a nonterminal + * and contains all rules with this nonterminal in the LHS and links all + * rules containing this nonterminal in the RHS + */ + +static struct nonterminal *nonterminals; + + +/* These functions must be called instead of the original functions in + * 'malloc.h'. They offer a checking allocation mechanism. +*/ +#if LL_ANSI_C +static char *Malloc(unsigned); +static char *Realloc(char*, unsigned); + +#else + +static char *Malloc(); +static char *Realloc(); +#endif + + +/* These functions build the grammar */ +#if LL_ANSI_C +static void init_grammar(void); +static void build_grammar(void); +static struct lhs *build_rule(void); +static struct symbol *build_rhs(struct lhs*); +static struct symbol *make_link(struct symbol*); + +#else + +static init_grammar(); +static build_grammar(); +static struct lhs *build_rule(); +static struct symbol *build_rhs(); +static struct symbol *make_link(); +#endif + + +/* These functions operate on the stacks */ +#if LL_ANSI_C +static int optimize(struct stacks*, struct symbol*, int); +static void read_token(void); +static void start_stack(struct stacks*, int, int); +static void continuation(struct stacks*, int, int); +static struct stack_elt *push_rule(struct stack_elt*, struct symbol*); +static void new_head(struct stacks*, struct stack_elt*); +static void to_delete(struct stacks*, struct stack_elt*); +static void substitute(struct stacks*, struct stack_elt*, int); +static int join(struct stacks*, struct stack_elt*, int); +static int path(struct stack_elt*, struct stack_elt*); +static int part_of_loop(struct stack_elt*); +static void generate_heads(struct stacks*, struct stack_elt*, int); +static void delete(struct stacks*, struct stack_elt*); +static void hyp_run(struct stack_elt*); +static void check_run(struct stacks*, struct stack_elt*); +static struct stack_elt *split(struct stack_elt*); +static void test(struct stacks*); +static void dump_stack(struct stack_elt*, int); +static void clear_flags(struct stack_elt*, char); +static void clear_gen_flags(struct stacks*); +static void match_heads(struct stacks*, int); +static void cleanup(struct stacks*); +static void initialize(struct stacks*); +static void calculate(struct stacks*, int); +static void kill_stack(struct stacks *stack); +void LLnc_recover(void); + +#else + +static int optimize(); +static read_token(); +static start_stack(); +static continuation(); +static struct stack_elt *push_rule(); +static new_head(); +static to_delete(); +static substitute(); +static int join(); +static int path(); +static int part_of_loop(); +static generate_heads(); +static delete(); +static hyp_run(); +static check_run(); +static struct stack_elt *split(); +static test(); +static dump_stack(); +static clear_flags(); +static clear_gen_flags(); +static match_heads(); +static cleanup(); +static initialize(); +static calculate(); +static kill_stack(); +LLnc_recover(); +#endif + + +#if LL_ANSI_C +static char *Malloc(unsigned size) +#else +static char *Malloc(size) +unsigned size; +#endif +{ + char *p; + + if ((p = malloc(size)) == (char *)0) { + fprintf(stderr, "fatal error: out of memory\n"); + exit(1); + } + return p; +} + + +#if LL_ANSI_C +static char *Realloc(char *ptr, unsigned size) +#else +static char *Realloc(ptr, size) +char *ptr; +unsigned size; +#endif +{ + char *p; + + if ((p = realloc(ptr, size)) == (char *)0) { + fprintf(stderr, "fatal error: out of memory\n"); + exit(1); + } + return p; +} + + +#if LL_ANSI_C +static void init_grammar(void) +#else +static init_grammar() +#endif +{ +/* Allocate and initialize an array for terminals and nonterminals */ + + int i; + + terminals = (struct terminal *) + Malloc((unsigned) LLFIRST_NT * sizeof(struct terminal)); + for (i = 0; i < LLFIRST_NT; i++) { + (terminals + i)->link = (struct symbol *)0; + } + + nonterminals = (struct nonterminal *) + Malloc((unsigned)LLNNONTERMINALS * sizeof(struct nonterminal)); + for (i = 0; i < LLNNONTERMINALS; i++) { + (nonterminals + i)->rule = (struct lhs *)0; + (nonterminals + i)->link = (struct symbol *)0; + } +} + + +#if LL_ANSI_C +static void build_grammar(void) +#else +static build_grammar() +#endif +{ +/* Build a rule for every nonterminal. The LHS must be saved first because + * of the fact that the right side of an assignment statement (in C) will + * be evaluated before the left side + */ + int nt, j; + + init_grammar(); + for (j = 0; j < LLNNONTERMINALS; j++) { + nt = LLgrammar[grammar_index]; + (nonterminals + nt - LLFIRST_NT)->rule = build_rule(); + } +} + + +#if LL_ANSI_C +static struct lhs *build_rule(void) +#else +static struct lhs *build_rule() +#endif +{ +/* Build LHS and call a funcion to create RHS */ + + struct lhs *l; + int j; + + l = (struct lhs *)Malloc(sizeof(struct lhs)); + l->nr = LLgrammar[grammar_index++]; + + /* Build first set */ + for (j = 0; j < LLSETSIZE; j++) { + l->first[j] = LLgrammar[grammar_index++]; + } + + /* Build follow set */ + for (j = 0; j < LLSETSIZE; j++) { + l->follow[j] = LLgrammar[grammar_index++]; + } + + l->empty = LLgrammar[grammar_index++]; /* Can NT produce empty? */ + + l->rhs = build_rhs(l); + + return l; +} + + +#if LL_ANSI_C +static struct symbol *build_rhs(struct lhs *l) +#else +static struct symbol *build_rhs(l) +struct lhs *l; +#endif +{ +/* Build RHS by creating structs for all symbols including ALT and + * EORULE. Also call a function for linking same terminals and + * nonterminals. + */ + + struct symbol *r; + + r = (struct symbol *)Malloc(sizeof(struct symbol)); + if (LLgrammar[grammar_index] == LLALT) { + grammar_index++; + r->x = LLALT; + r->nr = -1; /* Not applicable */ + r->link = (struct symbol *)0; /* Not applicable */ + r->next = build_rhs(l); + r->lhs = l; + } + else if (LLgrammar[grammar_index] == LLEORULE) { + grammar_index++; + r->x = LLEORULE; + r->nr = -1; /* Not applicable */ + r->link = (struct symbol *)0; /* Not applicable */ + r->next = (struct symbol *)0; /* Not applicable */ + r->lhs = l; + } + else if (LLgrammar[grammar_index] < LLFIRST_NT) { + r->x = LLTERMINAL; + r->nr = LLgrammar[grammar_index++]; + r->link = make_link(r); + r->next = build_rhs(l); + r->lhs = l; + } + else { + r->x = LLNONTERMINAL; + r->nr = LLgrammar[grammar_index++]; + r->link = make_link(r); + r->next = build_rhs(l); + r->lhs = l; + } + return r; +} + + +#if LL_ANSI_C +static struct symbol *make_link(struct symbol *r) +#else +static struct symbol *make_link(r) +struct symbol *r; +#endif +{ +/* Link same terminals and nonterminals. Every new symbol is appended + * in front of the corresponding list for efficiency. + */ + + struct symbol *tmp; + + if (r->nr < LLFIRST_NT) { + /* Terminal array */ + tmp = (terminals + r->nr)->link; + (terminals + r->nr)->link = r; + } + else { /* Nonterminal array */ + tmp = (nonterminals + (r->nr - LLFIRST_NT))->link; + (nonterminals + (r->nr - LLFIRST_NT))->link = r; + } + return tmp; +} + + +/*****************************************************************************/ + + +#if LL_ANSI_C +static int optimize(struct stacks* stack, struct symbol *symb_ptr, int l_ahead) +#else +static int optimize(stack, symb_ptr, l_ahead) +struct stacks *stack; +struct symbol *symb_ptr; +int l_ahead; +#endif + +/* Return 1 if rule symb_ptr can start with terminal l_ahead, else return 0. + * The array with expected terminals will also be filled in. + */ +{ + struct lhs *l; + int i; + +#ifdef NOFIRST + return(1); +#else + + if ((l_ahead <= 0) || (l_ahead == EOFILE)) return 1; + + switch(symb_ptr->x) { + case LLTERMINAL: + LLPUTIN(stack->exp_terminals, LLindex[symb_ptr->nr]); + if (symb_ptr->nr != l_ahead) return 0; + else return 1;/*???*/ + + case LLNONTERMINAL: + l = (nonterminals + symb_ptr->nr - LLFIRST_NT)->rule; + if (LLIN(l->first, LLindex[l_ahead])) return 1; + else if (l->empty) { + /* Try next symbol */ + return optimize(stack, symb_ptr->next, l_ahead); + } + else { + for (i = 0; i < LLSETSIZE; i++) { + stack->exp_terminals[i] |= (l->first)[i]; + } + return 0; + } + + default: + +#ifndef FOLLOW_OPT + return(1); +#else + + l = (nonterminals + symb_ptr->lhs->nr - LLFIRST_NT)->rule; + + if (LLIN(l->follow, LLindex[l_ahead])) return 1; + else { + for (i = 0; i < LLSETSIZE; i++) { + stack->exp_terminals[i] |= (l->follow)[i]; + } + return 0; + } +#endif /*FOLLOW_OPT */ + } +#endif /* NOFIRST */ +} + + +#if LL_ANSI_C +static void read_token(void) +#else +static read_token() +#endif + +/* Read token and put it in global variable LLsymb, skipping + * invalid tokens + */ +{ + LLsymb = LL_LEXI(); + while (LLindex[LLsymb] < 0) { + /* Skip garbage tokens */ + LLmessage(0); + LLsymb = LL_LEXI(); + } +} + + +#if LL_ANSI_C +static void start_stack(struct stacks *stack, int base, int l_ahead) +#else +static start_stack(stack, base, l_ahead) +struct stacks *stack; +int base, l_ahead; +#endif + +/* Start stack on base symbol base with lookahead l_ahead */ + +{ + struct stack_elt *bottom, *top; + struct symbol *symb_ptr; + int i; + + /* Find first applicable rule */ + symb_ptr = (terminals + base)->link; + + /* Now try all applicable rules */ + while (symb_ptr != (struct symbol *)0) { + + /* If the current rule cannot start with l_ahead, + * try the next one + */ + if (!optimize(stack, symb_ptr->next, l_ahead)) { + symb_ptr = symb_ptr->link; + continue; + } + + if ( (symb_ptr->next->x == LLTERMINAL) + || (symb_ptr->next->x == LLNONTERMINAL) + ) { + /* Allocate an end-of-stack */ +#ifdef DEBUG + allocates++; + if (allocates - deallocates > max_in_use) { + max_in_use = allocates - deallocates; + } +#endif + bottom = (struct stack_elt *) + Malloc(sizeof(struct stack_elt)); + bottom->edges = (struct edge *)0; + bottom->nr = LLEOSTACK; + bottom->flags = 0; + bottom->nr_nexts = 0; + bottom->ref_count = 0; + bottom->hyp_ref_count = -1; + + /* And use the rule to build a stack on it */ + top = push_rule(bottom, symb_ptr->next); + + /* Remember that we're now trying to match the LHS + * of the used rule + */ + bottom->matched = symb_ptr->lhs->nr; + + if (top->nr >= LLFIRST_NT) { + substitute(stack, top, l_ahead); + } + else { + new_head(stack, top); + } + + /* Perhaps this only has produced an empty stack, in + * that case bottom can be deallocated. + */ + if (bottom->ref_count == 0) { + to_delete(stack, bottom); + } + } + else { + /* base was the last element of the rule, so we + * figure we have matched the LHS of this rule. + */ + if (symb_ptr->lhs->nr == LLstartsymb) { + stack->start_seen = 1; + } + + continuation(stack, symb_ptr->lhs->nr, l_ahead); + } + symb_ptr = symb_ptr->link; + } + + + /* Reinitialize some arrays */ + for (i = 0; i < (LLNNONTERMINALS + 7)/8; i++) { + stack->r_rec[i] = (char) 0; + } + + for (i = 0; i < LLNNONTERMINALS; i++) { + stack->join_array[i] = (struct stack_elt *)0; + } + + /* Delete all HEAD flags */ + for (i = 0; i < stack->nr_heads; i++) { + (*(stack->heads_buf + i))->flags &= ~LLHEAD; + } + + /* Delete flags turned on by 'generate_heads()' */ + clear_gen_flags(stack); + /* Try to delete elements on cleanup_buf */ + cleanup(stack); +} + + +#if LL_ANSI_C +static void continuation(struct stacks *stack, int nt, int l_ahead) +#else +static continuation(stack, nt, l_ahead) +struct stacks *stack; +int nt, l_ahead; +#endif + +/* We have 'eaten' a whole stack, and think we recognized nt. Now +look for rules that we can proceed with, ie containing nt in the RHS. +Each rule found will be developed untill a terminal is at the top +of the stack.*/ +{ + + struct symbol *symb_ptr; + struct stack_elt *bottom, *top; + + /* If we've already tried this nt, don't do it again. + * Otherwise we may loop forever on right-recursive rules + */ + if (LLIN(stack->r_rec, nt - LLFIRST_NT)) return; + + /* Mark that we have looked for a continuation for nt */ + LLPUTIN(stack->r_rec, nt - LLFIRST_NT); + + /* Find first applicable rule */ + symb_ptr = (nonterminals + nt - LLFIRST_NT)->link; + + /* Try all applicable rules */ + while (symb_ptr != (struct symbol *)0) { + + /* If the current rule cannot start with l_ahead, + * try the next one + */ + if (!optimize(stack, symb_ptr->next, l_ahead)) { + symb_ptr = symb_ptr->link; + continue; + } + + if ( (symb_ptr->next->x == LLTERMINAL) + || (symb_ptr->next->x == LLNONTERMINAL) + ) { +#ifdef DEBUG + allocates++; + if (allocates - deallocates > max_in_use) { + max_in_use = allocates - deallocates; + } +#endif + bottom = (struct stack_elt *) + Malloc(sizeof(struct stack_elt)); + bottom->edges = (struct edge *)0; + bottom->nr = LLEOSTACK; + bottom->flags = 0; + bottom->nr_nexts = 0; + bottom->ref_count = 0; + bottom->hyp_ref_count = -1; + + /* Use the rule to build a stack on bottom */ + top = push_rule(bottom, symb_ptr->next); + + /* Remember that we're now trying to match the LHS + * of the used rule + */ + bottom->matched = symb_ptr->lhs->nr; + + if (top->nr >= LLFIRST_NT) { + substitute(stack, top, l_ahead); + } + else { + new_head(stack, top); + } + + /* Perhaps this only has produced an empty stack, in + * that case bottom can be deallocated. + */ + if (bottom->ref_count == 0) { + delete(stack, bottom); + } + } + else { + /* Stack is still empty */ + if (symb_ptr->lhs->nr == LLstartsymb) { + stack->start_seen = 1; + } + + continuation(stack, symb_ptr->lhs->nr, l_ahead); + } + + symb_ptr = symb_ptr->link; + } +} + + +#if LL_ANSI_C +static struct stack_elt *push_rule(struct stack_elt *element, + struct symbol *symb_ptr) +#else +static struct stack_elt *push_rule(element, symb_ptr) +struct stack_elt *element; +struct symbol *symb_ptr; +#endif + +/* Append the rule symb_ptr to stack element 'element'. Return a + * pointer to the new top of the stack + */ +{ + struct stack_elt *se, *top; + + if ( (symb_ptr->next->x == LLTERMINAL) + || (symb_ptr->next->x == LLNONTERMINAL) + ) { + top = push_rule(element, symb_ptr->next); + } + else { + top = element; + } + +#ifdef DEBUG + allocates++; + if (allocates - deallocates > max_in_use) { + max_in_use = allocates - deallocates; + } +#endif + + se = (struct stack_elt *)Malloc(sizeof(struct stack_elt)); + se->flags = 0; + se->nr = symb_ptr->nr; + se->ref_count = 0; + se->hyp_ref_count = -1; + se->matched = -1; + se->nr_nexts = 1; + +#ifdef DEBUG + edge_allocates++; + if (edge_allocates - edge_deallocates > edge_max_in_use) { + edge_max_in_use = edge_allocates - edge_deallocates; + } +#endif + + se->edges = (struct edge *)Malloc(sizeof(struct edge)); + se->edges->ptr = top; + se->edges->flags = 0; + + top->ref_count++; + return se; +} + + +#if LL_ANSI_C +static void new_head(struct stacks *stack, struct stack_elt *ptr) +#else +static new_head(stack, ptr) +struct stacks *stack; +struct stack_elt *ptr; +#endif + +/* Make ptr a head of stack */ +{ + + /* Is this already a head?*/ + if (ptr->flags & LLHEAD) return; + + if (stack->heads_buf_size == 0) { + stack->heads_buf_size = LLHEADS_BUF_INCR; + stack->heads_buf = (struct stack_elt **) + Malloc(LLHEADS_BUF_INCR * sizeof(struct stack_elt *)); + } + else if (stack->nr_heads == stack->heads_buf_size) { + /* buffer full? */ + stack->heads_buf_size += LLHEADS_BUF_INCR; + stack->heads_buf = (struct stack_elt **) + Realloc((char *) + stack->heads_buf, (unsigned) + stack->heads_buf_size * + sizeof(struct stack_elt *) + ); + } + + *(stack->heads_buf + stack->nr_heads) = ptr; /* Add at the tail */ + stack->nr_heads++; /* Increase number of heads */ + ptr->flags |= LLHEAD; /* Mark it as a head */ + ptr->ref_count++; /* Increase reference count */ + LLPUTIN(stack->exp_terminals, LLindex[ptr->nr]); +} + + +#if LL_ANSI_C +static void to_delete(struct stacks *stack, struct stack_elt *ptr) +#else +static to_delete(stack, ptr) +struct stacks *stack; +struct stack_elt *ptr; +#endif + +/* Remember that ptr has to be deleted */ +{ + + int i; + +#ifdef NOCLEAN + return; +#endif + + + + for (i = 0; i < stack->nr_cleanups; i++) { + /* Check if already in buffer */ + if (*(stack->cleanup_buf + i) == ptr) return; + } + + if (stack->cleanup_buf_size == 0) { + stack->cleanup_buf_size = LLCLEANUP_BUF_INCR; + stack->cleanup_buf = (struct stack_elt **) + Malloc(LLCLEANUP_BUF_INCR * sizeof(struct stack_elt *)); + } + else if (stack->nr_cleanups == stack->cleanup_buf_size) { + stack->cleanup_buf_size += LLCLEANUP_BUF_INCR; + stack->cleanup_buf = (struct stack_elt **) + Realloc((char *) stack->cleanup_buf, + (unsigned) stack->cleanup_buf_size * + sizeof(struct stack_elt *)); + } + *(stack->cleanup_buf + stack->nr_cleanups) = ptr; + stack->nr_cleanups++; +} + + +#if LL_ANSI_C +static void substitute(struct stacks *stack, struct stack_elt *top, int l_ahead) +#else +static substitute(stack, top, l_ahead) +struct stacks *stack; +struct stack_elt *top; +int l_ahead; +#endif + +/* This function substitutes the NT pointed to by 'top'. 'top' should be a top + * of a stack + */ +{ + struct symbol *symb_ptr; + struct stack_elt *new_top; + + /* Try to join top NT */ + if (join(stack, top, l_ahead)) return; + + /* Find RHS of the rule of nonterminal 'top->nr' */ + symb_ptr = (nonterminals + top->nr - LLFIRST_NT)->rule->rhs; + + /* Mark top as dummy */ + top->flags |= LLDUMMY; + + while (1) { + /* If this an empty production, search down the stack for + * terminals + */ + if ((symb_ptr->x == LLALT) || (symb_ptr->x == LLEORULE)) { + generate_heads(stack, top, l_ahead); + } + + /* Skip other empty productions, they have no effect. */ + while (symb_ptr->x == LLALT) { + symb_ptr = symb_ptr->next; + } + + if (symb_ptr->x == LLEORULE) { + /* If there are only empty productions, the NT on top + * can be deleted + */ + if (top->ref_count == 0) { + to_delete(stack, top); + } + return; + } + + /* If this rule can produce 'l_ahead' on the top of the stack + * substitute the nonterminal + */ + if (optimize(stack, symb_ptr, l_ahead)) { + new_top = push_rule(top, symb_ptr); + + /* If the new element on top is a nonterminal + * substitute it, else make it a head + */ + if (new_top->nr >= LLFIRST_NT) { + substitute(stack, new_top, l_ahead); + } + else { + new_head(stack, new_top); + } + } + + /* Search to next alternative */ + while ( (symb_ptr->x == LLTERMINAL) + || (symb_ptr->x == LLNONTERMINAL) + ) { + symb_ptr = symb_ptr->next; + } + + if (symb_ptr->x == LLEORULE) { + if (top->ref_count == 0) { + to_delete(stack, top); + } + return; + } + else { + symb_ptr = symb_ptr->next; + } + } + +} + + +#if LL_ANSI_C +static int join(struct stacks *stack, struct stack_elt *top, int l_ahead) +#else +static int join(stack, top, l_ahead) +struct stacks *stack; +struct stack_elt *top; +int l_ahead; +#endif + +/* This function tries to connect a NT on top of a stack with another stack, + * which has already substituted this NT + */ +{ + struct stack_elt *se; + int size; + + if ( (se = stack->join_array[top->nr - LLFIRST_NT]) == + (struct stack_elt *)0 + ) { + stack->join_array[top->nr - LLFIRST_NT] = top; + return 0; /* Join not possible */ + } + else { + se->nr_nexts++; /* Increase number of descendants */ + +#ifdef DEBUG + edge_allocates++; + if (edge_allocates - edge_deallocates > edge_max_in_use) { + edge_max_in_use = edge_allocates - edge_deallocates; + } +#endif + + /* Allocate one more pointer to descendants */ + size = se->nr_nexts * sizeof(struct edge); + se->edges = (struct edge *)Realloc((char *) se->edges, + (unsigned) size); + + /* Link it */ + (se->edges + se->nr_nexts - 1)->ptr = top->edges->ptr; + (se->edges + se->nr_nexts - 1)->flags = 0; + + /* The successor of 'top' gets an extra predecessor. + * 'top' has always only one successor because the stacks are + * constructed in 'depth first' order + */ + top->edges->ptr->ref_count++; + + +#ifndef NOLOOPS + /* If we have made a new loop find all stack elements of this + * loop and mark them + */ + if (path(top->edges->ptr, se)) { + (se->edges + se->nr_nexts - 1)->flags |= LLLOOP; + (se->edges + se->nr_nexts - 1)->flags |= LLYES; + } + clear_flags(top->edges->ptr, (LLNO | LLYES)); +#endif + + + /* Check if joined NT produces empty */ + if ((nonterminals + se->nr - LLFIRST_NT)->rule->empty) { + generate_heads(stack, top, l_ahead); + } + + /* Deallocate top symbol */ + if (top->ref_count == 0) { + to_delete(stack, top); + } + + return 1; + } +} + + +#ifndef NOLOOPS + +#if LL_ANSI_C +static int path(struct stack_elt *se1, struct stack_elt *se2) +#else +static int path(se1, se2) +struct stack_elt *se1, *se2; +#endif /* LL_ANSI_C */ + +/* If there is a path from se1 to se2 it returns 1 and marks all the paths + * betweeen these two points, otherwise it returns 0. The flags LLYES and + * LLNO are used for optimization. */ +{ + int i, result = 0; + + if (se1 == se2) return 1; + + for (i = 0; i < se1->nr_nexts; i++) { + if ( (!((se1->edges + i)->flags & LLNO)) + && (!((se1->edges + i)->flags & LLLOOP_SEARCH)) + ) { + (se1->edges + i)->flags |= LLLOOP_SEARCH; + + if (path((se1->edges + i)->ptr, se2)) { + (se1->edges + i)->flags |= LLLOOP; + result = 1; + } + else { + (se1->edges + i)->flags |= LLNO; + } + + (se1->edges + i)->flags &= ~LLLOOP_SEARCH; + } + (se1->edges + i)->flags |= LLYES; + } + return result; +} + + +#if LL_ANSI_C +static int part_of_loop(struct stack_elt *se) +#else +static int part_of_loop(se) +struct stack_elt *se; +#endif /* LL_ANSI_C */ + +/* Checks if 'se' belongs to a loop */ +{ + int i; + + for (i = 0; i < se->nr_nexts; i++) { + if ((se->edges + i)->flags & LLLOOP) return 1; + } + return 0; +} + +#endif /* NOLOOPS */ + + +#if LL_ANSI_C +static void generate_heads(struct stacks *stack, struct stack_elt *se, + int l_ahead) +#else +static generate_heads(stack, se, l_ahead) +struct stacks *stack; +struct stack_elt *se; +int l_ahead; +#endif + +/* This funcion finds all heads starting at 'se'. */ +{ + int i; + struct stack_elt *next_se; + + + for (i = 0; i < se->nr_nexts; i++) { + + if (!((se->edges + i)->ptr->flags & LLGEN_SEARCH)) { + + (se->edges + i)->ptr->flags |= LLGEN_SEARCH; + + next_se = (se->edges + i)->ptr; + + /* Remember a flag has to be cleared later */ + + if (stack->visited_buf_size == 0) { + stack->visited_buf_size = LL_VIS_INCR; + stack->visited_buf = (struct stack_elt **) + Malloc(LL_VIS_INCR * sizeof(struct stack_elt *)); + } + else if (stack->nr_visited == stack->visited_buf_size) { + stack->visited_buf_size += LL_VIS_INCR; + stack->visited_buf = (struct stack_elt **) + Realloc((char *) stack->visited_buf, + (unsigned) stack->visited_buf_size * + sizeof(struct stack_elt *)); + } + *(stack->visited_buf + stack->nr_visited) = next_se; + stack->nr_visited++; + + if (next_se->flags & LLDUMMY) { + generate_heads(stack, next_se, l_ahead); + } + else if (next_se->nr == LLEOSTACK) { + /* We have matched a nt */ + if (next_se->matched == LLstartsymb) { + stack->start_seen = 1; + } + + continuation(stack, next_se->matched, l_ahead); + if (next_se->ref_count == 0) { + to_delete(stack, next_se); + } + } + else if (next_se->nr < LLFIRST_NT) { + /* terminal */ + new_head(stack, next_se); + } + else { + if (next_se->ref_count > 0) { + next_se = split(next_se); + } + substitute(stack, next_se, l_ahead); + } + } + } +} + + +#if LL_ANSI_C +static void delete(struct stacks *stack, struct stack_elt *se) +#else +static delete(stack, se) +struct stacks *stack; +struct stack_elt *se; +#endif + +/* This function runs down the stack(s) deleting every element which cannot be + * reached anymore. */ +{ + int i; + +#ifdef NOCLEAN + return; +#endif + + if (se->ref_count == 0) { + + /* Decrease reference counts of all successors */ + for (i = 0; i < se->nr_nexts; i++) { + if ((se->edges + i)->ptr->ref_count != 0) { + (se->edges + i)->ptr->ref_count--; + + /* Try to delete next element */ + delete(stack, (se->edges + i)->ptr); + } + } + + /* If this element is saved in the join_array clear it */ + if (se->nr >= LLFIRST_NT) { + if (stack->join_array[se->nr - LLFIRST_NT] == se) { + stack->join_array[se->nr - LLFIRST_NT] = + (struct stack_elt *)0; + } + } +#ifdef DEBUG + deallocates++; + edge_deallocates += se->nr_nexts; +#endif + free((char *) se->edges); + free((char *) se); + } + +#ifndef NOLOOPS + /* If this element belongs to a loop try to delete it */ + else if (part_of_loop(se)) { + + /* Do a temporary delete */ + hyp_run(se); + + /* Check it */ + stack->check_run_ok = 1; + check_run(stack, se); + + /* If it can be deleted delete it */ + if (stack->check_run_ok) { + se->ref_count = 0; + delete(stack, se); + } + + } +#endif +} + + +#ifndef NOLOOPS + +#if LL_ANSI_C +static void hyp_run(struct stack_elt *se) +#else +static hyp_run(se) +struct stack_elt *se; +#endif /* LL_ANSI_C */ + +/* This function sets the 'hyp_ref_counts' of all elements of the loop that + * 'se' belongs to to the value that 'ref_count' will get when 'se' is + * deleted + */ +{ + int i; + struct stack_elt *next_se; + + for (i = 0; i < se->nr_nexts; i++) { + next_se = (se->edges + i)->ptr; + + if ( (!((se->edges + i)->flags & LLHYP_SEARCH)) + && ((se->edges + i)->flags & LLLOOP) + ) { + (se->edges + i)->flags |= LLHYP_SEARCH; + + /* If this element is not yet visited initialize + * 'hyp_ref_count' else decrease it by one + */ + if (next_se->hyp_ref_count == -1) { + next_se->hyp_ref_count = next_se->ref_count - 1; + } + else { + next_se->hyp_ref_count--; + } + + /* Continue searching */ + hyp_run(next_se); + } + } +} + + +#if LL_ANSI_C +static void check_run(struct stacks *stack, struct stack_elt *se) +#else +static check_run(stack, se) +struct stacks *stack; +struct stack_elt *se; +#endif /* LL_ANSI_C */ + +/* This function checks all 'hyp_ref_counts' that 'hyp_run()' has set. + * If one of them is not 0, 'check_run_ok' will be set to 0 indicating + * that 'se' cannot be deleted. 'check_run()' also resets all 'hyp_ref_counts' + */ +{ + int i; + + if (se->hyp_ref_count > 0) { + stack->check_run_ok = 0; + } + + /* Reset 'hyp_ref_count' */ + se->hyp_ref_count = -1; + for (i = 0; i < se->nr_nexts; i++) { + if ((se->edges + i)->flags & LLHYP_SEARCH) { + (se->edges + i)->flags &= ~LLHYP_SEARCH; + check_run(stack, (se->edges + i)->ptr); + } + } +} + +#endif /* NOLOOPS */ + + +#if LL_ANSI_C +static struct stack_elt *split(struct stack_elt *se) +#else +static struct stack_elt *split(se) +struct stack_elt *se; +#endif + +/* This function splits of a NT in de stack, and returns a pointer to it */ +{ + struct stack_elt *new_stack; + int i; + +#ifdef DEBUG + allocates++; + if (allocates - deallocates > max_in_use) { + max_in_use = allocates - deallocates; + } +#endif + + new_stack = (struct stack_elt *)Malloc(sizeof(struct stack_elt)); + new_stack->flags = 0; /* Used by 'clear_gen_flags()' */ + new_stack->nr = se->nr; + new_stack->ref_count = 0; /* Copy is new top */ + new_stack->hyp_ref_count = -1; + new_stack->matched = -1; + new_stack->nr_nexts = se->nr_nexts; + +#ifdef DEBUG + edge_allocates++; + if (edge_allocates - edge_deallocates > edge_max_in_use) { + edge_max_in_use = edge_allocates - edge_deallocates; + } +#endif + + new_stack->edges = (struct edge *) + Malloc((unsigned)se->nr_nexts * sizeof(struct edge)); + + /* Copy gets the same successors as the original */ + memcpy((char *) new_stack->edges, (char *) se->edges, + se->nr_nexts * sizeof(struct edge)); + + /* Each successor gets a new predecessor */ + for (i = 0; i < new_stack->nr_nexts; i++) { + (new_stack->edges + i)->ptr->ref_count++; + (new_stack->edges + i)->flags = 0; + } + + return new_stack; +} + + +#ifdef DEBUG +#if LL_ANSI_C +static void test(struct stacks *stack) +#else +static test(stack) +struct stacks *stack; +#endif +{ + struct stack_elt *se; + int i; + + printf("STACKS:\n"); + for (i = 0; i < stack->nr_heads; i++) { + printf("%2d: ", i + 1); + if (*(stack->heads_buf + i) == (struct stack_elt *)0) { + printf("NIL\n"); + continue; + } + se = *(stack->heads_buf + i); + dump_stack(se, 1); + clear_flags(se, PRINT_SEARCH); + } +} + + +#if LL_ANSI_C +static void dump_stack(struct stack_elt *se, int level) +#else +static dump_stack(se, level) +struct stack_elt *se; +int level; +#endif +{ + int i, j; + + while (se->nr != LLEOSTACK) { + if ((se->flags & LLDUMMY) && (se->nr_nexts > 1)) { + printf("[%d] <%d,%d,%d>\n", + se->nr, se->ref_count, + se->hyp_ref_count, + se->flags + ); + for (j = 0; j < se->nr_nexts; j++) { + for (i = 1; i <= level; i++) { + printf(" "); + } + printf("%d: ", j + 1); + if (!((se->edges + j)->flags & PRINT_SEARCH)) { + printf(" (%d) ", (se->edges + j)->flags); + (se->edges + j)->flags |= PRINT_SEARCH; + dump_stack((se->edges+j)->ptr,level+1); + /*clear_flags((se->edges+j)->ptr,PRINT_SEARCH);*/ + } + else { + printf("LOOP\n"); + } + } + return; + } + else { + if (se->flags & LLDUMMY) { + printf("[%d] <%d,%d,%d> ", + se->nr,se->ref_count, + se->hyp_ref_count, + se->flags + ); + } + else { + printf("%d <%d,%d,%d> ", + se->nr, se->ref_count, + se->hyp_ref_count, + se->flags + ); + } + if (!(se->edges->flags & PRINT_SEARCH)) { + printf(" (%d) ", se->edges->flags); + se->edges->flags |= PRINT_SEARCH; + se = se->edges->ptr; + } + else { + printf("LOOP\n"); + return; + } + } + } + printf("\n"); +} +#endif + + +#if LL_ANSI_C +static void clear_flags(struct stack_elt *se, char flag) +#else +static clear_flags(se, flag) +struct stack_elt *se; +char flag; +#endif + +/* Clears edge flag 'flag' */ +{ + int i; + + for (i = 0; i < se->nr_nexts; i++) { + if ((se->edges + i)->flags & flag) { + (se->edges + i)->flags &= ~flag; /* clear flag */ + clear_flags((se->edges + i)->ptr, flag); + } + } +} + +#if LL_ANSI_C +static void clear_gen_flags(struct stacks *stack) +#else +static clear_gen_flags(stack) +struct stacks *stack; +#endif + +{ + int i; + + for (i = 0; i < stack->nr_visited; i++) { + (*(stack->visited_buf + i))->flags &= ~(LLGEN_SEARCH); + } + + stack->nr_visited = 0; +} + + +#if LL_ANSI_C +static void match_heads(struct stacks *stack, int symb) +#else +static match_heads(stack, symb) +struct stacks *stack; +int symb; +#endif + +/* Match heads_buf against symb, leaving only matching heads, + * whilst deallocating the non-matching stacks + */ +{ + int i; + + int old_nr_heads; + struct stack_elt **old_heads_buf; + + + /* Copy the 'old' heads */ + old_nr_heads = stack->nr_heads; + old_heads_buf = stack->heads_buf; + + + /* Set heads in stack to 0 */ + stack->nr_heads = 0; + stack->heads_buf_size = 0; + stack->heads_buf = (struct stack_elt **) 0; + + + for (i = 0; i < old_nr_heads; i++) { + if ((*(old_heads_buf + i))->nr != symb) { + /* Does not match? */ + (*(old_heads_buf + i))->ref_count--; + (*(old_heads_buf + i))->flags &= ~LLHEAD; + delete(stack, *(old_heads_buf + i)); + } + else { /* Matches */ + if (stack->heads_buf_size == 0) { + stack->heads_buf_size = LLHEADS_BUF_INCR; + stack->heads_buf = (struct stack_elt **) + Malloc((unsigned)stack->heads_buf_size * + sizeof(struct stack_elt *)); + } + else if (stack->nr_heads == stack->heads_buf_size) { + stack->heads_buf_size += LLHEADS_BUF_INCR; + stack->heads_buf = (struct stack_elt **) + Realloc((char *) stack->heads_buf, + (unsigned) stack->heads_buf_size * + sizeof(struct stack_elt *)); + } + *(stack->heads_buf + stack->nr_heads) = + *(old_heads_buf + i); + stack->nr_heads++; + } + } + free((char *) old_heads_buf); +} + + +#if LL_ANSI_C +static void cleanup(struct stacks *stack) +#else +static cleanup(stack) +struct stacks *stack; +#endif + +/* Deletes all elements in 'cleanup_buf()' */ +{ + int i; + + for (i = 0; i < stack->nr_cleanups; i++) { + delete(stack, *(stack->cleanup_buf + i)); + } + + stack->nr_cleanups = 0; + +} + + +#if LL_ANSI_C +static void initialize(struct stacks *stack) +#else +static initialize(stack) +struct stacks *stack; +#endif + +/* Initializes some variables and arrays */ +{ + int j; + + stack->nr_heads = 0; + stack->heads_buf_size = 0; + stack->heads_buf = (struct stack_elt **)0; + + stack->nr_cleanups = 0; + stack->cleanup_buf_size = 0; + stack->cleanup_buf = (struct stack_elt **)0; + + stack->nr_visited = 0; + stack->visited_buf_size = 0; + stack->visited_buf = (struct stack_elt **)0; + + for (j = 0; j < (LLNNONTERMINALS + 7)/8; j++) { + stack->r_rec[j] = (char) 0; + } + + for (j = 0; j < LLNNONTERMINALS; j++) { + stack->join_array[j] = (struct stack_elt *)0; + } + + for (j = 0; j < LLSETSIZE; j++) { + stack->exp_terminals[j] = 0; + } + + stack->start_seen = 0; +} + + +#if LL_ANSI_C +static void calculate(struct stacks *stack, int l_ahead) +#else +static calculate(stack, l_ahead) +struct stacks *stack; +int l_ahead; +#endif + +/* This function finds all new heads and deletes the old heads */ +{ + int i; + int old_nr_heads; + struct stack_elt **old_heads_buf; + + /* Make a copy of the heads */ + old_nr_heads = stack->nr_heads; + old_heads_buf = stack->heads_buf; + + stack->nr_heads = 0; + stack->heads_buf = (struct stack_elt **) 0; + stack->heads_buf_size = 0; + + for (i = 0; i < old_nr_heads; i++) { + /* Find all new heads */ + generate_heads(stack, *(old_heads_buf + i), l_ahead); + clear_gen_flags(stack); + + /* Old head can be deleted now */ + (*(old_heads_buf + i))->ref_count--; + delete(stack, *(old_heads_buf + i)); + } + + + cleanup(stack); + free((char *) old_heads_buf); + + /* Reinitialize some things */ + for (i = 0; i < (LLNNONTERMINALS + 7)/8; i++) { + stack->r_rec[i] = (char) 0; + } + + for (i = 0; i < LLNNONTERMINALS; i++) { + stack->join_array[i] = (struct stack_elt *)0; + } + + /* Delete all HEAD flags */ + for (i = 0; i < stack->nr_heads; i++) { + (*(stack->heads_buf + i))->flags &= ~LLHEAD; + } +} + +#if LL_ANSI_C +static void kill_stack(struct stacks *stack) +#else +static kill_stack(stack) +struct stacks *stack; +#endif +{ + int i; + + for (i = 0; i < stack->nr_heads; i++) { + (*(stack->heads_buf + i))->ref_count--; + delete(stack, *(stack->heads_buf + i)); + } +} + + + +#if LL_ANSI_C +void LLnc_recover(void) +#else +LLnc_recover() +#endif + +/* This function contains the main loop for non correcting syntax error + * recovery + */ +{ + int j; + int base_symb; + struct stacks stack; + int max_nr_heads; + int max_nr_good_heads; + + initialize(&stack); + max_nr_heads = 0; + max_nr_good_heads = 0; + + /* Grammar has to be read only once */ + if (!grammar_read) { + build_grammar(); + grammar_read = 1; + } + + /* Read first token */ + read_token(); + base_symb = LLsymb; + + /* Check on end of file */ + if ((base_symb <= 0) || (base_symb == EOFILE)) { + + if ((nonterminals + LLstartsymb - LLFIRST_NT)->rule->empty != 1 + ) { + LLsymb = EOFILE; + LLmessage(0); + } + + kill_stack(&stack); + return; + } + + /* Read look ahead token */ + read_token(); + + /* Now search applicable rules and starts the ball rolling */ + start_stack(&stack, base_symb, LLsymb); + + if (stack.nr_heads > max_nr_heads) { + max_nr_heads = stack.nr_heads; + } + + + /* Only matching heads are needed */ + match_heads(&stack, LLsymb); + + if (stack.nr_heads > max_nr_good_heads) { + max_nr_good_heads = stack.nr_heads; + } + + +#ifdef DEBUG + test(&stack); +#endif + + /* Loop untill end of inputfile */ + while ((LLsymb > 0) && (LLsymb != EOFILE)) { + /* When entering the loop LLsymb always contains the + * symbol that was used as look_ahead to construct the stacks, + * or, if optimization is OFF, it contains the symbol with + * which the current heads have been matched + */ + + if (stack.nr_heads == 0) { + /* No more heads left */ + LLmessage(0); + + /* Restart the whole thing */ + initialize(&stack); + + /* The look-ahead caused the empty stack, don't + * use it to start a new one ! + */ + + read_token(); + base_symb = LLsymb; + + /* Check on end of file */ + if ((base_symb <= 0) || (base_symb == EOFILE)) { + if ((nonterminals + LLstartsymb - LLFIRST_NT)->rule->empty != 1) { + LLsymb = EOFILE; + LLmessage(0); + } + kill_stack(&stack); + return; + } + + read_token(); + + start_stack(&stack, base_symb, LLsymb); + + if (stack.nr_heads > max_nr_heads) { + max_nr_heads = stack.nr_heads; + } + + + match_heads(&stack, LLsymb); + + if (stack.nr_heads > max_nr_good_heads) { + max_nr_good_heads = stack.nr_heads; + } + + continue; + } + + + /* Normal case starts here */ + stack.start_seen = 0; + + for (j = 0; j < LLSETSIZE; j++) { + stack.exp_terminals[j] = 0; + } + + /* Read next symbol */ + read_token(); + + /* Generate all new heads and delete old ones */ + calculate(&stack, LLsymb); + + /* Leave out not wanted heads */ + + if (stack.nr_heads > max_nr_heads) { + max_nr_heads = stack.nr_heads; + } + + match_heads(&stack, LLsymb); + + if (stack.nr_heads > max_nr_good_heads) { + max_nr_good_heads = stack.nr_heads; + } + + + +#ifdef DEBUG + test(&stack); +#endif + + } + + /* End of file reached, check if we have seen a start symbol */ + if (stack.start_seen == 1) return; + else { + LLsymb = EOFILE; + LLmessage(0); + } + + kill_stack(&stack); + +#ifdef DEBUG + printf("Maximum number of heads: %d\n", max_nr_heads); + printf("Maximum number of good heads: %d\n", max_nr_good_heads); + printf("Number of node allocates: %d\n", allocates); + printf("Number of node deallocates: %d\n", deallocates); + printf("Maximum number of nodes in use: %8d\n", max_in_use); + printf("Sizeof(struct stack_elt) = %8d\n", sizeof(struct stack_elt)); + printf(" --------x\n"); + printf(" %8d\n", max_in_use * sizeof( + struct stack_elt)); + printf("Number of edge allocates: %d\n", edge_allocates); + printf("Number of edge deallocates: %d\n", edge_deallocates); + printf("Maximum number of edges in use: %8d\n", edge_max_in_use); + printf("Sizeof(struct edge) = %8d\n", sizeof(struct edge)); + printf(" --------x\n"); + printf(" %8d\n", edge_max_in_use * sizeof(struct edge)); + +#endif +} + diff --git a/util/LLgen/lib/rec b/util/LLgen/lib/rec index d27740c41..0c4a1bb8b 100644 --- a/util/LLgen/lib/rec +++ b/util/LLgen/lib/rec @@ -12,6 +12,11 @@ unsigned int LLscnt[LL_NSETS]; int LLcsymb, LLsymb; static int LLlevel; +#if LL_NON_CORR +int LLstartsymb; +static int fake_eof = 0; +#endif + #if LL_ANSI_C #define LL_VOIDCST (void) void LLmessage(int); @@ -39,7 +44,38 @@ LLscan(t) /* * Check if the next symbol is equal to the parameter */ + +#if LL_NON_CORR + /* See if the error recovery has eaten an eof */ + if (fake_eof) { + LLsymb = EOFILE; + fake_eof = 0; + } + else { + LLsymb = LL_LEXI(); + } + + if (LLsymb == t) { +#else if ((LLsymb = LL_LEXI()) == t) { +#endif + +#if LL_NON_CORR + /* Check if a previous parser has 'crashed', in that + * case continue with non-correcting parser + */ + if (err_seen && !nc_done) { + LLnc_recover(); + nc_done = 1; + /* Remember that the error recovery has eaten an eof */ + fake_eof = 1; + if (t != LLsymb) { + LLerror(t); + } + else + return; + } +#endif return; } /* @@ -54,6 +90,31 @@ void LLread(void) { #else LLread() { #endif + +#if LL_NON_CORR + /* Again, check if another parser has crashed, + * in that case intercept and go to the + * non-correcting parser + */ + + if (err_seen && !nc_done) { + LLnc_recover(); + nc_done = 1; + /* Pretend we read end of file */ + LLsymb = EOFILE; + LLcsymb = LLindex[EOFILE]; + fake_eof = 0; + return; + } + + if (fake_eof) { + LLsymb = EOFILE; + LLcsymb = LLindex[EOFILE]; + fake_eof = 0; + return; + } +#endif + for (;;) { if ((LLcsymb = LLindex[(LLsymb = LL_LEXI())]) >= 0) return; LLmessage(0); @@ -85,6 +146,16 @@ LLerror(t) return; } #endif + +#if LL_NON_CORR + if ((!nc_done) && (LLsymb > 0) && (LLsymb != EOFILE)) { + LLmessage(0); + LLnc_recover(); + nc_done = 1; + LLsymb = EOFILE; + } +#endif + if ((LLcsymb = LLindex[LLsymb]) < 0) { LLmessage(0); LLread(); @@ -97,7 +168,25 @@ LLerror(t) LL_VOIDCST LLskip(); #endif LLtcnt[i]--; - if (LLsymb != t) LLmessage(t); + if (LLsymb != t) { +#if LL_NON_CORR + /* A little kludge here; when using non-correcting recovery + * it can happen that a program is correct but incomplete. + * Here, we test this, and make sure the appropriate + * message is generated + */ + if (! nc_done) { + int oldLLsymb; + oldLLsymb = LLsymb; + LLsymb = EOFILE; + LLmessage(0); + nc_done = 1; + /* Not really, but to prevent more than 1 error message */ + LLsymb = oldLLsymb; + } +#endif + LLmessage(t); + } } #if LL_ANSI_C @@ -121,6 +210,14 @@ LLsafeerror(t) } return; } +#endif +#if LL_NON_CORR + if ((!nc_done) && (LLsymb > 0) && (LLsymb != EOFILE)) { + LLmessage(0); + LLnc_recover(); + nc_done = 1; + LLsymb = EOFILE; + } #endif LLmessage(t); } @@ -265,7 +362,19 @@ static int LLdoskip(e) continue; } #endif /* LL_USERHOOK */ +#if LL_NON_CORR + if ((!nc_done) && (LLsymb > 0)) { + LLmessage(0); + LLnc_recover(); + nc_done = 1; + fake_eof = 1; + } + else { + LLmessage(0); + } +#else LLmessage(0); +#endif retval = 1; LLread(); } diff --git a/util/LLgen/src/LLgen.g b/util/LLgen/src/LLgen.g index e71e72a71..16b15f3e3 100644 --- a/util/LLgen/src/LLgen.g +++ b/util/LLgen/src/LLgen.g @@ -32,9 +32,7 @@ string store(); p_gram search(); long ftell(); -static int nparams; /* parameter count for nonterminals */ static int acount; /* count #of global actions */ -static int order; static p_term t_list; static int t_cnt; static p_gram alt_table; @@ -48,9 +46,8 @@ static int max_rules; #define RULEINCR 32 /* Here are defined : */ -STATIC newnorder(); -STATIC newtorder(); -STATIC copyact(); +STATIC newnorder(); +STATIC newtorder(); STATIC mkalt(); STATIC mkterm(); STATIC p_gram copyrule(); @@ -169,13 +166,20 @@ def { register string p; } } ';' | C_ONERROR C_IDENT - { if (! onerror) { + { +#ifdef NON_CORRECTING + if (non_corr) { + warning(linecount, "%%onerror conflicts with -n option"); + } + else +#endif + if (! onerror) { onerror = store(lextoken.t_string); } else error(linecount,"Duplicate %%onerror"); } ';' - | action(0) { acount++; } + | C_ACTION { acount++; } /* * A global C-declaration */ @@ -216,18 +220,20 @@ rule { register p_nont p; p->n_lineno = linecount; p->n_off = ftell(fact); } - [ params(1) { if (nparams > 0) { + [ C_PARAMS { if (lextoken.t_num > 0) { p->n_flags |= PARAMS; - if (nparams > 15) { + if (lextoken.t_num > 15) { error(linecount,"Too many parameters"); } - else setntparams(p,nparams); + else setntparams(p,lextoken.t_num); } } ]? - [ action(0) { p->n_flags |= LOCALS; } + [ C_ACTION { p->n_flags |= LOCALS; } ]? - ':' productions(&rr) ';' + ':' { in_production = 1; } + productions(&rr) ';' + { in_production = 0; } /* * Do not use p->n_rule now! The nonterms array * might have been re-allocated. @@ -235,15 +241,6 @@ rule { register p_nont p; { nonterms[g_getcont(temp)].n_rule = rr;} ; -action(int n;) - /* - * The parameter n is non-zero when the opening and closing - * bracket must be copied along with the action - */ - : '{' { copyact('{','}',n,0); } - '}' - ; - productions(p_gram *p;) /* * One or more alternatives @@ -280,7 +277,7 @@ productions(p_gram *p;) t = 0; *p = prod; } - ]+ { if (conflres & ~DEF) { + ]+ { if (conflres & (COND|PREFERING|AVOIDING)) { error(n_lc, "Resolver on last alternative not allowed"); } @@ -290,7 +287,7 @@ productions(p_gram *p;) *p = copyrule(&alt_table[n_alts-altcnt],altcnt+1); } | - { if (conflres & ~DEF) { + { if (conflres & (COND|PREFERING|AVOIDING)) { error(o_lc, "No alternation conflict resolver allowed here"); } @@ -336,16 +333,32 @@ simpleproduction(p_gram *p; register int *conflres;) int cnt, kind; int termdeleted = 0; } : - [ C_DEFAULT { *conflres = DEF; } + [ C_DEFAULT { *conflres |= DEF; } ]? [ /* * Optional conflict reslover */ - C_IF expr { *conflres |= COND; } + C_IF C_EXPR { *conflres |= COND; } | C_PREFER { *conflres |= PREFERING; } | C_AVOID { *conflres |= AVOIDING; } ]? + [ C_ILLEGAL { +#ifdef NON_CORRECTING + if (n_rules >= max_rules-2) { + rule_table = (p_gram) ralloc( + (p_mem) rule_table, + (unsigned)(max_rules+=RULEINCR)*sizeof(t_gram)); + } + elmcnt++; + rule_table[n_rules++] = + *search(TERMINAL, "LLILLEGAL", BOTH); + if (*conflres & DEF) { + error(linecount, "%%illegal not allowed in %%default rule"); + } +#endif + } + ]? [ %persistent elem(&elem) { if (n_rules >= max_rules-2) { rule_table = (p_gram) ralloc( @@ -467,9 +480,12 @@ elem (register p_gram pres;) p_gram p1; int ln; p_gram pe; +#ifdef NON_CORRECTING + int erroneous = 0; +#endif } : '[' { ln = linecount; } - [ C_WHILE expr { t |= RESOLVER; } + [ C_WHILE C_EXPR { t |= RESOLVER; } ]? [ C_PERSISTENT { t |= PERSISTENT; } ]? @@ -478,12 +494,32 @@ elem (register p_gram pres;) mkterm(p1,t,ln,pres); } | + [ C_ERRONEOUS { +#ifdef NON_CORRECTING + erroneous = 1; +#endif + } + ]? + + [ C_IDENT { pe = search(UNKNOWN,lextoken.t_string,BOTH); *pres = *pe; +#ifdef NON_CORRECTING + if (erroneous) { + if (g_gettype(pres) != TERMINAL){ + warning(linecount, + "Erroneous only allowed on terminal"); + erroneous = 0; + } + else + pres->g_erroneous = 1; + } +#endif + } - [ params(0) { if (nparams > 15) { + [ C_PARAMS { if (lextoken.t_num > 15) { error(linecount,"Too many parameters"); - } else g_setnpar(pres,nparams); + } else g_setnpar(pres,lextoken.t_num); if (g_gettype(pres) == TERMINAL) { error(linecount, "Terminal with parameters"); @@ -492,27 +528,73 @@ elem (register p_gram pres;) ]? | C_LITERAL { pe = search(LITERAL,lextoken.t_string,BOTH); *pres = *pe; +#ifdef NON_CORRECTING + if (erroneous) + pres->g_erroneous = 1; +#endif } + ] | { g_settype(pres,ACTION); pres->g_lineno = linecount; +#ifdef NON_CORRECTING + g_setsubparse(pres, (p_start) 0); +#endif } - action(1) - ; -params(int formal) -{ - long off = ftell(fact); -} - : '(' { copyact('(', ')', formal ? 2 : 0, 0); } - ')' - { if (nparams == 0) { - fseek(fact, off, 0); + [ C_SUBSTART + + { +#ifdef NON_CORRECTING + nsubstarts++; +#endif + } + + C_IDENT + { +#ifdef NON_CORRECTING + register p_gram temp; + register p_start subp; + + temp = search(NONTERM,lextoken.t_string,BOTH); + subp = (p_start) alloc (sizeof(t_start)); + + subp->ff_nont = g_getcont(temp); + subp->ff_name = (string) 0; + subp->ff_next = (p_start) 0; + g_setsubparse(pres, subp); +#endif + } + + [ ',' C_IDENT + { +#ifdef NON_CORRECTING + register p_gram temp; + register p_start ff; + + temp = search(NONTERM,lextoken.t_string,BOTH); + + ff = g_getsubparse(pres); + while (ff) { + if (ff->ff_nont == g_getcont(temp)) { + warning(linecount, "\"%s\" used twice in %%substart", lextoken.t_string); + break; + } + ff = ff->ff_next; + } + + ff = (p_start) alloc(sizeof(t_start)); + ff->ff_nont = g_getcont(temp); + ff->ff_name = (string) 0; + ff->ff_next = g_getsubparse(pres); + g_setsubparse(pres, ff); +#endif } - ; -expr : '(' { copyact('(',')',1,0); } - ')' + ]* ';' + ]? + + C_ACTION ; repeats(int *kind; int *cnt;) { int t1 = 0; } : @@ -562,119 +644,6 @@ firsts { register string p; } ; { -STATIC -copyact(ch1,ch2,flag,level) char ch1,ch2; { - /* - * Copy an action to file f. Opening bracket is ch1, closing bracket - * is ch2. - * If flag & 1, copy opening and closing parameters too. - * If flag & 2, don't allow ','. - */ - static int text_seen = 0; - register FILE *f; - register ch; /* Current char */ - register match; /* used to read strings */ - int saved; /* save linecount */ - int sav_strip = strip_grammar; - - f = fact; - if (ch1 == '{' || flag != 1) strip_grammar = 0; - if (!level) { - saved = linecount; - text_seen = 0; - nparams = 0; /* count comma's */ - putc('\0',f); - fprintf(f,"# line %d \"%s\"\n", linecount,f_input); - } - if (level || (flag & 1)) putc(ch1,f); - for (;;) { - ch = input(); - if (ch == ch2) { - if (!level) { - unput(ch); - if (text_seen) nparams++; - } - if (level || (flag & 1)) putc(ch,f); - if (strip_grammar != sav_strip) { - if (ch1 == '{' || flag != 1) putchar(ch); - } - strip_grammar = sav_strip; - return; - } - switch(ch) { - case ')': - case '}': - case ']': - error(linecount,"Parentheses mismatch"); - break; - case '(': - text_seen = 1; - copyact('(',')',flag,level+1); - continue; - case '{': - text_seen = 1; - copyact('{','}',flag,level+1); - continue; - case '[': - text_seen = 1; - copyact('[',']',flag,level+1); - continue; - case '/': - ch = input(); - unput(ch); - if (ch == '*') { - putc('/', f); - skipcomment(1); - continue; - } - ch = '/'; - text_seen = 1; - break; - case ';': - case ',': - if (! level && text_seen) { - text_seen = 0; - nparams++; - if (ch == ',' && (flag & 2)) { - warning(linecount, "Parameters may not be separated with a ','"); - ch = ';'; - } - } - break; - case '\'': - case '"' : - /* - * watch out for brackets in strings, they do not - * count ! - */ - text_seen = 1; - match = ch; - putc(ch,f); - while((ch = input())) { - if (ch == match) break; - if (ch == '\\') { - putc(ch,f); - ch = input(); - } - if (ch == '\n') { - error(linecount,"Newline in string"); - unput(match); - } - putc(ch,f); - } - if (ch == match) break; - /* Fall through */ - case EOF : - if (!level) error(saved,"Action does not terminate"); - strip_grammar = sav_strip; - return; - default: - if (c_class[ch] != ISSPA) text_seen = 1; - } - putc(ch,f); - } -} - STATIC p_gram copyrule(p,length) register p_gram p; { /* diff --git a/util/LLgen/src/alloc.c b/util/LLgen/src/alloc.c index da418ae35..0580b4a9c 100644 --- a/util/LLgen/src/alloc.c +++ b/util/LLgen/src/alloc.c @@ -80,7 +80,7 @@ new_mem(p) register p_info p; { */ p->i_size += p->i_incr * p->i_esize; } - p->i_ptr = !p->i_ptr ? + p->i_ptr = !p->i_ptr ? alloc(p->i_size) : ralloc(p->i_ptr, p->i_size); p->i_max = p->i_ptr + sz; diff --git a/util/LLgen/src/cclass.c b/util/LLgen/src/cclass.c index 7591f2bb9..dfd7c7d53 100644 --- a/util/LLgen/src/cclass.c +++ b/util/LLgen/src/cclass.c @@ -47,8 +47,8 @@ char c_class[] = { ISKEY, /* '%' */ 0, /* '&' */ ISLIT, /* ''' */ - ISTOK, /* '(' */ - ISTOK, /* ')' */ + ISACT, /* '(' */ + 0, /* ')' */ ISTOK, /* '*' */ ISTOK, /* '+' */ ISTOK, /* ',' */ @@ -130,9 +130,9 @@ char c_class[] = { ISLET, /* 'x' */ ISLET, /* 'y' */ ISLET, /* 'z' */ - ISTOK, /* '{' */ + ISACT, /* '{' */ ISTOK, /* '|' */ - ISTOK, /* '}' */ + 0, /* '}' */ 0, /* '~' */ 0 /* 0177 */ }; diff --git a/util/LLgen/src/cclass.h b/util/LLgen/src/cclass.h index ca60e15f6..61477b01e 100644 --- a/util/LLgen/src/cclass.h +++ b/util/LLgen/src/cclass.h @@ -14,3 +14,4 @@ extern char c_class[]; #define ISTOK 5 #define ISCOM 6 #define ISLIT 7 +#define ISACT 8 diff --git a/util/LLgen/src/check.c b/util/LLgen/src/check.c index 43f3667cc..2c974aecd 100644 --- a/util/LLgen/src/check.c +++ b/util/LLgen/src/check.c @@ -26,7 +26,6 @@ static string rcsid1 = "$Id$"; # endif - static string c_first = "> firstset "; static string c_contains = "> containset "; static string c_follow = "> followset "; @@ -72,7 +71,7 @@ conflchecks() { f_input = x->f_name; for (s = x->f_nonterminals; s != -1; s = p->n_next) { p = &nonterms[s]; - if (check(p->n_rule)) p->n_flags |= VERBOSE; + if (check(p->n_rule)) p->n_flags |= VERBOSE; } } for (x = files; x < maxfiles; x++) { @@ -188,7 +187,7 @@ check(p) register p_gram p; { n = &nonterms[g_getcont(p)]; if (g_getnpar(p) != getntparams(n)) { error(p->g_lineno, - "Call of %s: parameter count mismatch", + "Call of %s: parameter count mismatch", n->n_name); } break; } @@ -211,13 +210,13 @@ check(p) register p_gram p; { temp = setalloc(); setunion(temp,q->t_first); if (!setintersect(temp,q->t_follow)) { - /* - * q->t_first * q->t_follow != EMPTY - */ + /* + * q->t_first * q->t_follow != EMPTY + */ if (!(q->t_flags & RESOLVER)) { - /* - * No conflict resolver - */ + /* + * No conflict resolver + */ error(p->g_lineno, "Repetition conflict"); retval = 1; @@ -249,7 +248,7 @@ check(p) register p_gram p; { "Alternation conflict"); retval = 1; moreverbose(temp); - } + } } else { if (l->l_flag & (COND|PREFERING|AVOIDING)) { l->l_flag |= NOCONF; @@ -257,8 +256,8 @@ check(p) register p_gram p; { "Conflict resolver without conflict"); } } - if (l->l_flag & PREFERING) propagate(l->l_symbs,p+1); free( (p_mem) temp); + if (l->l_flag & PREFERING) propagate(l->l_symbs,p+1); retval |= check(l->l_rule); break; } } @@ -378,7 +377,7 @@ prrule(p) register p_gram p; { spaces(); p++; continue; } case LITERAL : - case TERMINAL : { + case TERMINAL : { register p_token pt = &tokens[g_getcont(p)]; fprintf(f,pt->t_tokno<0400 ? @@ -463,7 +462,7 @@ propagate(set,p) p_set set; register p_gram p; { while (g_gettype(p) != EORULE) { setminus(g_getlink(p)->l_symbs,set); p++; - } + } } STATIC diff --git a/util/LLgen/src/compute.c b/util/LLgen/src/compute.c index 0546fd004..ef6b28499 100644 --- a/util/LLgen/src/compute.c +++ b/util/LLgen/src/compute.c @@ -59,6 +59,12 @@ STATIC do_contains(); STATIC contains(); STATIC int nsafes(); STATIC int do_safes(); +#ifdef NON_CORRECTING +STATIC int nc_nfirst(); +STATIC nc_first(); +STATIC int nc_nfollow(); +STATIC nc_follow(); +#endif do_compute() { /* @@ -116,6 +122,31 @@ do_compute() { setntsafe(p,SCANDONE); } co_trans(nsafes); + +#ifdef NON_CORRECTING + if (subpars_sim) { + int s; + + /* compute the union of the first sets of all start symbols + Used to compute the nc-first-sets when -s option is given */ + start_firsts = get_set(); + for (st = start; st; st = st->ff_next) { + s = setunion(start_firsts, (&nonterms[st->ff_nont])->n_first); + } + } + + if (non_corr) { + /* compute the non_corr first sets for all nonterminals and terms */ + + co_trans(nc_nfirst); + for (st = start; st; st = st->ff_next) { + p = &nonterms[st->ff_nont]; + PUTIN(p->n_nc_follow,0); + } + co_trans(nc_nfollow); + } +#endif + # ifndef NDEBUG if (debug) { fputs("Safeties:\n", stderr); @@ -151,6 +182,10 @@ createsets() { p = &nonterms[i]; p->n_flags |= GENSTATIC; p->n_first = get_set(); +#ifdef NON_CORRECTING + p->n_nc_first = get_set(); + p->n_nc_follow = get_set(); +#endif p->n_follow = get_set(); walk(f->f_used, p->n_rule); } @@ -185,6 +220,10 @@ walk(u, p) p_set u; register p_gram p; { q = g_getterm(p); q->t_first = get_set(); +#ifdef NON_CORRECTING + q->t_nc_first = get_set(); + q->t_nc_follow = get_set(); +#endif q->t_follow = get_set(); walk(u, q->t_rule); break; } @@ -193,6 +232,9 @@ walk(u, p) p_set u; register p_gram p; { l = g_getlink(p); l->l_symbs = get_set(); +#ifdef NON_CORRECTING + l->l_nc_symbs = get_set(); +#endif l->l_others = get_set(); walk(u, l->l_rule); break; } @@ -237,7 +279,7 @@ empty(p) register p_gram p; { for (;;) { switch (g_gettype(p)) { - case EORULE : + case EORULE : return 1; case TERM : { register p_term q; @@ -271,6 +313,12 @@ nfirst(p) register p_nont p; { return first(p->n_first, p->n_rule, 0); } +#ifdef NON_CORRECTING +STATIC int nc_nfirst(p) register p_nont p; { + return nc_first(p->n_nc_first, p->n_rule, 0); +} +#endif + STATIC first(setp,p,flag) p_set setp; register p_gram p; { /* @@ -282,8 +330,8 @@ first(setp,p,flag) p_set setp; register p_gram p; { */ register s; /* Will gather return value */ int noenter;/* when set, unables entering of elements into - * setp. This is necessary to walk through the - * rest of rule p. + * setp. This is necessary to walk through the + * rest of rule p. */ s = 0; @@ -349,6 +397,108 @@ first(setp,p,flag) p_set setp; register p_gram p; { } } +#ifdef NON_CORRECTING +STATIC +nc_first(setp,p,flag) p_set setp; register p_gram p; { + /* + * Compute the non_corr FIRST set of rule p. + * If flag = 0, also the non_corr first sets for terms and + * alternations in the rule p are computed. + * The non_corr FIRST set is put in setp. + * return 1 if the set refered to by "setp" changed + * If the -s flag was given, the union of the first-sets of all + * start symbols is used whenever an action occurs. Else, only the + * first-sets of startsynbols in the %substart are used + */ + + register s; /* Will gather return value */ + int noenter;/* when set, unables entering of elements into + * setp. This is necessary to walk through the + * rest of rule p. + */ + + s = 0; + noenter = 0; + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return s; + case TERM : { + register p_term q; + + q = g_getterm(p); + if (flag == 0) { + if (nc_first(q->t_nc_first,q->t_rule,0))/*nothing*/; + } + if (!noenter) s |= setunion(setp,q->t_nc_first); + p++; + if (r_getkind(q) == STAR || + r_getkind(q) == OPT || + empty(q->t_rule)) continue; + break; } + case ALTERNATION : { + register p_link l; + + l = g_getlink(p); + if (flag == 0) { + if (nc_first(l->l_nc_symbs,l->l_rule,0))/*nothing*/; + } + if (noenter == 0) { + s |= setunion(setp,l->l_nc_symbs); + } + if (g_gettype(p+1) == EORULE) return s; + } + p++; + continue; + case ACTION : { + register p_start subp; + + if (!noenter) + if (subpars_sim) + s |= setunion(setp, start_firsts); + else { + for (subp = g_getsubparse(p); subp; + subp = subp->ff_next) + s |= setunion(setp, (&nonterms[subp->ff_nont])->n_nc_first); + + } + p++; + continue; + } + case LITERAL : + case TERMINAL : + if (g_getcont(p) == g_getcont(illegal_gram)) { + /* Ignore for this set. */ + p++; + continue; + } + if ((noenter == 0) && !IN(setp,g_getcont(p))) { + s = 1; + PUTIN(setp,g_getcont(p)); + } + p++; + break; + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getcont(p)]; + if (noenter == 0) { + s |= setunion(setp,n->n_nc_first); + if (ntneeded) NTPUTIN(setp,g_getcont(p)); + } + p++; + if (n->n_flags & EMPTY) continue; + break; } + } + if (flag == 0) { + noenter = 1; + continue; + } + return s; + } +} +#endif + STATIC int nfollow(p) register p_nont p; { return follow(p->n_follow, p->n_rule); @@ -426,6 +576,87 @@ follow(setp,p) p_set setp; register p_gram p; { } } +#ifdef NON_CORRECTING + +STATIC int +nc_nfollow(p) register p_nont p; { + return follow(p->n_nc_follow, p->n_rule); +} + +STATIC +nc_follow(setp,p) p_set setp; register p_gram p; { + /* + * setp is the follow set for the rule p. + * Compute the follow sets in the rule p from this set. + * Return 1 if a follow set of a nonterminal changed. + */ + register s; /* Will gather return value */ + + s = 0; + for (;;) { + switch (g_gettype(p)) { + case EORULE : + return s; + case TERM : { + register p_term q; + + q = g_getterm(p); + if (empty(p+1)) { + /* + * If what follows the term can be empty, + * everything that can follow the whole + * rule can also follow the term + */ + s |= setunion(q->t_nc_follow,setp); + } + /* + * Everything that can start the rest of the rule + * can follow the term + */ + s |= nc_first(q->t_nc_follow,p+1,1); + if (r_getkind(q) == STAR || + r_getkind(q) == PLUS || + r_getnum(q) ) { + /* + * If the term involves a repetition + * of possibly more than one, + * everything that can start the term + * can also follow it. + */ + s |= nc_follow(q->t_nc_first,q->t_rule); + } + /* + * Now propagate the set computed sofar + */ + s |= nc_follow(q->t_nc_follow, q->t_rule); + break; } + case ALTERNATION : + /* + * Just propagate setp + */ + s |= nc_follow(setp,g_getlink(p)->l_rule); + break; + case NONTERM : { + register p_nont n; + + n = &nonterms[g_getcont(p)]; + s |= nc_first(n->n_nc_follow,p+1,1); + if (empty(p+1)) { + /* + * If the rest of p can produce empty, + * everything that follows p can follow + * the nonterminal + */ + s |= setunion(n->n_nc_follow,setp); + } + break; } + } + p++; + } +} + +#endif + STATIC co_dirsymb(setp,p) p_set setp; register p_gram p; { /* @@ -519,7 +750,7 @@ do_lengthcomp() { * Compute the minimum length of a terminal production for each * nonterminal. * This length consists of two fields: the number of terminals, - * and a number that is composed of + * and a number that is composed of * - the number of this alternative * - a crude measure of the number of terms and nonterminals in the * production of this shortest string. @@ -562,6 +793,12 @@ complength(p,le) register p_gram p; p_length le; { switch (g_gettype(p)) { case LITERAL : case TERMINAL : +#ifdef NON_CORRECTING + if (g_getcont(p) == g_getcont(illegal_gram)) { + add(&X, INFINITY, 0); + break; + } +#endif add(&X, 1, 0); break; case ALTERNATION : @@ -571,6 +808,7 @@ complength(p,le) register p_gram p; p_length le; { while (g_gettype(p) != EORULE) { cnt++; l = g_getlink(p); + p++; complength(l->l_rule,&i); i.val += cnt; if (l->l_flag & DEF) { @@ -580,7 +818,6 @@ complength(p,le) register p_gram p; p_length le; { if (compare(&i, &X) < 0) { X = i; } - p++; } /* Fall through */ case EORULE : @@ -593,7 +830,7 @@ complength(p,le) register p_gram p; p_length le; { q = g_getterm(p); rep = r_getkind(q); X.val += 1; - if ((q->t_flags&PERSISTENT) || + if ((q->t_flags&PERSISTENT) || rep==FIXED || rep==PLUS) { complength(q->t_rule,&i); add(&X, i.cnt, i.val); @@ -661,6 +898,7 @@ setdefaults(p) register p_gram p; { do { cnt++; l = g_getlink(p); + p++; complength(l->l_rule,&i); i.val += cnt; if (l->l_flag & DEF) temp = 1; @@ -671,7 +909,6 @@ setdefaults(p) register p_gram p; { count = i; } setdefaults(l->l_rule); - p++; } while (g_gettype(p) != EORULE); if (!temp) { /* No user specified default */ @@ -687,7 +924,7 @@ STATIC do_contains(n) register p_nont n; { /* * Compute the total set of symbols that nonterminal n can - * produce + * produce */ if (n->n_contains == 0) { @@ -811,7 +1048,7 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; { for (;;) { switch (g_gettype(p)) { case ACTION: - p++; + p++; continue; case LITERAL: case TERMINAL: @@ -830,12 +1067,13 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; { safe = t_after(rep, i, retval); break; } case ALTERNATION : { - register p_link l; + register p_link l; register int i; retval = -1; while (g_gettype(p) == ALTERNATION) { l = g_getlink(p); + p++; if (safe > SAFE && (l->l_flag & DEF)) { i = do_safes(l->l_rule,SAFESCANDONE,ch); } @@ -848,7 +1086,6 @@ do_safes(p,safe,ch) register p_gram p; register int *ch; { } else if (i > retval) retval = i; } - p++; } return retval; } case NONTERM : { diff --git a/util/LLgen/src/extern.h b/util/LLgen/src/extern.h index ade136f8a..fca911761 100644 --- a/util/LLgen/src/extern.h +++ b/util/LLgen/src/extern.h @@ -36,6 +36,11 @@ extern int ntokens; /* number of terminals */ extern int nterms; /* number of terms */ extern int nalts; /* number of alternatives */ extern p_start start; /* will contain startsymbols */ +#ifdef NON_CORRECTING +extern int nsubstarts; /* number of subparserstarts */ +extern p_set start_firsts; /* Will contain the union of first sets of + startsymbols when -n -s option is on */ +#endif extern int linecount; /* line number */ extern int assval; /* to create difference between literals * and other terminals @@ -73,8 +78,17 @@ extern string LLgenid; /* LLgen identification string */ extern t_token lextoken; /* the current token */ extern int nerrors; extern string rec_file, incl_file; +#ifdef NON_CORRECTING +extern string nc_rec_file, nc_incl_file; +#endif extern int low_percentage, high_percentage; extern int min_cases_for_jmptable; extern int jmptable_option; extern int ansi_c; +#ifdef NON_CORRECTING +extern int non_corr; +extern int subpars_sim; +extern p_gram illegal_gram; +#endif extern int strip_grammar; +extern int in_production; diff --git a/util/LLgen/src/gencode.c b/util/LLgen/src/gencode.c index 7d4ba26fe..0a18fd786 100644 --- a/util/LLgen/src/gencode.c +++ b/util/LLgen/src/gencode.c @@ -51,6 +51,9 @@ extern gencode(); STATIC opentemp(); STATIC geninclude(); STATIC genrecovery(); +#ifdef NON_CORRECTING +STATIC genncrecovery(); +#endif STATIC string genname(); STATIC generate(); STATIC prset(); @@ -109,18 +112,31 @@ genhdr() else { fputs("#if __STDC__ || __cplusplus\n#define LL_ANSI_C 1\n#endif\n", fpars); } +#ifdef NON_CORRECTING + if (non_corr) fputs("#define LL_NON_CORR 1\n", fpars); +#endif fprintf(fpars, "#define LL_LEXI %s\n", lexical); copyfile(incl_file); } gencode(argc) { register p_file p = files; - + /* Set up for code generation */ if ((fact = fopen(f_temp,"r")) == NULL) { fatal(0,e_noopen,f_temp); } +#ifdef NON_CORRECTING + /* The non-correcting error recovery must be generated BEFORE + parser code is generated!!!! In case of conflict resolvers, + the code-generation process will delete conflicting symbols + from first and followsets, making them UNUSABLE for the + non-correcting error recovery code. + */ + if (non_corr) + genncrecovery(); +#endif /* For every source file .... */ while (argc--) { /* Open temporary */ @@ -138,6 +154,7 @@ gencode(argc) { } geninclude(); genrecovery(); + fclose(fact); } @@ -167,13 +184,18 @@ geninclude() { } fprintf(fpars, "#define %s_MAXTOKNO %d\n", prefix ? prefix : "LL", maxno); +#ifdef NON_CORRECTING + if (non_corr) { + fprintf(fpars, "#define %sNONCORR\n", prefix ? prefix : "LL"); + } +#endif doclose(fpars); install(f_include, "."); } STATIC genrecovery() { - register FILE *f; + register FILE *f; register p_token t; register int *q; register p_nont p; @@ -202,6 +224,12 @@ genrecovery() { i > 0 ? i : 1, ntokens); if (onerror) fprintf(f,"#define LL_USERHOOK %s\n", onerror); +#ifdef NON_CORRECTING + if (non_corr) { + fputs("static int nc_done = 0;\n", f); + fputs("static int err_seen = 0;\n", f); + } +#endif /* Now generate the routines that call the startsymbols */ fputs("#if LL_ANSI_C\n", f); for (st = start; st; st = st->ff_next) { @@ -214,7 +242,18 @@ genrecovery() { for (st = start; st; st = st->ff_next) { fprintf(f, "#if LL_ANSI_C\nvoid %s(void)\n#else\n%s()\n#endif\n", st->ff_name, st->ff_name); p = &nonterms[st->ff_nont]; - fputs(" {\n\tunsigned int s[LL_NTERMINALS+LL_NSETS+2];\n\tLLnewlevel(s);\n\tLLread();\n", f); + fputs(" {\n\tunsigned int s[LL_NTERMINALS+LL_NSETS+2];", f); +#ifdef NON_CORRECTING + if (non_corr) { + fputs(" \n\tint oldstartsymb;", f); + fputs(" \n\tint oldncflag;", f); + fputs(" \n\toldstartsymb = LLstartsymb;", f); + fputs(" \n\toldncflag = nc_done;", f); + fputs(" \n\tnc_done = 0;", f); + fprintf(f, "\n\tLLstartsymb = %d;", st->ff_nont + assval); + } +#endif + fputs("\n\tLLnewlevel(s);\n\tLLread();\n", f); if (g_gettype(p->n_rule) == ALTERNATION) { genpush(findindex(p->n_contains)); } @@ -224,7 +263,18 @@ genrecovery() { fputs("\tLL_NOSCANDONE(EOFILE);\n",f); } else fputs("\tLL_SCANDONE(EOFILE);\n",f); - fputs("\tLLoldlevel(s);\n}\n",f); + fputs("\tLLoldlevel(s);\n",f); +#ifdef NON_CORRECTING + if (non_corr) { + fputs("\tLLstartsymb = oldstartsymb;\n", f); + fputs("\tif (nc_done == 1) { \n", f); + fputs("\t\terr_seen = 1;\n", f); + fputs("\tnc_done = oldncflag;\n", f); + fputs("\t}\n", f); + } +#endif + fputs("}\n", f); + } /* Now generate the sets */ fputs("static char LLsets[] = {\n",f); @@ -254,6 +304,46 @@ genrecovery() { install(f_rec, "."); } +#ifdef NON_CORRECTING +STATIC +genncrecovery() { + register FILE *f; + register p_token t; + register int *q; + int *index; + + /* Generate the non-correcting error recovery file */ + + opentemp((string) 0); + f = fpars; + + genhdr(); + correct_prefix(); + save_grammar(f); + + fprintf(f, "#define LLFIRST_NT %d\n", assval); + fprintf(f, "#define LLSETSIZE %d\n", nbytes); + + index = (int *) alloc((unsigned) (assval * sizeof(int))); + for (q = index; q < &index[assval];) *q++ = -1; + for (t = tokens; t < maxt; t++) { + index[t->t_tokno] = t - tokens; + } + fputs("#define LLindex (LL_index+1)\nstatic short LL_index[] = {0,0,\n",f); + for (q = index+1; q < &index[assval]; q++) { + fprintf(f, "%d,\n", *q); + } + fputs(c_arrend, f); + free((p_mem) index); + + copyfile(nc_incl_file); + copyfile(nc_rec_file); + + doclose(f); + install(f_nc, "."); +} +#endif + STATIC generate(f) p_file f; { /* @@ -272,7 +362,7 @@ generate(f) p_file f; { for (ff = f->f_firsts; ff; ff = ff->ff_next) { macro(ff->ff_name,&nonterms[ff->ff_nont]); } - + /* For every nonterminal generate a function */ for (s = f->f_nonterminals; s != -1; s = p->n_next) { p = &nonterms[s]; @@ -378,7 +468,7 @@ STATIC getparams() { /* getparams is called if a nonterminal has parameters. The names * of the parameters have to be found, and they should be declared - */ + */ long off; register int l; long ftell(); @@ -407,7 +497,7 @@ getparams() { } fputs(") ",fpars); /* - * Now copy the declarations + * Now copy the declarations */ l = getc(fact); /* patch: some implementations of fseek do not work properly after "ungetc" @@ -469,7 +559,7 @@ getansiparams(mkdef) { /* getansiparams is called if a nonterminal has parameters * and an ANSI C function definition/declaration has to be produced. * If a definition has to be produced, "mkdef" is set to 1. - */ + */ register int l; int delayed = 0; @@ -911,7 +1001,7 @@ codeforterm(q,safety,toplevel) register p_term q; { int term_is_persistent = (q->t_flags & PERSISTENT); int ispushed = NOPOP; - if (!(toplevel > 0 && + if (!(toplevel > 0 && (safety == 0 || (!onerror && safety <= SAFESCANDONE)) && (rep_kind == OPT || (rep_kind == FIXED && rep_count == 0)))) { ispushed = findindex(q->t_contains); @@ -1091,21 +1181,21 @@ genswhead(q, rep_kind, rep_count, safety, ispushed) register p_term q; { STATIC gencases(tokenlist, caseno, compacted) - int *tokenlist; + int *tokenlist; { /* * setp points to a bitset indicating which cases must * be generated. - * YECH, the PCC compiler does not accept many cases without statements - * inbetween, so after every case label an empty statement is - * generated. + * YECH, the PCC compiler does not accept many cases without + * statements in between, so after every case label an empty + * statement is generated. * The C-grammar used by PCC is really stupid on this point : * it contains the rule - * statement : label statement + * statement : label statement * which is right-recursive, and as is well known, LALR parsers don't - * handle these things very good. + * handle these things very well. * The grammar should have been written : - * labeledstatement : labels statement ; + * labeledstatement : labels statement ; * labels : labels label | ; */ register p_token p; @@ -1119,7 +1209,7 @@ gencases(tokenlist, caseno, compacted) (p->t_tokno < 0400 ? "/* case '%s' */\n" : "/* case %s */\n") : p->t_tokno<0400 ? "case /* '%s' */ %d : ;\n" - : "case /* %s */ %d : ;\n", + : "case /* %s */ %d : ;\n", p->t_string, i); } } @@ -1220,10 +1310,10 @@ out_list(tokenlist, listno, casecnt) register int i; register FILE *f = fpars; - fprintf(f, "static %s LL%d_tklist[] = {", + fprintf(f, "static %s LL%d_tklist[] = {", casecnt <= 127 ? "char" : "short", listno); - + for (i = 0; i < ntokens; i++) { fprintf(f, "%c%d,", i % 10 == 0 ? '\n': ' ', tokenlist[i]); } @@ -1260,6 +1350,10 @@ correct_prefix() fprintf(f, "#define LLnewlevel %snewlevel\n", s); fprintf(f, "#define LLoldlevel %soldlevel\n", s); fprintf(f, "#define LLmessage %smessage\n", s); +#ifdef NON_CORRECTING + fprintf(f, "#define LLnc_recovery %sncrecovery\n", s); + fprintf(f, "#define LLstartsymb %sstartsymb\n", s); +#endif } fprintf(f, "#include \"%s\"\n", f_include); } diff --git a/util/LLgen/src/global.c b/util/LLgen/src/global.c index a8128a0dc..8c54e7bd4 100644 --- a/util/LLgen/src/global.c +++ b/util/LLgen/src/global.c @@ -33,6 +33,10 @@ p_token maxt; int ntokens; int nterms, nalts; int norder, torder; +#ifdef NON_CORRECTING +int nsubstarts; +p_set start_firsts; +#endif p_start start; int linecount; int assval; @@ -42,6 +46,9 @@ FILE *finput; FILE *fact; char f_pars[] = PARSERFILE; char f_temp[] = ACTFILE; +#ifdef NON_CORRECTING +char f_nc[20]; +#endif char f_out[20]; string f_input; char f_include[20]; @@ -64,8 +71,19 @@ string LLgenid = "/* LLgen generated code from source %s */\n"; t_token lextoken; int nerrors; string rec_file, incl_file; +#ifdef NON_CORRECTING +string nc_rec_file, nc_incl_file; +#endif int low_percentage = 10, high_percentage = 30; int min_cases_for_jmptable = 8; int jmptable_option; int ansi_c = 0; +#ifdef NON_CORRECTING +int non_corr = 0; +int subpars_sim = 0; +p_gram illegal_gram; +#endif int strip_grammar = 0; +int in_production; /* set when the parser is reading a production + rule. + */ diff --git a/util/LLgen/src/io.h b/util/LLgen/src/io.h index 8997587e8..d81f96446 100644 --- a/util/LLgen/src/io.h +++ b/util/LLgen/src/io.h @@ -25,7 +25,9 @@ # define ACTFILE "tempXXXXXX" /* temporary file to save actions */ # define HFILE "%spars.h" /* file for "#define's " */ # define RFILE "%spars.c" /* Error recovery */ - +#ifdef NON_CORRECTING +# define NCFILE "%sncor.c" /* Non-corrcting error recovery */ +#endif extern FILE *finput; extern FILE *fpars; extern FILE *fact; @@ -36,3 +38,6 @@ extern char f_out[]; extern string f_input; extern char f_include[]; extern char f_rec[]; +#ifdef NON_CORRECTING +extern char f_nc[]; +#endif diff --git a/util/LLgen/src/main.c b/util/LLgen/src/main.c index a7cd8109f..f7cf1a9a7 100644 --- a/util/LLgen/src/main.c +++ b/util/LLgen/src/main.c @@ -41,13 +41,13 @@ extern char *sbrk(); main(argc,argv) register string argv[]; { register string arg; string libpath(); - char *beg_sbrk; + char *beg_sbrk = 0; /* Initialize */ assval = 0400; /* read options */ - + while (argc >= 2 && (arg = argv[1], *arg == '-')) { while (*++arg) { switch(*arg) { @@ -84,7 +84,7 @@ main(argc,argv) register string argv[]; { fprintf(stderr,"duplicate -r flag\n"); exit(1); } - rec_file = ++arg; + rec_file = ++arg; break; case 'i': case 'I': @@ -92,7 +92,7 @@ main(argc,argv) register string argv[]; { fprintf(stderr,"duplicate -i flag\n"); exit(1); } - incl_file = ++arg; + incl_file = ++arg; break; #endif /* not NDEBUG */ case 'x': @@ -104,8 +104,18 @@ main(argc,argv) register string argv[]; { case 'A': ansi_c = 1; continue; +#ifdef NON_CORRECTING + case 'n': + case 'N': + non_corr = 1; + continue; case 's': case 'S': + subpars_sim = 1; + continue; +#endif + case 'g': + case 'G': strip_grammar = 1; continue; default: @@ -120,6 +130,13 @@ main(argc,argv) register string argv[]; { if (verbose) beg_sbrk = sbrk(0); +#ifdef NON_CORRECTING + if ((subpars_sim) && (!non_corr)) { + fprintf(stderr,"option -s illegal without -n, turned off\n"); + subpars_sim = 0; + } +#endif + /* * Now check wether the sets should include nonterminals */ @@ -139,6 +156,12 @@ main(argc,argv) register string argv[]; { # ifndef NDEBUG } # endif +#ifdef NON_CORRECTING + if (non_corr) { + nc_incl_file = libpath("nc_incl"); + nc_rec_file = libpath ("nc_rec"); + } +#endif mktemp(f_temp); mktemp(f_pars); if ((fact = fopen(f_temp,"w")) == NULL) { @@ -154,6 +177,10 @@ main(argc,argv) register string argv[]; { */ sprintf(f_include, HFILE, prefix ? prefix : "L"); sprintf(f_rec, RFILE, prefix ? prefix : "L"); +#ifdef NON_CORRECTING + if (non_corr) + sprintf(f_nc, NCFILE, prefix ? prefix : "L"); +#endif setinit(ntneeded); maxnt = &nonterms[nnonterms]; maxt = &tokens[ntokens]; @@ -216,7 +243,7 @@ readgrammar(argc,argv) char *argv[]; { /* * There must be a start symbol! */ - if (start == 0) { + if (! nerrors && start == 0) { fatal(linecount,"Missing %%start"); } if (nerrors) comfatal(); @@ -237,7 +264,7 @@ doparse(p) register p_file p; { } /* VARARGS1 */ -error(lineno,s,t,u) string s,t,u; { +error(lineno,s,t,u) string s,t,u; { /* * Just an error message */ @@ -250,7 +277,7 @@ error(lineno,s,t,u) string s,t,u; { } /* VARARGS1 */ -warning(lineno,s,t,u) string s,t,u; { +warning(lineno,s,t,u) string s,t,u; { /* * Just a warning */ @@ -292,7 +319,7 @@ copyfile(file) string file; { register FILE *f; if ((f = fopen(file,"r")) == NULL) { - fatal(0,"Cannot open libraryfile, call an expert"); + fatal(0,"Cannot open library file %s, call an expert",file); } while ((c = getc(f)) != EOF) putc(c,fpars); fclose(f); diff --git a/util/LLgen/src/name.c b/util/LLgen/src/name.c index 33663e028..4d34097fb 100644 --- a/util/LLgen/src/name.c +++ b/util/LLgen/src/name.c @@ -51,6 +51,9 @@ name_init() { nont_info.i_esize = sizeof (t_nont); nont_info.i_incr = 50; search(TERMINAL,"EOFILE",ENTERING); +#ifdef NON_CORRECTING + illegal_gram = search(TERMINAL,"LLILLEGAL",ENTERING); +#endif } STATIC p_entry @@ -65,10 +68,13 @@ newentry(str, next) string str; p_entry next; { p->h_name = str; p->h_next = next; p->h_type.g_lineno = linecount; +#ifdef NON_CORRECTING + p->h_type.g_erroneous = 0; +#endif return p; } -string +string store(s) string s; { /* * Store a string s in the name table @@ -147,14 +153,14 @@ search(type,str,option) register string str; { "%s : is already defined",str); } p->h_type.g_lineno = linecount; - return &(p->h_type); + return &(p->h_type); } } p = newentry(store(str), h_root[i]); h_root[i] = p; if (type == TERMINAL || type == LITERAL) { register p_token pt; - + pt = (p_token) new_mem(&token_info); tokens = (p_token) token_info.i_ptr; pt->t_string = p->h_name; @@ -166,7 +172,7 @@ search(type,str,option) register string str; { if (str[2] == '\0') { switch(str[1]) { case 'n' : - val = '\n'; + val = '\n'; break; case 'r' : val = '\r'; @@ -175,19 +181,19 @@ search(type,str,option) register string str; { val = '\b'; break; case 'f' : - val = '\f'; + val = '\f'; break; case 't' : - val = '\t'; + val = '\t'; break; case '\'': - val = '\''; + val = '\''; break; case '\\': - val = '\\'; + val = '\\'; break; default : - error(linecount,e_literal); + error(linecount,e_literal); } } else { /* @@ -200,7 +206,7 @@ search(type,str,option) register string str; { val = 64*str[1] - 73*'0' + 8*str[2] + str[3]; } - } else { + } else { /* * No escape in literal */ @@ -221,7 +227,7 @@ search(type,str,option) register string str; { return &(p->h_type); } /* - * type == NONTERM || type == UNKNOWN + * type == NONTERM || type == UNKNOWN * UNKNOWN and not yet declared means : NONTERM */ { diff --git a/util/LLgen/src/proto.make b/util/LLgen/src/proto.make index 9db3b6d9f..841a3ade2 100644 --- a/util/LLgen/src/proto.make +++ b/util/LLgen/src/proto.make @@ -15,11 +15,11 @@ LLOPT= # -vvv -x OBJECTS = main.$(SUF) gencode.$(SUF) compute.$(SUF) LLgen.$(SUF) tokens.$(SUF) \ check.$(SUF) reach.$(SUF) global.$(SUF) name.$(SUF) sets.$(SUF) \ - Lpars.$(SUF) alloc.$(SUF) machdep.$(SUF) cclass.$(SUF) + Lpars.$(SUF) alloc.$(SUF) machdep.$(SUF) cclass.$(SUF) savegram.$(SUF) CSRC = $(SRC_DIR)/main.c $(SRC_DIR)/gencode.c $(SRC_DIR)/compute.c \ $(SRC_DIR)/check.c $(SRC_DIR)/reach.c $(SRC_DIR)/global.c \ $(SRC_DIR)/name.c $(SRC_DIR)/sets.c $(SRC_DIR)/alloc.c \ - $(SRC_DIR)/machdep.c $(SRC_DIR)/cclass.c + $(SRC_DIR)/machdep.c $(SRC_DIR)/cclass.c $(SRC_DIR)/savegram.c CFILES = LLgen.c tokens.c Lpars.c $(CSRC) GFILES = $(SRC_DIR)/tokens.g $(SRC_DIR)/LLgen.g FILES = $(SRC_DIR)/types.h $(SRC_DIR)/extern.h \ diff --git a/util/LLgen/src/savegram.c b/util/LLgen/src/savegram.c new file mode 100644 index 000000000..0addd4c31 --- /dev/null +++ b/util/LLgen/src/savegram.c @@ -0,0 +1,385 @@ +/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands. + * All rights reserved. + */ + +#ifdef NON_CORRECTING + +/* + * L L G E N + * + * An Extended LL(1) Parser Generator + * + * Author : Ceriel J.H. Jacobs + */ + +/* + * savegram.c + * Save the input grammar for non-correcting error recovery + * + * Grammar rules are `flattened' by introducing anonymous nonterminals. + * [B]? becomes X; X: B | {empty} + * [B]+ becomes X: B Y; Y: X | {empty} + * [B]* becomes X; X: B X | {empty} + * [B | C] becomes X; X: B | C + * [B | C]* becomes X; X: B X | C X | {empty} etc. + */ + + +# include "types.h" +# include "extern.h" +# include "io.h" +# include "assert.h" +# include "sets.h" + +#define LLALT 9999 + +static int nt_highest; +extern int nbytes; +extern p_mem alloc(); +extern p_set start_firsts; +extern p_set setalloc(); +extern p_gram search(); + +STATIC save_rule(); +STATIC save_set(); + +/* t_list will contain terms to be `flattened' */ +static struct t_list { + p_term term; + int t_nt_num; +} *t_list; + +/* Subparse list will contain symbols in %substart */ +static struct subparse_list { + p_gram sub_action; + int sub_nt_num; +} *sub_list; + +/* Index in t_list */ +static int t_list_index; + +/* Index in subparse_list */; +static int sub_list_index; + +/* File to save grammar to */ +static FILE *fgram; + +/* Nonterminal number to simulate parsers that get called in actions + used when LLgen called with -n -s options */ +int act_nt; + +save_grammar(f) FILE *f; { + /* + * Save the grammar + */ + register p_nont p; + register p_start st; + register int nt_nr; + + fgram = f; + + /* Compute highest nonterminal nr. */ + nt_highest = nnonterms + assval - 1; + + + /* Generate some constants in the grammar file */ + + /* Allocate terms list */ + t_list = (struct t_list *) alloc((unsigned) nterms * sizeof(struct t_list)); + t_list_index = 0; + + sub_list = (struct subparse_list *) alloc(nsubstarts * sizeof(struct subparse_list)); + + fputs("static ", fgram); + fputs((prefix ? prefix : "LL"), fgram); + fputs("grammar[] = {\n", fgram); + + /* Check if -n -s option is on */ + if (subpars_sim) { + + /* Allocate action simulation nt */ + + act_nt = ++nt_highest; + + /* write simualtion rule */ + fprintf(fgram, "/* Simulation rule */\n"); + fprintf(fgram, "%d,\n", act_nt); + + /* Put a firstset and a fake followset */ + /* Followset optimization is not implemented for + -s because it would be hard, and does not + bring enough improvement to jutify the effort + */ + save_set(start_firsts); + save_set(start_firsts); + /* Simulation rule procudes empty */ + fprintf(fgram, "%d,\n", 1); + for (st = start; st; st = st->ff_next) + { + fprintf(fgram, "%d, %d, %d, \n", st->ff_nont + assval, + act_nt, LLALT); + } + fprintf(fgram, "%d, \n", 0); + + } + + /* Now process all rules */ + for (p = nonterms, nt_nr = assval; p < maxnt; p++, nt_nr++) { + fprintf(fgram, "/* nr. %d %s */\n", nt_nr, p->n_name); + fprintf(fgram, "%d, ",nt_nr); + if (! p->n_rule) { /* undefined */ + f_input = p->n_string; + error(p->n_lineno,"Nonterminal %s not defined", + p->n_name); + } + + /* Save the first_set and follow set */ + save_set(p->n_nc_first); + save_set(p->n_nc_follow); + + if (p->n_flags & EMPTY) + fprintf(fgram, "%d,\n", 1); + else + fprintf(fgram, "%d,\n", 0); + + save_rule(p->n_rule, 0); + + fprintf(fgram, "%d,\n", 0); + } + + /* Resolve terms, they are on t_list */ + + fprintf(fgram, "/* Fresh nonterminals */\n"); + + { int i; + for (i = 0; i < t_list_index; i++) + { + + /* Terms of the form [] without + ? * or number produce + a NIL pointer in the term-list */ + if ((t_list + i)->term == (struct term *) 0) { + continue; + } + + fprintf(fgram, "%d, ", (t_list + i)->t_nt_num); + + /* Save the first and follow sets */ + + save_set((t_list + i)->term->t_nc_first); + save_set((t_list + i)->term->t_nc_follow); + + /* NOTE: A VARIABLE REPETITION COUNT TERMS RULE IS NOT + ALLOWED TO PRODUCE EMPTY IN LLGEN + */ + + switch(r_getkind((t_list + i)->term)) { + case FIXED: + /* Already done by repeating new nonterminal */ + + /* FIXED term-rule may produce empty */ + if (empty((t_list +i)->term->t_rule)) + fprintf(fgram, "%d,\n", 1); + else + fprintf(fgram, "%d,\n", 0); + + save_rule((t_list + i)->term->t_rule, 0); + fprintf(fgram, "%d,\n", 0); + break; + case STAR: + /* Save the rule, appending the new lhs for this rule */ + + /* Star rules always produce empty */ + fprintf(fgram, "1,\n"); + + save_rule((t_list + i)->term->t_rule, + (t_list + i)->t_nt_num); + fprintf(fgram, "%d,\n%d,\n", LLALT, 0); + /* ALT EMPTY*/ + break; + case PLUS: + /* Save the rule appending a fresh nonterminal */ + + fprintf(fgram, "%d,\n", 0); + + save_rule((t_list + i)->term->t_rule, ++nt_highest); + fprintf(fgram, "%d,\n", 0); /* EOR */ + fprintf(fgram, "%d, ", nt_highest); + /* First set of the extra nonterm is same as + for the term */ + /* Except that the new nonterm also produces empty ! */ + save_set((t_list + i)->term->t_nc_first); + save_set((t_list + i)->term->t_nc_follow); + fprintf(fgram, "1,\n"); + fprintf(fgram, "%d, ", (t_list+i)->t_nt_num); + fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */ + break; + case OPT: + fprintf(fgram, "1,\n"); + save_rule((t_list + i)->term->t_rule, 0); + fprintf(fgram, "%d,\n%d,\n", LLALT, 0); /* ALT EMPTY */ + break; + } + } + } + + /* Resolve %substarts */ + if (!subpars_sim) { + int i,s,check; + p_start ff, gg; + p_set temp_set; + + for (i = 0; i < sub_list_index; i++) { + fprintf(fgram, "%d, ", (sub_list + i)->sub_nt_num); + /* Compute the first set */ + temp_set = setalloc(); + for (ff = g_getsubparse((sub_list + i)->sub_action); + ff; ff = ff->ff_next){ + s = setunion(temp_set, + (&nonterms[ff->ff_nont])->n_first); + check = 0; + for (gg =start; gg; gg = gg->ff_next) + if (ff->ff_nont == gg->ff_nont) + check = 1; + if (check == 0) + warning((sub_list + i)->sub_action->g_lineno, + "\"%s\" is not a startsymbol", + (&nonterms[ff->ff_nont])->n_name); + } + save_set(temp_set); + save_set(temp_set); + free(temp_set); + + /* Produces empty */ + fprintf(fgram, "1,\n"); + + ff = g_getsubparse((sub_list + i)->sub_action); + + for (; ff; ff = ff->ff_next) + fprintf(fgram, "%d, %d, %d, \n", ff->ff_nont + assval, + (sub_list + i)->sub_nt_num, + LLALT); + fprintf(fgram, "%d, \n", 0); + } + } + + fprintf(fgram, "%d\n};\n", 0); + fprintf(fgram, "#define LLNNONTERMINALS %d\n", nt_highest - assval + 1); +} + +STATIC +save_rule(p, tail) register p_gram p; int tail; { +/* + Walk through rule p, saving it. The non-terminal tail is + appended to the rule. It needs to be appended in this function + to process alt-rules correctly. Tail == 0 means don't append. + */ + + int in_alt; + int illegal_num; + /* Processing an alt needs some special care. When processing the + first alternative, we don't want to write the alt-code; + When appending something to the alt, it needs to be appended to + every alternative and not at the end of the rule. + */ + + /* Look up the ILLEGAL token number */ + illegal_num = tokens[g_getcont(illegal_gram)].t_tokno; + + in_alt = 0; + for (;;) { + switch(g_gettype(p)) { + case ALTERNATION : + if (in_alt) + fprintf(fgram, "%d,\n", LLALT); + else + in_alt = 1; + save_rule(g_getlink(p)->l_rule, tail); + break; + case TERM : + /* Make entry in term list */ + (t_list + t_list_index)->term = g_getterm(p); + /* Test for [] without specifier */ + if (g_getterm(p) == (struct term *) 0) { + t_list_index++; + break; + } + (t_list + t_list_index++)->t_nt_num = ++nt_highest; + fprintf(fgram, "%d, ", nt_highest); + /* Check if repetition, if so handle here */ + if (r_getkind(g_getterm(p)) == FIXED) + { + int k; + for (k = 1; k < r_getnum(g_getterm(p)); k++) + fprintf(fgram, "%d, ", nt_highest); + } + break; + case NONTERM : + fprintf(fgram, "%d, ", g_getcont(p) + assval); + break; + case TERMINAL: + if (g_getcont(p) == g_getcont(illegal_gram)) { + /* %illegal. Ignore. */ + break; + } + if (p->g_erroneous) + fprintf(fgram, "%d, ", illegal_num); + else + fprintf(fgram, "%d, ", + tokens[g_getcont(p)].t_tokno); + break; + case LITERAL: + if (p->g_erroneous) + fprintf(fgram, "%d, ", illegal_num); + else + fprintf(fgram, "%d, ", + tokens[g_getcont(p)].t_tokno); + break; + case ACTION: + if (subpars_sim) { + fprintf(fgram, "%d, ", act_nt); + } + else if (g_getsubparse(p)) { + /* Allocate nonterminal that will simulate + subparser + */ + (sub_list + sub_list_index)->sub_nt_num = + ++nt_highest; + (sub_list + sub_list_index++)->sub_action = p; + + fprintf(fgram, "%d, ", nt_highest); + } + break; + case EORULE : + if ((! in_alt) && tail ) + /* If this rule is not an alt, append tail now. + If it is an alt, the recursive call of this function + has appended tail to each alternative + */ + fprintf(fgram, "%d, ", tail); + return; + } + p++; + } +} + +STATIC +save_set(p) p_set p; { + register int k; + register unsigned i; + int j; + + j = nbytes; + for (;;) { + i = (unsigned) *p++; + for (k = 0; k < sizeof(int); k++) { + fprintf(fgram,"0%o,",(int)(i & 0377)); + i >>= 8; + if (--j == 0) { + fputs("\n",fgram); + return; + } + } + } + /* NOTREACHED */ +} +#endif diff --git a/util/LLgen/src/sets.c b/util/LLgen/src/sets.c index 7fffdb6b4..dd1c1364f 100644 --- a/util/LLgen/src/sets.c +++ b/util/LLgen/src/sets.c @@ -31,7 +31,7 @@ extern p_set setalloc(); extern p_set get_set(); extern int setunion(); extern int setintersect(); -extern setminus(); +extern setminus(); extern int setempty(); extern int findindex(); extern int setcount(); diff --git a/util/LLgen/src/tokens.g b/util/LLgen/src/tokens.g index cbfe25558..7dd8e7a22 100644 --- a/util/LLgen/src/tokens.g +++ b/util/LLgen/src/tokens.g @@ -14,7 +14,7 @@ /* * tokens.g * Defines the tokens for the grammar of LLgen. - * The lexical analyser and LLmessage are also included here. + * The lexical analyser and LLmessage are also included here. */ { @@ -30,7 +30,7 @@ static string rcsidc = "$Id$"; /* Here are defined : */ extern int scanner(); -extern LLmessage(); +extern LLmessage(); extern int input(); extern unput(); extern skipcomment(); @@ -39,12 +39,18 @@ STATIC linedirective(); # endif STATIC string cpy(); STATIC string vallookup(); +STATIC copyact(); + +static int nparams; } /* Classes */ -%token C_IDENT ; /* lextoken.t_string contains the identifier read */ +%token C_IDENT ; /* lextoken.t_string contains the identifier read */ %token C_NUMBER ; /* lextoken.t_num contains the number read */ %token C_LITERAL ; /* lextoken.t_string contains the literal read */ +%token C_EXPR ; /* A C expression (%if or %while) */ +%token C_PARAMS ; /* formal or actual parameters */ +%token C_ACTION ; /* a C action */ /* Keywords */ @@ -60,6 +66,9 @@ STATIC string vallookup(); %token C_AVOID ; %token C_PREFER ; %token C_DEFAULT ; +%token C_SUBSTART ; +%token C_ERRONEOUS ; +%token C_ILLEGAL ; %lexical scanner ; @@ -80,26 +89,143 @@ typedef struct keyword { */ static t_keyw resword[] = { - { "token", C_TOKEN }, - { "avoid", C_AVOID }, + { "token", C_TOKEN }, + { "avoid", C_AVOID }, { "prefer", C_PREFER }, { "persistent", C_PERSISTENT }, { "default", C_DEFAULT }, - { "if", C_IF }, - { "while", C_WHILE }, - { "first", C_FIRST }, - { "start", C_START }, + { "if", C_IF }, + { "while", C_WHILE }, + { "first", C_FIRST }, + { "start", C_START }, { "lexical", C_LEXICAL }, { "onerror", C_ONERROR }, { "prefix", C_PREFIX }, - { 0, 0 } +#ifdef NON_CORRECTING + { "substart", C_SUBSTART }, + { "erroneous", C_ERRONEOUS }, + { "illegal", C_ILLEGAL }, +#endif + { 0, 0 } }; static t_token savedtok; /* to save lextoken in case of an insertion */ # ifdef LINE_DIRECTIVE -static int nostartline; /* = 0 if at the start of a line */ +static int nostartline; /* = 0 if at the start of a line */ # endif +STATIC +copyact(ch1,ch2,flag,level) char ch1,ch2; { + /* + * Copy an action to file f. Opening bracket is ch1, closing bracket + * is ch2. + * If flag & 1, copy opening and closing parameters too. + * If flag & 2, don't allow ','. + */ + static int text_seen = 0; + register FILE *f; + register ch; /* Current char */ + register match; /* used to read strings */ + int saved = linecount; + /* save linecount */ + int sav_strip = strip_grammar; + + f = fact; + if (ch1 == '{' || flag != 1) strip_grammar = 0; + if (!level) { + text_seen = 0; + nparams = 0; /* count comma's */ + putc('\0',f); + fprintf(f,"# line %d \"%s\"\n", linecount,f_input); + } + if (level || (flag & 1)) putc(ch1,f); + for (;;) { + ch = input(); + if (ch == ch2) { + if (!level) { + if (text_seen) nparams++; + } + if (level || (flag & 1)) putc(ch,f); + if (strip_grammar != sav_strip) { + if (ch1 == '{' || flag != 1) putchar(ch); + } + strip_grammar = sav_strip; + return; + } + switch(ch) { + case ')': + case '}': + case ']': + error(linecount,"Parentheses mismatch"); + break; + case '(': + text_seen = 1; + copyact('(',')',flag,level+1); + continue; + case '{': + text_seen = 1; + copyact('{','}',flag,level+1); + continue; + case '[': + text_seen = 1; + copyact('[',']',flag,level+1); + continue; + case '/': + ch = input(); + unput(ch); + if (ch == '*') { + putc('/', f); + skipcomment(1); + continue; + } + ch = '/'; + text_seen = 1; + break; + case ';': + case ',': + if (! level && text_seen) { + text_seen = 0; + nparams++; + if (ch == ',' && (flag & 2)) { + warning(linecount, "Parameters may not be separated with a ','"); + ch = ';'; + } + } + break; + case '\'': + case '"' : + /* + * watch out for brackets in strings, they do not + * count ! + */ + text_seen = 1; + match = ch; + putc(ch,f); + while((ch = input())) { + if (ch == match) break; + if (ch == '\\') { + putc(ch,f); + ch = input(); + } + if (ch == '\n') { + error(linecount,"Newline in string"); + unput(match); + } + putc(ch,f); + } + if (ch == match) break; + /* Fall through */ + case EOF : + if (!level) error(saved,"Action does not terminate"); + strip_grammar = sav_strip; + return; + default: + if (c_class[ch] != ISSPA) text_seen = 1; + } + putc(ch,f); + } +} + scanner() { /* * Lexical analyser, what else @@ -108,7 +234,11 @@ scanner() { register char *p = ltext; int reserved = 0; /* reserved word? */ char *max = <ext[LTEXTSZ - 1]; + static int nextexpr; + int expect_expr = nextexpr; + long off; + nextexpr = 0; if (savedtok.t_tokno) { /* A token has been inserted. * Now deliver the last lextoken again @@ -127,6 +257,21 @@ scanner() { } # endif switch(c_class[ch]) { + case ISACT : + if (ch == '{') { + copyact('{', '}', in_production, 0); + return C_ACTION; + } + assert(ch == '('); + if (expect_expr) { + copyact('(', ')', 1, 0); + return C_EXPR; + } + off = ftell(fact); + copyact('(', ')', in_production != 0 ? 0 : 2, 0); + if (nparams == 0) fseek(fact, off, 0); + lextoken.t_num = nparams; + return C_PARAMS; case ISLIT : for (;;) { ch = input(); @@ -177,7 +322,7 @@ scanner() { unput(ch); *p = '\0'; if (reserved) { /* - * Now search for the keyword + * Now search for the keyword */ register p_keyw w; @@ -187,6 +332,10 @@ scanner() { /* * Return token number. */ + if (w->w_value == C_IF || + w->w_value == C_WHILE) { + nextexpr = 1; + } return w->w_value; } w++; @@ -208,11 +357,11 @@ input() { */ register c; - if (c = backupc) { + if (c = backupc) { /* Last char was "unput()". Deliver it again */ backupc = 0; - return c; + return c; } if ((c = getc(finput)) == EOF) { nonline = 0; @@ -337,7 +486,7 @@ cpy(s,p,inserted) register string p; { register string t = 0; switch(s) { - case C_IDENT : + case C_IDENT : if (!inserted) t = lextoken.t_string; else t = "identifier"; break; @@ -353,7 +502,7 @@ cpy(s,p,inserted) register string p; { t = "literal"; break; case EOFILE : - t = "endoffile"; + t = "end-of-file"; break; } if (!t && (t = vallookup(s))) { @@ -382,13 +531,15 @@ cpy(s,p,inserted) register string p; { case '\r' : *p++ = 'r'; break; case '\t' : *p++ = 't'; break; default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); - *p++='0'+(s&07); + *p++='0'+(s&07); } } *p++ = '\''; return p; } +string strcpy(); + LLmessage(d) { /* * d is either 0, in which case the current token has been deleted, @@ -400,9 +551,16 @@ LLmessage(d) { nerrors++; s = buf; - if (d == 0) { - s = cpy(LLsymb,s,0); + if (d < 0) { + strcpy(buf, "end-of-file expected"); + } + else if (d == 0) { +#ifdef LLNONCORR + t = " unexpected"; +#else t = " deleted"; +#endif + s = cpy(LLsymb,s,0); do *s++ = *t; while (*t++); } else { s = cpy(d,s,1); @@ -411,12 +569,7 @@ LLmessage(d) { s = cpy(LLsymb,s,0); *s = '\0'; } - error(linecount, "%s", buf); - /* Don't change this line to - * error(linecount, buf). - * The string in "buf" might contain '%' ... - */ - if (d) { /* + if (d > 0) { /* * Save the current token and make up some * attributes for the inserted token */ @@ -426,5 +579,17 @@ LLmessage(d) { else if (d == C_LITERAL) lextoken.t_string = "dummy_literal"; else if (d == C_NUMBER) lextoken.t_num = 1; } +#ifdef LLNONCORR + else +#endif + error(linecount, "%s", buf); + /* Don't change this line to + * error(linecount, buf). + * The string in "buf" might contain '%' ... + */ +#ifdef LLNONCORR + in_production = 1; + /* To prevent warnings from copyact */ +#endif } } diff --git a/util/LLgen/src/types.h b/util/LLgen/src/types.h index cb680ca3f..da18696f6 100644 --- a/util/LLgen/src/types.h +++ b/util/LLgen/src/types.h @@ -40,12 +40,20 @@ typedef struct token { * structure for the grammar elements */ typedef struct gram { - short x; /* for lay-out see comment below */ - short g_lineno; /* element found on this line number */ + int x; /* for lay-out see comment below */ + int g_lineno; /* element found on this line number */ +#ifdef NON_CORRECTING + int g_erroneous; /* 1 if element declared erroneous */ +#endif union { int g_index; struct term * g_term; struct link * g_link; +#ifdef NON_CORRECTING + /* If this is an action with a %substart g_subparse + points to the list of startsymbols of the subparser */ + struct ff_firsts *g_subparse; +#endif } g_i; } t_gram,*p_gram; @@ -78,7 +86,10 @@ typedef struct gram { # define g_setterm(p,s) ((p)->g_i.g_term = (s)) # define g_setlink(p,s) ((p)->g_i.g_link = (s)) # define g_setnpar(p,s) { assert(((unsigned)(s))<=017);(p)->x=((p)->x&~0170)|((s)<<3);} - +#ifdef NON_CORRECTING +# define g_getsubparse(p) ((p)->g_i.g_subparse) +# define g_setsubparse(p,s) ((p)->g_i.g_subparse = (s)) +#endif /* * Some constants to communicate with the symbol table search routine */ @@ -101,7 +112,7 @@ typedef struct gram { * nonterminal structure */ typedef struct { - short n_flags; /* low order four bits are reserved + int n_flags; /* low order four bits are reserved * the parameter count */ # define getntparams(p) ((p)->n_flags&017) @@ -110,7 +121,7 @@ typedef struct { # define RECURSIVE 02000 /* Set if the default rule is recursive */ # define PARAMS 04000 /* tells if a nonterminal has parameters */ # define EMPTY 010000 /* tells if a nonterminal produces empty */ -# define LOCALS 020000 /* local declarations ? */ +# define LOCALS 020000 /* local declarations ? */ # define REACHABLE 040000 /* can this nonterminal be reached ? */ # define VERBOSE 0100000 /* Set if in LL.output file */ char n_insafety; @@ -119,8 +130,8 @@ typedef struct { # define setntsafe(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_insafety=(i);} # define getntout(p) ((p)->n_outsafety) # define setntout(p,i) {assert(((unsigned)(i))<=NOSAFETY);(p)->n_outsafety=(i);} - short n_count; /* pieces of code before this rule */ - short n_lineno; /* declared on line ... */ + int n_count; /* pieces of code before this rule */ + int n_lineno; /* declared on line ... */ p_gram n_rule; /* pointer to right hand side of rule */ union { p_set n_f; /* ptr to "first" set */ @@ -131,6 +142,10 @@ typedef struct { } n_x; # define n_first n_x.n_f # define n_string n_x.n_s +#ifdef NON_CORRECTING + p_set n_nc_first; /* Pointer to non-corr first set */ + p_set n_nc_follow; /* Pointer to non-corr follow set */ +#endif p_set n_follow; /* pointer to the "follow" set */ p_set n_contains; /* pointer to symbols that can be produced */ string n_name; /* name of nonterminal */ @@ -138,7 +153,7 @@ typedef struct { long n_off; /* index of parameters in action file */ } t_nont, *p_nont; -/* +/* * hash table structure */ typedef struct h_entry { @@ -161,13 +176,16 @@ typedef struct link { */ p_gram l_rule; /* pointer to this rule */ p_set l_symbs; /* set, when to take this rule */ +#ifdef NON_CORRECTING + p_set l_nc_symbs; +#endif p_set l_others; /* set, when to take another rule */ } t_link, *p_link; /* * Structure for a repitition specification */ -typedef short t_reps,*p_reps; +typedef int t_reps,*p_reps; # define FIXED 00 /* a fixed number */ # define STAR 01 /* 0 or more times */ @@ -187,7 +205,7 @@ typedef short t_reps,*p_reps; */ typedef struct term { t_reps t_repeats; - short t_flags; /* Low order three bits for safety */ + int t_flags; /* Low order three bits for safety */ # define gettout(q) ((q)->t_flags&07) # define settout(q,i) {assert(((unsigned)(i))<=NOSAFETY);(q)->t_flags&=~07;(q)->t_flags|=i;} # define PERSISTENT 010 /* Set if this term has %persistent */ @@ -199,6 +217,10 @@ typedef struct term { p_gram t_rule; /* pointer to this term */ p_set t_follow; /* set of followers */ p_set t_first; /* set of firsts */ +#ifdef NON_CORRECTING + p_set t_nc_first; /* set of non corr firsts */ + p_set t_nc_follow; /* set of non corr followers */ +#endif p_set t_contains; /* contains set */ } t_term, *p_term;