2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 /* $Id: replace.c,v 1.22 1998/02/09 09:49:00 ceriel Exp $ */
6 /* M A C R O R E P L A C E M E N T */
12 #include "pathlength.h"
29 extern struct idf *GetIdentifier();
30 extern int InputLevel;
31 struct repl *ReplaceList; /* list of currently active macros */
32 extern char *strcat(), *strcpy();
36 register struct idf *idf;
38 /* replace is called by the lexical analyzer to perform
39 macro replacement. The routine actualy functions as a
40 higher interface to the real thing: expand_macro().
44 if (!(idf->id_macro)) return 0;
45 if (idf->id_macro->mc_flag & NOREPLACE)
48 repl->r_ptr = repl->r_text = Malloc(repl->r_size = LAPBUF);
49 repl->r_args = new_args();
51 if (!expand_macro(repl, idf))
54 InsertText(repl->r_text, (int)(repl->r_ptr - repl->r_text));
55 idf->id_macro->mc_flag |= NOREPLACE;
56 repl->r_level = InputLevel;
57 repl->next = ReplaceList;
72 /* We must don't know how many parameters were specified, so be
73 * prepared to free all NPARAMS parameters.
74 * When an expvec is !0, the rawvec will also be !0.
75 * When an expvec is 0, all remaining vectors will also be 0.
77 for (i = 0; i < NPARAMS; i++) {
78 if (args->a_expvec[i]) {
79 free(args->a_expvec[i]);
80 free(args->a_rawvec[i]);
88 register struct repl *r = ReplaceList, *prev = 0;
90 ASSERT(Unstacked > 0);
92 struct repl *nxt = r->next;
94 if (r->r_level > InputLevel) {
95 r->r_idf->id_macro->mc_flag &= ~NOREPLACE;
96 if (!prev) ReplaceList = nxt;
97 else prev->next = nxt;
108 expand_macro(repl, idf)
109 register struct repl *repl;
110 register struct idf *idf;
112 /* expand_macro() does the actual macro replacement.
113 "idf" is a description of the identifier which
114 caused the replacement.
115 If the identifier represents a function-like macro
116 call, the number of actual parameters is checked
117 against the number of formal parameters. Note that
118 in ANSI C the parameters are expanded first;
119 this is done by calling getactuals().
120 When the possible parameters are expanded, the replace-
121 ment list associated with "idf" is expanded.
122 expand_macro() returns 1 if the replacement succeeded
123 and 0 if some error occurred.
125 A special case is "defined". This acts as a unary operator
126 on a single, unexpanded identifier, which may be surrounded
127 by parenthesis. The function expand_defined() handles this.
129 register struct macro *mac = idf->id_macro;
130 struct args *args = repl->r_args;
133 if (mac->mc_nps != -1) { /* with parameter list */
134 if (mac->mc_flag & FUNC) {
135 /* the following assertion won't compile:
136 ASSERT(!strcmp("defined", idf->id_text));
137 expand the assert macro by hand (??? dirty, temporary)
140 if (strcmp("defined", idf->id_text))
141 crash("in %s, %u: assertion %s failed",
142 __FILE__, __LINE__ - 2,
143 "strcmp(\"defined\", idf->id_text)");
145 if (!AccDefined) return 0;
146 expand_defined(repl);
151 ch = skipspaces(ch,1);
152 if (ch != '(') { /* no replacement if no () */
156 getactuals(repl, idf);
160 if (mac->mc_flag & FUNC) /* this macro leads to special action */
163 macro2buffer(repl, idf, args);
165 /* According to the ANSI definition:
170 'a' must be substituded, but the result should be
171 three tokens: + + ID. Therefore a token separator is
172 inserted after the replacement.
174 if (repl->r_text == repl->r_ptr || *(repl->r_ptr - 1) != TOKSEP) {
175 add2repl(repl, TOKSEP);
181 register struct repl *repl;
183 register int ch = GetChar();
187 ch = skipspaces(ch, 0);
192 ch = skipspaces(ch, 0);
194 if ((class(ch) != STIDF) && (class(ch) != STELL)) {
195 error("identifier missing");
196 if (parens && ch != ')') error(") missing");
197 if (!parens || ch != ')') ChPushBack(ch);
202 id = GetIdentifier(0);
203 ASSERT(id || class(ch) == STELL);
205 ch = skipspaces(ch, 0);
206 if (parens && ch != ')') error(") missing");
207 if (!parens || ch != ')') ChPushBack(ch);
208 add2repl(repl, (id && id->id_macro) ? '1' : '0');
215 args->a_expptr = args->a_expbuf = Malloc(args->a_expsize = ARGBUF);
216 args->a_rawptr = args->a_rawbuf = Malloc(args->a_rawsize = ARGBUF);
219 getactuals(repl, idf)
221 register struct idf *idf;
223 /* Get the actual parameters from the input stream.
224 The hard part is done by actual(), only comma's and
225 other syntactic trivialities are checked here.
227 register struct args *args = repl->r_args;
228 register int nps = idf->id_macro->mc_nps;
234 if ((ch = GetChar()) != ')') {
236 while ((ch = actual(repl)) != ')' ) {
238 lexerror("illegal macro call");
241 stash(repl, '\0', 1);
242 args->a_expvec[argcnt] = args->a_expbuf;
243 args->a_rawvec[argcnt] = args->a_rawbuf;
245 if (argcnt == STDC_NPARAMS)
246 lexstrict("number of parameters exceeds ANSI standard");
247 if (argcnt >= NPARAMS)
248 fatal("argument vector overflow");
251 stash(repl, '\0', 1);
252 args->a_expvec[argcnt] = args->a_expbuf;
253 args->a_rawvec[argcnt] = args->a_rawbuf;
257 lexerror("too few macro arguments");
258 else if (argcnt > nps)
259 lexerror("too many macro arguments");
265 register struct repl *nrepl = ReplaceList;
266 register struct args *ap = nrepl->r_args;
269 /* stash identifier name */
270 for (p = nrepl->r_idf->id_text; *p != '\0'; p++)
273 /* The following code deals with expanded function
274 like macro calls. It makes the following code
277 #define def(a,b) x(a,b)
278 #define glue(a,b) a ## b
286 if (ap->a_rawvec[0]) {
287 /* stash arguments */
290 for (i = 0; ap->a_rawvec[i] != (char *)0; i++) {
291 if (i == 0) stash(repl, '(', -1);
292 else stash(repl, ',', -1);
293 for (p = ap->a_rawvec[i]; *p != '\0'; p++)
296 stash(repl, ')', -1);
304 /* This routine deals with the scanning of an actual parameter.
305 It keeps in account the opening and closing brackets,
306 preprocessor numbers, strings and character constants.
309 register int level = 0, nostashraw = 0;
311 static int Unstacked_missed;
318 && nostashraw >= Unstacked_missed) {
319 nostashraw -= Unstacked_missed;
320 Unstacked_missed = 0;
323 nostashraw -= Unstacked;
324 if (nostashraw < 0) {
325 Unstacked_missed = -nostashraw;
330 if (class(ch) == STIDF || class(ch) == STELL) {
331 /* Scan a preprocessor identifier token. If the
332 token is a macro, it is expanded first.
334 char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
335 register char *p = buf;
336 register struct idf *idef;
337 register int pos = -1;
344 } else NoExpandMacro = 0;
347 if (++pos < idfsize) {
351 } while (in_idf(ch));
353 ch = '\0'; /* It could be an unstashed TOKSEP */
356 /* When the identifier has an associated macro
357 replacement list, it's expanded.
360 if (!idef || NoExpandMacro || !replace(idef)) {
362 || (idef && idef->id_macro
363 && (idef->id_macro->mc_flag & NOREPLACE)))
364 stash(repl, NOEXPM, !nostashraw);
365 for (p = buf; *p != '\0'; p++)
366 stash(repl, *p, !nostashraw);
368 if (!nostashraw) saveraw(repl);
371 } else if (class(ch) == STNUM) {
372 /* a preprocessing number has the following
374 [0-9|"."[0-9]]{[0-9"."a-zA-Z_]|{[Ee][+-]}}*
376 stash(repl, ch, !nostashraw);
379 if (class(ch) != STNUM) {
380 ch = '\0'; /* It could be an unstashed TOKSEP */
384 else stash(repl, ch, !nostashraw);
387 while (in_idf(ch) || ch == '.') {
388 stash(repl, ch, !nostashraw);
389 if ((ch = GetChar()) == 'e' || ch == 'E') {
390 stash(repl, ch, !nostashraw);
392 if (ch == '+' || ch == '-') {
393 stash(repl, ch, !nostashraw);
398 ch = '\0'; /* It could be an unstashed TOKSEP */
400 } else if (ch == '(') {
401 /* a comma may occur between parentheses */
403 stash(repl, ch, !nostashraw);
404 } else if (ch == ')') {
406 /* closing parenthesis of macro call */
407 if (level < 0) return ')';
408 stash(repl, ch, !nostashraw);
409 } else if (ch == ',') {
410 if (level <= 0) { /* comma separator for next argument */
412 lexerror("unbalanced parenthesis");
414 return ','; /* ??? */
416 stash(repl, ch, !nostashraw);
417 } else if (ch == '\n') {
418 /* newlines are accepted as white spaces */
420 /* This piece of code needs some explanation:
421 consider the call of a macro defined as:
422 #define sum(a,b) (a+b)
423 in the following form:
425 /_* comment *_/ #include phone_number
427 in which case the include must be handled
431 a_new_line: ch = GetChar();
432 while (class(ch) == STSKIP || ch == '/') {
434 if ((ch = GetChar()) == '*' && !InputLevel) {
436 stash(repl, ' ', !nostashraw);
443 stash(repl, '/', !nostashraw);
445 } else ch = GetChar();
450 /* Clear File_Inserted since domacro could
451 * be called again, which calls GetToken().
455 } else if (ch == EOI) {
456 lexerror("unterminated macro call");
462 stash(repl, ' ', !nostashraw);
464 } else if (ch == '/') {
465 /* comments are treated as one white space token */
466 if ((ch = GetChar()) == '*' && !InputLevel) {
468 stash(repl, ' ', !nostashraw);
472 stash(repl, '/', !nostashraw);
474 } else if (ch == '\'' || ch == '"') {
475 /* Strings are considered as ONE token, thus no
476 replacement within strings.
478 register int match = ch;
480 stash(repl, ch, !nostashraw);
481 while ((ch = GetChar()) != EOI) {
485 stash(repl, ch, !nostashraw);
487 } else if (ch == '\n') {
488 lexerror("newline in string");
490 stash(repl, match, !nostashraw);
493 stash(repl, ch, !nostashraw);
496 lexerror("unterminated macro call");
499 stash(repl, ch, !nostashraw);
501 if (lastch == TOKSEP && ch == TOKSEP) continue;
502 stash(repl, ch, !nostashraw);
508 register struct idf *idef;
510 /* macro_func() performs the special actions needed with some
511 macros. These macros are __FILE__ and __LINE__ which
512 replacement texts must be evaluated at the time they are
515 register struct macro *mac = idef->id_macro;
516 static char FilNamBuf[PATHLENGTH];
519 switch (idef->id_text[2]) {
520 case 'F': /* __FILE__ */
522 strcpy(&FilNamBuf[1], FileName);
523 strcat(FilNamBuf, "\"");
524 mac->mc_text = FilNamBuf;
525 mac->mc_length = strlen(FilNamBuf);
527 case 'L': /* __LINE__ */
528 mac->mc_text = long2str((long)LineNumber, 10);
529 mac->mc_length = strlen(mac->mc_text);
532 crash("(macro_func)");
537 macro2buffer(repl, idf, args)
538 register struct repl *repl;
539 register struct idf *idf;
540 register struct args *args;
542 /* macro2buffer expands the replacement list and places the
543 result onto the replacement buffer. It deals with the #
544 and ## operators, and inserts the actual parameters.
545 The argument buffer contains the raw argument (needed
546 for the ## operator), and the expanded argument (for
547 all other parameter substitutions).
549 The grammar of the replacement list is:
551 repl_list: TOKEN repl_list
552 | PARAMETER repl_list
555 | PARAMETER '##' TOKEN
556 | TOKEN '##' PARAMETER
557 | PARAMETER '##' PARAMETER
560 As the grammar indicates, we could make a DFA and
561 use this finite state machine for the replacement
562 list parsing (inserting the arguments, etc.).
564 Currently we go through the replacement list in a
565 linear fashion. This is VERY expensive, something
566 smarter should be done (but even a DFA is O(|s|)).
568 register char *ptr = idf->id_macro->mc_text;
570 int func = idf->id_macro->mc_nps != -1;
573 ASSERT(ptr[idf->id_macro->mc_length] == '\0');
575 if (*ptr == '\'' || *ptr == '"') {
576 register int delim = *ptr;
579 add2repl(repl, *ptr);
581 add2repl(repl, *++ptr);
583 lexerror("unterminated string");
587 } while (*ptr != delim || *ptr == '\0');
588 add2repl(repl, *ptr++);
589 } else if (*ptr == '#' && (func || *(ptr+1) == '#')) {
591 register int tmpindex;
592 /* ## - paste operator */
595 /* trim the actual replacement list */
597 while (repl->r_ptr >= repl->r_text
598 && is_wsp(*repl->r_ptr))
601 /* ## occurred at the beginning of the replacement list.
603 if (repl->r_ptr < repl->r_text) {
608 if (repl->r_ptr >= repl->r_text
609 && *repl->r_ptr == TOKSEP)
613 tmpindex = repl->r_ptr - repl->r_text;
614 /* tmpindex can be 0 */
616 /* skip space in macro replacement list */
617 while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
620 /* ## occurred at the end of the replacement list.
622 if (*ptr & FORMALP) {
623 register int n = *ptr++ & 0177;
627 p = args->a_rawvec[n-1];
628 if (p) { /* else macro argument missing */
629 while (is_wsp(*p)) p++;
630 if (*p == NOEXPM) p++;
632 add2repl(repl, *p++);
635 && in_idf(repl->r_text[tmpindex]))
638 && repl->r_text[tmpindex] == NOEXPM)
639 repl->r_text[tmpindex] = TOKSEP;
640 } else if (*ptr == '\0') {
647 && in_idf(repl->r_text[tmpindex]))
650 && repl->r_text[tmpindex] == NOEXPM)
651 repl->r_text[tmpindex] = TOKSEP;
654 } else { /* # operator */
655 ptr = stringify(repl, ptr, args);
657 } else if (*ptr & FORMALP) {
658 /* insert actual parameter */
659 register int n = *ptr++ & 0177;
660 register char *p, *q;
664 /* This is VERY dirty, we look ahead for the
665 ## operator. If it's found we use the raw
666 argument buffer instead of the expanded
669 for (p = ptr; (*p & FORMALP) == 0 && is_wsp(*p); p++)
671 if (*p == '#' && p[1] == '#')
672 q = args->a_rawvec[n-1];
674 q = args->a_expvec[n-1];
676 if (q) /* else macro argument missing */
678 add2repl(repl, *q++);
680 if (repl->r_text == repl->r_ptr || *(repl->r_ptr - 1) != TOKSEP)
681 add2repl(repl, TOKSEP);
683 add2repl(repl, *ptr++);
687 lexerror("illegal use of the ## operator");
691 stringify(repl, ptr, args)
692 register struct repl *repl;
694 register struct args *args;
696 /* If a parameter is immediately preceded by a # token
697 both are replaced by a single string literal that
698 contains the spelling of the token sequence for the
699 corresponding argument.
700 Each occurrence of white space between the argument's
701 tokens become a single space character in the string
702 literal. White spaces before the first token and after
703 the last token comprising the argument are deleted.
704 To retain the original spelling we insert backslashes
705 as appropriate. We only escape backslashes if they
706 occure within string tokens.
708 register int space = 1; /* skip leading spaces */
709 register int delim = 0; /* string or character constant delim */
710 register int backslash = 0; /* last character was a \ */
712 /* skip spaces macro replacement list */
713 while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
716 if (*ptr & FORMALP) {
717 register int n = *ptr++ & 0177;
721 p = args->a_rawvec[n-1];
734 if (!delim && (*p == '"' || *p == '\''))
736 else if (*p == delim && !backslash)
738 backslash = *p == '\\';
739 if (*p == '"' || (delim && *p == '\\'))
740 add2repl(repl, '\\');
741 if (*p == TOKSEP || *p == NOEXPM) p++;
742 else add2repl(repl, *p++);
745 /* trim spaces in the replacement list */
746 for (--repl->r_ptr; is_wsp(*repl->r_ptr); repl->r_ptr--)
748 ++repl->r_ptr; /* oops, one to far */
751 error("illegal use of # operator");
755 /* The following routine is also called from domacro.c.
758 register struct repl *repl;
761 register int index = repl->r_ptr - repl->r_text;
763 ASSERT(index < repl->r_size);
764 if (index + 2 >= repl->r_size) {
765 repl->r_text = Realloc(repl->r_text, (unsigned) (repl->r_size <<= 1));
766 repl->r_ptr = repl->r_text + index;
772 /* If the variable stashraw is negative, we must only stash into the raw
773 * buffer. If the variable is zero, we must only stash into the expanded
774 * buffer. Otherwise, we must use both buffers.
776 stash(repl, ch, stashraw)
781 /* Stash characters into the macro expansion buffer.
783 register struct args *args = repl->r_args;
784 register int index = args->a_expptr - args->a_expbuf;
787 ASSERT(index < args->a_expsize);
788 if (index + 1 >= args->a_expsize) {
789 args->a_expbuf = Realloc(args->a_expbuf,
790 (unsigned) (args->a_expsize <<= 1));
791 args->a_expptr = args->a_expbuf + index;
793 *args->a_expptr++ = ch;
797 index = args->a_rawptr - args->a_rawbuf;
798 ASSERT(index < args->a_rawsize);
799 if (index + 1 >= args->a_rawsize) {
800 args->a_rawbuf = Realloc(args->a_rawbuf,
801 (unsigned)(args->a_rawsize <<= 1));
802 args->a_rawptr = args->a_rawbuf + index;
804 *args->a_rawptr++ = ch;