2 * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3 * See the copyright notice in the ACK home directory, in the file "Copyright".
5 /* $Id: replace.c,v 1.20 1998/02/09 09:48:27 ceriel Exp $ */
6 /* M A C R O R E P L A C E M E N T */
8 #include "pathlength.h"
25 extern char *GetIdentifier();
26 extern char *strcpy();
27 extern char *strcat();
28 extern int InputLevel;
29 struct repl *ReplaceList; /* list of currently active macros */
33 register struct idf *idf;
35 /* replace is called by the lexical analyzer to perform
36 macro replacement. The routine actualy functions as a
37 higher interface to the real thing: expand_macro().
41 if (!(idf->id_macro)) return 0;
42 if (idf->id_macro->mc_flag & NOREPLACE)
45 repl->r_ptr = repl->r_text = Malloc((unsigned)(repl->r_size = LAPBUF));
46 repl->r_args = new_args();
48 if (!expand_macro(repl, idf))
51 InsertText(repl->r_text, (int)(repl->r_ptr - repl->r_text));
52 idf->id_macro->mc_flag |= NOREPLACE;
53 repl->r_level = InputLevel;
54 repl->next = ReplaceList;
69 /* We must don't know how many parameters were specified, so be
70 * prepared to free all NPARAMS parameters.
71 * When an expvec is !NULL, the rawvec will also be !NULL.
72 * When an expvec is NULL, all remaining vectors will also be NULL.
74 for (i = 0; i < NPARAMS; i++) {
75 if (args->a_expvec[i]) {
76 free(args->a_expvec[i]);
77 free(args->a_rawvec[i]);
85 register struct repl *r = ReplaceList, *prev = 0;
87 assert(Unstacked > 0);
89 struct repl *nxt = r->next;
91 if (r->r_level > InputLevel) {
92 r->r_idf->id_macro->mc_flag &= ~NOREPLACE;
93 if (!prev) ReplaceList = nxt;
94 else prev->next = nxt;
105 expand_macro(repl, idf)
106 register struct repl *repl;
107 register struct idf *idf;
109 /* expand_macro() does the actual macro replacement.
110 "idf" is a description of the identifier which
111 caused the replacement.
112 If the identifier represents a function-like macro
113 call, the number of actual parameters is checked
114 against the number of formal parameters. Note that
115 in ANSI C the parameters are expanded first;
116 this is done by calling getactuals().
117 When the possible parameters are expanded, the replace-
118 ment list associated with "idf" is expanded.
119 expand_macro() returns 1 if the replacement succeeded
120 and 0 if some error occurred.
122 A special case is "defined". This acts as a unary operator
123 on a single, unexpanded identifier, which may be surrounded
124 by parenthesis. The function expand_defined() handles this.
126 register struct macro *mac = idf->id_macro;
127 struct args *args = repl->r_args;
130 if (mac->mc_nps != -1) { /* with parameter list */
131 if (mac->mc_flag & FUNC) {
132 /* the following assertion won't compile:
133 assert(!strcmp("defined", idf->id_text));
135 if (!AccDefined) return 0;
136 expand_defined(repl);
141 ch = skipspaces(ch,1);
142 if (ch != '(') { /* no replacement if no () */
146 getactuals(repl, idf);
150 if (mac->mc_flag & FUNC) /* this macro leads to special action */
153 macro2buffer(repl, idf, args);
155 /* According to the ANSI definition:
160 'a' must be substituded, but the result should be
161 three tokens: + + ID. Therefore a token separator is
162 inserted after the replacement.
164 if (repl->r_text == repl->r_ptr || *(repl->r_ptr -1) != TOKSEP) {
165 add2repl(repl, TOKSEP);
171 register struct repl *repl;
173 register int ch = GetChar();
178 ch = skipspaces(ch, 0);
183 ch = skipspaces(ch, 0);
185 if ((class(ch) != STIDF) && (class(ch) != STELL)) {
186 error("identifier missing");
187 if (parens && ch != ')') error(") missing");
188 if (!parens || ch != ')') ChPushBack(ch);
193 str = GetIdentifier(0);
199 ch = skipspaces(ch, 0);
200 if (parens && ch != ')') error(") missing");
201 if (!parens || ch != ')') ChPushBack(ch);
202 add2repl(repl, (id && id->id_macro) ? '1' : '0');
209 args->a_expptr = args->a_expbuf = Malloc((unsigned)(args->a_expsize = ARGBUF));
210 args->a_rawptr = args->a_rawbuf = Malloc((unsigned)(args->a_rawsize = ARGBUF));
213 getactuals(repl, idf)
215 register struct idf *idf;
217 /* Get the actual parameters from the input stream.
218 The hard part is done by actual(), only comma's and
219 other syntactic trivialities are checked here.
221 register struct args *args = repl->r_args;
222 register int nps = idf->id_macro->mc_nps;
228 if ((ch = GetChar()) != ')') {
230 while ((ch = actual(repl)) != ')' ) {
232 error("illegal macro call");
235 stash(repl, '\0', 1);
236 args->a_expvec[argcnt] = args->a_expbuf;
237 args->a_rawvec[argcnt] = args->a_rawbuf;
239 if (argcnt == STDC_NPARAMS)
240 strict("number of parameters exceeds ANSI standard");
241 if (argcnt >= NPARAMS)
242 fatal("argument vector overflow");
245 stash(repl, '\0', 1);
246 args->a_expvec[argcnt] = args->a_expbuf;
247 args->a_rawvec[argcnt] = args->a_rawbuf;
251 error("too few macro arguments");
252 else if (argcnt > nps)
253 error("too many macro arguments");
259 register struct repl *nrepl = ReplaceList;
260 register struct args *ap = nrepl->r_args;
263 /* stash identifier name */
264 for (p = nrepl->r_idf->id_text; *p != '\0'; p++)
267 /* The following code deals with expanded function
268 like macro calls. It makes the following code
271 #define def(a,b) x(a,b)
272 #define glue(a,b) a ## b
280 if (ap->a_rawvec[0]) {
281 /* stash arguments */
284 for (i = 0; ap->a_rawvec[i] != (char *)0; i++) {
285 if (i == 0) stash(repl, '(', -1);
286 else stash(repl, ',', -1);
287 for (p = ap->a_rawvec[i]; *p != '\0'; p++)
290 stash(repl, ')', -1);
298 /* This routine deals with the scanning of an actual parameter.
299 It keeps in account the opening and closing brackets,
300 preprocessor numbers, strings and character constants.
303 register int level = 0, nostashraw = 0;
305 static int Unstacked_missed;
312 && nostashraw >= Unstacked_missed) {
313 nostashraw -= Unstacked_missed;
314 Unstacked_missed = 0;
317 nostashraw -= Unstacked;
318 if (nostashraw < 0) {
319 Unstacked_missed = -nostashraw;
324 if (class(ch) == STIDF || class(ch) == STELL) {
325 /* Scan a preprocessor identifier token. If the
326 token is a macro, it is expanded first.
328 char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
329 register char *p = buf;
330 register struct idf *idef;
331 register int pos = -1;
338 } else NoExpandMacro = 0;
341 if (++pos < idfsize) {
345 } while (in_idf(ch));
347 ch = '\0'; /* Could be a non-stashed TOKSEP */
350 /* When the identifier has an associated macro
351 replacement list, it's expanded.
354 if (!idef || NoExpandMacro || !replace(idef)) {
356 || (idef && idef->id_macro
357 && (idef->id_macro->mc_flag & NOREPLACE)))
358 stash(repl, NOEXPM, !nostashraw);
359 for (p = buf; *p != '\0'; p++)
360 stash(repl, *p, !nostashraw);
362 if (!nostashraw) saveraw(repl);
365 } else if (class(ch) == STNUM) {
366 /* a preprocessing number has the following
368 [0-9|"."[0-9]]{[0-9"."a-zA-Z_]|{[Ee][+-]}}*
370 stash(repl, ch, !nostashraw);
373 if (class(ch) != STNUM) {
374 ch = '\0'; /* Could be a non-stashed TOKSEP */
378 else stash(repl, ch, !nostashraw);
381 while (in_idf(ch) || ch == '.') {
382 stash(repl, ch, !nostashraw);
383 if ((ch = GetChar()) == 'e' || ch == 'E') {
384 stash(repl, ch, !nostashraw);
386 if (ch == '+' || ch == '-') {
387 stash(repl, ch, !nostashraw);
392 ch = '\0'; /* Could be a non-stashed TOKSEP */
394 } else if (ch == '(') {
395 /* a comma may occur within parentheses */
397 stash(repl, ch, !nostashraw);
398 } else if (ch == ')') {
400 /* test on closing parenthesis of macro call */
401 if (level < 0) return ')';
402 stash(repl, ch, !nostashraw);
403 } else if (ch == ',') {
404 if (level <= 0) { /* comma separator for next argument */
406 error("unbalanced parenthesis");
408 return ','; /* ??? */
410 stash(repl, ch, !nostashraw);
411 } else if (ch == '\n') {
412 /* newlines are accepted as white spaces */
414 /* This piece of code needs some explanation:
415 consider the call of a macro defined as:
416 #define sum(a,b) (a+b)
417 in the following form:
419 /_* comment *_/ #include phone_number
421 in which case the include must be handled
425 a_new_line: ch = GetChar();
426 while (class(ch) == STSKIP || ch == '/') {
428 if ((ch = GetChar()) == '*' && !InputLevel) {
430 stash(repl, ' ', !nostashraw);
437 stash(repl, '/', !nostashraw);
439 } else ch = GetChar();
445 } else if (ch == EOI) {
446 error("unterminated macro call");
452 stash(repl, ' ', !nostashraw);
454 } else if (ch == '/') {
455 /* comments are treated as one white space token */
456 if ((ch = GetChar()) == '*' && !InputLevel) {
458 stash(repl, ' ', !nostashraw);
462 stash(repl, '/', !nostashraw);
464 } else if (ch == '\'' || ch == '"') {
465 /* Strings are considered as ONE token, thus no
466 replacement within strings.
468 register int match = ch;
470 stash(repl, ch, !nostashraw);
471 while ((ch = GetChar()) != EOI) {
475 stash(repl, ch, !nostashraw);
477 } else if (ch == '\n') {
478 error("newline in string");
480 stash(repl, match, !nostashraw);
483 stash(repl, ch, !nostashraw);
486 error("unterminated macro call");
489 stash(repl, ch, !nostashraw);
491 if (lastch == TOKSEP && ch == TOKSEP) continue;
492 stash(repl, ch, !nostashraw);
498 register struct idf *idef;
500 /* macro_func() performs the special actions needed with some
501 macros. These macros are __FILE__ and __LINE__ which
502 replacement texts must be evaluated at the time they are
505 register struct macro *mac = idef->id_macro;
506 static char FilNamBuf[PATHLENGTH];
509 switch (idef->id_text[2]) {
510 case 'F': /* __FILE__ */
512 strcpy(&FilNamBuf[1], FileName);
513 strcat(FilNamBuf, "\"");
514 mac->mc_text = FilNamBuf;
515 mac->mc_length = strlen(FilNamBuf);
517 case 'L': /* __LINE__ */
518 mac->mc_text = long2str((long)LineNumber, 10);
519 mac->mc_length = strlen(mac->mc_text);
522 crash("(macro_func)");
527 macro2buffer(repl, idf, args)
528 register struct repl *repl;
529 register struct idf *idf;
530 register struct args *args;
532 /* macro2buffer expands the replacement list and places the
533 result onto the replacement buffer. It deals with the #
534 and ## operators, and inserts the actual parameters.
535 The argument buffer contains the raw argument (needed
536 for the # and ## operators), and the expanded argument
537 (for all other parameter substitutions).
539 The grammar of the replacement list is:
541 repl_list: TOKEN repl_list
542 | PARAMETER repl_list
545 | PARAMETER '##' TOKEN
546 | TOKEN '##' PARAMETER
547 | PARAMETER '##' PARAMETER
550 As the grammar indicates, we could make a DFA and
551 use this finite state machine for the replacement
552 list parsing (inserting the arguments, etc.).
554 Currently we go through the replacement list in a
555 linear fashion. This is VERY expensive, something
556 smarter should be done (but even a DFA is O(|s|)).
558 register char *ptr = idf->id_macro->mc_text;
560 int func = idf->id_macro->mc_nps != -1;
563 assert(ptr[idf->id_macro->mc_length] == '\0');
565 if (*ptr == '\'' || *ptr == '"') {
566 register int delim = *ptr;
569 add2repl(repl, *ptr);
571 add2repl(repl, *++ptr);
573 error("unterminated string");
577 } while (*ptr != delim || *ptr == '\0');
578 add2repl(repl, *ptr++);
579 } else if (*ptr == '#' && (func || *(ptr+1) == '#')) {
581 register int tmpindex;
582 /* ## - paste operator */
585 /* trim the actual replacement list */
587 while (repl->r_ptr >= repl->r_text
588 && is_wsp(*repl->r_ptr))
591 /* ## occurred at the beginning of the replacement list.
593 if (repl->r_ptr < repl->r_text) {
598 if (repl->r_ptr >= repl->r_text
599 && *repl->r_ptr == TOKSEP)
603 tmpindex = repl->r_ptr - repl->r_text;
604 /* tmpindex can be 0 */
606 /* skip space in macro replacement list */
607 while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
610 /* ## occurred at the end of the replacement list.
612 if (*ptr & FORMALP) {
613 register int n = *ptr++ & 0177;
617 p = args->a_rawvec[n-1];
618 if (p) { /* else macro argument missing */
619 while (is_wsp(*p)) p++;
620 if (*p == NOEXPM) p++;
622 add2repl(repl, *p++);
625 && in_idf(repl->r_text[tmpindex]))
628 && repl->r_text[tmpindex] == NOEXPM)
629 repl->r_text[tmpindex] = TOKSEP;
630 } else if (*ptr == '\0') {
637 && in_idf(repl->r_text[tmpindex]))
640 && repl->r_text[tmpindex] == NOEXPM)
641 repl->r_text[tmpindex] = TOKSEP;
644 } else { /* # operator */
645 ptr = stringify(repl, ptr, args);
647 } else if (*ptr & FORMALP) {
648 /* insert actual parameter */
649 register int n = *ptr++ & 0177;
650 register char *p, *q;
654 /* This is VERY dirty, we look ahead for the
655 ## operator. If it's found we use the raw
656 argument buffer instead of the expanded
659 for (p = ptr; (*p & FORMALP) == 0 && is_wsp(*p); p++)
661 if (*p == '#' && p[1] == '#')
662 q = args->a_rawvec[n-1];
664 q = args->a_expvec[n-1];
666 if (q) /* else macro argument missing */
668 add2repl(repl, *q++);
670 if (repl->r_text == repl->r_ptr || *(repl->r_ptr-1) != TOKSEP)
671 add2repl(repl, TOKSEP);
673 add2repl(repl, *ptr++);
677 error("illegal use of ## operator");
681 stringify(repl, ptr, args)
682 register struct repl *repl;
684 register struct args *args;
686 /* If a parameter is immediately preceded by a # token
687 both are replaced by a single string literal that
688 contains the spelling of the token sequence for the
689 corresponding argument.
690 Each occurrence of white space between the argument's
691 tokens become a single space character in the string
692 literal. White spaces before the first token and after
693 the last token comprising the argument are deleted.
694 To retain the original spelling we insert backslashes
695 as appropriate. We only escape backslashes if they
696 occure within string tokens.
698 register int space = 1; /* skip leading spaces */
699 register int delim = 0; /* string or character constant delim */
700 register int backslash = 0; /* last character was a \ */
702 /* skip spaces macro replacement list */
703 while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
706 if (*ptr & FORMALP) {
707 register int n = *ptr++ & 0177;
711 p = args->a_rawvec[n-1];
724 if (!delim && (*p == '"' || *p == '\''))
726 else if (*p == delim && !backslash)
728 backslash = *p == '\\';
729 if (*p == '"' || (delim && *p == '\\'))
730 add2repl(repl, '\\');
731 if (*p == TOKSEP || *p == NOEXPM) p++;
732 else add2repl(repl, *p++);
735 /* trim spaces in the replacement list */
736 for (--repl->r_ptr; is_wsp(*repl->r_ptr); repl->r_ptr--)
738 ++repl->r_ptr; /* oops, one to far */
741 error("illegal use of # operator");
745 /* The following routine is also called from domacro.c.
748 register struct repl *repl;
751 register int index = repl->r_ptr - repl->r_text;
753 assert(index < repl->r_size);
754 if (index + 2 >= repl->r_size) {
755 repl->r_text = Realloc(repl->r_text, (unsigned)(repl->r_size <<= 1));
756 repl->r_ptr = repl->r_text + index;
762 /* If the variable stashraw is negative, we must only stash into the raw
763 * buffer. If the variable is zero, we must only stash into the expanded
764 * buffer. Otherwise, we must use both buffers.
766 stash(repl, ch, stashraw)
771 /* Stash characters into the macro expansion buffer.
773 register struct args *args = repl->r_args;
774 register int index = args->a_expptr - args->a_expbuf;
777 assert(index < args->a_expsize);
778 if (index + 1 >= args->a_expsize) {
779 args->a_expbuf = Realloc(args->a_expbuf,
780 (unsigned)(args->a_expsize <<= 1));
781 args->a_expptr = args->a_expbuf + index;
783 *args->a_expptr++ = ch;
787 index = args->a_rawptr - args->a_rawbuf;
788 assert(index < args->a_rawsize);
789 if (index + 1 >= args->a_rawsize) {
790 args->a_rawbuf = Realloc(args->a_rawbuf,
791 (unsigned)(args->a_rawsize <<= 1));
792 args->a_rawptr = args->a_rawbuf + index;
794 *args->a_rawptr++ = ch;