From: ceriel Date: Wed, 16 Jan 1991 16:07:50 +0000 (+0000) Subject: initial version X-Git-Tag: release-5-5~1289 X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=29b2e6e3d4e9cf97749bcba30fcaea0a93487804;p=ack.git initial version --- diff --git a/util/flex/.distr b/util/flex/.distr new file mode 100644 index 000000000..8854f59cf --- /dev/null +++ b/util/flex/.distr @@ -0,0 +1,24 @@ +COPYING +Changes +Headers +Makefile +README +ccl.c +dfa.c +ecs.c +flex.1 +flex.skel +flexdef.h +flexdoc.1 +gen.c +initscan.c +libmain.c +main.c +makefile +misc.c +nfa.c +parse.y +scan.l +sym.c +tblcmp.c +yylex.c diff --git a/util/flex/COPYING b/util/flex/COPYING new file mode 100644 index 000000000..9b01361ca --- /dev/null +++ b/util/flex/COPYING @@ -0,0 +1,38 @@ +Flex carries the copyright used for BSD software, slightly modified +because it originated at the Lawrence Berkeley (not Livermore!) Laboratory, +which operates under a contract with the Department of Energy: + + Copyright (c) 1990 The Regents of the University of California. + All rights reserved. + + This code is derived from software contributed to Berkeley by + Vern Paxson. + + The United States Government has rights in this work pursuant + to contract no. DE-AC03-76SF00098 between the United States + Department of Energy and the University of California. + + Redistribution and use in source and binary forms are permitted + provided that: (1) source distributions retain this entire + copyright notice and comment, and (2) distributions including + binaries display the following acknowledgement: ``This product + includes software developed by the University of California, + Berkeley and its contributors'' in the documentation or other + materials provided with the distribution and in all advertising + materials mentioning features or use of this software. Neither the + name of the University nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. + +This basically says "do whatever you please with this software except +remove this notice or take advantage of the University's (or the flex +authors') name". + +Note that the "flex.skel" scanner skeleton carries no copyright notice. +You are free to do whatever you please with scanners generated using flex; +for them, you are not even bound by the above copyright. diff --git a/util/flex/Changes b/util/flex/Changes new file mode 100644 index 000000000..ca7b4265e --- /dev/null +++ b/util/flex/Changes @@ -0,0 +1,337 @@ +Changes between 2.3 Patch #6 (29Aug90) and 2.3 Patch #5: + + - Fixed a serious bug in yymore() which basically made it + completely broken. Thanks goes to Jean Christophe of + the Nethack development team for finding the problem + and passing along the fix. + + +Changes between 2.3 Patch #5 (16Aug90) and 2.3 Patch #4: + + - An up-to-date version of initscan.c so "make test" will + work after applying the previous patches + + +Changes between 2.3 Patch #4 (14Aug90) and 2.3 Patch #3: + + - Fixed bug in hexadecimal escapes which allowed only digits, + not letters, in escapes + - Fixed bug in previous "Changes" file! + + +Changes between 2.3 Patch #3 (03Aug90) and 2.3 Patch #2: + + - Correction to patch #2 for gcc compilation; thanks goes to + Paul Eggert for catching this. + + +Changes between 2.3 Patch #2 (02Aug90) and original 2.3 release: + + - Fixed (hopefully) headaches involving declaring malloc() + and free() for gcc, which defines __STDC__ but (often) doesn't + come with the standard include files such as . + Reordered #ifdef maze in the scanner skeleton in the hope of + getting the declarations right for cfront and g++, too. + + - Note that this patch supercedes patch #1 for release 2.3, + which was never announced but was available briefly for + anonymous ftp. + + +Changes between 2.3 (full) release of 28Jun90 and 2.2 (alpha) release: + + User-visible: + + - A lone <> rule (that is, one which is not qualified with + a list of start conditions) now specifies the EOF action for + *all* start conditions which haven't already had <> actions + given. To specify an end-of-file action for just the initial + state, use <>. + + - -d debug output is now contigent on the global yy_flex_debug + being set to a non-zero value, which it is by default. + + - A new macro, YY_USER_INIT, is provided for the user to specify + initialization action to be taken on the first call to the + scanner. This action is done before the scanner does its + own initialization. + + - yy_new_buffer() has been added as an alias for yy_create_buffer() + + - Comments beginning with '#' and extending to the end of the line + now work, but have been deprecated (in anticipation of making + flex recognize #line directives). + + - The funky restrictions on when semi-colons could follow the + YY_NEW_FILE and yyless macros have been removed. They now + behave identically to functions. + + - A bug in the sample redefinition of YY_INPUT in the documentation + has been corrected. + + - A bug in the sample simple tokener in the documentation has + been corrected. + + - The documentation on the incompatibilities between flex and + lex has been reordered so that the discussion of yylineno + and input() come first, as it's anticipated that these will + be the most common source of headaches. + + + Things which didn't used to be documented but now are: + + - flex interprets "^foo|bar" differently from lex. flex interprets + it as "match either a 'foo' or a 'bar', providing it comes at the + beginning of a line", whereas lex interprets it as "match either + a 'foo' at the beginning of a line, or a 'bar' anywhere". + + - flex initializes the global "yyin" on the first call to the + scanner, while lex initializes it at compile-time. + + - yy_switch_to_buffer() can be used in the yywrap() macro/routine. + + - flex scanners do not use stdio for their input, and hence when + writing an interactive scanner one must explictly call fflush() + after writing out a prompt. + + - flex scanner can be made reentrant (after a fashion) by using + "yyrestart( yyin );". This is useful for interactive scanners + which have interrupt handlers that long-jump out of the scanner. + + - a defense of why yylineno is not supported is included, along + with a suggestion on how to convert scanners which rely on it. + + + Other changes: + + - Prototypes and proper declarations of void routines have + been added to the flex source code, courtesy of Kevin B. Kenny. + + - Routines dealing with memory allocation now use void* pointers + instead of char* - see Makefile for porting implications. + + - Error-checking is now done when flex closes a file. + + - Various lint tweaks were added to reduce the number of gripes. + + - Makefile has been further parameterized to aid in porting. + + - Support for SCO Unix added. + + - Flex now sports the latest & greatest UC copyright notice + (which is only slightly different from the previous one). + + - A note has been added to flexdoc.1 mentioning work in progress + on modifying flex to generate straight C code rather than a + table-driven automaton, with an email address of whom to contact + if you are working along similar lines. + + +Changes between 2.2 Patch #3 (30Mar90) and 2.2 Patch #2: + + - fixed bug which caused -I scanners to bomb + + +Changes between 2.2 Patch #2 (27Mar90) and 2.2 Patch #1: + + - fixed bug writing past end of input buffer in yyunput() + - fixed bug detecting NUL's at the end of a buffer + + +Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release: + + - Makefile fixes: definition of MAKE variable for systems + which don't have it; installation of flexdoc.1 along with + flex.1; fixed two bugs which could cause "bigtest" to fail. + + - flex.skel fix for compiling with g++. + + - README and flexdoc.1 no longer list an out-of-date BITNET address + for contacting me. + + - minor typos and formatting changes to flex.1 and flexdoc.1. + + +Changes between 2.2 (alpha) release of March '90 and previous release: + + User-visible: + + - Full user documentation now available. + + - Support for 8-bit scanners. + + - Scanners now accept NUL's. + + - A facility has been added for dealing with multiple + input buffers. + + - Two manual entries now. One which fully describes flex + (rather than just its differences from lex), and the + other for quick(er) reference. + + - A number of changes to bring flex closer into compliance + with the latest POSIX lex draft: + + %t support + flex now accepts multiple input files and concatenates + them together to form its input + previous -c (compress) flag renamed -C + do-nothing -c and -n flags added + Any indented code or code within %{}'s in section 2 is + now copied to the output + + - yyleng is now a bona fide global integer. + + - -d debug information now gives the line number of the + matched rule instead of which number rule it was from + the beginning of the file. + + - -v output now includes a summary of the flags used to generate + the scanner. + + - unput() and yyrestart() are now globally callable. + + - yyrestart() no longer closes the previous value of yyin. + + - C++ support; generated scanners can be compiled with C++ compiler. + + - Primitive -lfl library added, containing default main() + which calls yylex(). A number of routines currently living + in the scanner skeleton will probably migrate to here + in the future (in particular, yywrap() will probably cease + to be a macro and instead be a function in the -lfl library). + + - Hexadecimal (\x) escape sequences added. + + - Support for MS-DOS, VMS, and Turbo-C integrated. + + - The %used/%unused operators have been deprecated. They + may go away soon. + + + Other changes: + + - Makefile enhanced for easier testing and installation. + - The parser has been tweaked to detect some erroneous + constructions which previously were missed. + - Scanner input buffer overflow is now detected. + - Bugs with missing "const" declarations fixed. + - Out-of-date Minix/Atari patches provided. + - Scanners no longer require printf() unless FLEX_DEBUG is being used. + - A subtle input() bug has been fixed. + - Line numbers for "continued action" rules (those following + the special '|' action) are now correct. + - unput() bug fixed; had been causing problems porting flex to VMS. + - yymore() handling rewritten to fix bug with interaction + between yymore() and trailing context. + - EOF in actions now generates an error message. + - Bug involving -CFe and generating equivalence classes fixed. + - Bug which made -CF be treated as -Cf fixed. + - Support for SysV tmpnam() added. + - Unused #define's for scanner no longer generated. + - Error messages which are associated with a particular input + line are now all identified with their input line in standard + format. + - % directives which are valid to lex but not to flex are + now ignored instead of generating warnings. + - -DSYS_V flag can now also be specified -DUSG for System V + compilation. + + +Changes between 2.1 beta-test release of June '89 and previous release: + + User-visible: + + - -p flag generates a performance report to stderr. The report + consists of comments regarding features of the scanner rules + which result in slower scanners. + + - -b flag generates backtracking information to lex.backtrack. + This is a list of scanner states which require backtracking + and the characters on which they do so. By adding rules + one can remove backtracking states. If all backtracking states + are eliminated, the generated scanner will run faster. + Backtracking is not yet documented in the manual entry. + + - Variable trailing context now works, i.e., one can have + rules like "(foo)*/[ \t]*bletch". Some trailing context + patterns still cannot be properly matched and generate + error messages. These are patterns where the ending of the + first part of the rule matches the beginning of the second + part, such as "zx*/xy*", where the 'x*' matches the 'x' at + the beginning of the trailing context. Lex won't get these + patterns right either. + + - Faster scanners. + + - End-of-file rules. The special rule "<>" indicates + actions which are to be taken when an end-of-file is + encountered and yywrap() returns non-zero (i.e., indicates + no further files to process). See manual entry for example. + + - The -r (reject used) flag is gone. flex now scans the input + for occurrences of the string "REJECT" to determine if the + action is needed. It tries to be intelligent about this but + can be fooled. One can force the presence or absence of + REJECT by adding a line in the first section of the form + "%used REJECT" or "%unused REJECT". + + - yymore() has been implemented. Similarly to REJECT, flex + detects the use of yymore(), which can be overridden using + "%used" or "%unused". + + - Patterns like "x{0,3}" now work (i.e., with lower-limit == 0). + + - Removed '\^x' for ctrl-x misfeature. + + - Added '\a' and '\v' escape sequences. + + - \ now works for octal escape sequences; previously + \0 was required. + + - Better error reporting; line numbers are associated with rules. + + - yyleng is a macro; it cannot be accessed outside of the + scanner source file. + + - yytext and yyleng should not be modified within a flex action. + + - Generated scanners #define the name FLEX_SCANNER. + + - Rules are internally separated by YY_BREAK in lex.yy.c rather + than break, to allow redefinition. + + - The macro YY_USER_ACTION can be redefined to provide an action + which is always executed prior to the matched rule's action. + + - yyrestart() is a new action which can be used to restart + the scanner after it has seen an end-of-file (a "real" one, + that is, one for which yywrap() returned non-zero). It takes + a FILE* argument indicating a new file to scan and sets + things up so that a subsequent call to yylex() will start + scanning that file. + + - Internal scanner names all preceded by "yy_" + + - lex.yy.c is deleted if errors are encountered during processing. + + - Comments may be put in the first section of the input by preceding + them with '#'. + + + + Other changes: + + - Some portability-related bugs fixed, in particular for machines + with unsigned characters or sizeof( int* ) != sizeof( int ). + Also, tweaks for VMS and Microsoft C (MS-DOS), and identifiers all + trimmed to be 31 or fewer characters. Shortened file names + for dinosaur OS's. Checks for allocating > 64K memory + on 16 bit'ers. Amiga tweaks. Compiles using gcc on a Sun-3. + - Compressed and fast scanner skeletons merged. + - Skeleton header files done away with. + - Generated scanner uses prototypes and "const" for __STDC__. + - -DSV flag is now -DSYS_V for System V compilation. + - Removed all references to FTL language. + - Software now covered by BSD Copyright. + - flex will replace lex in subsequent BSD releases. diff --git a/util/flex/Headers b/util/flex/Headers new file mode 100644 index 000000000..775f33b6c --- /dev/null +++ b/util/flex/Headers @@ -0,0 +1,25 @@ +This file contains the original RCS Headers. Unfortunately, RCS will destroy +them as soon as we bring our version under RCS. This file lives under RCS as +well, so all occurences of a $ followed by Header are changed into $header. + +Makefile:# @(#) $header: /usr/fsys/odin/a/vern/flex/RCS/Makefile,v 2.9 90/05/26 17:28:44 vern Exp $ (LBL) +README:// $header: /usr/fsys/odin/a/vern/flex/RCS/README,v 2.8 90/05/26 17:31:27 vern Exp $ +ccl.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/ccl.c,v 2.5 90/06/27 23:48:13 vern Exp $ (LBL)"; +dfa.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/dfa.c,v 2.7 90/06/27 23:48:15 vern Exp $ (LBL)"; +ecs.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/ecs.c,v 2.5 90/06/27 23:48:17 vern Exp $ (LBL)"; +flex.skel: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $ +flexdef.h:/* @(#) $header: /usr/fsys/odin/a/vern/flex/RCS/flexdef.h,v 2.10 90/08/03 14:09:52 vern Exp $ (LBL) */ +gen.c: "@(#) $header: /usr/helios/u0/vern/flex/RCS/gen.c,v 2.10 90/08/29 12:11:13 vern Exp $ (LBL)"; +initscan.c: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $ +initscan.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)"; +libmain.c:/* $header: /usr/fsys/odin/a/vern/flex/RCS/libmain.c,v 1.2 90/05/26 16:50:08 vern Exp $ */ +main.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/main.c,v 2.9 90/06/27 23:48:24 vern Exp $ (LBL)"; +misc.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/misc.c,v 2.9 90/08/14 00:10:24 vern Exp $ (LBL)"; +nfa.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/nfa.c,v 2.6 90/06/27 23:48:29 vern Exp $ (LBL)"; +parse.y: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/parse.y,v 2.7 90/06/27 23:48:31 vern Exp $ (LBL)"; +scan.c: * $header: /usr/fsys/odin/a/vern/flex/RCS/flex.skel,v 2.16 90/08/03 14:09:36 vern Exp $ +scan.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)"; +scan.l: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/scan.l,v 2.9 90/06/27 23:48:34 vern Exp $ (LBL)"; +sym.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/sym.c,v 2.4 90/06/27 23:48:36 vern Exp $ (LBL)"; +tblcmp.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/tblcmp.c,v 2.5 90/06/27 23:48:38 vern Exp $ (LBL)"; +yylex.c: "@(#) $header: /usr/fsys/odin/a/vern/flex/RCS/yylex.c,v 2.5 90/06/27 23:48:40 vern Exp $ (LBL)"; diff --git a/util/flex/Makefile b/util/flex/Makefile new file mode 100644 index 000000000..2cbeaefbf --- /dev/null +++ b/util/flex/Makefile @@ -0,0 +1,190 @@ +# make file for "flex" tool + +# @(#) $Header$ (LBL) + +# Porting considerations: +# +# For System V Unix machines, add -DUSG to CFLAGS (if it's not +# automatically defined) +# For Vax/VMS, add "-DVMS -DUSG" to CFLAGS. +# For MS-DOS, add "-DMS_DOS -DUSG" to CFLAGS. Create \tmp if not present. +# You will also want to rename flex.skel to something with a three +# character extension, change SKELETON_FILE below appropriately, +# See MSDOS.notes for more info. +# For Amiga, add "-DAMIGA -DUSG" to CFLAGS. +# For SCO Unix, add "-DSCO_UNIX" to CFLAGS. +# +# For C compilers which don't know about "void", add -Dvoid=int to CFLAGS. +# +# If your C compiler is ANSI standard but does not include the +# header file (some installations of gcc have this problem), then add +# -DDONT_HAVE_STDLIB_H to CFLAGS. +# +# By default, flex will be configured to generate 8-bit scanners only +# if the -8 flag is given. If you want it to always generate 8-bit +# scanners, add "-DDEFAULT_CSIZE=256" to CFLAGS. Note that doing +# so will double the size of all uncompressed scanners. +# +# If on your system you have trouble building flex due to 8-bit +# character problems, remove the -8 from FLEX_FLAGS and the +# "#define FLEX_8_BIT_CHARS" from the beginning of flexdef.h. + + +# the first time around use "make first_flex" + + +# Installation targeting. Files will be installed under the tree rooted +# at DESTDIR. User commands will be installed in BINDIR, library files +# in LIBDIR (which will be created if necessary), auxiliary files in +# AUXDIR, manual pages will be installed in MANDIR with extension MANEXT. +# Raw, unformatted troff source will be installed if INSTALLMAN=man, nroff +# preformatted versions will be installed if INSTALLMAN=cat. +DESTDIR = +BINDIR = /usr/local +LIBDIR = /usr/local/lib +AUXDIR = /usr/local/lib +MANDIR = /usr/man/manl +MANEXT = l +INSTALLMAN = man + +# MAKE = make + + +SKELETON_FILE = $(DESTDIR)$(AUXDIR)/flex.skel +SKELFLAGS = -DDEFAULT_SKELETON_FILE=\"$(SKELETON_FILE)\" +CFLAGS = -O +LDFLAGS = -s + +COMPRESSION = +FLEX_FLAGS = -ist8 -Sflex.skel +# which "flex" to use to generate scan.c from scan.l +FLEX = ./flex +# CC = cc + +AR = ar +RANLIB = ranlib + +FLEXOBJS = \ + ccl.o \ + dfa.o \ + ecs.o \ + gen.o \ + main.o \ + misc.o \ + nfa.o \ + parse.o \ + scan.o \ + sym.o \ + tblcmp.o \ + yylex.o + +FLEX_C_SOURCES = \ + ccl.c \ + dfa.c \ + ecs.c \ + gen.c \ + main.c \ + misc.c \ + nfa.c \ + parse.c \ + scan.c \ + sym.c \ + tblcmp.c \ + yylex.c + +FLEX_LIB_OBJS = \ + libmain.o + +FLEXLIB = flexlib.a + + +all : flex $(FLEXLIB) + +flex : $(FLEXOBJS) + $(CC) $(CFLAGS) -o flex $(LDFLAGS) $(FLEXOBJS) + +first_flex: + cp initscan.c scan.c + $(MAKE) $(MFLAGS) flex + +parse.h parse.c : parse.y + $(YACC) -d parse.y + @mv y.tab.c parse.c + @mv y.tab.h parse.h + +scan.c : scan.l + $(FLEX) $(FLEX_FLAGS) $(COMPRESSION) scan.l >scan.c + +scan.o : scan.c parse.h flexdef.h + +main.o : main.c flexdef.h + $(CC) $(CFLAGS) -c $(SKELFLAGS) main.c + +ccl.o : ccl.c flexdef.h +dfa.o : dfa.c flexdef.h +ecs.o : ecs.c flexdef.h +gen.o : gen.c flexdef.h +misc.o : misc.c flexdef.h +nfa.o : nfa.c flexdef.h +parse.o : parse.c flexdef.h +sym.o : sym.c flexdef.h +tblcmp.o : tblcmp.c flexdef.h +yylex.o : yylex.c flexdef.h + +flex.man : flex.1 + nroff -man flex.1 >flex.man + +$(FLEXLIB) : $(FLEX_LIB_OBJS) + $(AR) cru $(FLEXLIB) $(FLEX_LIB_OBJS) + +lint : $(FLEX_C_SOURCES) + lint $(FLEX_C_SOURCES) > flex.lint + +distrib : + mv scan.c initscan.c + chmod 444 initscan.c + $(MAKE) $(MFLAGS) clean + +install: flex $(DESTDIR)$(LIBDIR) flex.skel install.$(INSTALLMAN) install-lib + install -s -m 755 flex $(DESTDIR)$(BINDIR)/flex + install -c -m 644 flex.skel $(SKELETON_FILE) + +install-lib: $(DESTDIR)$(LIBDIR) $(FLEXLIB) + install -c -m 644 $(FLEXLIB) $(DESTDIR)$(LIBDIR)/libfl.a + $(RANLIB) $(DESTDIR)$(LIBDIR)/libfl.a + +$(DESTDIR)$(LIBDIR): + mkdir $@ + +install.man: flex.1 flexdoc.1 + install -c -m 644 flex.1 $(DESTDIR)$(MANDIR)/flex.$(MANEXT) + install -c -m 644 flexdoc.1 $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT) + +install.cat: flex.1 flexdoc.1 + nroff -h -man flex.1 > $(DESTDIR)$(MANDIR)/flex.$(MANEXT) + nroff -h -man flexdoc.1 > $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT) + chmod 644 $(DESTDIR)$(MANDIR)/flex.$(MANEXT) + chmod 644 $(DESTDIR)$(MANDIR)/flexdoc.$(MANEXT) + +clean : + rm -f core errs flex *.o parse.c *.lint parse.h flex.man tags \ + $(FLEXLIB) + +tags : + ctags $(FLEX_C_SOURCES) + +vms : flex.man + $(MAKE) $(MFLAGS) distrib + +test : flex + ./flex $(FLEX_FLAGS) $(COMPRESSION) scan.l | diff scan.c - + +bigtest : + rm -f scan.c ; $(MAKE) COMPRESSION="-C" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" test + rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cf" test + rm -f scan.c ; $(MAKE) COMPRESSION="-CF" test + rm -f scan.c ; $(MAKE) diff --git a/util/flex/README b/util/flex/README new file mode 100644 index 000000000..a407467d6 --- /dev/null +++ b/util/flex/README @@ -0,0 +1,78 @@ +// $Header$ + +This is release 2.3 of flex - a full release. + +The flex distribution consists of the following files: + + README This message + + Makefile + flexdef.h + parse.y + scan.l + ccl.c + dfa.c + ecs.c flex sources + gen.c + main.c + misc.c + nfa.c + sym.c + tblcmp.c + yylex.c + + libmain.c flex library (-lfl) source + + initscan.c pre-flex'd version of scan.l + + flex.skel skeleton for generated scanners + + flexdoc.1 full user documentation + flex.1 reference documentation + + Changes Differences between this release and the previous one + + COPYING flex's copyright + + MISC/ a directory containing miscellaneous porting-related + notes (for Atari, MS-DOS, Turbo-C, and VMS) + + +Decide where you want to keep flex.skel (suggestion: /usr/local/lib), +but don't move it there yet. Edit "Makefile" and change the definition +of SKELETON_FILE to reflect the full pathname of flex.skel. + +Read the "Porting considerations" note in the Makefile and make +the necessary changes. + +To make flex for the first time, use: + + make first_flex + +which uses the pre-generated copy of the flex scanner (the scanner +itself is written using flex). + +Assuming it builds successfully, you can test it using + + make test + +The "diff" should not show any differences. + +If you're feeling adventurous, issue "make bigtest" and be prepared +to wait a while. + +Install flex using: + + make install + + +Please send problems and feedback to: + + vern@cs.cornell.edu + decvax!cornell!vern + + Vern Paxson + CS Department + 4126 Upson Hall + Cornell University + Ithaca, NY 14853-7501 diff --git a/util/flex/ccl.c b/util/flex/ccl.c new file mode 100644 index 000000000..45714b5e3 --- /dev/null +++ b/util/flex/ccl.c @@ -0,0 +1,175 @@ +/* ccl - routines for character classes */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + +/* ccladd - add a single character to a ccl + * + * synopsis + * int cclp; + * int ch; + * ccladd( cclp, ch ); + */ + +void ccladd( cclp, ch ) +int cclp; +int ch; + + { + int ind, len, newpos, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + /* check to see if the character is already in the ccl */ + + for ( i = 0; i < len; ++i ) + if ( ccltbl[ind + i] == ch ) + return; + + newpos = ind + len; + + if ( newpos >= current_max_ccl_tbl_size ) + { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + + ++num_reallocs; + + ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size ); + } + + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; + } + + +/* cclinit - make an empty ccl + * + * synopsis + * int cclinit(); + * new_ccl = cclinit(); + */ + +int cclinit() + + { + if ( ++lastccl >= current_maxccls ) + { + current_maxccls += MAX_CCLS_INCREMENT; + + ++num_reallocs; + + cclmap = reallocate_integer_array( cclmap, current_maxccls ); + ccllen = reallocate_integer_array( ccllen, current_maxccls ); + cclng = reallocate_integer_array( cclng, current_maxccls ); + } + + if ( lastccl == 1 ) + /* we're making the first ccl */ + cclmap[lastccl] = 0; + + else + /* the new pointer is just past the end of the last ccl. Since + * the cclmap points to the \first/ character of a ccl, adding the + * length of the ccl to the cclmap pointer will produce a cursor + * to the first free space + */ + cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; + + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ + + return ( lastccl ); + } + + +/* cclnegate - negate a ccl + * + * synopsis + * int cclp; + * cclnegate( ccl ); + */ + +void cclnegate( cclp ) +int cclp; + + { + cclng[cclp] = 1; + } + + +/* list_character_set - list the members of a set of characters in CCL form + * + * synopsis + * int cset[CSIZE]; + * FILE *file; + * list_character_set( cset ); + * + * writes to the given file a character-class representation of those + * characters present in the given set. A character is present if it + * has a non-zero value in the set array. + */ + +void list_character_set( file, cset ) +FILE *file; +int cset[]; + + { + register int i; + char *readable_form(); + + putc( '[', file ); + + for ( i = 0; i < csize; ++i ) + { + if ( cset[i] ) + { + register int start_char = i; + + putc( ' ', file ); + + fputs( readable_form( i ), file ); + + while ( ++i < csize && cset[i] ) + ; + + if ( i - 1 > start_char ) + /* this was a run */ + fprintf( file, "-%s", readable_form( i - 1 ) ); + + putc( ' ', file ); + } + } + + putc( ']', file ); + } diff --git a/util/flex/dfa.c b/util/flex/dfa.c new file mode 100644 index 000000000..b312ce4bd --- /dev/null +++ b/util/flex/dfa.c @@ -0,0 +1,1075 @@ +/* dfa - DFA construction routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + + +/* declare functions that have forward references */ + +void dump_associated_rules PROTO((FILE*, int)); +void dump_transitions PROTO((FILE*, int[])); +void sympartition PROTO((int[], int, int[], int[])); +int symfollowset PROTO((int[], int, int, int[])); + + +/* check_for_backtracking - check a DFA state for backtracking + * + * synopsis + * int ds, state[numecs]; + * check_for_backtracking( ds, state ); + * + * ds is the number of the state to check and state[] is its out-transitions, + * indexed by equivalence class, and state_rules[] is the set of rules + * associated with this state + */ + +void check_for_backtracking( ds, state ) +int ds; +int state[]; + + { + if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) + { /* state is non-accepting */ + ++num_backtracking; + + if ( backtrack_report ) + { + fprintf( backtrack_file, "State #%d is non-accepting -\n", ds ); + + /* identify the state */ + dump_associated_rules( backtrack_file, ds ); + + /* now identify it further using the out- and jam-transitions */ + dump_transitions( backtrack_file, state ); + + putc( '\n', backtrack_file ); + } + } + } + + +/* check_trailing_context - check to see if NFA state set constitutes + * "dangerous" trailing context + * + * synopsis + * int nfa_states[num_states+1], num_states; + * int accset[nacc+1], nacc; + * check_trailing_context( nfa_states, num_states, accset, nacc ); + * + * NOTES + * Trailing context is "dangerous" if both the head and the trailing + * part are of variable size \and/ there's a DFA state which contains + * both an accepting state for the head part of the rule and NFA states + * which occur after the beginning of the trailing context. + * When such a rule is matched, it's impossible to tell if having been + * in the DFA state indicates the beginning of the trailing context + * or further-along scanning of the pattern. In these cases, a warning + * message is issued. + * + * nfa_states[1 .. num_states] is the list of NFA states in the DFA. + * accset[1 .. nacc] is the list of accepting numbers for the DFA state. + */ + +void check_trailing_context( nfa_states, num_states, accset, nacc ) +int *nfa_states, num_states; +int *accset; +register int nacc; + + { + register int i, j; + + for ( i = 1; i <= num_states; ++i ) + { + int ns = nfa_states[i]; + register int type = state_type[ns]; + register int ar = assoc_rule[ns]; + + if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) + { /* do nothing */ + } + + else if ( type == STATE_TRAILING_CONTEXT ) + { + /* potential trouble. Scan set of accepting numbers for + * the one marking the end of the "head". We assume that + * this looping will be fairly cheap since it's rare that + * an accepting number set is large. + */ + for ( j = 1; j <= nacc; ++j ) + if ( accset[j] & YY_TRAILING_HEAD_MASK ) + { + fprintf( stderr, + "%s: Dangerous trailing context in rule at line %d\n", + program_name, rule_linenum[ar] ); + return; + } + } + } + } + + +/* dump_associated_rules - list the rules associated with a DFA state + * + * synopisis + * int ds; + * FILE *file; + * dump_associated_rules( file, ds ); + * + * goes through the set of NFA states associated with the DFA and + * extracts the first MAX_ASSOC_RULES unique rules, sorts them, + * and writes a report to the given file + */ + +void dump_associated_rules( file, ds ) +FILE *file; +int ds; + + { + register int i, j; + register int num_associated_rules = 0; + int rule_set[MAX_ASSOC_RULES + 1]; + int *dset = dss[ds]; + int size = dfasiz[ds]; + + for ( i = 1; i <= size; ++i ) + { + register rule_num = rule_linenum[assoc_rule[dset[i]]]; + + for ( j = 1; j <= num_associated_rules; ++j ) + if ( rule_num == rule_set[j] ) + break; + + if ( j > num_associated_rules ) + { /* new rule */ + if ( num_associated_rules < MAX_ASSOC_RULES ) + rule_set[++num_associated_rules] = rule_num; + } + } + + bubble( rule_set, num_associated_rules ); + + fprintf( file, " associated rule line numbers:" ); + + for ( i = 1; i <= num_associated_rules; ++i ) + { + if ( i % 8 == 1 ) + putc( '\n', file ); + + fprintf( file, "\t%d", rule_set[i] ); + } + + putc( '\n', file ); + } + + +/* dump_transitions - list the transitions associated with a DFA state + * + * synopisis + * int state[numecs]; + * FILE *file; + * dump_transitions( file, state ); + * + * goes through the set of out-transitions and lists them in human-readable + * form (i.e., not as equivalence classes); also lists jam transitions + * (i.e., all those which are not out-transitions, plus EOF). The dump + * is done to the given file. + */ + +void dump_transitions( file, state ) +FILE *file; +int state[]; + + { + register int i, ec; + int out_char_set[CSIZE]; + + for ( i = 0; i < csize; ++i ) + { + ec = abs( ecgroup[i] ); + out_char_set[i] = state[ec]; + } + + fprintf( file, " out-transitions: " ); + + list_character_set( file, out_char_set ); + + /* now invert the members of the set to get the jam transitions */ + for ( i = 0; i < csize; ++i ) + out_char_set[i] = ! out_char_set[i]; + + fprintf( file, "\n jam-transitions: EOF " ); + + list_character_set( file, out_char_set ); + + putc( '\n', file ); + } + + +/* epsclosure - construct the epsilon closure of a set of ndfa states + * + * synopsis + * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc; + * int hashval; + * int *epsclosure(); + * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); + * + * NOTES + * the epsilon closure is the set of all states reachable by an arbitrary + * number of epsilon transitions which themselves do not have epsilon + * transitions going out, unioned with the set of states which have non-null + * accepting numbers. t is an array of size numstates of nfa state numbers. + * Upon return, t holds the epsilon closure and numstates is updated. accset + * holds a list of the accepting numbers, and the size of accset is given + * by nacc. t may be subjected to reallocation if it is not large enough + * to hold the epsilon closure. + * + * hashval is the hash value for the dfa corresponding to the state set + */ + +int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) +int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; + + { + register int stkpos, ns, tsp; + int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; + int stkend, nstate; + static int did_stk_init = false, *stk; + +#define MARK_STATE(state) \ + trans1[state] = trans1[state] - MARKER_DIFFERENCE; + +#define IS_MARKED(state) (trans1[state] < 0) + +#define UNMARK_STATE(state) \ + trans1[state] = trans1[state] + MARKER_DIFFERENCE; + +#define CHECK_ACCEPT(state) \ + { \ + nfaccnum = accptnum[state]; \ + if ( nfaccnum != NIL ) \ + accset[++nacc] = nfaccnum; \ + } + +#define DO_REALLOCATION \ + { \ + current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ + ++num_reallocs; \ + t = reallocate_integer_array( t, current_max_dfa_size ); \ + stk = reallocate_integer_array( stk, current_max_dfa_size ); \ + } \ + +#define PUT_ON_STACK(state) \ + { \ + if ( ++stkend >= current_max_dfa_size ) \ + DO_REALLOCATION \ + stk[stkend] = state; \ + MARK_STATE(state) \ + } + +#define ADD_STATE(state) \ + { \ + if ( ++numstates >= current_max_dfa_size ) \ + DO_REALLOCATION \ + t[numstates] = state; \ + hashval = hashval + state; \ + } + +#define STACK_STATE(state) \ + { \ + PUT_ON_STACK(state) \ + CHECK_ACCEPT(state) \ + if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ + ADD_STATE(state) \ + } + + if ( ! did_stk_init ) + { + stk = allocate_integer_array( current_max_dfa_size ); + did_stk_init = true; + } + + nacc = stkend = hashval = 0; + + for ( nstate = 1; nstate <= numstates; ++nstate ) + { + ns = t[nstate]; + + /* the state could be marked if we've already pushed it onto + * the stack + */ + if ( ! IS_MARKED(ns) ) + PUT_ON_STACK(ns) + + CHECK_ACCEPT(ns) + hashval = hashval + ns; + } + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + ns = stk[stkpos]; + transsym = transchar[ns]; + + if ( transsym == SYM_EPSILON ) + { + tsp = trans1[ns] + MARKER_DIFFERENCE; + + if ( tsp != NO_TRANSITION ) + { + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + + tsp = trans2[ns]; + + if ( tsp != NO_TRANSITION ) + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + } + } + } + + /* clear out "visit" markers */ + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + if ( IS_MARKED(stk[stkpos]) ) + { + UNMARK_STATE(stk[stkpos]) + } + else + flexfatal( "consistency check failed in epsclosure()" ); + } + + *ns_addr = numstates; + *hv_addr = hashval; + *nacc_addr = nacc; + + return ( t ); + } + + +/* increase_max_dfas - increase the maximum number of DFAs */ + +void increase_max_dfas() + + { + current_max_dfas += MAX_DFAS_INCREMENT; + + ++num_reallocs; + + base = reallocate_integer_array( base, current_max_dfas ); + def = reallocate_integer_array( def, current_max_dfas ); + dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); + accsiz = reallocate_integer_array( accsiz, current_max_dfas ); + dhash = reallocate_integer_array( dhash, current_max_dfas ); + dss = reallocate_int_ptr_array( dss, current_max_dfas ); + dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); + + if ( nultrans ) + nultrans = reallocate_integer_array( nultrans, current_max_dfas ); + } + + +/* ntod - convert an ndfa to a dfa + * + * synopsis + * ntod(); + * + * creates the dfa corresponding to the ndfa we've constructed. the + * dfa starts out in state #1. + */ + +void ntod() + + { + int *accset, ds, nacc, newds; + int sym, hashval, numstates, dsize; + int num_full_table_rows; /* used only for -f */ + int *nset, *dset; + int targptr, totaltrans, i, comstate, comfreq, targ; + int *epsclosure(), snstods(), symlist[CSIZE + 1]; + int num_start_states; + int todo_head, todo_next; + + /* note that the following are indexed by *equivalence classes* + * and not by characters. Since equivalence classes are indexed + * beginning with 1, even if the scanner accepts NUL's, this + * means that (since every character is potentially in its own + * equivalence class) these arrays must have room for indices + * from 1 to CSIZE, so their size must be CSIZE + 1. + */ + int duplist[CSIZE + 1], state[CSIZE + 1]; + int targfreq[CSIZE + 1], targstate[CSIZE + 1]; + + /* this is so find_table_space(...) will know where to start looking in + * chk/nxt for unused records for space to put in the state + */ + if ( fullspd ) + firstfree = 0; + + accset = allocate_integer_array( num_rules + 1 ); + nset = allocate_integer_array( current_max_dfa_size ); + + /* the "todo" queue is represented by the head, which is the DFA + * state currently being processed, and the "next", which is the + * next DFA state number available (not in use). We depend on the + * fact that snstods() returns DFA's \in increasing order/, and thus + * need only know the bounds of the dfas to be processed. + */ + todo_head = todo_next = 0; + + for ( i = 0; i <= csize; ++i ) + { + duplist[i] = NIL; + symlist[i] = false; + } + + for ( i = 0; i <= num_rules; ++i ) + accset[i] = NIL; + + if ( trace ) + { + dumpnfa( scset[1] ); + fputs( "\n\nDFA Dump:\n\n", stderr ); + } + + inittbl(); + + /* check to see whether we should build a separate table for transitions + * on NUL characters. We don't do this for full-speed (-F) scanners, + * since for them we don't have a simple state number lying around with + * which to index the table. We also don't bother doing it for scanners + * unless (1) NUL is in its own equivalence class (indicated by a + * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is + * the last equivalence class, and (3) the number of equivalence classes + * is the same as the number of characters. This latter case comes about + * when useecs is false or when its true but every character still + * manages to land in its own class (unlikely, but it's cheap to check + * for). If all these things are true then the character code needed + * to represent NUL's equivalence class for indexing the tables is + * going to take one more bit than the number of characters, and therefore + * we won't be assured of being able to fit it into a YY_CHAR variable. + * This rules out storing the transitions in a compressed table, since + * the code for interpreting them uses a YY_CHAR variable (perhaps it + * should just use an integer, though; this is worth pondering ... ###). + * + * Finally, for full tables, we want the number of entries in the + * table to be a power of two so the array references go fast (it + * will just take a shift to compute the major index). If encoding + * NUL's transitions in the table will spoil this, we give it its + * own table (note that this will be the case if we're not using + * equivalence classes). + */ + + /* note that the test for ecgroup[0] == numecs below accomplishes + * both (1) and (2) above + */ + if ( ! fullspd && ecgroup[0] == numecs ) + { /* NUL is alone in its equivalence class, which is the last one */ + int use_NUL_table = (numecs == csize); + + if ( fulltbl && ! use_NUL_table ) + { /* we still may want to use the table if numecs is a power of 2 */ + int power_of_two; + + for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 ) + if ( numecs == power_of_two ) + { + use_NUL_table = true; + break; + } + } + + if ( use_NUL_table ) + nultrans = allocate_integer_array( current_max_dfas ); + /* from now on, nultrans != nil indicates that we're + * saving null transitions for later, separate encoding + */ + } + + + if ( fullspd ) + { + for ( i = 0; i <= numecs; ++i ) + state[i] = 0; + place_state( state, 0, 0 ); + } + + else if ( fulltbl ) + { + if ( nultrans ) + /* we won't be including NUL's transitions in the table, + * so build it for entries from 0 .. numecs - 1 + */ + num_full_table_rows = numecs; + + else + /* take into account the fact that we'll be including + * the NUL entries in the transition table. Build it + * from 0 .. numecs. + */ + num_full_table_rows = numecs + 1; + + /* declare it "short" because it's a real long-shot that that + * won't be large enough. + */ + printf( "static short int yy_nxt[][%d] =\n {\n", + /* '}' so vi doesn't get too confused */ + num_full_table_rows ); + + /* generate 0 entries for state #0 */ + for ( i = 0; i < num_full_table_rows; ++i ) + mk2data( 0 ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + /* create the first states */ + + num_start_states = lastsc * 2; + + for ( i = 1; i <= num_start_states; ++i ) + { + numstates = 1; + + /* for each start condition, make one state for the case when + * we're at the beginning of the line (the '%' operator) and + * one for the case when we're not + */ + if ( i % 2 == 1 ) + nset[numstates] = scset[(i / 2) + 1]; + else + nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); + + nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) + { + numas += nacc; + totnst += numstates; + ++todo_next; + + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, accset, nacc ); + } + } + + if ( ! fullspd ) + { + if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) + flexfatal( "could not create unique end-of-buffer state" ); + + ++numas; + ++num_start_states; + ++todo_next; + } + + while ( todo_head < todo_next ) + { + targptr = 0; + totaltrans = 0; + + for ( i = 1; i <= numecs; ++i ) + state[i] = 0; + + ds = ++todo_head; + + dset = dss[ds]; + dsize = dfasiz[ds]; + + if ( trace ) + fprintf( stderr, "state # %d:\n", ds ); + + sympartition( dset, dsize, symlist, duplist ); + + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { /* symbol has unique out-transitions */ + numstates = symfollowset( dset, dsize, sym, nset ); + nset = epsclosure( nset, &numstates, accset, + &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + ++todo_next; + numas += nacc; + + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, + accset, nacc ); + } + + state[sym] = newds; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, newds ); + + targfreq[++targptr] = 1; + targstate[targptr] = newds; + ++numuniq; + } + + else + { + /* sym's equivalence class has the same transitions + * as duplist(sym)'s equivalence class + */ + targ = state[duplist[sym]]; + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, targ ); + + /* update frequency count for destination state */ + + i = 0; + while ( targstate[++i] != targ ) + ; + + ++targfreq[i]; + ++numdup; + } + + ++totaltrans; + duplist[sym] = NIL; + } + } + + numsnpairs = numsnpairs + totaltrans; + + if ( caseins && ! useecs ) + { + register int j; + + for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) + state[i] = state[j]; + } + + if ( ds > num_start_states ) + check_for_backtracking( ds, state ); + + if ( nultrans ) + { + nultrans[ds] = state[NUL_ec]; + state[NUL_ec] = 0; /* remove transition */ + } + + if ( fulltbl ) + { + /* supply array's 0-element */ + if ( ds == end_of_buffer_state ) + mk2data( -end_of_buffer_state ); + else + mk2data( end_of_buffer_state ); + + for ( i = 1; i < num_full_table_rows; ++i ) + /* jams are marked by negative of state number */ + mk2data( state[i] ? state[i] : -ds ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + else if ( fullspd ) + place_state( state, ds, totaltrans ); + + else if ( ds == end_of_buffer_state ) + /* special case this state to make sure it does what it's + * supposed to, i.e., jam on end-of-buffer + */ + stack1( ds, 0, 0, JAMSTATE ); + + else /* normal, compressed state */ + { + /* determine which destination state is the most common, and + * how many transitions to it there are + */ + + comfreq = 0; + comstate = 0; + + for ( i = 1; i <= targptr; ++i ) + if ( targfreq[i] > comfreq ) + { + comfreq = targfreq[i]; + comstate = targstate[i]; + } + + bldtbl( state, ds, totaltrans, comstate, comfreq ); + } + } + + if ( fulltbl ) + dataend(); + + else if ( ! fullspd ) + { + cmptmps(); /* create compressed template entries */ + + /* create tables for all the states with only one out-transition */ + while ( onesp > 0 ) + { + mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], + onedef[onesp] ); + --onesp; + } + + mkdeftbl(); + } + } + + +/* snstods - converts a set of ndfa states into a dfa state + * + * synopsis + * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval; + * int snstods(); + * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); + * + * on return, the dfa state number is in newds. + */ + +int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) +int sns[], numstates, accset[], nacc, hashval, *newds_addr; + + { + int didsort = 0; + register int i, j; + int newds, *oldsns; + + for ( i = 1; i <= lastdfa; ++i ) + if ( hashval == dhash[i] ) + { + if ( numstates == dfasiz[i] ) + { + oldsns = dss[i]; + + if ( ! didsort ) + { + /* we sort the states in sns so we can compare it to + * oldsns quickly. we use bubble because there probably + * aren't very many states + */ + bubble( sns, numstates ); + didsort = 1; + } + + for ( j = 1; j <= numstates; ++j ) + if ( sns[j] != oldsns[j] ) + break; + + if ( j > numstates ) + { + ++dfaeql; + *newds_addr = i; + return ( 0 ); + } + + ++hshcol; + } + + else + ++hshsave; + } + + /* make a new dfa */ + + if ( ++lastdfa >= current_max_dfas ) + increase_max_dfas(); + + newds = lastdfa; + + dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) ); + + if ( ! dss[newds] ) + flexfatal( "dynamic memory failure in snstods()" ); + + /* if we haven't already sorted the states in sns, we do so now, so that + * future comparisons with it can be made quickly + */ + + if ( ! didsort ) + bubble( sns, numstates ); + + for ( i = 1; i <= numstates; ++i ) + dss[newds][i] = sns[i]; + + dfasiz[newds] = numstates; + dhash[newds] = hashval; + + if ( nacc == 0 ) + { + if ( reject ) + dfaacc[newds].dfaacc_set = (int *) 0; + else + dfaacc[newds].dfaacc_state = 0; + + accsiz[newds] = 0; + } + + else if ( reject ) + { + /* we sort the accepting set in increasing order so the disambiguating + * rule that the first rule listed is considered match in the event of + * ties will work. We use a bubble sort since the list is probably + * quite small. + */ + + bubble( accset, nacc ); + + dfaacc[newds].dfaacc_set = + (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); + + if ( ! dfaacc[newds].dfaacc_set ) + flexfatal( "dynamic memory failure in snstods()" ); + + /* save the accepting set for later */ + for ( i = 1; i <= nacc; ++i ) + dfaacc[newds].dfaacc_set[i] = accset[i]; + + accsiz[newds] = nacc; + } + + else + { /* find lowest numbered rule so the disambiguating rule will work */ + j = num_rules + 1; + + for ( i = 1; i <= nacc; ++i ) + if ( accset[i] < j ) + j = accset[i]; + + dfaacc[newds].dfaacc_state = j; + } + + *newds_addr = newds; + + return ( 1 ); + } + + +/* symfollowset - follow the symbol transitions one step + * + * synopsis + * int ds[current_max_dfa_size], dsize, transsym; + * int nset[current_max_dfa_size], numstates; + * numstates = symfollowset( ds, dsize, transsym, nset ); + */ + +int symfollowset( ds, dsize, transsym, nset ) +int ds[], dsize, transsym, nset[]; + + { + int ns, tsp, sym, i, j, lenccl, ch, numstates; + int ccllist; + + numstates = 0; + + for ( i = 1; i <= dsize; ++i ) + { /* for each nfa state ns in the state set of ds */ + ns = ds[i]; + sym = transchar[ns]; + tsp = trans1[ns]; + + if ( sym < 0 ) + { /* it's a character class */ + sym = -sym; + ccllist = cclmap[sym]; + lenccl = ccllen[sym]; + + if ( cclng[sym] ) + { + for ( j = 0; j < lenccl; ++j ) + { /* loop through negated character class */ + ch = ccltbl[ccllist + j]; + + if ( ch == 0 ) + ch = NUL_ec; + + if ( ch > transsym ) + break; /* transsym isn't in negated ccl */ + + else if ( ch == transsym ) + /* next 2 */ goto bottom; + } + + /* didn't find transsym in ccl */ + nset[++numstates] = tsp; + } + + else + for ( j = 0; j < lenccl; ++j ) + { + ch = ccltbl[ccllist + j]; + + if ( ch == 0 ) + ch = NUL_ec; + + if ( ch > transsym ) + break; + + else if ( ch == transsym ) + { + nset[++numstates] = tsp; + break; + } + } + } + + else if ( sym >= 'A' && sym <= 'Z' && caseins ) + flexfatal( "consistency check failed in symfollowset" ); + + else if ( sym == SYM_EPSILON ) + { /* do nothing */ + } + + else if ( abs( ecgroup[sym] ) == transsym ) + nset[++numstates] = tsp; + +bottom: + ; + } + + return ( numstates ); + } + + +/* sympartition - partition characters with same out-transitions + * + * synopsis + * integer ds[current_max_dfa_size], numstates, duplist[numecs]; + * symlist[numecs]; + * sympartition( ds, numstates, symlist, duplist ); + */ + +void sympartition( ds, numstates, symlist, duplist ) +int ds[], numstates, duplist[]; +int symlist[]; + + { + int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; + + /* partitioning is done by creating equivalence classes for those + * characters which have out-transitions from the given state. Thus + * we are really creating equivalence classes of equivalence classes. + */ + + for ( i = 1; i <= numecs; ++i ) + { /* initialize equivalence class list */ + duplist[i] = i - 1; + dupfwd[i] = i + 1; + } + + duplist[1] = NIL; + dupfwd[numecs] = NIL; + + for ( i = 1; i <= numstates; ++i ) + { + ns = ds[i]; + tch = transchar[ns]; + + if ( tch != SYM_EPSILON ) + { + if ( tch < -lastccl || tch > csize ) + { + if ( tch > csize && tch <= CSIZE ) + flexerror( "scanner requires -8 flag" ); + + else + flexfatal( + "bad transition character detected in sympartition()" ); + } + + if ( tch >= 0 ) + { /* character transition */ + /* abs() needed for fake %t ec's */ + int ec = abs( ecgroup[tch] ); + + mkechar( ec, dupfwd, duplist ); + symlist[ec] = 1; + } + + else + { /* character class */ + tch = -tch; + + lenccl = ccllen[tch]; + cclp = cclmap[tch]; + mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs, + NUL_ec ); + + if ( cclng[tch] ) + { + j = 0; + + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + + if ( ich == 0 ) + ich = NUL_ec; + + for ( ++j; j < ich; ++j ) + symlist[j] = 1; + } + + for ( ++j; j <= numecs; ++j ) + symlist[j] = 1; + } + + else + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + + if ( ich == 0 ) + ich = NUL_ec; + + symlist[ich] = 1; + } + } + } + } + } diff --git a/util/flex/ecs.c b/util/flex/ecs.c new file mode 100644 index 000000000..73c07ddaf --- /dev/null +++ b/util/flex/ecs.c @@ -0,0 +1,349 @@ +/* ecs - equivalence class routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + +/* ccl2ecl - convert character classes to set of equivalence classes + * + * synopsis + * ccl2ecl(); + */ + +void ccl2ecl() + + { + int i, ich, newlen, cclp, ccls, cclmec; + + for ( i = 1; i <= lastccl; ++i ) + { + /* we loop through each character class, and for each character + * in the class, add the character's equivalence class to the + * new "character" class we are creating. Thus when we are all + * done, character classes will really consist of collections + * of equivalence classes + */ + + newlen = 0; + cclp = cclmap[i]; + + for ( ccls = 0; ccls < ccllen[i]; ++ccls ) + { + ich = ccltbl[cclp + ccls]; + cclmec = ecgroup[ich]; + + if ( xlation && cclmec < 0 ) + { + /* special hack--if we're doing %t tables then it's + * possible that no representative of this character's + * equivalence class is in the ccl. So waiting till + * we see the representative would be disastrous. Instead, + * we add this character's equivalence class anyway, if it's + * not already present. + */ + int j; + + /* this loop makes this whole process n^2; but we don't + * really care about %t performance anyway + */ + for ( j = 0; j < newlen; ++j ) + if ( ccltbl[cclp + j] == -cclmec ) + break; + + if ( j >= newlen ) + { /* no representative yet, add this one in */ + ccltbl[cclp + newlen] = -cclmec; + ++newlen; + } + } + + else if ( cclmec > 0 ) + { + ccltbl[cclp + newlen] = cclmec; + ++newlen; + } + } + + ccllen[i] = newlen; + } + } + + +/* cre8ecs - associate equivalence class numbers with class members + * + * synopsis + * int cre8ecs(); + * number of classes = cre8ecs( fwd, bck, num ); + * + * fwd is the forward linked-list of equivalence class members. bck + * is the backward linked-list, and num is the number of class members. + * + * Returned is the number of classes. + */ + +int cre8ecs( fwd, bck, num ) +int fwd[], bck[], num; + + { + int i, j, numcl; + + numcl = 0; + + /* create equivalence class numbers. From now on, abs( bck(x) ) + * is the equivalence class number for object x. If bck(x) + * is positive, then x is the representative of its equivalence + * class. + */ + for ( i = 1; i <= num; ++i ) + if ( bck[i] == NIL ) + { + bck[i] = ++numcl; + for ( j = fwd[i]; j != NIL; j = fwd[j] ) + bck[j] = -numcl; + } + + return ( numcl ); + } + + +/* ecs_from_xlation - associate equivalence class numbers using %t table + * + * synopsis + * numecs = ecs_from_xlation( ecmap ); + * + * Upon return, ecmap will map each character code to its equivalence + * class. The mapping will be positive if the character is the representative + * of its class, negative otherwise. + * + * Returns the number of equivalence classes used. + */ + +int ecs_from_xlation( ecmap ) +int ecmap[]; + + { + int i; + int nul_is_alone = false; + int did_default_xlation_class = false; + + if ( xlation[0] != 0 ) + { + /* if NUL shares its translation with other characters, choose one + * of the other characters as the representative for the equivalence + * class. This allows a cheap test later to see whether we can + * do away with NUL's equivalence class. + */ + for ( i = 1; i < csize; ++i ) + if ( xlation[i] == -xlation[0] ) + { + xlation[i] = xlation[0]; + ecmap[0] = -xlation[0]; + break; + } + + if ( i >= csize ) + /* didn't find a companion character--remember this fact */ + nul_is_alone = true; + } + + for ( i = 1; i < csize; ++i ) + if ( xlation[i] == 0 ) + { + if ( did_default_xlation_class ) + ecmap[i] = -num_xlations; + + else + { + /* make an equivalence class for those characters not + * specified in the %t table + */ + ++num_xlations; + ecmap[i] = num_xlations; + did_default_xlation_class = true; + } + } + + else + ecmap[i] = xlation[i]; + + if ( nul_is_alone ) + /* force NUL's equivalence class to be the last one */ + { + ++num_xlations; + ecmap[0] = num_xlations; + + /* there's actually a bug here: if someone is fanatic enough to + * put every character in its own translation class, then right + * now we just promoted NUL's equivalence class to be csize + 1; + * we can handle NUL's class number being == csize (by instead + * putting it in its own table), but we can't handle some *other* + * character having to be put in its own table, too. So in + * this case we bail out. + */ + if ( num_xlations > csize ) + flexfatal( "too many %t classes!" ); + } + + return num_xlations; + } + + +/* mkeccl - update equivalence classes based on character class xtions + * + * synopsis + * Char ccls[]; + * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; + * mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ); + * + * where ccls contains the elements of the character class, lenccl is the + * number of elements in the ccl, fwd is the forward link-list of equivalent + * characters, bck is the backward link-list, and llsiz size of the link-list + * + * NUL_mapping is the value which NUL (0) should be mapped to. + */ + +void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) +Char ccls[]; +int lenccl, fwd[], bck[], llsiz, NUL_mapping; + + { + int cclp, oldec, newec; + int cclm, i, j; + static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ + + /* note that it doesn't matter whether or not the character class is + * negated. The same results will be obtained in either case. + */ + + cclp = 0; + + while ( cclp < lenccl ) + { + cclm = ccls[cclp]; + + if ( NUL_mapping && cclm == 0 ) + cclm = NUL_mapping; + + oldec = bck[cclm]; + newec = cclm; + + j = cclp + 1; + + for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) + { /* look for the symbol in the character class */ + for ( ; j < lenccl; ++j ) + { + register int ccl_char; + + if ( NUL_mapping && ccls[j] == 0 ) + ccl_char = NUL_mapping; + else + ccl_char = ccls[j]; + + if ( ccl_char > i ) + break; + + if ( ccl_char == i && ! cclflags[j] ) + { + /* we found an old companion of cclm in the ccl. + * link it into the new equivalence class and flag it as + * having been processed + */ + + bck[i] = newec; + fwd[newec] = i; + newec = i; + cclflags[j] = 1; /* set flag so we don't reprocess */ + + /* get next equivalence class member */ + /* continue 2 */ + goto next_pt; + } + } + + /* symbol isn't in character class. Put it in the old equivalence + * class + */ + + bck[i] = oldec; + + if ( oldec != NIL ) + fwd[oldec] = i; + + oldec = i; +next_pt: + ; + } + + if ( bck[cclm] != NIL || oldec != bck[cclm] ) + { + bck[cclm] = NIL; + fwd[oldec] = NIL; + } + + fwd[newec] = NIL; + + /* find next ccl member to process */ + + for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) + { + /* reset "doesn't need processing" flag */ + cclflags[cclp] = 0; + } + } + } + + +/* mkechar - create equivalence class for single character + * + * synopsis + * int tch, fwd[], bck[]; + * mkechar( tch, fwd, bck ); + */ + +void mkechar( tch, fwd, bck ) +int tch, fwd[], bck[]; + + { + /* if until now the character has been a proper subset of + * an equivalence class, break it away to create a new ec + */ + + if ( fwd[tch] != NIL ) + bck[fwd[tch]] = bck[tch]; + + if ( bck[tch] != NIL ) + fwd[bck[tch]] = fwd[tch]; + + fwd[tch] = NIL; + bck[tch] = NIL; + } diff --git a/util/flex/flex.1 b/util/flex/flex.1 new file mode 100644 index 000000000..3035f26d2 --- /dev/null +++ b/util/flex/flex.1 @@ -0,0 +1,781 @@ +.TH FLEX 1 "26 May 1990" "Version 2.3" +.SH NAME +flex - fast lexical analyzer generator +.SH SYNOPSIS +.B flex +.B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] +.I [filename ...] +.SH DESCRIPTION +.I flex +is a tool for generating +.I scanners: +programs which recognized lexical patterns in text. +.I flex +reads +the given input files, or its standard input if no file names are given, +for a description of a scanner to generate. The description is in +the form of pairs +of regular expressions and C code, called +.I rules. flex +generates as output a C source file, +.B lex.yy.c, +which defines a routine +.B yylex(). +This file is compiled and linked with the +.B -lfl +library to produce an executable. When the executable is run, +it analyzes its input for occurrences +of the regular expressions. Whenever it finds one, it executes +the corresponding C code. +.LP +For full documentation, see +.B flexdoc(1). +This manual entry is intended for use as a quick reference. +.SH OPTIONS +.I flex +has the following options: +.TP +.B -b +Generate backtracking information to +.I lex.backtrack. +This is a list of scanner states which require backtracking +and the input characters on which they do so. By adding rules one +can remove backtracking states. If all backtracking states +are eliminated and +.B -f +or +.B -F +is used, the generated scanner will run faster. +.TP +.B -c +is a do-nothing, deprecated option included for POSIX compliance. +.IP +.B NOTE: +in previous releases of +.I flex +.B -c +specified table-compression options. This functionality is +now given by the +.B -C +flag. To ease the the impact of this change, when +.I flex +encounters +.B -c, +it currently issues a warning message and assumes that +.B -C +was desired instead. In the future this "promotion" of +.B -c +to +.B -C +will go away in the name of full POSIX compliance (unless +the POSIX meaning is removed first). +.TP +.B -d +makes the generated scanner run in +.I debug +mode. Whenever a pattern is recognized and the global +.B yy_flex_debug +is non-zero (which is the default), the scanner will +write to +.I stderr +a line of the form: +.nf + + --accepting rule at line 53 ("the matched text") + +.fi +The line number refers to the location of the rule in the file +defining the scanner (i.e., the file that was fed to flex). Messages +are also generated when the scanner backtracks, accepts the +default rule, reaches the end of its input buffer (or encounters +a NUL; the two look the same as far as the scanner's concerned), +or reaches an end-of-file. +.TP +.B -f +specifies (take your pick) +.I full table +or +.I fast scanner. +No table compression is done. The result is large but fast. +This option is equivalent to +.B -Cf +(see below). +.TP +.B -i +instructs +.I flex +to generate a +.I case-insensitive +scanner. The case of letters given in the +.I flex +input patterns will +be ignored, and tokens in the input will be matched regardless of case. The +matched text given in +.I yytext +will have the preserved case (i.e., it will not be folded). +.TP +.B -n +is another do-nothing, deprecated option included only for +POSIX compliance. +.TP +.B -p +generates a performance report to stderr. The report +consists of comments regarding features of the +.I flex +input file which will cause a loss of performance in the resulting scanner. +.TP +.B -s +causes the +.I default rule +(that unmatched scanner input is echoed to +.I stdout) +to be suppressed. If the scanner encounters input that does not +match any of its rules, it aborts with an error. +.TP +.B -t +instructs +.I flex +to write the scanner it generates to standard output instead +of +.B lex.yy.c. +.TP +.B -v +specifies that +.I flex +should write to +.I stderr +a summary of statistics regarding the scanner it generates. +.TP +.B -F +specifies that the +.ul +fast +scanner table representation should be used. This representation is +about as fast as the full table representation +.ul +(-f), +and for some sets of patterns will be considerably smaller (and for +others, larger). See +.B flexdoc(1) +for details. +.IP +This option is equivalent to +.B -CF +(see below). +.TP +.B -I +instructs +.I flex +to generate an +.I interactive +scanner, that is, a scanner which stops immediately rather than +looking ahead if it knows +that the currently scanned text cannot be part of a longer rule's match. +Again, see +.B flexdoc(1) +for details. +.IP +Note, +.B -I +cannot be used in conjunction with +.I full +or +.I fast tables, +i.e., the +.B -f, -F, -Cf, +or +.B -CF +flags. +.TP +.B -L +instructs +.I flex +not to generate +.B #line +directives in +.B lex.yy.c. +The default is to generate such directives so error +messages in the actions will be correctly +located with respect to the original +.I flex +input file, and not to +the fairly meaningless line numbers of +.B lex.yy.c. +.TP +.B -T +makes +.I flex +run in +.I trace +mode. It will generate a lot of messages to +.I stdout +concerning +the form of the input and the resultant non-deterministic and deterministic +finite automata. This option is mostly for use in maintaining +.I flex. +.TP +.B -8 +instructs +.I flex +to generate an 8-bit scanner. +On some sites, this is the default. On others, the default +is 7-bit characters. To see which is the case, check the verbose +.B (-v) +output for "equivalence classes created". If the denominator of +the number shown is 128, then by default +.I flex +is generating 7-bit characters. If it is 256, then the default is +8-bit characters. +.TP +.B -C[efmF] +controls the degree of table compression. +.IP +.B -Ce +directs +.I flex +to construct +.I equivalence classes, +i.e., sets of characters +which have identical lexical properties. +Equivalence classes usually give +dramatic reductions in the final table/object file sizes (typically +a factor of 2-5) and are pretty cheap performance-wise (one array +look-up per character scanned). +.IP +.B -Cf +specifies that the +.I full +scanner tables should be generated - +.I flex +should not compress the +tables by taking advantages of similar transition functions for +different states. +.IP +.B -CF +specifies that the alternate fast scanner representation (described in +.B flexdoc(1)) +should be used. +.IP +.B -Cm +directs +.I flex +to construct +.I meta-equivalence classes, +which are sets of equivalence classes (or characters, if equivalence +classes are not being used) that are commonly used together. Meta-equivalence +classes are often a big win when using compressed tables, but they +have a moderate performance impact (one or two "if" tests and one +array look-up per character scanned). +.IP +A lone +.B -C +specifies that the scanner tables should be compressed but neither +equivalence classes nor meta-equivalence classes should be used. +.IP +The options +.B -Cf +or +.B -CF +and +.B -Cm +do not make sense together - there is no opportunity for meta-equivalence +classes if the table is not being compressed. Otherwise the options +may be freely mixed. +.IP +The default setting is +.B -Cem, +which specifies that +.I flex +should generate equivalence classes +and meta-equivalence classes. This setting provides the highest +degree of table compression. You can trade off +faster-executing scanners at the cost of larger tables with +the following generally being true: +.nf + + slowest & smallest + -Cem + -Cm + -Ce + -C + -C{f,F}e + -C{f,F} + fastest & largest + +.fi +.IP +.B -C +options are not cumulative; whenever the flag is encountered, the +previous -C settings are forgotten. +.TP +.B -Sskeleton_file +overrides the default skeleton file from which +.I flex +constructs its scanners. You'll never need this option unless you are doing +.I flex +maintenance or development. +.SH SUMMARY OF FLEX REGULAR EXPRESSIONS +The patterns in the input are written using an extended set of regular +expressions. These are: +.nf + + x match the character 'x' + . any character except newline + [xyz] a "character class"; in this case, the pattern + matches either an 'x', a 'y', or a 'z' + [abj-oZ] a "character class" with a range in it; matches + an 'a', a 'b', any letter from 'j' through 'o', + or a 'Z' + [^A-Z] a "negated character class", i.e., any character + but those in the class. In this case, any + character EXCEPT an uppercase letter. + [^A-Z\\n] any character EXCEPT an uppercase letter or + a newline + r* zero or more r's, where r is any regular expression + r+ one or more r's + r? zero or one r's (that is, "an optional r") + r{2,5} anywhere from two to five r's + r{2,} two or more r's + r{4} exactly 4 r's + {name} the expansion of the "name" definition + (see above) + "[xyz]\\"foo" + the literal string: [xyz]"foo + \\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', + then the ANSI-C interpretation of \\x. + Otherwise, a literal 'X' (used to escape + operators such as '*') + \\123 the character with octal value 123 + \\x2a the character with hexadecimal value 2a + (r) match an r; parentheses are used to override + precedence (see below) + + + rs the regular expression r followed by the + regular expression s; called "concatenation" + + + r|s either an r or an s + + + r/s an r but only if it is followed by an s. The + s is not part of the matched text. This type + of pattern is called as "trailing context". + ^r an r, but only at the beginning of a line + r$ an r, but only at the end of a line. Equivalent + to "r/\\n". + + + r an r, but only in start condition s (see + below for discussion of start conditions) + r + same, but in any of start conditions s1, + s2, or s3 + + + <> an end-of-file + <> + an end-of-file when in start condition s1 or s2 + +.fi +The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence. +.LP +Some notes on patterns: +.IP - +Negated character classes +.I match newlines +unless "\\n" (or an equivalent escape sequence) is one of the +characters explicitly present in the negated character class +(e.g., "[^A-Z\\n]"). +.IP - +A rule can have at most one instance of trailing context (the '/' operator +or the '$' operator). The start condition, '^', and "<>" patterns +can only occur at the beginning of a pattern, and, as well as with '/' and '$', +cannot be grouped inside parentheses. The following are all illegal: +.nf + + foo/bar$ + foo|(bar$) + foo|^bar + foobar + +.fi +.SH SUMMARY OF SPECIAL ACTIONS +In addition to arbitrary C code, the following can appear in actions: +.IP - +.B ECHO +copies yytext to the scanner's output. +.IP - +.B BEGIN +followed by the name of a start condition places the scanner in the +corresponding start condition. +.IP - +.B REJECT +directs the scanner to proceed on to the "second best" rule which matched the +input (or a prefix of the input). +.B yytext +and +.B yyleng +are set up appropriately. Note that +.B REJECT +is a particularly expensive feature in terms scanner performance; +if it is used in +.I any +of the scanner's actions it will slow down +.I all +of the scanner's matching. Furthermore, +.B REJECT +cannot be used with the +.I -f +or +.I -F +options. +.IP +Note also that unlike the other special actions, +.B REJECT +is a +.I branch; +code immediately following it in the action will +.I not +be executed. +.IP - +.B yymore() +tells the scanner that the next time it matches a rule, the corresponding +token should be +.I appended +onto the current value of +.B yytext +rather than replacing it. +.IP - +.B yyless(n) +returns all but the first +.I n +characters of the current token back to the input stream, where they +will be rescanned when the scanner looks for the next match. +.B yytext +and +.B yyleng +are adjusted appropriately (e.g., +.B yyleng +will now be equal to +.I n +). +.IP - +.B unput(c) +puts the character +.I c +back onto the input stream. It will be the next character scanned. +.IP - +.B input() +reads the next character from the input stream (this routine is called +.B yyinput() +if the scanner is compiled using +.B C++). +.IP - +.B yyterminate() +can be used in lieu of a return statement in an action. It terminates +the scanner and returns a 0 to the scanner's caller, indicating "all done". +.IP +By default, +.B yyterminate() +is also called when an end-of-file is encountered. It is a macro and +may be redefined. +.IP - +.B YY_NEW_FILE +is an action available only in <> rules. It means "Okay, I've +set up a new input file, continue scanning". +.IP - +.B yy_create_buffer( file, size ) +takes a +.I FILE +pointer and an integer +.I size. +It returns a YY_BUFFER_STATE +handle to a new input buffer large enough to accomodate +.I size +characters and associated with the given file. When in doubt, use +.B YY_BUF_SIZE +for the size. +.IP - +.B yy_switch_to_buffer( new_buffer ) +switches the scanner's processing to scan for tokens from +the given buffer, which must be a YY_BUFFER_STATE. +.IP - +.B yy_delete_buffer( buffer ) +deletes the given buffer. +.SH VALUES AVAILABLE TO THE USER +.IP - +.B char *yytext +holds the text of the current token. It may not be modified. +.IP - +.B int yyleng +holds the length of the current token. It may not be modified. +.IP - +.B FILE *yyin +is the file which by default +.I flex +reads from. It may be redefined but doing so only makes sense before +scanning begins. Changing it in the middle of scanning will have +unexpected results since +.I flex +buffers its input. Once scanning terminates because an end-of-file +has been seen, +.B +void yyrestart( FILE *new_file ) +may be called to point +.I yyin +at the new input file. +.IP - +.B FILE *yyout +is the file to which +.B ECHO +actions are done. It can be reassigned by the user. +.IP - +.B YY_CURRENT_BUFFER +returns a +.B YY_BUFFER_STATE +handle to the current buffer. +.SH MACROS THE USER CAN REDEFINE +.IP - +.B YY_DECL +controls how the scanning routine is declared. +By default, it is "int yylex()", or, if prototypes are being +used, "int yylex(void)". This definition may be changed by redefining +the "YY_DECL" macro. Note that +if you give arguments to the scanning routine using a +K&R-style/non-prototyped function declaration, you must terminate +the definition with a semi-colon (;). +.IP - +The nature of how the scanner +gets its input can be controlled by redefining the +.B YY_INPUT +macro. +YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its +action is to place up to +.I max_size +characters in the character array +.I buf +and return in the integer variable +.I result +either the +number of characters read or the constant YY_NULL (0 on Unix systems) +to indicate EOF. The default YY_INPUT reads from the +global file-pointer "yyin". +A sample redefinition of YY_INPUT (in the definitions +section of the input file): +.nf + + %{ + #undef YY_INPUT + #define YY_INPUT(buf,result,max_size) \\ + { \\ + int c = getchar(); \\ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \\ + } + %} + +.fi +.IP - +When the scanner receives an end-of-file indication from YY_INPUT, +it then checks the +.B yywrap() +function. If +.B yywrap() +returns false (zero), then it is assumed that the +function has gone ahead and set up +.I yyin +to point to another input file, and scanning continues. If it returns +true (non-zero), then the scanner terminates, returning 0 to its +caller. +.IP +The default +.B yywrap() +always returns 1. Presently, to redefine it you must first +"#undef yywrap", as it is currently implemented as a macro. It is +likely that +.B yywrap() +will soon be defined to be a function rather than a macro. +.IP - +YY_USER_ACTION +can be redefined to provide an action +which is always executed prior to the matched rule's action. +.IP - +The macro +.B YY_USER_INIT +may be redefined to provide an action which is always executed before +the first scan. +.IP - +In the generated scanner, the actions are all gathered in one large +switch statement and separated using +.B YY_BREAK, +which may be redefined. By default, it is simply a "break", to separate +each rule's action from the following rule's. +.SH FILES +.TP +.I flex.skel +skeleton scanner. +.TP +.I lex.yy.c +generated scanner (called +.I lexyy.c +on some systems). +.TP +.I lex.backtrack +backtracking information for +.B -b +flag (called +.I lex.bck +on some systems). +.TP +.B -lfl +library with which to link the scanners. +.SH "SEE ALSO" +.LP +flexdoc(1), lex(1), yacc(1), sed(1), awk(1). +.LP +M. E. Lesk and E. Schmidt, +.I LEX - Lexical Analyzer Generator +.SH DIAGNOSTICS +.I reject_used_but_not_detected undefined +or +.LP +.I yymore_used_but_not_detected undefined - +These errors can occur at compile time. They indicate that the +scanner uses +.B REJECT +or +.B yymore() +but that +.I flex +failed to notice the fact, meaning that +.I flex +scanned the first two sections looking for occurrences of these actions +and failed to find any, but somehow you snuck some in (via a #include +file, for example). Make an explicit reference to the action in your +.I flex +input file. (Note that previously +.I flex +supported a +.B %used/%unused +mechanism for dealing with this problem; this feature is still supported +but now deprecated, and will go away soon unless the author hears from +people who can argue compellingly that they need it.) +.LP +.I flex scanner jammed - +a scanner compiled with +.B -s +has encountered an input string which wasn't matched by +any of its rules. +.LP +.I flex input buffer overflowed - +a scanner rule matched a string long enough to overflow the +scanner's internal input buffer (16K bytes - controlled by +.B YY_BUF_MAX +in "flex.skel"). +.LP +.I scanner requires -8 flag - +Your scanner specification includes recognizing 8-bit characters and +you did not specify the -8 flag (and your site has not installed flex +with -8 as the default). +.LP +.I +fatal flex scanner internal error--end of buffer missed - +This can occur in an scanner which is reentered after a long-jump +has jumped out (or over) the scanner's activation frame. Before +reentering the scanner, use: +.nf + + yyrestart( yyin ); + +.fi +.LP +.I too many %t classes! - +You managed to put every single character into its own %t class. +.I flex +requires that at least one of the classes share characters. +.SH AUTHOR +Vern Paxson, with the help of many ideas and much inspiration from +Van Jacobson. Original version by Jef Poskanzer. +.LP +See flexdoc(1) for additional credits and the address to send comments to. +.SH DEFICIENCIES / BUGS +.LP +Some trailing context +patterns cannot be properly matched and generate +warning messages ("Dangerous trailing context"). These are +patterns where the ending of the +first part of the rule matches the beginning of the second +part, such as "zx*/xy*", where the 'x*' matches the 'x' at +the beginning of the trailing context. (Note that the POSIX draft +states that the text matched by such patterns is undefined.) +.LP +For some trailing context rules, parts which are actually fixed-length are +not recognized as such, leading to the abovementioned performance loss. +In particular, parts using '|' or {n} (such as "foo{3}") are always +considered variable-length. +.LP +Combining trailing context with the special '|' action can result in +.I fixed +trailing context being turned into the more expensive +.I variable +trailing context. For example, this happens in the following example: +.nf + + %% + abc | + xyz/def + +.fi +.LP +Use of unput() invalidates yytext and yyleng. +.LP +Use of unput() to push back more text than was matched can +result in the pushed-back text matching a beginning-of-line ('^') +rule even though it didn't come at the beginning of the line +(though this is rare!). +.LP +Pattern-matching of NUL's is substantially slower than matching other +characters. +.LP +.I flex +does not generate correct #line directives for code internal +to the scanner; thus, bugs in +.I flex.skel +yield bogus line numbers. +.LP +Due to both buffering of input and read-ahead, you cannot intermix +calls to routines, such as, for example, +.B getchar(), +with +.I flex +rules and expect it to work. Call +.B input() +instead. +.LP +The total table entries listed by the +.B -v +flag excludes the number of table entries needed to determine +what rule has been matched. The number of entries is equal +to the number of DFA states if the scanner does not use +.B REJECT, +and somewhat greater than the number of states if it does. +.LP +.B REJECT +cannot be used with the +.I -f +or +.I -F +options. +.LP +Some of the macros, such as +.B yywrap(), +may in the future become functions which live in the +.B -lfl +library. This will doubtless break a lot of code, but may be +required for POSIX-compliance. +.LP +The +.I flex +internal algorithms need documentation. diff --git a/util/flex/flex.skel b/util/flex/flex.skel new file mode 100644 index 000000000..90f45c640 --- /dev/null +++ b/util/flex/flex.skel @@ -0,0 +1,858 @@ +/* A lexical scanner generated by flex */ + +/* scanner skeleton version: + * $Header$ + */ + +#define FLEX_SCANNER + +#define ACK_MOD + +#include + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include +#include + +/* use prototypes in function declarations */ +#define YY_USE_PROTOS + +/* the "const" storage-class-modifier is valid */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#ifdef __STDC__ + +#ifdef __GNUC__ +#include +void *malloc( size_t ); +void free( void* ); +#else +#include +#endif /* __GNUC__ */ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + + +#ifdef __TURBOC__ +#define YY_USE_CONST +#endif + + +#ifndef YY_USE_CONST +#define const +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +/* we can't get here if it's an ANSI C compiler, or a C++ compiler, + * so it's got to be a K&R compiler, and therefore there's no standard + * place from which to include these definitions + */ +char *malloc(); +int free(); +int read(); +#endif + + +/* amount of stuff to slurp up with each read */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* returned upon end-of-file */ +#define YY_END_TOK 0 + +/* copy whatever the last rule matched to the standard output */ + +/* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ +/* this used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite() + */ +#define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) + +/* gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#define YY_INPUT(buf,result,max_size) \ + if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ + YY_FATAL_ERROR( "read() in flex scanner failed" ); +#define YY_NULL 0 + +/* no semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#define yyterminate() return ( YY_NULL ) + +/* report a fatal error */ + +/* The funky do-while is used to turn this macro definition into + * a single C statement (which needs a semi-colon terminator). + * This avoids problems with code like: + * + * if ( something_happens ) + * YY_FATAL_ERROR( "oops, the something happened" ); + * else + * everything_okay(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the YY_FATAL_ERROR() call. + */ + +#define YY_FATAL_ERROR(msg) \ + do \ + { \ + (void) fputs( msg, stderr ); \ + (void) putc( '\n', stderr ); \ + exit( 1 ); \ + } \ + while ( 0 ) + +/* default yywrap function - always treat EOF as an EOF */ +#define yywrap() 1 + +/* enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN + */ +#define BEGIN yy_start = 1 + 2 * + +/* action number for EOF rule of a given start state */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* special action meaning "start processing a new file" */ +#define YY_NEW_FILE \ + do \ + { \ + yy_init_buffer( yy_current_buffer, yyin ); \ + yy_load_buffer_state(); \ + } \ + while ( 0 ) + +/* default declaration of generated scanner - a define so the user can + * easily add parameters + */ +#define YY_DECL int yylex YY_PROTO(( void )) + +/* code executed at the end of each rule */ +#define YY_BREAK break; + +#define YY_END_OF_BUFFER_CHAR 0 + +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ +#endif + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +%% section 1 definitions go here + +/* done after the current pattern has been matched and before the + * corresponding action - sets up yytext + */ +#define YY_DO_BEFORE_ACTION \ + yytext = yy_bp; \ +%% code to fiddle yytext and yyleng for yymore() goes here + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* return all but the first 'n' matched characters back to the input stream */ +#define yyless(n) \ + do \ + { \ + /* undo effects of setting up yytext */ \ + *yy_cp = yy_hold_char; \ + yy_c_buf_p = yy_cp = yy_bp + n; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext ) + + +struct yy_buffer_state + { + FILE *yy_input_file; + + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ + + /* size of input buffer in bytes, not including room for EOB characters*/ + int yy_buf_size; + + /* number of characters read into yy_ch_buf, not including EOB characters */ + int yy_n_chars; + + int yy_eof_status; /* whether we've seen an EOF on this buffer */ +#define EOF_NOT_SEEN 0 + /* "pending" happens when the EOF has been seen but there's still + * some text process + */ +#define EOF_PENDING 1 +#define EOF_DONE 2 + }; + +static YY_BUFFER_STATE yy_current_buffer; + +/* we provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state" + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed */ +static YY_CHAR yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + + +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +#ifndef YY_USER_INIT +#define YY_USER_INIT +#endif + +extern YY_CHAR *yytext; +extern int yyleng; +extern FILE *yyin, *yyout; + +YY_CHAR *yytext; +int yyleng; + +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +%% data tables for the DFA go here + +/* these variables are all declared out here so that section 3 code can + * manipulate them + */ +/* points to current character in buffer */ +static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr )); +void yyrestart YY_PROTO(( FILE *input_file )); +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); + +#define yy_new_buffer yy_create_buffer + +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif + +YY_DECL + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp, *yy_bp; + register int yy_act; + +%% user's declarations go here + + if ( yy_init ) + { + YY_USER_INIT; + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( yy_current_buffer ) + yy_init_buffer( yy_current_buffer, yyin ); + else + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + + yy_init = 0; + } + + while ( 1 ) /* loops until end-of-file is reached */ + { +%% yymore()-related code goes here + yy_cp = yy_c_buf_p; + + /* support of yytext */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of the + * current run. + */ + yy_bp = yy_cp; + +%% code to set up and find next match goes here + +yy_find_action: +%% code to find the action number goes here + + YY_DO_BEFORE_ACTION; + YY_USER_ACTION; + +do_action: /* this label is used only to access EOF actions */ + +%% debug code goes here + + switch ( yy_act ) + { +%% actions go here + + case YY_END_OF_BUFFER: + { + /* amount of text matched not including the EOB char */ + int yy_amount_of_matched_text = yy_cp - yytext - 1; + + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + + /* note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the end- + * of-buffer state). Contrast this with the test in yyinput(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* this was really a NUL */ + { + yy_state_type yy_next_state; + + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* okay, we're now positioned to make the + * NUL transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we + * don't want to build jamming into it because + * then it will run more slowly) + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* consume the NUL */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { +%% code to do backtracking for compressed tables and set up yy_cp goes here + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* note: because we've taken care in + * yy_get_next_buffer() to have set up yytext, + * we can now set up yy_c_buf_p so that if some + * total hoser (like flex itself) wants + * to call the scanner after we return the + * YY_NULL, it'll still work - another YY_NULL + * will get returned. + */ + yy_c_buf_p = yytext + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF((yy_start - 1) / 2); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + } + break; + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: +#ifdef FLEX_DEBUG + printf( "action # %d\n", yy_act ); +#endif + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } + } + } + + +/* yy_get_next_buffer - try to read in a new buffer + * + * synopsis + * int yy_get_next_buffer(); + * + * returns a code representing an action + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + + { + register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; + register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + /* try to read more data */ + + /* first move last chars to start of buffer */ + number_to_move = yy_c_buf_p - yytext; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_n_chars = 0; + + else + { + int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + else if ( num_to_read <= 0 ) + YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); + + /* read in more data */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == 1 ) + { + ret_val = EOB_ACT_END_OF_FILE; + yy_current_buffer->yy_eof_status = EOF_DONE; + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_eof_status = EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + /* yytext begins at the second character in yy_ch_buf; the first + * character is the one which preceded it before reading in the latest + * buffer; it needs to be kept around in case it's a newline, so + * yy_get_previous_state() will have with '^' rules active + */ + + yytext = &yy_current_buffer->yy_ch_buf[1]; + + return ( ret_val ); + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached + * + * synopsis + * yy_state_type yy_get_previous_state(); + */ + +static yy_state_type yy_get_previous_state() + + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp; + +%% code to get the start state into yy_current_state goes here + + for ( yy_cp = yytext + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { +%% code to find the next state goes here + } + + return ( yy_current_state ); + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +register yy_state_type yy_current_state; +#endif + + { + register int yy_is_jam; +%% code to find the next state, and perhaps do backtracking, goes here + + return ( yy_is_jam ? 0 : yy_current_state ); + } + + +#ifdef YY_USE_PROTOS +static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) +#else +static void yyunput( c, yy_bp ) +YY_CHAR c; +register YY_CHAR *yy_bp; +#endif + + { + register YY_CHAR *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ + register YY_CHAR *dest = + &yy_current_buffer->yy_ch_buf[yy_current_buffer->yy_buf_size + 2]; + register YY_CHAR *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += dest - source; + yy_bp += dest - source; + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) + yy_cp[-2] = '\n'; + + *--yy_cp = c; + + /* note: the formal parameter *must* be called "yy_bp" for this + * macro to now work correctly + */ + YY_DO_BEFORE_ACTION; /* set up yytext again */ + } + + +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + + { + int c; + YY_CHAR *yy_cp = yy_c_buf_p; + + *yy_cp = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* this was really a NUL */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + yytext = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + { + yy_c_buf_p = yytext + YY_MORE_ADJ; + return ( EOF ); + } + + YY_NEW_FILE; + +#ifdef __cplusplus + return ( yyinput() ); +#else + return ( input() ); +#endif + } + break; + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + YY_MORE_ADJ; + break; + + case EOB_ACT_LAST_MATCH: +#ifdef __cplusplus + YY_FATAL_ERROR( "unexpected last match in yyinput()" ); +#else + YY_FATAL_ERROR( "unexpected last match in input()" ); +#endif + } + } + } + + c = *yy_c_buf_p; + yy_hold_char = *++yy_c_buf_p; + + return ( c ); + } + + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + + { + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* flush out information for old buffer */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* we don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); + + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + yy_init_buffer( b, file ); + + return ( b ); + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + free( (char *) b->yy_ch_buf ); + free( (char *) b ); + } + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + { + b->yy_input_file = file; + + /* we put in the '\n' and start reading from [1] so that an + * initial match-at-newline will be true. + */ + + b->yy_ch_buf[0] = '\n'; + b->yy_n_chars = 1; + + /* we always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[1]; + + b->yy_eof_status = EOF_NOT_SEEN; + } + +#ifdef ACK_MOD +/* redefine yyless() so that it does not access local variables of YYDECL */ + +#undef yyless + +/* return all but the first 'n' matched characters back to the input stream */ +#define yyless(n) \ + do \ + { \ + /* undo effects of setting up yytext */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + +#endif ACK_MOD diff --git a/util/flex/flexdef.h b/util/flex/flexdef.h new file mode 100644 index 000000000..63fc9f8a3 --- /dev/null +++ b/util/flex/flexdef.h @@ -0,0 +1,877 @@ +/* flexdef - definitions file for flex */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* @(#) $Header$ (LBL) */ + +#ifndef FILE +#include +#endif + +/* always be prepared to generate an 8-bit scanner */ +#define FLEX_8_BIT_CHARS + +#ifdef FLEX_8_BIT_CHARS +#define CSIZE 256 +#define Char unsigned char +#else +#define Char char +#define CSIZE 128 +#endif + +/* size of input alphabet - should be size of ASCII set */ +#ifndef DEFAULT_CSIZE +#define DEFAULT_CSIZE 128 +#endif + +#ifndef PROTO +#ifdef __STDC__ +#define PROTO(proto) proto +#else +#define PROTO(proto) () +#endif +#endif + + +#ifndef ACK_MOD +#ifdef USG +#define SYS_V +#endif + +#ifdef SYS_V +#include +#else + +#include +#ifdef lint +char *sprintf(); /* keep lint happy */ +#endif +#ifdef SCO_UNIX +void *memset(); +#else +char *memset(); +#endif +#endif +#else /* ACK_MOD */ +extern char *strcpy(); +#endif /* ACK_MOD */ + +#ifndef ACK_MOD +#ifdef AMIGA +#define bzero(s, n) setmem((char *)(s), n, '\0') +#ifndef abs +#define abs(x) ((x) < 0 ? -(x) : (x)) +#endif +#else +#define bzero(s, n) (void) memset((char *)(s), '\0', n) +#endif +#endif /* not ACK_MOD */ + +#ifdef VMS +#define unlink delete +#define SHORT_FILE_NAMES +#endif + +#ifdef __STDC__ + +#ifdef __GNUC__ +#include +void *malloc( size_t ); +void free( void* ); +#else +#include +#endif + +#else /* ! __STDC__ */ +char *malloc(), *realloc(); +#endif + + +/* maximum line length we'll have to deal with */ +#define MAXLINE BUFSIZ + +/* maximum size of file name */ +#define FILENAMESIZE 1024 + +#ifndef min +#define min(x,y) ((x) < (y) ? (x) : (y)) +#endif +#ifndef max +#define max(x,y) ((x) > (y) ? (x) : (y)) +#endif + +#ifdef MS_DOS +#ifndef abs +#define abs(x) ((x) < 0 ? -(x) : (x)) +#endif +#define SHORT_FILE_NAMES +#endif + +#define true 1 +#define false 0 + + +#ifndef DEFAULT_SKELETON_FILE +#define DEFAULT_SKELETON_FILE "flex.skel" +#endif + +/* special chk[] values marking the slots taking by end-of-buffer and action + * numbers + */ +#define EOB_POSITION -1 +#define ACTION_POSITION -2 + +/* number of data items per line for -f output */ +#define NUMDATAITEMS 10 + +/* number of lines of data in -f output before inserting a blank line for + * readability. + */ +#define NUMDATALINES 10 + +/* transition_struct_out() definitions */ +#define TRANS_STRUCT_PRINT_LENGTH 15 + +/* returns true if an nfa state has an epsilon out-transition slot + * that can be used. This definition is currently not used. + */ +#define FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans2[state] == NO_TRANSITION && \ + finalst[state] != state) + +/* returns true if an nfa state has an epsilon out-transition character + * and both slots are free + */ +#define SUPER_FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans1[state] == NO_TRANSITION) \ + +/* maximum number of NFA states that can comprise a DFA state. It's real + * big because if there's a lot of rules, the initial state will have a + * huge epsilon closure. + */ +#define INITIAL_MAX_DFA_SIZE 750 +#define MAX_DFA_SIZE_INCREMENT 750 + + +/* a note on the following masks. They are used to mark accepting numbers + * as being special. As such, they implicitly limit the number of accepting + * numbers (i.e., rules) because if there are too many rules the rule numbers + * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 == + * 8192) so unlikely to actually cause any problems. A check is made in + * new_rule() to ensure that this limit is not reached. + */ + +/* mask to mark a trailing context accepting number */ +#define YY_TRAILING_MASK 0x2000 + +/* mask to mark the accepting number of the "head" of a trailing context rule */ +#define YY_TRAILING_HEAD_MASK 0x4000 + +/* maximum number of rules, as outlined in the above note */ +#define MAX_RULE (YY_TRAILING_MASK - 1) + + +/* NIL must be 0. If not, its special meaning when making equivalence classes + * (it marks the representative of a given e.c.) will be unidentifiable + */ +#define NIL 0 + +#define JAM -1 /* to mark a missing DFA transition */ +#define NO_TRANSITION NIL +#define UNIQUE -1 /* marks a symbol as an e.c. representative */ +#define INFINITY -1 /* for x{5,} constructions */ + +#define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ +#define MAX_CCLS_INCREMENT 100 + +/* size of table holding members of character classes */ +#define INITIAL_MAX_CCL_TBL_SIZE 500 +#define MAX_CCL_TBL_SIZE_INCREMENT 250 + +#define INITIAL_MAX_RULES 100 /* default maximum number of rules */ +#define MAX_RULES_INCREMENT 100 + +#define INITIAL_MNS 2000 /* default maximum number of nfa states */ +#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ + +#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */ +#define MAX_DFAS_INCREMENT 1000 + +#define JAMSTATE -32766 /* marks a reference to the state that always jams */ + +/* enough so that if it's subtracted from an NFA state number, the result + * is guaranteed to be negative + */ +#define MARKER_DIFFERENCE 32000 +#define MAXIMUM_MNS 31999 + +/* maximum number of nxt/chk pairs for non-templates */ +#define INITIAL_MAX_XPAIRS 2000 +#define MAX_XPAIRS_INCREMENT 2000 + +/* maximum number of nxt/chk pairs needed for templates */ +#define INITIAL_MAX_TEMPLATE_XPAIRS 2500 +#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 + +#define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */ + +#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ +#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ + +#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ +#define SAME_TRANS -1 /* transition is the same as "default" entry for state */ + +/* the following percentages are used to tune table compression: + + * the percentage the number of out-transitions a state must be of the + * number of equivalence classes in order to be considered for table + * compaction by using protos + */ +#define PROTO_SIZE_PERCENTAGE 15 + +/* the percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * that the state's transition table is first compared with a potential + * template of the most common out-transition instead of with the first + * proto in the proto queue + */ +#define CHECK_COM_PERCENTAGE 50 + +/* the percentage the number of differences between a state's transition + * table and the proto it was first compared with must be of the total + * number of out-transitions of the state in order to keep the first + * proto as a good match and not search any further + */ +#define FIRST_MATCH_DIFF_PERCENTAGE 10 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to use the proto as an acceptable close match + */ +#define ACCEPTABLE_DIFF_PERCENTAGE 50 + +/* the percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * to consider making a template from the state + */ +#define TEMPLATE_SAME_PERCENTAGE 60 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to create a new proto from the state + */ +#define NEW_PROTO_DIFF_PERCENTAGE 20 + +/* the percentage the total number of out-transitions of a state must be + * of the number of equivalence classes in order to consider trying to + * fit the transition table into "holes" inside the nxt/chk table. + */ +#define INTERIOR_FIT_PERCENTAGE 15 + +/* size of region set aside to cache the complete transition table of + * protos on the proto queue to enable quick comparisons + */ +#define PROT_SAVE_SIZE 2000 + +#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ + +/* maximum number of out-transitions a state can have that we'll rummage + * around through the interior of the internal fast table looking for a + * spot for it + */ +#define MAX_XTIONS_FULL_INTERIOR_FIT 4 + +/* maximum number of rules which will be reported as being associated + * with a DFA state + */ +#define MAX_ASSOC_RULES 100 + +/* number that, if used to subscript an array, has a good chance of producing + * an error; should be small enough to fit into a short + */ +#define BAD_SUBSCRIPT -32767 + +/* absolute value of largest number that can be stored in a short, with a + * bit of slop thrown in for general paranoia. + */ +#define MAX_SHORT 32766 + + +/* Declarations for global variables. */ + +/* variables for symbol tables: + * sctbl - start-condition symbol table + * ndtbl - name-definition symbol table + * ccltab - character class text symbol table + */ + +struct hash_entry + { + struct hash_entry *prev, *next; + char *name; + char *str_val; + int int_val; + } ; + +typedef struct hash_entry *hash_table[]; + +#define NAME_TABLE_HASH_SIZE 101 +#define START_COND_HASH_SIZE 101 +#define CCL_HASH_SIZE 101 + +extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; +extern struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* variables for flags: + * printstats - if true (-v), dump statistics + * syntaxerror - true if a syntax error has been found + * eofseen - true if we've seen an eof in the input file + * ddebug - if true (-d), make a "debug" scanner + * trace - if true (-T), trace processing + * spprdflt - if true (-s), suppress the default rule + * interactive - if true (-I), generate an interactive scanner + * caseins - if true (-i), generate a case-insensitive scanner + * useecs - if true (-Ce flag), use equivalence classes + * fulltbl - if true (-Cf flag), don't compress the DFA state table + * usemecs - if true (-Cm flag), use meta-equivalence classes + * fullspd - if true (-F flag), use Jacobson method of table representation + * gen_line_dirs - if true (i.e., no -L flag), generate #line directives + * performance_report - if true (i.e., -p flag), generate a report relating + * to scanner performance + * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file + * listing backtracking states + * csize - size of character set for the scanner we're generating; + * 128 for 7-bit chars and 256 for 8-bit + * yymore_used - if true, yymore() is used in input rules + * reject - if true, generate backtracking tables for REJECT macro + * real_reject - if true, scanner really uses REJECT (as opposed to just + * having "reject" set for variable trailing context) + * continued_action - true if this rule's action is to "fall through" to + * the next rule's action (i.e., the '|' action) + * yymore_really_used - has a REALLY_xxx value indicating whether a + * %used or %notused was used with yymore() + * reject_really_used - same for REJECT + */ + +extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +extern int interactive, caseins, useecs, fulltbl, usemecs; +extern int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; +extern int yymore_used, reject, real_reject, continued_action; + +#define REALLY_NOT_DETERMINED 0 +#define REALLY_USED 1 +#define REALLY_NOT_USED 2 +extern int yymore_really_used, reject_really_used; + + +/* variables used in the flex input routines: + * datapos - characters on current output line + * dataline - number of contiguous lines of data in current data + * statement. Used to generate readable -f output + * linenum - current input line number + * skelfile - the skeleton file + * yyin - input file + * temp_action_file - temporary file to hold actions + * backtrack_file - file to summarize backtracking states to + * infilename - name of input file + * action_file_name - name of the temporary file + * input_files - array holding names of input files + * num_input_files - size of input_files array + * program_name - name with which program was invoked + */ + +extern int datapos, dataline, linenum; +extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file; +extern char *infilename; +extern char *action_file_name; +extern char **input_files; +extern int num_input_files; +extern char *program_name; + + +/* variables for stack of states having only one out-transition: + * onestate - state number + * onesym - transition symbol + * onenext - target state + * onedef - default base entry + * onesp - stack pointer + */ + +extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; + + +/* variables for nfa machine data: + * current_mns - current maximum on number of NFA states + * num_rules - number of the last accepting state; also is number of + * rules created so far + * current_max_rules - current maximum number of rules + * lastnfa - last nfa state number created + * firstst - physically the first state of a fragment + * lastst - last physical state of fragment + * finalst - last logical state of fragment + * transchar - transition character + * trans1 - transition state + * trans2 - 2nd transition state for epsilons + * accptnum - accepting number + * assoc_rule - rule associated with this NFA state (or 0 if none) + * state_type - a STATE_xxx type identifying whether the state is part + * of a normal rule, the leading state in a trailing context + * rule (i.e., the state which marks the transition from + * recognizing the text-to-be-matched to the beginning of + * the trailing context), or a subsequent state in a trailing + * context rule + * rule_type - a RULE_xxx type identifying whether this a a ho-hum + * normal rule or one which has variable head & trailing + * context + * rule_linenum - line number associated with rule + */ + +extern int current_mns, num_rules, current_max_rules, lastnfa; +extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +extern int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; + +/* different types of states; values are useful as masks, as well, for + * routines like check_trailing_context() + */ +#define STATE_NORMAL 0x1 +#define STATE_TRAILING_CONTEXT 0x2 + +/* global holding current type of state we're making */ + +extern int current_state_type; + +/* different types of rules */ +#define RULE_NORMAL 0 +#define RULE_VARIABLE 1 + +/* true if the input rules include a rule with both variable-length head + * and trailing context, false otherwise + */ +extern int variable_trailing_context_rules; + + +/* variables for protos: + * numtemps - number of templates created + * numprots - number of protos created + * protprev - backlink to a more-recently used proto + * protnext - forward link to a less-recently used proto + * prottbl - base/def table entry for proto + * protcomst - common state of proto + * firstprot - number of the most recently used proto + * lastprot - number of the least recently used proto + * protsave contains the entire state array for protos + */ + +extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; + + +/* variables for managing equivalence classes: + * numecs - number of equivalence classes + * nextecm - forward link of Equivalence Class members + * ecgroup - class number or backward link of EC members + * nummecs - number of meta-equivalence classes (used to compress + * templates) + * tecfwd - forward link of meta-equivalence classes members + * tecbck - backward link of MEC's + * xlation - maps character codes to their translations, or nil if no %t table + * num_xlations - number of different xlation values + */ + +/* reserve enough room in the equivalence class arrays so that we + * can use the CSIZE'th element to hold equivalence class information + * for the NUL character. Later we'll move this information into + * the 0th element. + */ +extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; + +/* meta-equivalence classes are indexed starting at 1, so it's possible + * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 + * slots total (since the arrays are 0-based). nextecm[] and ecgroup[] + * don't require the extra position since they're indexed from 1 .. CSIZE - 1. + */ +extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; + +extern int *xlation; +extern int num_xlations; + + +/* variables for start conditions: + * lastsc - last start condition created + * current_max_scs - current limit on number of start conditions + * scset - set of rules active in start condition + * scbol - set of rules active only at the beginning of line in a s.c. + * scxclu - true if start condition is exclusive + * sceof - true if start condition has EOF rule + * scname - start condition name + * actvsc - stack of active start conditions for the current rule + */ + +extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +extern char **scname; + + +/* variables for dfa machine data: + * current_max_dfa_size - current maximum number of NFA states in DFA + * current_max_xpairs - current maximum number of non-template xtion pairs + * current_max_template_xpairs - current maximum number of template pairs + * current_max_dfas - current maximum number DFA states + * lastdfa - last dfa state number created + * nxt - state to enter upon reading character + * chk - check value to see if "nxt" applies + * tnxt - internal nxt table for templates + * base - offset into "nxt" for given state + * def - where to go if "chk" disallows "nxt" entry + * nultrans - NUL transition for each state + * NUL_ec - equivalence class of the NUL character + * tblend - last "nxt/chk" table entry being used + * firstfree - first empty entry in "nxt/chk" table + * dss - nfa state set for each dfa + * dfasiz - size of nfa state set for each dfa + * dfaacc - accepting set for each dfa state (or accepting number, if + * -r is not given) + * accsiz - size of accepting set for each dfa state + * dhash - dfa state hash value + * numas - number of DFA accepting states created; note that this + * is not necessarily the same value as num_rules, which is the analogous + * value for the NFA + * numsnpairs - number of state/nextstate transition pairs + * jambase - position in base/def where the default jam table starts + * jamstate - state number corresponding to "jam" state + * end_of_buffer_state - end-of-buffer dfa state number + */ + +extern int current_max_dfa_size, current_max_xpairs; +extern int current_max_template_xpairs, current_max_dfas; +extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; +extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; +extern union dfaacc_union + { + int *dfaacc_set; + int dfaacc_state; + } *dfaacc; +extern int *accsiz, *dhash, numas; +extern int numsnpairs, jambase, jamstate; +extern int end_of_buffer_state; + +/* variables for ccl information: + * lastccl - ccl index of the last created ccl + * current_maxccls - current limit on the maximum number of unique ccl's + * cclmap - maps a ccl index to its set pointer + * ccllen - gives the length of a ccl + * cclng - true for a given ccl if the ccl is negated + * cclreuse - counts how many times a ccl is re-used + * current_max_ccl_tbl_size - current limit on number of characters needed + * to represent the unique ccl's + * ccltbl - holds the characters in each ccl - indexed by cclmap + */ + +extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +extern int current_max_ccl_tbl_size; +extern Char *ccltbl; + + +/* variables for miscellaneous information: + * starttime - real-time when we started + * endtime - real-time when we ended + * nmstr - last NAME scanned by the scanner + * sectnum - section number currently being parsed + * nummt - number of empty nxt/chk table entries + * hshcol - number of hash collisions detected by snstods + * dfaeql - number of times a newly created dfa was equal to an old one + * numeps - number of epsilon NFA states created + * eps2 - number of epsilon states which have 2 out-transitions + * num_reallocs - number of times it was necessary to realloc() a group + * of arrays + * tmpuses - number of DFA states that chain to templates + * totnst - total number of NFA states used to make DFA states + * peakpairs - peak number of transition pairs we had to store internally + * numuniq - number of unique transitions + * numdup - number of duplicate transitions + * hshsave - number of hash collisions saved by checking number of states + * num_backtracking - number of DFA states requiring back-tracking + * bol_needed - whether scanner needs beginning-of-line recognition + */ + +extern char *starttime, *endtime, nmstr[MAXLINE]; +extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +extern int num_backtracking, bol_needed; + +void *allocate_array(), *reallocate_array(); + +#define allocate_integer_array(size) \ + (int *) allocate_array( size, sizeof( int ) ) + +#define reallocate_integer_array(array,size) \ + (int *) reallocate_array( (void *) array, size, sizeof( int ) ) + +#define allocate_int_ptr_array(size) \ + (int **) allocate_array( size, sizeof( int * ) ) + +#define allocate_char_ptr_array(size) \ + (char **) allocate_array( size, sizeof( char * ) ) + +#define allocate_dfaacc_union(size) \ + (union dfaacc_union *) \ + allocate_array( size, sizeof( union dfaacc_union ) ) + +#define reallocate_int_ptr_array(array,size) \ + (int **) reallocate_array( (void *) array, size, sizeof( int * ) ) + +#define reallocate_char_ptr_array(array,size) \ + (char **) reallocate_array( (void *) array, size, sizeof( char * ) ) + +#define reallocate_dfaacc_union(array, size) \ + (union dfaacc_union *) \ + reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) ) + +#define allocate_character_array(size) \ + (Char *) allocate_array( size, sizeof( Char ) ) + +#define reallocate_character_array(array,size) \ + (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) + + +/* used to communicate between scanner and parser. The type should really + * be YYSTYPE, but we can't easily get our hands on it. + */ +extern int yylval; + + +/* external functions that are cross-referenced among the flex source files */ + + +/* from file ccl.c */ + +extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */ +extern int cclinit PROTO(()); /* make an empty ccl */ +extern void cclnegate PROTO((int)); /* negate a ccl */ + +/* list the members of a set of characters in CCL form */ +extern void list_character_set PROTO((FILE*, int[])); + + +/* from file dfa.c */ + +/* increase the maximum number of dfas */ +extern void increase_max_dfas PROTO(()); + +extern void ntod PROTO(()); /* convert a ndfa to a dfa */ + + +/* from file ecs.c */ + +/* convert character classes to set of equivalence classes */ +extern void ccl2ecl PROTO(()); + +/* associate equivalence class numbers with class members */ +extern int cre8ecs PROTO((int[], int[], int)); + +/* associate equivalence class numbers using %t table */ +extern int ecs_from_xlation PROTO((int[])); + +/* update equivalence classes based on character class transitions */ +extern void mkeccl PROTO((Char[], int, int[], int[], int, int)); + +/* create equivalence class for single character */ +extern void mkechar PROTO((int, int[], int[])); + + +/* from file gen.c */ + +extern void make_tables PROTO(()); /* generate transition tables */ + + +/* from file main.c */ + +extern void flexend PROTO((int)); + + +/* from file misc.c */ + +/* write out the actions from the temporary file to lex.yy.c */ +extern void action_out PROTO(()); + +/* true if a string is all lower case */ +extern int all_lower PROTO((register Char *)); + +/* true if a string is all upper case */ +extern int all_upper PROTO((register Char *)); + +/* bubble sort an integer array */ +extern void bubble PROTO((int [], int)); + +/* shell sort a character array */ +extern void cshell PROTO((Char [], int, int)); + +extern void dataend PROTO(()); /* finish up a block of data declarations */ + +/* report an error message and terminate */ +extern void flexerror PROTO((char[])); + +/* report a fatal error message and terminate */ +extern void flexfatal PROTO((char[])); + +/* report an error message formatted with one integer argument */ +extern void lerrif PROTO((char[], int)); + +/* report an error message formatted with one string argument */ +extern void lerrsf PROTO((char[], char[])); + +/* spit out a "# line" statement */ +extern void line_directive_out PROTO((FILE*)); + +/* generate a data statment for a two-dimensional array */ +extern void mk2data PROTO((int)); + +extern void mkdata PROTO((int)); /* generate a data statement */ + +/* return the integer represented by a string of digits */ +extern int myctoi PROTO((Char [])); + +/* write out one section of the skeleton file */ +extern void skelout PROTO(()); + +/* output a yy_trans_info structure */ +extern void transition_struct_out PROTO((int, int)); + + +/* from file nfa.c */ + +/* add an accepting state to a machine */ +extern void add_accept PROTO((int, int)); + +/* make a given number of copies of a singleton machine */ +extern int copysingl PROTO((int, int)); + +/* debugging routine to write out an nfa */ +extern void dumpnfa PROTO((int)); + +/* finish up the processing for a rule */ +extern void finish_rule PROTO((int, int, int, int)); + +/* connect two machines together */ +extern int link_machines PROTO((int, int)); + +/* mark each "beginning" state in a machine as being a "normal" (i.e., + * not trailing context associated) state + */ +extern void mark_beginning_as_normal PROTO((register int)); + +/* make a machine that branches to two machines */ +extern int mkbranch PROTO((int, int)); + +extern int mkclos PROTO((int)); /* convert a machine into a closure */ +extern int mkopt PROTO((int)); /* make a machine optional */ + +/* make a machine that matches either one of two machines */ +extern int mkor PROTO((int, int)); + +/* convert a machine into a positive closure */ +extern int mkposcl PROTO((int)); + +extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ + +/* create a state with a transition on a given symbol */ +extern int mkstate PROTO((int)); + +extern void new_rule PROTO(()); /* initialize for a new rule */ + + +/* from file parse.y */ + +/* write out a message formatted with one string, pinpointing its location */ +extern void format_pinpoint_message PROTO((char[], char[])); + +/* write out a message, pinpointing its location */ +extern void pinpoint_message PROTO((char[])); + +extern void synerr PROTO((char [])); /* report a syntax error */ +extern int yyparse PROTO(()); /* the YACC parser */ + + +/* from file scan.l */ + +extern int flexscan PROTO(()); /* the Flex-generated scanner for flex */ + +/* open the given file (if NULL, stdin) for scanning */ +extern void set_input_file PROTO((char*)); + +extern int yywrap PROTO(()); /* wrapup a file in the lexical analyzer */ + + +/* from file sym.c */ + +/* save the text of a character class */ +extern void cclinstal PROTO ((Char [], int)); + +/* lookup the number associated with character class */ +extern int ccllookup PROTO((Char [])); + +extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ +extern void scinstal PROTO((char[], int)); /* make a start condition */ + +/* lookup the number associated with a start condition */ +extern int sclookup PROTO((char[])); + + +/* from file tblcmp.c */ + +/* build table entries for dfa state */ +extern void bldtbl PROTO((int[], int, int, int, int)); + +extern void cmptmps PROTO(()); /* compress template table entries */ +extern void inittbl PROTO(()); /* initialize transition tables */ +extern void mkdeftbl PROTO(()); /* make the default, "jam" table entries */ + +/* create table entries for a state (or state fragment) which has + * only one out-transition */ +extern void mk1tbl PROTO((int, int, int, int)); + +/* place a state into full speed transition table */ +extern void place_state PROTO((int*, int, int)); + +/* save states with only one out-transition to be processed later */ +extern void stack1 PROTO((int, int, int, int)); + + +/* from file yylex.c */ + +extern int yylex PROTO(()); + + +/* The Unix kernel calls used here */ + +extern int read PROTO((int, char*, int)); +extern int unlink PROTO((char*)); +extern int write PROTO((int, char*, int)); diff --git a/util/flex/flexdoc.1 b/util/flex/flexdoc.1 new file mode 100644 index 000000000..cef3d7d85 --- /dev/null +++ b/util/flex/flexdoc.1 @@ -0,0 +1,2446 @@ +.TH FLEX 1 "26 May 1990" "Version 2.3" +.SH NAME +flex - fast lexical analyzer generator +.SH SYNOPSIS +.B flex +.B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] +.I [filename ...] +.SH DESCRIPTION +.I flex +is a tool for generating +.I scanners: +programs which recognized lexical patterns in text. +.I flex +reads +the given input files, or its standard input if no file names are given, +for a description of a scanner to generate. The description is in +the form of pairs +of regular expressions and C code, called +.I rules. flex +generates as output a C source file, +.B lex.yy.c, +which defines a routine +.B yylex(). +This file is compiled and linked with the +.B -lfl +library to produce an executable. When the executable is run, +it analyzes its input for occurrences +of the regular expressions. Whenever it finds one, it executes +the corresponding C code. +.SH SOME SIMPLE EXAMPLES +.LP +First some simple examples to get the flavor of how one uses +.I flex. +The following +.I flex +input specifies a scanner which whenever it encounters the string +"username" will replace it with the user's login name: +.nf + + %% + username printf( "%s", getlogin() ); + +.fi +By default, any text not matched by a +.I flex +scanner +is copied to the output, so the net effect of this scanner is +to copy its input file to its output with each occurrence +of "username" expanded. +In this input, there is just one rule. "username" is the +.I pattern +and the "printf" is the +.I action. +The "%%" marks the beginning of the rules. +.LP +Here's another simple example: +.nf + + int num_lines = 0, num_chars = 0; + + %% + \\n ++num_lines; ++num_chars; + . ++num_chars; + + %% + main() + { + yylex(); + printf( "# of lines = %d, # of chars = %d\\n", + num_lines, num_chars ); + } + +.fi +This scanner counts the number of characters and the number +of lines in its input (it produces no output other than the +final report on the counts). The first line +declares two globals, "num_lines" and "num_chars", which are accessible +both inside +.B yylex() +and in the +.B main() +routine declared after the second "%%". There are two rules, one +which matches a newline ("\\n") and increments both the line count and +the character count, and one which matches any character other than +a newline (indicated by the "." regular expression). +.LP +A somewhat more complicated example: +.nf + + /* scanner for a toy Pascal-like language */ + + %{ + /* need this for the call to atof() below */ + #include + %} + + DIGIT [0-9] + ID [a-z][a-z0-9]* + + %% + + {DIGIT}+ { + printf( "An integer: %s (%d)\\n", yytext, + atoi( yytext ) ); + } + + {DIGIT}+"."{DIGIT}* { + printf( "A float: %s (%g)\\n", yytext, + atof( yytext ) ); + } + + if|then|begin|end|procedure|function { + printf( "A keyword: %s\\n", yytext ); + } + + {ID} printf( "An identifier: %s\\n", yytext ); + + "+"|"-"|"*"|"/" printf( "An operator: %s\\n", yytext ); + + "{"[^}\\n]*"}" /* eat up one-line comments */ + + [ \\t\\n]+ /* eat up whitespace */ + + . printf( "Unrecognized character: %s\\n", yytext ); + + %% + + main( argc, argv ) + int argc; + char **argv; + { + ++argv, --argc; /* skip over program name */ + if ( argc > 0 ) + yyin = fopen( argv[0], "r" ); + else + yyin = stdin; + + yylex(); + } + +.fi +This is the beginnings of a simple scanner for a language like +Pascal. It identifies different types of +.I tokens +and reports on what it has seen. +.LP +The details of this example will be explained in the following +sections. +.SH FORMAT OF THE INPUT FILE +The +.I flex +input file consists of three sections, separated by a line with just +.B %% +in it: +.nf + + definitions + %% + rules + %% + user code + +.fi +The +.I definitions +section contains declarations of simple +.I name +definitions to simplify the scanner specification, and declarations of +.I start conditions, +which are explained in a later section. +.LP +Name definitions have the form: +.nf + + name definition + +.fi +The "name" is a word beginning with a letter or an underscore ('_') +followed by zero or more letters, digits, '_', or '-' (dash). +The definition is taken to begin at the first non-white-space character +following the name and continuing to the end of the line. +The definition can subsequently be referred to using "{name}", which +will expand to "(definition)". For example, +.nf + + DIGIT [0-9] + ID [a-z][a-z0-9]* + +.fi +defines "DIGIT" to be a regular expression which matches a +single digit, and +"ID" to be a regular expression which matches a letter +followed by zero-or-more letters-or-digits. +A subsequent reference to +.nf + + {DIGIT}+"."{DIGIT}* + +.fi +is identical to +.nf + + ([0-9])+"."([0-9])* + +.fi +and matches one-or-more digits followed by a '.' followed +by zero-or-more digits. +.LP +The +.I rules +section of the +.I flex +input contains a series of rules of the form: +.nf + + pattern action + +.fi +where the pattern must be unindented and the action must begin +on the same line. +.LP +See below for a further description of patterns and actions. +.LP +Finally, the user code section is simply copied to +.B lex.yy.c +verbatim. +It is used for companion routines which call or are called +by the scanner. The presence of this section is optional; +if it is missing, the second +.B %% +in the input file may be skipped, too. +.LP +In the definitions and rules sections, any +.I indented +text or text enclosed in +.B %{ +and +.B %} +is copied verbatim to the output (with the %{}'s removed). +The %{}'s must appear unindented on lines by themselves. +.LP +In the rules section, +any indented or %{} text appearing before the +first rule may be used to declare variables +which are local to the scanning routine and (after the declarations) +code which is to be executed whenever the scanning routine is entered. +Other indented or %{} text in the rule section is still copied to the output, +but its meaning is not well-defined and it may well cause compile-time +errors (this feature is present for +.I POSIX +compliance; see below for other such features). +.LP +In the definitions section, an unindented comment (i.e., a line +beginning with "/*") is also copied verbatim to the output up +to the next "*/". Also, any line in the definitions section +beginning with '#' is ignored, though this style of comment is +deprecated and may go away in the future. +.SH PATTERNS +The patterns in the input are written using an extended set of regular +expressions. These are: +.nf + + x match the character 'x' + . any character except newline + [xyz] a "character class"; in this case, the pattern + matches either an 'x', a 'y', or a 'z' + [abj-oZ] a "character class" with a range in it; matches + an 'a', a 'b', any letter from 'j' through 'o', + or a 'Z' + [^A-Z] a "negated character class", i.e., any character + but those in the class. In this case, any + character EXCEPT an uppercase letter. + [^A-Z\\n] any character EXCEPT an uppercase letter or + a newline + r* zero or more r's, where r is any regular expression + r+ one or more r's + r? zero or one r's (that is, "an optional r") + r{2,5} anywhere from two to five r's + r{2,} two or more r's + r{4} exactly 4 r's + {name} the expansion of the "name" definition + (see above) + "[xyz]\\"foo" + the literal string: [xyz]"foo + \\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', + then the ANSI-C interpretation of \\x. + Otherwise, a literal 'X' (used to escape + operators such as '*') + \\123 the character with octal value 123 + \\x2a the character with hexadecimal value 2a + (r) match an r; parentheses are used to override + precedence (see below) + + + rs the regular expression r followed by the + regular expression s; called "concatenation" + + + r|s either an r or an s + + + r/s an r but only if it is followed by an s. The + s is not part of the matched text. This type + of pattern is called as "trailing context". + ^r an r, but only at the beginning of a line + r$ an r, but only at the end of a line. Equivalent + to "r/\\n". + + + r an r, but only in start condition s (see + below for discussion of start conditions) + r + same, but in any of start conditions s1, + s2, or s3 + + + <> an end-of-file + <> + an end-of-file when in start condition s1 or s2 + +.fi +The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence. For example, +.nf + + foo|bar* + +.fi +is the same as +.nf + + (foo)|(ba(r*)) + +.fi +since the '*' operator has higher precedence than concatenation, +and concatenation higher than alternation ('|'). This pattern +therefore matches +.I either +the string "foo" +.I or +the string "ba" followed by zero-or-more r's. +To match "foo" or zero-or-more "bar"'s, use: +.nf + + foo|(bar)* + +.fi +and to match zero-or-more "foo"'s-or-"bar"'s: +.nf + + (foo|bar)* + +.fi +.LP +Some notes on patterns: +.IP - +A negated character class such as the example "[^A-Z]" +above +.I will match a newline +unless "\\n" (or an equivalent escape sequence) is one of the +characters explicitly present in the negated character class +(e.g., "[^A-Z\\n]"). This is unlike how many other regular +expression tools treat negated character classes, but unfortunately +the inconsistency is historically entrenched. +Matching newlines means that a pattern like [^"]* can match an entire +input (overflowing the scanner's input buffer) unless there's another +quote in the input. +.IP - +A rule can have at most one instance of trailing context (the '/' operator +or the '$' operator). The start condition, '^', and "<>" patterns +can only occur at the beginning of a pattern, and, as well as with '/' and '$', +cannot be grouped inside parentheses. A '^' which does not occur at +the beginning of a rule or a '$' which does not occur at the end of +a rule loses its special properties and is treated as a normal character. +.IP +The following are illegal: +.nf + + foo/bar$ + foobar + +.fi +Note that the first of these, can be written "foo/bar\\n". +.IP +The following will result in '$' or '^' being treated as a normal character: +.nf + + foo|(bar$) + foo|^bar + +.fi +If what's wanted is a "foo" or a bar-followed-by-a-newline, the following +could be used (the special '|' action is explained below): +.nf + + foo | + bar$ /* action goes here */ + +.fi +A similar trick will work for matching a foo or a +bar-at-the-beginning-of-a-line. +.SH HOW THE INPUT IS MATCHED +When the generated scanner is run, it analyzes its input looking +for strings which match any of its patterns. If it finds more than +one match, it takes the one matching the most text (for trailing +context rules, this includes the length of the trailing part, even +though it will then be returned to the input). If it finds two +or more matches of the same length, the +rule listed first in the +.I flex +input file is chosen. +.LP +Once the match is determined, the text corresponding to the match +(called the +.I token) +is made available in the global character pointer +.B yytext, +and its length in the global integer +.B yyleng. +The +.I action +corresponding to the matched pattern is then executed (a more +detailed description of actions follows), and then the remaining +input is scanned for another match. +.LP +If no match is found, then the +.I default rule +is executed: the next character in the input is considered matched and +copied to the standard output. Thus, the simplest legal +.I flex +input is: +.nf + + %% + +.fi +which generates a scanner that simply copies its input (one character +at a time) to its output. +.SH ACTIONS +Each pattern in a rule has a corresponding action, which can be any +arbitrary C statement. The pattern ends at the first non-escaped +whitespace character; the remainder of the line is its action. If the +action is empty, then when the pattern is matched the input token +is simply discarded. For example, here is the specification for a program +which deletes all occurrences of "zap me" from its input: +.nf + + %% + "zap me" + +.fi +(It will copy all other characters in the input to the output since +they will be matched by the default rule.) +.LP +Here is a program which compresses multiple blanks and tabs down to +a single blank, and throws away whitespace found at the end of a line: +.nf + + %% + [ \\t]+ putchar( ' ' ); + [ \\t]+$ /* ignore this token */ + +.fi +.LP +If the action contains a '{', then the action spans till the balancing '}' +is found, and the action may cross multiple lines. +.I flex +knows about C strings and comments and won't be fooled by braces found +within them, but also allows actions to begin with +.B %{ +and will consider the action to be all the text up to the next +.B %} +(regardless of ordinary braces inside the action). +.LP +An action consisting solely of a vertical bar ('|') means "same as +the action for the next rule." See below for an illustration. +.LP +Actions can include arbitrary C code, including +.B return +statements to return a value to whatever routine called +.B yylex(). +Each time +.B yylex() +is called it continues processing tokens from where it last left +off until it either reaches +the end of the file or executes a return. Once it reaches an end-of-file, +however, then any subsequent call to +.B yylex() +will simply immediately return, unless +.B yyrestart() +is first called (see below). +.LP +Actions are not allowed to modify yytext or yyleng. +.LP +There are a number of special directives which can be included within +an action: +.IP - +.B ECHO +copies yytext to the scanner's output. +.IP - +.B BEGIN +followed by the name of a start condition places the scanner in the +corresponding start condition (see below). +.IP - +.B REJECT +directs the scanner to proceed on to the "second best" rule which matched the +input (or a prefix of the input). The rule is chosen as described +above in "How the Input is Matched", and +.B yytext +and +.B yyleng +set up appropriately. +It may either be one which matched as much text +as the originally chosen rule but came later in the +.I flex +input file, or one which matched less text. +For example, the following will both count the +words in the input and call the routine special() whenever "frob" is seen: +.nf + + int word_count = 0; + %% + + frob special(); REJECT; + [^ \\t\\n]+ ++word_count; + +.fi +Without the +.B REJECT, +any "frob"'s in the input would not be counted as words, since the +scanner normally executes only one action per token. +Multiple +.B REJECT's +are allowed, each one finding the next best choice to the currently +active rule. For example, when the following scanner scans the token +"abcd", it will write "abcdabcaba" to the output: +.nf + + %% + a | + ab | + abc | + abcd ECHO; REJECT; + .|\\n /* eat up any unmatched character */ + +.fi +(The first three rules share the fourth's action since they use +the special '|' action.) +.B REJECT +is a particularly expensive feature in terms scanner performance; +if it is used in +.I any +of the scanner's actions it will slow down +.I all +of the scanner's matching. Furthermore, +.B REJECT +cannot be used with the +.I -f +or +.I -F +options (see below). +.IP +Note also that unlike the other special actions, +.B REJECT +is a +.I branch; +code immediately following it in the action will +.I not +be executed. +.IP - +.B yymore() +tells the scanner that the next time it matches a rule, the corresponding +token should be +.I appended +onto the current value of +.B yytext +rather than replacing it. For example, given the input "mega-kludge" +the following will write "mega-mega-kludge" to the output: +.nf + + %% + mega- ECHO; yymore(); + kludge ECHO; + +.fi +First "mega-" is matched and echoed to the output. Then "kludge" +is matched, but the previous "mega-" is still hanging around at the +beginning of +.B yytext +so the +.B ECHO +for the "kludge" rule will actually write "mega-kludge". +The presence of +.B yymore() +in the scanner's action entails a minor performance penalty in the +scanner's matching speed. +.IP - +.B yyless(n) +returns all but the first +.I n +characters of the current token back to the input stream, where they +will be rescanned when the scanner looks for the next match. +.B yytext +and +.B yyleng +are adjusted appropriately (e.g., +.B yyleng +will now be equal to +.I n +). For example, on the input "foobar" the following will write out +"foobarbar": +.nf + + %% + foobar ECHO; yyless(3); + [a-z]+ ECHO; + +.fi +An argument of 0 to +.B yyless +will cause the entire current input string to be scanned again. Unless you've +changed how the scanner will subsequently process its input (using +.B BEGIN, +for example), this will result in an endless loop. +.IP - +.B unput(c) +puts the character +.I c +back onto the input stream. It will be the next character scanned. +The following action will take the current token and cause it +to be rescanned enclosed in parentheses. +.nf + + { + int i; + unput( ')' ); + for ( i = yyleng - 1; i >= 0; --i ) + unput( yytext[i] ); + unput( '(' ); + } + +.fi +Note that since each +.B unput() +puts the given character back at the +.I beginning +of the input stream, pushing back strings must be done back-to-front. +.IP - +.B input() +reads the next character from the input stream. For example, +the following is one way to eat up C comments: +.nf + + %% + "/*" { + register int c; + + for ( ; ; ) + { + while ( (c = input()) != '*' && + c != EOF ) + ; /* eat up text of comment */ + + if ( c == '*' ) + { + while ( (c = input()) == '*' ) + ; + if ( c == '/' ) + break; /* found the end */ + } + + if ( c == EOF ) + { + error( "EOF in comment" ); + break; + } + } + } + +.fi +(Note that if the scanner is compiled using +.B C++, +then +.B input() +is instead referred to as +.B yyinput(), +in order to avoid a name clash with the +.B C++ +stream by the name of +.I input.) +.IP - +.B yyterminate() +can be used in lieu of a return statement in an action. It terminates +the scanner and returns a 0 to the scanner's caller, indicating "all done". +Subsequent calls to the scanner will immediately return unless preceded +by a call to +.B yyrestart() +(see below). +By default, +.B yyterminate() +is also called when an end-of-file is encountered. It is a macro and +may be redefined. +.SH THE GENERATED SCANNER +The output of +.I flex +is the file +.B lex.yy.c, +which contains the scanning routine +.B yylex(), +a number of tables used by it for matching tokens, and a number +of auxiliary routines and macros. By default, +.B yylex() +is declared as follows: +.nf + + int yylex() + { + ... various definitions and the actions in here ... + } + +.fi +(If your environment supports function prototypes, then it will +be "int yylex( void )".) This definition may be changed by redefining +the "YY_DECL" macro. For example, you could use: +.nf + + #undef YY_DECL + #define YY_DECL float lexscan( a, b ) float a, b; + +.fi +to give the scanning routine the name +.I lexscan, +returning a float, and taking two floats as arguments. Note that +if you give arguments to the scanning routine using a +K&R-style/non-prototyped function declaration, you must terminate +the definition with a semi-colon (;). +.LP +Whenever +.B yylex() +is called, it scans tokens from the global input file +.I yyin +(which defaults to stdin). It continues until it either reaches +an end-of-file (at which point it returns the value 0) or +one of its actions executes a +.I return +statement. +In the former case, when called again the scanner will immediately +return unless +.B yyrestart() +is called to point +.I yyin +at the new input file. ( +.B yyrestart() +takes one argument, a +.B FILE * +pointer.) +In the latter case (i.e., when an action +executes a return), the scanner may then be called again and it +will resume scanning where it left off. +.LP +By default (and for purposes of efficiency), the scanner uses +block-reads rather than simple +.I getc() +calls to read characters from +.I yyin. +The nature of how it gets its input can be controlled by redefining the +.B YY_INPUT +macro. +YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its +action is to place up to +.I max_size +characters in the character array +.I buf +and return in the integer variable +.I result +either the +number of characters read or the constant YY_NULL (0 on Unix systems) +to indicate EOF. The default YY_INPUT reads from the +global file-pointer "yyin". +.LP +A sample redefinition of YY_INPUT (in the definitions +section of the input file): +.nf + + %{ + #undef YY_INPUT + #define YY_INPUT(buf,result,max_size) \\ + { \\ + int c = getchar(); \\ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \\ + } + %} + +.fi +This definition will change the input processing to occur +one character at a time. +.LP +You also can add in things like keeping track of the +input line number this way; but don't expect your scanner to +go very fast. +.LP +When the scanner receives an end-of-file indication from YY_INPUT, +it then checks the +.B yywrap() +function. If +.B yywrap() +returns false (zero), then it is assumed that the +function has gone ahead and set up +.I yyin +to point to another input file, and scanning continues. If it returns +true (non-zero), then the scanner terminates, returning 0 to its +caller. +.LP +The default +.B yywrap() +always returns 1. Presently, to redefine it you must first +"#undef yywrap", as it is currently implemented as a macro. As indicated +by the hedging in the previous sentence, it may be changed to +a true function in the near future. +.LP +The scanner writes its +.B ECHO +output to the +.I yyout +global (default, stdout), which may be redefined by the user simply +by assigning it to some other +.B FILE +pointer. +.SH START CONDITIONS +.I flex +provides a mechanism for conditionally activating rules. Any rule +whose pattern is prefixed with "" will only be active when +the scanner is in the start condition named "sc". For example, +.nf + + [^"]* { /* eat up the string body ... */ + ... + } + +.fi +will be active only when the scanner is in the "STRING" start +condition, and +.nf + + \\. { /* handle an escape ... */ + ... + } + +.fi +will be active only when the current start condition is +either "INITIAL", "STRING", or "QUOTE". +.LP +Start conditions +are declared in the definitions (first) section of the input +using unindented lines beginning with either +.B %s +or +.B %x +followed by a list of names. +The former declares +.I inclusive +start conditions, the latter +.I exclusive +start conditions. A start condition is activated using the +.B BEGIN +action. Until the next +.B BEGIN +action is executed, rules with the given start +condition will be active and +rules with other start conditions will be inactive. +If the start condition is +.I inclusive, +then rules with no start conditions at all will also be active. +If it is +.I exclusive, +then +.I only +rules qualified with the start condition will be active. +A set of rules contingent on the same exclusive start condition +describe a scanner which is independent of any of the other rules in the +.I flex +input. Because of this, +exclusive start conditions make it easy to specify "mini-scanners" +which scan portions of the input that are syntactically different +from the rest (e.g., comments). +.LP +If the distinction between inclusive and exclusive start conditions +is still a little vague, here's a simple example illustrating the +connection between the two. The set of rules: +.nf + + %s example + %% + foo /* do something */ + +.fi +is equivalent to +.nf + + %x example + %% + foo /* do something */ + +.fi +.LP +The default rule (to +.B ECHO +any unmatched character) remains active in start conditions. +.LP +.B BEGIN(0) +returns to the original state where only the rules with +no start conditions are active. This state can also be +referred to as the start-condition "INITIAL", so +.B BEGIN(INITIAL) +is equivalent to +.B BEGIN(0). +(The parentheses around the start condition name are not required but +are considered good style.) +.LP +.B BEGIN +actions can also be given as indented code at the beginning +of the rules section. For example, the following will cause +the scanner to enter the "SPECIAL" start condition whenever +.I yylex() +is called and the global variable +.I enter_special +is true: +.nf + + int enter_special; + + %x SPECIAL + %% + if ( enter_special ) + BEGIN(SPECIAL); + + blahblahblah + ...more rules follow... + +.fi +.LP +To illustrate the uses of start conditions, +here is a scanner which provides two different interpretations +of a string like "123.456". By default it will treat it as +as three tokens, the integer "123", a dot ('.'), and the integer "456". +But if the string is preceded earlier in the line by the string +"expect-floats" +it will treat it as a single token, the floating-point number +123.456: +.nf + + %{ + #include + %} + %s expect + + %% + expect-floats BEGIN(expect); + + [0-9]+"."[0-9]+ { + printf( "found a float, = %f\\n", + atof( yytext ) ); + } + \\n { + /* that's the end of the line, so + * we need another "expect-number" + * before we'll recognize any more + * numbers + */ + BEGIN(INITIAL); + } + + [0-9]+ { + printf( "found an integer, = %d\\n", + atoi( yytext ) ); + } + + "." printf( "found a dot\\n" ); + +.fi +Here is a scanner which recognizes (and discards) C comments while +maintaining a count of the current input line. +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + [^*\\n]* /* eat anything that's not a '*' */ + "*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ + \\n ++line_num; + "*"+"/" BEGIN(INITIAL); + +.fi +Note that start-conditions names are really integer values and +can be stored as such. Thus, the above could be extended in the +following fashion: +.nf + + %x comment foo + %% + int line_num = 1; + int comment_caller; + + "/*" { + comment_caller = INITIAL; + BEGIN(comment); + } + + ... + + "/*" { + comment_caller = foo; + BEGIN(comment); + } + + [^*\\n]* /* eat anything that's not a '*' */ + "*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ + \\n ++line_num; + "*"+"/" BEGIN(comment_caller); + +.fi +One can then implement a "stack" of start conditions using an +array of integers. (It is likely that such stacks will become +a full-fledged +.I flex +feature in the future.) Note, though, that +start conditions do not have their own name-space; %s's and %x's +declare names in the same fashion as #define's. +.SH MULTIPLE INPUT BUFFERS +Some scanners (such as those which support "include" files) +require reading from several input streams. As +.I flex +scanners do a large amount of buffering, one cannot control +where the next input will be read from by simply writing a +.B YY_INPUT +which is sensitive to the scanning context. +.B YY_INPUT +is only called when the scanner reaches the end of its buffer, which +may be a long time after scanning a statement such as an "include" +which requires switching the input source. +.LP +To negotiate these sorts of problems, +.I flex +provides a mechanism for creating and switching between multiple +input buffers. An input buffer is created by using: +.nf + + YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) + +.fi +which takes a +.I FILE +pointer and a size and creates a buffer associated with the given +file and large enough to hold +.I size +characters (when in doubt, use +.B YY_BUF_SIZE +for the size). It returns a +.B YY_BUFFER_STATE +handle, which may then be passed to other routines: +.nf + + void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) + +.fi +switches the scanner's input buffer so subsequent tokens will +come from +.I new_buffer. +Note that +.B yy_switch_to_buffer() +may be used by yywrap() to sets things up for continued scanning, instead +of opening a new file and pointing +.I yyin +at it. +.nf + + void yy_delete_buffer( YY_BUFFER_STATE buffer ) + +.fi +is used to reclaim the storage associated with a buffer. +.LP +.B yy_new_buffer() +is an alias for +.B yy_create_buffer(), +provided for compatibility with the C++ use of +.I new +and +.I delete +for creating and destroying dynamic objects. +.LP +Finally, the +.B YY_CURRENT_BUFFER +macro returns a +.B YY_BUFFER_STATE +handle to the current buffer. +.LP +Here is an example of using these features for writing a scanner +which expands include files (the +.B <> +feature is discussed below): +.nf + + /* the "incl" state is used for picking up the name + * of an include file + */ + %x incl + + %{ + #define MAX_INCLUDE_DEPTH 10 + YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; + int include_stack_ptr = 0; + %} + + %% + include BEGIN(incl); + + [a-z]+ ECHO; + [^a-z\\n]*\\n? ECHO; + + [ \\t]* /* eat the whitespace */ + [^ \\t\\n]+ { /* got the include file name */ + if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) + { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + + include_stack[include_stack_ptr++] = + YY_CURRENT_BUFFER; + + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) + error( ... ); + + yy_switch_to_buffer( + yy_create_buffer( yyin, YY_BUF_SIZE ) ); + + BEGIN(INITIAL); + } + + <> { + if ( --include_stack_ptr < 0 ) + { + yyterminate(); + } + + else + yy_switch_to_buffer( + include_stack[include_stack_ptr] ); + } + +.fi +.SH END-OF-FILE RULES +The special rule "<>" indicates +actions which are to be taken when an end-of-file is +encountered and yywrap() returns non-zero (i.e., indicates +no further files to process). The action must finish +by doing one of four things: +.IP - +the special +.B YY_NEW_FILE +action, if +.I yyin +has been pointed at a new file to process; +.IP - +a +.I return +statement; +.IP - +the special +.B yyterminate() +action; +.IP - +or, switching to a new buffer using +.B yy_switch_to_buffer() +as shown in the example above. +.LP +<> rules may not be used with other +patterns; they may only be qualified with a list of start +conditions. If an unqualified <> rule is given, it +applies to +.I all +start conditions which do not already have <> actions. To +specify an <> rule for only the initial start condition, use +.nf + + <> + +.fi +.LP +These rules are useful for catching things like unclosed comments. +An example: +.nf + + %x quote + %% + + ...other rules for dealing with quotes... + + <> { + error( "unterminated quote" ); + yyterminate(); + } + <> { + if ( *++filelist ) + { + yyin = fopen( *filelist, "r" ); + YY_NEW_FILE; + } + else + yyterminate(); + } + +.fi +.SH MISCELLANEOUS MACROS +The macro +.bd +YY_USER_ACTION +can be redefined to provide an action +which is always executed prior to the matched rule's action. For example, +it could be #define'd to call a routine to convert yytext to lower-case. +.LP +The macro +.B YY_USER_INIT +may be redefined to provide an action which is always executed before +the first scan (and before the scanner's internal initializations are done). +For example, it could be used to call a routine to read +in a data table or open a logging file. +.LP +In the generated scanner, the actions are all gathered in one large +switch statement and separated using +.B YY_BREAK, +which may be redefined. By default, it is simply a "break", to separate +each rule's action from the following rule's. +Redefining +.B YY_BREAK +allows, for example, C++ users to +#define YY_BREAK to do nothing (while being very careful that every +rule ends with a "break" or a "return"!) to avoid suffering from +unreachable statement warnings where because a rule's action ends with +"return", the +.B YY_BREAK +is inaccessible. +.SH INTERFACING WITH YACC +One of the main uses of +.I flex +is as a companion to the +.I yacc +parser-generator. +.I yacc +parsers expect to call a routine named +.B yylex() +to find the next input token. The routine is supposed to +return the type of the next token as well as putting any associated +value in the global +.B yylval. +To use +.I flex +with +.I yacc, +one specifies the +.B -d +option to +.I yacc +to instruct it to generate the file +.B y.tab.h +containing definitions of all the +.B %tokens +appearing in the +.I yacc +input. This file is then included in the +.I flex +scanner. For example, if one of the tokens is "TOK_NUMBER", +part of the scanner might look like: +.nf + + %{ + #include "y.tab.h" + %} + + %% + + [0-9]+ yylval = atoi( yytext ); return TOK_NUMBER; + +.fi +.SH TRANSLATION TABLE +In the name of POSIX compliance, +.I flex +supports a +.I translation table +for mapping input characters into groups. +The table is specified in the first section, and its format looks like: +.nf + + %t + 1 abcd + 2 ABCDEFGHIJKLMNOPQRSTUVWXYZ + 52 0123456789 + 6 \\t\\ \\n + %t + +.fi +This example specifies that the characters 'a', 'b', 'c', and 'd' +are to all be lumped into group #1, upper-case letters +in group #2, digits in group #52, tabs, blanks, and newlines into +group #6, and +.I +no other characters will appear in the patterns. +The group numbers are actually disregarded by +.I flex; +.B %t +serves, though, to lump characters together. Given the above +table, for example, the pattern "a(AA)*5" is equivalent to "d(ZQ)*0". +They both say, "match any character in group #1, followed by +zero-or-more pairs of characters +from group #2, followed by a character from group #52." Thus +.B %t +provides a crude way for introducing equivalence classes into +the scanner specification. +.LP +Note that the +.B -i +option (see below) coupled with the equivalence classes which +.I flex +automatically generates take care of virtually all the instances +when one might consider using +.B %t. +But what the hell, it's there if you want it. +.SH OPTIONS +.I flex +has the following options: +.TP +.B -b +Generate backtracking information to +.I lex.backtrack. +This is a list of scanner states which require backtracking +and the input characters on which they do so. By adding rules one +can remove backtracking states. If all backtracking states +are eliminated and +.B -f +or +.B -F +is used, the generated scanner will run faster (see the +.B -p +flag). Only users who wish to squeeze every last cycle out of their +scanners need worry about this option. (See the section on PERFORMANCE +CONSIDERATIONS below.) +.TP +.B -c +is a do-nothing, deprecated option included for POSIX compliance. +.IP +.B NOTE: +in previous releases of +.I flex +.B -c +specified table-compression options. This functionality is +now given by the +.B -C +flag. To ease the the impact of this change, when +.I flex +encounters +.B -c, +it currently issues a warning message and assumes that +.B -C +was desired instead. In the future this "promotion" of +.B -c +to +.B -C +will go away in the name of full POSIX compliance (unless +the POSIX meaning is removed first). +.TP +.B -d +makes the generated scanner run in +.I debug +mode. Whenever a pattern is recognized and the global +.B yy_flex_debug +is non-zero (which is the default), +the scanner will write to +.I stderr +a line of the form: +.nf + + --accepting rule at line 53 ("the matched text") + +.fi +The line number refers to the location of the rule in the file +defining the scanner (i.e., the file that was fed to flex). Messages +are also generated when the scanner backtracks, accepts the +default rule, reaches the end of its input buffer (or encounters +a NUL; at this point, the two look the same as far as the scanner's concerned), +or reaches an end-of-file. +.TP +.B -f +specifies (take your pick) +.I full table +or +.I fast scanner. +No table compression is done. The result is large but fast. +This option is equivalent to +.B -Cf +(see below). +.TP +.B -i +instructs +.I flex +to generate a +.I case-insensitive +scanner. The case of letters given in the +.I flex +input patterns will +be ignored, and tokens in the input will be matched regardless of case. The +matched text given in +.I yytext +will have the preserved case (i.e., it will not be folded). +.TP +.B -n +is another do-nothing, deprecated option included only for +POSIX compliance. +.TP +.B -p +generates a performance report to stderr. The report +consists of comments regarding features of the +.I flex +input file which will cause a loss of performance in the resulting scanner. +Note that the use of +.I REJECT +and variable trailing context (see the BUGS section in flex(1)) +entails a substantial performance penalty; use of +.I yymore(), +the +.B ^ +operator, +and the +.B -I +flag entail minor performance penalties. +.TP +.B -s +causes the +.I default rule +(that unmatched scanner input is echoed to +.I stdout) +to be suppressed. If the scanner encounters input that does not +match any of its rules, it aborts with an error. This option is +useful for finding holes in a scanner's rule set. +.TP +.B -t +instructs +.I flex +to write the scanner it generates to standard output instead +of +.B lex.yy.c. +.TP +.B -v +specifies that +.I flex +should write to +.I stderr +a summary of statistics regarding the scanner it generates. +Most of the statistics are meaningless to the casual +.I flex +user, but the +first line identifies the version of +.I flex, +which is useful for figuring +out where you stand with respect to patches and new releases, +and the next two lines give the date when the scanner was created +and a summary of the flags which were in effect. +.TP +.B -F +specifies that the +.ul +fast +scanner table representation should be used. This representation is +about as fast as the full table representation +.ul +(-f), +and for some sets of patterns will be considerably smaller (and for +others, larger). In general, if the pattern set contains both "keywords" +and a catch-all, "identifier" rule, such as in the set: +.nf + + "case" return TOK_CASE; + "switch" return TOK_SWITCH; + ... + "default" return TOK_DEFAULT; + [a-z]+ return TOK_ID; + +.fi +then you're better off using the full table representation. If only +the "identifier" rule is present and you then use a hash table or some such +to detect the keywords, you're better off using +.ul +-F. +.IP +This option is equivalent to +.B -CF +(see below). +.TP +.B -I +instructs +.I flex +to generate an +.I interactive +scanner. Normally, scanners generated by +.I flex +always look ahead one +character before deciding that a rule has been matched. At the cost of +some scanning overhead, +.I flex +will generate a scanner which only looks ahead +when needed. Such scanners are called +.I interactive +because if you want to write a scanner for an interactive system such as a +command shell, you will probably want the user's input to be terminated +with a newline, and without +.B -I +the user will have to type a character in addition to the newline in order +to have the newline recognized. This leads to dreadful interactive +performance. +.IP +If all this seems to confusing, here's the general rule: if a human will +be typing in input to your scanner, use +.B -I, +otherwise don't; if you don't care about squeezing the utmost performance +from your scanner and you +don't want to make any assumptions about the input to your scanner, +use +.B -I. +.IP +Note, +.B -I +cannot be used in conjunction with +.I full +or +.I fast tables, +i.e., the +.B -f, -F, -Cf, +or +.B -CF +flags. +.TP +.B -L +instructs +.I flex +not to generate +.B #line +directives. Without this option, +.I flex +peppers the generated scanner +with #line directives so error messages in the actions will be correctly +located with respect to the original +.I flex +input file, and not to +the fairly meaningless line numbers of +.B lex.yy.c. +(Unfortunately +.I flex +does not presently generate the necessary directives +to "retarget" the line numbers for those parts of +.B lex.yy.c +which it generated. So if there is an error in the generated code, +a meaningless line number is reported.) +.TP +.B -T +makes +.I flex +run in +.I trace +mode. It will generate a lot of messages to +.I stdout +concerning +the form of the input and the resultant non-deterministic and deterministic +finite automata. This option is mostly for use in maintaining +.I flex. +.TP +.B -8 +instructs +.I flex +to generate an 8-bit scanner, i.e., one which can recognize 8-bit +characters. On some sites, +.I flex +is installed with this option as the default. On others, the default +is 7-bit characters. To see which is the case, check the verbose +.B (-v) +output for "equivalence classes created". If the denominator of +the number shown is 128, then by default +.I flex +is generating 7-bit characters. If it is 256, then the default is +8-bit characters and the +.B -8 +flag is not required (but may be a good idea to keep the scanner +specification portable). Feeding a 7-bit scanner 8-bit characters +will result in infinite loops, bus errors, or other such fireworks, +so when in doubt, use the flag. Note that if equivalence classes +are used, 8-bit scanners take only slightly more table space than +7-bit scanners (128 bytes, to be exact); if equivalence classes are +not used, however, then the tables may grow up to twice their +7-bit size. +.TP +.B -C[efmF] +controls the degree of table compression. +.IP +.B -Ce +directs +.I flex +to construct +.I equivalence classes, +i.e., sets of characters +which have identical lexical properties (for example, if the only +appearance of digits in the +.I flex +input is in the character class +"[0-9]" then the digits '0', '1', ..., '9' will all be put +in the same equivalence class). Equivalence classes usually give +dramatic reductions in the final table/object file sizes (typically +a factor of 2-5) and are pretty cheap performance-wise (one array +look-up per character scanned). +.IP +.B -Cf +specifies that the +.I full +scanner tables should be generated - +.I flex +should not compress the +tables by taking advantages of similar transition functions for +different states. +.IP +.B -CF +specifies that the alternate fast scanner representation (described +above under the +.B -F +flag) +should be used. +.IP +.B -Cm +directs +.I flex +to construct +.I meta-equivalence classes, +which are sets of equivalence classes (or characters, if equivalence +classes are not being used) that are commonly used together. Meta-equivalence +classes are often a big win when using compressed tables, but they +have a moderate performance impact (one or two "if" tests and one +array look-up per character scanned). +.IP +A lone +.B -C +specifies that the scanner tables should be compressed but neither +equivalence classes nor meta-equivalence classes should be used. +.IP +The options +.B -Cf +or +.B -CF +and +.B -Cm +do not make sense together - there is no opportunity for meta-equivalence +classes if the table is not being compressed. Otherwise the options +may be freely mixed. +.IP +The default setting is +.B -Cem, +which specifies that +.I flex +should generate equivalence classes +and meta-equivalence classes. This setting provides the highest +degree of table compression. You can trade off +faster-executing scanners at the cost of larger tables with +the following generally being true: +.nf + + slowest & smallest + -Cem + -Cm + -Ce + -C + -C{f,F}e + -C{f,F} + fastest & largest + +.fi +Note that scanners with the smallest tables are usually generated and +compiled the quickest, so +during development you will usually want to use the default, maximal +compression. +.IP +.B -Cfe +is often a good compromise between speed and size for production +scanners. +.IP +.B -C +options are not cumulative; whenever the flag is encountered, the +previous -C settings are forgotten. +.TP +.B -Sskeleton_file +overrides the default skeleton file from which +.I flex +constructs its scanners. You'll never need this option unless you are doing +.I flex +maintenance or development. +.SH PERFORMANCE CONSIDERATIONS +The main design goal of +.I flex +is that it generate high-performance scanners. It has been optimized +for dealing well with large sets of rules. Aside from the effects +of table compression on scanner speed outlined above, +there are a number of options/actions which degrade performance. These +are, from most expensive to least: +.nf + + REJECT + + pattern sets that require backtracking + arbitrary trailing context + + '^' beginning-of-line operator + yymore() + +.fi +with the first three all being quite expensive and the last two +being quite cheap. +.LP +.B REJECT +should be avoided at all costs when performance is important. +It is a particularly expensive option. +.LP +Getting rid of backtracking is messy and often may be an enormous +amount of work for a complicated scanner. In principal, one begins +by using the +.B -b +flag to generate a +.I lex.backtrack +file. For example, on the input +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + +.fi +the file looks like: +.nf + + State #6 is non-accepting - + associated rule line numbers: + 2 3 + out-transitions: [ o ] + jam-transitions: EOF [ \\001-n p-\\177 ] + + State #8 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ a ] + jam-transitions: EOF [ \\001-` b-\\177 ] + + State #9 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ r ] + jam-transitions: EOF [ \\001-q s-\\177 ] + + Compressed tables always backtrack. + +.fi +The first few lines tell us that there's a scanner state in +which it can make a transition on an 'o' but not on any other +character, and that in that state the currently scanned text does not match +any rule. The state occurs when trying to match the rules found +at lines 2 and 3 in the input file. +If the scanner is in that state and then reads +something other than an 'o', it will have to backtrack to find +a rule which is matched. With +a bit of headscratching one can see that this must be the +state it's in when it has seen "fo". When this has happened, +if anything other than another 'o' is seen, the scanner will +have to back up to simply match the 'f' (by the default rule). +.LP +The comment regarding State #8 indicates there's a problem +when "foob" has been scanned. Indeed, on any character other +than a 'b', the scanner will have to back up to accept "foo". +Similarly, the comment for State #9 concerns when "fooba" has +been scanned. +.LP +The final comment reminds us that there's no point going to +all the trouble of removing backtracking from the rules unless +we're using +.B -f +or +.B -F, +since there's no performance gain doing so with compressed scanners. +.LP +The way to remove the backtracking is to add "error" rules: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + fooba | + foob | + fo { + /* false alarm, not really a keyword */ + return TOK_ID; + } + +.fi +.LP +Eliminating backtracking among a list of keywords can also be +done using a "catch-all" rule: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + [a-z]+ return TOK_ID; + +.fi +This is usually the best solution when appropriate. +.LP +Backtracking messages tend to cascade. +With a complicated set of rules it's not uncommon to get hundreds +of messages. If one can decipher them, though, it often +only takes a dozen or so rules to eliminate the backtracking (though +it's easy to make a mistake and have an error rule accidentally match +a valid token. A possible future +.I flex +feature will be to automatically add rules to eliminate backtracking). +.LP +.I Variable +trailing context (where both the leading and trailing parts do not have +a fixed length) entails almost the same performance loss as +.I REJECT +(i.e., substantial). So when possible a rule like: +.nf + + %% + mouse|rat/(cat|dog) run(); + +.fi +is better written: +.nf + + %% + mouse/cat|dog run(); + rat/cat|dog run(); + +.fi +or as +.nf + + %% + mouse|rat/cat run(); + mouse|rat/dog run(); + +.fi +Note that here the special '|' action does +.I not +provide any savings, and can even make things worse (see +.B BUGS +in flex(1)). +.LP +Another area where the user can increase a scanner's performance +(and one that's easier to implement) arises from the fact that +the longer the tokens matched, the faster the scanner will run. +This is because with long tokens the processing of most input +characters takes place in the (short) inner scanning loop, and +does not often have to go through the additional work of setting up +the scanning environment (e.g., +.B yytext) +for the action. Recall the scanner for C comments: +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + [^*\\n]* + "*"+[^*/\\n]* + \\n ++line_num; + "*"+"/" BEGIN(INITIAL); + +.fi +This could be sped up by writing it as: +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + [^*\\n]* + [^*\\n]*\\n ++line_num; + "*"+[^*/\\n]* + "*"+[^*/\\n]*\\n ++line_num; + "*"+"/" BEGIN(INITIAL); + +.fi +Now instead of each newline requiring the processing of another +action, recognizing the newlines is "distributed" over the other rules +to keep the matched text as long as possible. Note that +.I adding +rules does +.I not +slow down the scanner! The speed of the scanner is independent +of the number of rules or (modulo the considerations given at the +beginning of this section) how complicated the rules are with +regard to operators such as '*' and '|'. +.LP +A final example in speeding up a scanner: suppose you want to scan +through a file containing identifiers and keywords, one per line +and with no other extraneous characters, and recognize all the +keywords. A natural first approach is: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + .|\\n /* it's not a keyword */ + +.fi +To eliminate the back-tracking, introduce a catch-all rule: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Now, if it's guaranteed that there's exactly one word per line, +then we can reduce the total number of matches by a half by +merging in the recognition of newlines with that of the other +tokens: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + .|\\n /* it's not a keyword */ + +.fi +One has to be careful here, as we have now reintroduced backtracking +into the scanner. In particular, while +.I we +know that there will never be any characters in the input stream +other than letters or newlines, +.I flex +can't figure this out, and it will plan for possibly needing backtracking +when it has scanned a token like "auto" and then the next character +is something other than a newline or a letter. Previously it would +then just match the "auto" rule and be done, but now it has no "auto" +rule, only a "auto\\n" rule. To eliminate the possibility of backtracking, +we could either duplicate all rules but without final newlines, or, +since we never expect to encounter such an input and therefore don't +how it's classified, we can introduce one more catch-all rule, this +one which doesn't include a newline: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Compiled with +.B -Cf, +this is about as fast as one can get a +.I flex +scanner to go for this particular problem. +.LP +A final note: +.I flex +is slow when matching NUL's, particularly when a token contains +multiple NUL's. +It's best to write rules which match +.I short +amounts of text if it's anticipated that the text will often include NUL's. +.SH INCOMPATIBILITIES WITH LEX AND POSIX +.I flex +is a rewrite of the Unix +.I lex +tool (the two implementations do not share any code, though), +with some extensions and incompatibilities, both of which +are of concern to those who wish to write scanners acceptable +to either implementation. At present, the POSIX +.I lex +draft is +very close to the original +.I lex +implementation, so some of these +incompatibilities are also in conflict with the POSIX draft. But +the intent is that except as noted below, +.I flex +as it presently stands will +ultimately be POSIX conformant (i.e., that those areas of conflict with +the POSIX draft will be resolved in +.I flex's +favor). Please bear in +mind that all the comments which follow are with regard to the POSIX +.I draft +standard of Summer 1989, and not the final document (or subsequent +drafts); they are included so +.I flex +users can be aware of the standardization issues and those areas where +.I flex +may in the near future undergo changes incompatible with +its current definition. +.LP +.I flex +is fully compatible with +.I lex +with the following exceptions: +.IP - +The undocumented +.I lex +scanner internal variable +.B yylineno +is not supported. It is difficult to support this option efficiently, +since it requires examining every character scanned and reexamining +the characters when the scanner backs up. +Things get more complicated when the end of buffer or file is reached or a +NUL is scanned (since the scan must then be restarted with the proper line +number count), or the user uses the yyless(), unput(), or REJECT actions, +or the multiple input buffer functions. +.IP +The fix is to add rules which, upon seeing a newline, increment +yylineno. This is usually an easy process, though it can be a drag if some +of the patterns can match multiple newlines along with other characters. +.IP +yylineno is not part of the POSIX draft. +.IP - +The +.B input() +routine is not redefinable, though it may be called to read characters +following whatever has been matched by a rule. If +.B input() +encounters an end-of-file the normal +.B yywrap() +processing is done. A ``real'' end-of-file is returned by +.B input() +as +.I EOF. +.IP +Input is instead controlled by redefining the +.B YY_INPUT +macro. +.IP +The +.I flex +restriction that +.B input() +cannot be redefined is in accordance with the POSIX draft, but +.B YY_INPUT +has not yet been accepted into the draft (and probably won't; it looks +like the draft will simply not specify any way of controlling the +scanner's input other than by making an initial assignment to +.I yyin). +.IP - +.I flex +scanners do not use stdio for input. Because of this, when writing an +interactive scanner one must explicitly call fflush() on the +stream associated with the terminal after writing out a prompt. +With +.I lex +such writes are automatically flushed since +.I lex +scanners use +.B getchar() +for their input. Also, when writing interactive scanners with +.I flex, +the +.B -I +flag must be used. +.IP - +.I flex +scanners are not as reentrant as +.I lex +scanners. In particular, if you have an interactive scanner and +an interrupt handler which long-jumps out of the scanner, and +the scanner is subsequently called again, you may get the following +message: +.nf + + fatal flex scanner internal error--end of buffer missed + +.fi +To reenter the scanner, first use +.nf + + yyrestart( yyin ); + +.fi +.IP - +.B output() +is not supported. +Output from the +.B ECHO +macro is done to the file-pointer +.I yyout +(default +.I stdout). +.IP +The POSIX draft mentions that an +.B output() +routine exists but currently gives no details as to what it does. +.IP - +.I lex +does not support exclusive start conditions (%x), though they +are in the current POSIX draft. +.IP - +When definitions are expanded, +.I flex +encloses them in parentheses. +With lex, the following: +.nf + + NAME [A-Z][A-Z0-9]* + %% + foo{NAME}? printf( "Found it\\n" ); + %% + +.fi +will not match the string "foo" because when the macro +is expanded the rule is equivalent to "foo[A-Z][A-Z0-9]*?" +and the precedence is such that the '?' is associated with +"[A-Z0-9]*". With +.I flex, +the rule will be expanded to +"foo([A-Z][A-Z0-9]*)?" and so the string "foo" will match. +Note that because of this, the +.B ^, $, , /, +and +.B <> +operators cannot be used in a +.I flex +definition. +.IP +The POSIX draft interpretation is the same as +.I flex's. +.IP - +To specify a character class which matches anything but a left bracket (']'), +in +.I lex +one can use "[^]]" but with +.I flex +one must use "[^\\]]". The latter works with +.I lex, +too. +.IP - +The +.I lex +.B %r +(generate a Ratfor scanner) option is not supported. It is not part +of the POSIX draft. +.IP - +If you are providing your own yywrap() routine, you must include a +"#undef yywrap" in the definitions section (section 1). Note that +the "#undef" will have to be enclosed in %{}'s. +.IP +The POSIX draft +specifies that yywrap() is a function and this is very unlikely to change; so +.I flex users are warned +that +.B yywrap() +is likely to be changed to a function in the near future. +.IP - +After a call to +.B unput(), +.I yytext +and +.I yyleng +are undefined until the next token is matched. This is not the case with +.I lex +or the present POSIX draft. +.IP - +The precedence of the +.B {} +(numeric range) operator is different. +.I lex +interprets "abc{1,3}" as "match one, two, or +three occurrences of 'abc'", whereas +.I flex +interprets it as "match 'ab' +followed by one, two, or three occurrences of 'c'". The latter is +in agreement with the current POSIX draft. +.IP - +The precedence of the +.B ^ +operator is different. +.I lex +interprets "^foo|bar" as "match either 'foo' at the beginning of a line, +or 'bar' anywhere", whereas +.I flex +interprets it as "match either 'foo' or 'bar' if they come at the beginning +of a line". The latter is in agreement with the current POSIX draft. +.IP - +To refer to yytext outside of the scanner source file, +the correct definition with +.I flex +is "extern char *yytext" rather than "extern char yytext[]". +This is contrary to the current POSIX draft but a point on which +.I flex +will not be changing, as the array representation entails a +serious performance penalty. It is hoped that the POSIX draft will +be emended to support the +.I flex +variety of declaration (as this is a fairly painless change to +require of +.I lex +users). +.IP - +.I yyin +is +.I initialized +by +.I lex +to be +.I stdin; +.I flex, +on the other hand, +initializes +.I yyin +to NULL +and then +.I assigns +it to +.I stdin +the first time the scanner is called, providing +.I yyin +has not already been assigned to a non-NULL value. The difference is +subtle, but the net effect is that with +.I flex +scanners, +.I yyin +does not have a valid value until the scanner has been called. +.IP - +The special table-size declarations such as +.B %a +supported by +.I lex +are not required by +.I flex +scanners; +.I flex +ignores them. +.IP - +The name +.bd +FLEX_SCANNER +is #define'd so scanners may be written for use with either +.I flex +or +.I lex. +.LP +The following +.I flex +features are not included in +.I lex +or the POSIX draft standard: +.nf + + yyterminate() + <> + YY_DECL + #line directives + %{}'s around actions + yyrestart() + comments beginning with '#' (deprecated) + multiple actions on a line + +.fi +This last feature refers to the fact that with +.I flex +you can put multiple actions on the same line, separated with +semi-colons, while with +.I lex, +the following +.nf + + foo handle_foo(); ++num_foos_seen; + +.fi +is (rather surprisingly) truncated to +.nf + + foo handle_foo(); + +.fi +.I flex +does not truncate the action. Actions that are not enclosed in +braces are simply terminated at the end of the line. +.SH DIAGNOSTICS +.I reject_used_but_not_detected undefined +or +.I yymore_used_but_not_detected undefined - +These errors can occur at compile time. They indicate that the +scanner uses +.B REJECT +or +.B yymore() +but that +.I flex +failed to notice the fact, meaning that +.I flex +scanned the first two sections looking for occurrences of these actions +and failed to find any, but somehow you snuck some in (via a #include +file, for example). Make an explicit reference to the action in your +.I flex +input file. (Note that previously +.I flex +supported a +.B %used/%unused +mechanism for dealing with this problem; this feature is still supported +but now deprecated, and will go away soon unless the author hears from +people who can argue compellingly that they need it.) +.LP +.I flex scanner jammed - +a scanner compiled with +.B -s +has encountered an input string which wasn't matched by +any of its rules. +.LP +.I flex input buffer overflowed - +a scanner rule matched a string long enough to overflow the +scanner's internal input buffer (16K bytes by default - controlled by +.B YY_BUF_SIZE +in "flex.skel". Note that to redefine this macro, you must first +.B #undefine +it). +.LP +.I scanner requires -8 flag - +Your scanner specification includes recognizing 8-bit characters and +you did not specify the -8 flag (and your site has not installed flex +with -8 as the default). +.LP +.I +fatal flex scanner internal error--end of buffer missed - +This can occur in an scanner which is reentered after a long-jump +has jumped out (or over) the scanner's activation frame. Before +reentering the scanner, use: +.nf + + yyrestart( yyin ); + +.fi +.LP +.I too many %t classes! - +You managed to put every single character into its own %t class. +.I flex +requires that at least one of the classes share characters. +.SH DEFICIENCIES / BUGS +See flex(1). +.SH "SEE ALSO" +.LP +flex(1), lex(1), yacc(1), sed(1), awk(1). +.LP +M. E. Lesk and E. Schmidt, +.I LEX - Lexical Analyzer Generator +.SH AUTHOR +Vern Paxson, with the help of many ideas and much inspiration from +Van Jacobson. Original version by Jef Poskanzer. The fast table +representation is a partial implementation of a design done by Van +Jacobson. The implementation was done by Kevin Gong and Vern Paxson. +.LP +Thanks to the many +.I flex +beta-testers, feedbackers, and contributors, especially Casey +Leedom, benson@odi.com, Keith Bostic, +Frederic Brehm, Nick Christopher, Jason Coughlin, +Scott David Daniels, Leo Eskin, +Chris Faylor, Eric Goldman, Eric +Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, +Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, Esmond Pitt, +Jef Poskanzer, Jim Roskind, +Dave Tallman, Frank Whaley, Ken Yap, and those whose names +have slipped my marginal mail-archiving skills but whose contributions +are appreciated all the same. +.LP +Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob +Mulcahy, Rich Salz, and Richard Stallman for help with various distribution +headaches. +.LP +Thanks to Esmond Pitt and Earle Horton for 8-bit character support; +to Benson Margulies and Fred +Burke for C++ support; to Ove Ewerlid for the basics of support for +NUL's; and to Eric Hughes for the basics of support for multiple buffers. +.LP +Work is being done on extending +.I flex +to generate scanners in which the +state machine is directly represented in C code rather than tables. +These scanners may well be substantially faster than those generated +using -f or -F. If you are working in this area and are interested +in comparing notes and seeing whether redundant work can be avoided, +contact Ove Ewerlid (ewerlid@mizar.DoCS.UU.SE). +.LP +This work was primarily done when I was at the Real Time Systems Group +at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there +for the support I received. +.LP +Send comments to: +.nf + + Vern Paxson + Computer Science Department + 4126 Upson Hall + Cornell University + Ithaca, NY 14853-7501 + + vern@cs.cornell.edu + decvax!cornell!vern + +.fi diff --git a/util/flex/gen.c b/util/flex/gen.c new file mode 100644 index 000000000..f51adf95b --- /dev/null +++ b/util/flex/gen.c @@ -0,0 +1,1336 @@ +/* gen - actual generation (writing) of flex scanners */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + + +/* declare functions that have forward references */ + +void gen_next_state PROTO((int)); +void genecs PROTO(()); +void indent_put2s PROTO((char [], char [])); +void indent_puts PROTO((char [])); + + +static int indent_level = 0; /* each level is 4 spaces */ + +#define indent_up() (++indent_level) +#define indent_down() (--indent_level) +#define set_indent(indent_val) indent_level = indent_val + +/* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ +static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; +static char C_long_decl[] = "static const long int %s[%d] =\n { 0,\n"; +static char C_state_decl[] = + "static const yy_state_type %s[%d] =\n { 0,\n"; + + +/* indent to the current level */ + +void do_indent() + + { + register int i = indent_level * 4; + + while ( i >= 8 ) + { + putchar( '\t' ); + i -= 8; + } + + while ( i > 0 ) + { + putchar( ' ' ); + --i; + } + } + + +/* generate the code to keep backtracking information */ + +void gen_backtracking() + + { + if ( reject || num_backtracking == 0 ) + return; + + if ( fullspd ) + indent_puts( "if ( yy_current_state[-1].yy_nxt )" ); + else + indent_puts( "if ( yy_accept[yy_current_state] )" ); + + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_last_accepting_state = yy_current_state;" ); + indent_puts( "yy_last_accepting_cpos = yy_cp;" ); + indent_puts( "}" ); + indent_down(); + } + + +/* generate the code to perform the backtrack */ + +void gen_bt_action() + + { + if ( reject || num_backtracking == 0 ) + return; + + set_indent( 3 ); + + indent_puts( "case 0: /* must backtrack */" ); + indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); + indent_puts( "*yy_cp = yy_hold_char;" ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); + else + /* backtracking info for compressed tables is taken \after/ + * yy_cp has been incremented for the next state + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + indent_puts( "goto yy_find_action;" ); + putchar( '\n' ); + + set_indent( 0 ); + } + + +/* genctbl - generates full speed compressed transition table + * + * synopsis + * genctbl(); + */ + +void genctbl() + + { + register int i; + int end_of_buffer_action = num_rules + 1; + + /* table of verify for transition and offset to next state */ + printf( "static const struct yy_trans_info yy_transition[%d] =\n", + tblend + numecs + 1 ); + printf( " {\n" ); + + /* We want the transition to be represented as the offset to the + * next state, not the actual state number, which is what it currently is. + * The offset is base[nxt[i]] - base[chk[i]]. That's just the + * difference between the starting points of the two involved states + * (to - from). + * + * first, though, we need to find some way to put in our end-of-buffer + * flags and states. We do this by making a state with absolutely no + * transitions. We put it at the end of the table. + */ + /* at this point, we're guaranteed that there's enough room in nxt[] + * and chk[] to hold tblend + numecs entries. We need just two slots. + * One for the action and one for the end-of-buffer transition. We + * now *assume* that we're guaranteed the only character we'll try to + * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to + * make sure there's room for jam entries for other characters. + */ + + base[lastdfa + 1] = tblend + 2; + nxt[tblend + 1] = end_of_buffer_action; + chk[tblend + 1] = numecs + 1; + chk[tblend + 2] = 1; /* anything but EOB */ + nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ + + /* make sure every state has a end-of-buffer transition and an action # */ + for ( i = 0; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + chk[base[i]] = EOB_POSITION; + chk[base[i] - 1] = ACTION_POSITION; + nxt[base[i] - 1] = anum; /* action number */ + } + + for ( i = 0; i <= tblend; ++i ) + { + if ( chk[i] == EOB_POSITION ) + transition_struct_out( 0, base[lastdfa + 1] - i ); + + else if ( chk[i] == ACTION_POSITION ) + transition_struct_out( 0, nxt[i] ); + + else if ( chk[i] > numecs || chk[i] == 0 ) + transition_struct_out( 0, 0 ); /* unused slot */ + + else /* verify, transition */ + transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); + } + + + /* here's the final, end-of-buffer state */ + transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); + transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); + + printf( " };\n" ); + printf( "\n" ); + + /* table of pointers to start states */ + printf( "static const struct yy_trans_info *yy_start_state_list[%d] =\n", + lastsc * 2 + 1 ); + printf( " {\n" ); + + for ( i = 0; i <= lastsc * 2; ++i ) + printf( " &yy_transition[%d],\n", base[i] ); + + dataend(); + + if ( useecs ) + genecs(); + } + + +/* generate equivalence-class tables */ + +void genecs() + + { + register int i, j; + static char C_char_decl[] = "static const %s %s[%d] =\n { 0,\n"; + int numrows; + Char clower(); + + if ( numecs < csize ) + printf( C_char_decl, "YY_CHAR", "yy_ec", csize ); + else + printf( C_char_decl, "short", "yy_ec", csize ); + + for ( i = 1; i < csize; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + char *readable_form(); + + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = csize / 8; + + for ( j = 0; j < numrows; ++j ) + { + for ( i = j; i < csize; i = i + numrows ) + { + fprintf( stderr, "%4s = %-2d", readable_form( i ), ecgroup[i] ); + + putc( ' ', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + +/* generate the code to find the action number */ + +void gen_find_action() + + { + if ( fullspd ) + indent_puts( "yy_act = yy_current_state[-1].yy_nxt;" ); + + else if ( fulltbl ) + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + + else if ( reject ) + { + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + puts( "find_rule: /* we branch to this label when backtracking */" ); + + indent_puts( "for ( ; ; ) /* until we find what rule we matched */" ); + + indent_up(); + + indent_puts( "{" ); + + indent_puts( "if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_act = yy_acclist[yy_lp];" ); + + if ( variable_trailing_context_rules ) + { + indent_puts( "if ( yy_act & YY_TRAILING_HEAD_MASK ||" ); + indent_puts( " yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + + indent_puts( "if ( yy_act == yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_looking_for_trail_begin = 0;" ); + indent_puts( "yy_act &= ~YY_TRAILING_HEAD_MASK;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else if ( yy_act & YY_TRAILING_MASK )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( + "yy_looking_for_trail_begin = yy_act & ~YY_TRAILING_MASK;" ); + indent_puts( + "yy_looking_for_trail_begin |= YY_TRAILING_HEAD_MASK;" ); + + if ( real_reject ) + { + /* remember matched text in case we back up due to REJECT */ + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "++yy_lp;" ); + indent_puts( "goto find_rule;" ); + } + + else + { + /* remember matched text in case we back up due to trailing context + * plus REJECT + */ + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "--yy_cp;" ); + + /* we could consolidate the following two lines with those at + * the beginning, but at the cost of complaints that we're + * branching inside a loop + */ + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + indent_puts( "}" ); + + indent_down(); + } + + else + /* compressed */ + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + } + + +/* genftbl - generates full transition table + * + * synopsis + * genftbl(); + */ + +void genftbl() + + { + register int i; + int end_of_buffer_action = num_rules + 1; + + printf( C_short_decl, "yy_accept", lastdfa + 1 ); + + + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + + for ( i = 1; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + mkdata( anum ); + + if ( trace && anum ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); + } + + dataend(); + + if ( useecs ) + genecs(); + + /* don't have to dump the actual full table entries - they were created + * on-the-fly + */ + } + + +/* generate the code to find the next compressed-table state */ + +void gen_next_compressed_state( char_map ) +char *char_map; + + { + indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); + + /* save the backtracking info \before/ computing the next state + * because we always compute one more state than needed - we + * always proceed until we reach a jam state + */ + gen_backtracking(); + + indent_puts( + "while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_current_state = yy_def[yy_current_state];" ); + + if ( usemecs ) + { + /* we've arrange it so that templates are never chained + * to one another. This means we can afford make a + * very simple test to see if we need to convert to + * yy_c's meta-equivalence class without worrying + * about erroneously looking up the meta-equivalence + * class twice + */ + do_indent(); + /* lastdfa + 2 is the beginning of the templates */ + printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + + indent_up(); + indent_puts( "yy_c = yy_meta[yy_c];" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( + "yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];" ); + } + + +/* generate the code to find the next match */ + +void gen_next_match() + + { + /* NOTE - changes in here should be reflected in gen_next_state() and + * gen_NUL_trans() + */ + char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; + char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; + + if ( fulltbl ) + { + indent_put2s( + "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", + char_map ); + + indent_up(); + + if ( num_backtracking > 0 ) + { + indent_puts( "{" ); + gen_backtracking(); + putchar( '\n' ); + } + + indent_puts( "++yy_cp;" ); + + if ( num_backtracking > 0 ) + indent_puts( "}" ); + + indent_down(); + + putchar( '\n' ); + indent_puts( "yy_current_state = -yy_current_state;" ); + } + + else if ( fullspd ) + { + indent_puts( "{" ); + indent_puts( "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "register YY_CHAR yy_c;\n" ); + indent_put2s( "for ( yy_c = %s;", char_map ); + indent_puts( + " (yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;" ); + indent_put2s( " yy_c = %s )", char_map_2 ); + + indent_up(); + + if ( num_backtracking > 0 ) + indent_puts( "{" ); + + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + if ( num_backtracking > 0 ) + { + putchar( '\n' ); + gen_backtracking(); + indent_puts( "}" ); + } + + indent_down(); + indent_puts( "}" ); + } + + else + { /* compressed */ + indent_puts( "do" ); + + indent_up(); + indent_puts( "{" ); + + gen_next_state( false ); + + indent_puts( "++yy_cp;" ); + + indent_puts( "}" ); + indent_down(); + + do_indent(); + + if ( interactive ) + printf( "while ( yy_base[yy_current_state] != %d );\n", jambase ); + else + printf( "while ( yy_current_state != %d );\n", jamstate ); + + if ( ! reject && ! interactive ) + { + /* do the guaranteed-needed backtrack to figure out the match */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + } + } + } + + +/* generate the code to find the next state */ + +void gen_next_state( worry_about_NULs ) +int worry_about_NULs; + + { /* NOTE - changes in here should be reflected in get_next_match() */ + char char_map[256]; + + if ( worry_about_NULs && ! nultrans ) + { + if ( useecs ) + (void) sprintf( char_map, "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); + else + (void) sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); + } + + else + (void) strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); + + if ( worry_about_NULs && nultrans ) + { + if ( ! fulltbl && ! fullspd ) + /* compressed tables backtrack *before* they match */ + gen_backtracking(); + + indent_puts( "if ( *yy_cp )" ); + indent_up(); + indent_puts( "{" ); + } + + if ( fulltbl ) + indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];", + char_map ); + + else if ( fullspd ) + indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); + + else + gen_next_compressed_state( char_map ); + + if ( worry_about_NULs && nultrans ) + { + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_down(); + } + + if ( fullspd || fulltbl ) + gen_backtracking(); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + + +/* generate the code to make a NUL transition */ + +void gen_NUL_trans() + + { /* NOTE - changes in here should be reflected in get_next_match() */ + int need_backtracking = (num_backtracking > 0 && ! reject); + + if ( need_backtracking ) + /* we'll need yy_cp lying around for the gen_backtracking() */ + indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); + + putchar( '\n' ); + + if ( nultrans ) + { + indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_puts( "yy_is_jam = (yy_current_state == 0);" ); + } + + else if ( fulltbl ) + { + do_indent(); + printf( "yy_current_state = yy_nxt[yy_current_state][%d];\n", + NUL_ec ); + indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); + } + + else if ( fullspd ) + { + do_indent(); + printf( "register int yy_c = %d;\n", NUL_ec ); + + indent_puts( + "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "yy_trans_info = &yy_current_state[yy_c];" ); + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + indent_puts( "yy_is_jam = (yy_trans_info->yy_verify != yy_c);" ); + } + + else + { + char NUL_ec_str[20]; + + (void) sprintf( NUL_ec_str, "%d", NUL_ec ); + gen_next_compressed_state( NUL_ec_str ); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + + do_indent(); + + if ( interactive ) + printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n", + jambase ); + else + printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + } + + /* if we've entered an accepting state, backtrack; note that + * compressed tables have *already* done such backtracking, so + * we needn't bother with it again + */ + if ( need_backtracking && (fullspd || fulltbl) ) + { + putchar( '\n' ); + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "{" ); + gen_backtracking(); + indent_puts( "}" ); + indent_down(); + } + } + + +/* generate the code to find the start state */ + +void gen_start_state() + + { + if ( fullspd ) + indent_put2s( "yy_current_state = yy_start_state_list[yy_start%s];", + bol_needed ? " + (yy_bp[-1] == '\\n' ? 1 : 0)" : "" ); + + else + { + indent_puts( "yy_current_state = yy_start;" ); + + if ( bol_needed ) + { + indent_puts( "if ( yy_bp[-1] == '\\n' )" ); + indent_up(); + indent_puts( "++yy_current_state;" ); + indent_down(); + } + + if ( reject ) + { + /* set up for storing up states */ + indent_puts( "yy_state_ptr = yy_state_buf;" ); + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + } + } + + +/* gentabs - generate data statements for the transition tables + * + * synopsis + * gentabs(); + */ + +void gentabs() + + { + int i, j, k, *accset, nacc, *acc_array, total_states; + int end_of_buffer_action = num_rules + 1; + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ + static char C_char_decl[] = + "static const YY_CHAR %s[%d] =\n { 0,\n"; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + /* the compressed table format jams by entering the "jam state", + * losing information about the previous state in the process. + * In order to recover the previous state, we effectively need + * to keep backtracking information. + */ + ++num_backtracking; + + if ( reject ) + { + /* write out accepting list and pointer list + * + * first we generate the "yy_acclist" array. In the process, we compute + * the indices that will go into the "yy_accept" array, and save the + * indices in the dfaacc array + */ + int EOB_accepting_list[2]; + + /* set up accepting structures for the End Of Buffer state */ + EOB_accepting_list[0] = 0; + EOB_accepting_list[1] = end_of_buffer_action; + accsiz[end_of_buffer_state] = 1; + dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; + + printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); + + j = 1; /* index into "yy_acclist" array */ + + for ( i = 1; i <= lastdfa; ++i ) + { + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i].dfaacc_set; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, "state # %d accepts: ", i ); + + for ( k = 1; k <= nacc; ++k ) + { + int accnum = accset[k]; + + ++j; + + if ( variable_trailing_context_rules && + ! (accnum & YY_TRAILING_HEAD_MASK) && + accnum > 0 && + rule_type[accnum] == RULE_VARIABLE ) + { + /* special hack to flag accepting number as part + * of trailing context rule + */ + accnum |= YY_TRAILING_MASK; + } + + mkdata( accnum ); + + if ( trace ) + { + fprintf( stderr, "[%d]", accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } + } + + /* add accepting number for the "jam" state */ + acc_array[i] = j; + + dataend(); + } + + else + { + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = dfaacc[i].dfaacc_state; + + /* add accepting number for jam state */ + acc_array[i] = 0; + } + + /* spit out "yy_accept" array. If we're doing "reject", it'll be pointers + * into the "yy_acclist" array. Otherwise it's actual accepting numbers. + * In either case, we just dump the numbers. + */ + + /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays + * beginning at 0 and for "jam" state + */ + k = lastdfa + 2; + + if ( reject ) + /* we put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; + + printf( C_short_decl, "yy_accept", k ); + + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( acc_array[i] ); + + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); + } + + /* add entry for "jam" state */ + mkdata( acc_array[i] ); + + if ( reject ) + /* add "cap" for the list */ + mkdata( acc_array[i] ); + + dataend(); + + if ( useecs ) + genecs(); + + if ( usemecs ) + { + /* write out meta-equivalence classes (used to index templates with) */ + + if ( trace ) + fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); + + printf( C_char_decl, "yy_meta", numecs + 1 ); + + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); + + mkdata( abs( tecbck[i] ) ); + } + + dataend(); + } + + total_states = lastdfa + numtemps; + + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_base", total_states + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; + + if ( base[i] == JAMSTATE ) + base[i] = jambase; + + if ( d == JAMSTATE ) + def[i] = jamstate; + + else if ( d < 0 ) + { + /* template reference */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } + + mkdata( base[i] ); + } + + /* generate jam state's base index */ + mkdata( base[i] ); + + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( base[i] ); + def[i] = jamstate; + } + + dataend(); + + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_def", total_states + 1 ); + + for ( i = 1; i <= total_states; ++i ) + mkdata( def[i] ); + + dataend(); + + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_nxt", tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 || chk[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ + + mkdata( nxt[i] ); + } + + dataend(); + + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_chk", tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( chk[i] ); + } + + dataend(); + } + + +/* write out a formatted string (with a secondary string argument) at the + * current indentation level, adding a final newline + */ + +void indent_put2s( fmt, arg ) +char fmt[], arg[]; + + { + do_indent(); + printf( fmt, arg ); + putchar( '\n' ); + } + + +/* write out a string at the current indentation level, adding a final + * newline + */ + +void indent_puts( str ) +char str[]; + + { + do_indent(); + puts( str ); + } + + +/* make_tables - generate transition tables + * + * synopsis + * make_tables(); + * + * Generates transition tables and finishes generating output file + */ + +void make_tables() + + { + register int i; + int did_eof_rule = false; + + skelout(); + + /* first, take care of YY_DO_BEFORE_ACTION depending on yymore being used */ + set_indent( 2 ); + + if ( yymore_used ) + { + indent_puts( "yytext -= yy_more_len; \\" ); + indent_puts( "yyleng = yy_cp - yytext; \\" ); + } + + else + indent_puts( "yyleng = yy_cp - yy_bp; \\" ); + + set_indent( 0 ); + + skelout(); + + + printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); + + if ( fullspd ) + { /* need to define the transet type as a size large + * enough to hold the biggest offset + */ + int total_table_size = tblend + numecs + 1; + char *trans_offset_type = + total_table_size > MAX_SHORT ? "long" : "short"; + + set_indent( 0 ); + indent_puts( "struct yy_trans_info" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "short yy_verify;" ); + + /* in cases where its sister yy_verify *is* a "yes, there is a + * transition", yy_nxt is the offset (in records) to the next state. + * In most cases where there is no transition, the value of yy_nxt + * is irrelevant. If yy_nxt is the -1th record of a state, though, + * then yy_nxt is the action number for that state + */ + + indent_put2s( "%s yy_nxt;", trans_offset_type ); + indent_puts( "};" ); + indent_down(); + + indent_puts( "typedef const struct yy_trans_info *yy_state_type;" ); + } + + else + indent_puts( "typedef int yy_state_type;" ); + + if ( fullspd ) + genctbl(); + + else if ( fulltbl ) + genftbl(); + + else + gentabs(); + + if ( num_backtracking > 0 ) + { + indent_puts( "static yy_state_type yy_last_accepting_state;" ); + indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); + } + + if ( nultrans ) + { + printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + if ( fullspd ) + { + if ( nultrans ) + printf( " &yy_transition[%d],\n", base[i] ); + else + printf( " 0,\n" ); + } + + else + mkdata( nultrans[i] ); + } + + dataend(); + } + + if ( ddebug ) + { /* spit out table mapping rules to line numbers */ + indent_puts( "extern int yy_flex_debug;" ); + indent_puts( "int yy_flex_debug = 1;\n" ); + + printf( C_short_decl, "yy_rule_linenum", num_rules ); + for ( i = 1; i < num_rules; ++i ) + mkdata( rule_linenum[i] ); + dataend(); + } + + if ( reject ) + { + /* declare state buffer variables */ + puts( + "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); + puts( "static YY_CHAR *yy_full_match;" ); + puts( "static int yy_lp;" ); + + if ( variable_trailing_context_rules ) + { + puts( "static int yy_looking_for_trail_begin = 0;" ); + puts( "static int yy_full_lp;" ); + puts( "static int *yy_full_state;" ); + printf( "#define YY_TRAILING_MASK 0x%x\n", YY_TRAILING_MASK ); + printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", + YY_TRAILING_HEAD_MASK ); + } + + puts( "#define REJECT \\" ); + puts( "{ \\" ); + puts( + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \\" ); + puts( + "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); + + if ( variable_trailing_context_rules ) + { + puts( "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); + puts( + "yy_state_ptr = yy_full_state; /* restore orig. state */ \\" ); + puts( + "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); + } + + puts( "++yy_lp; \\" ); + puts( "goto find_rule; \\" ); + puts( "}" ); + } + + else + { + puts( "/* the intent behind this definition is that it'll catch" ); + puts( " * any uses of REJECT which flex missed" ); + puts( " */" ); + puts( "#define REJECT reject_used_but_not_detected" ); + } + + if ( yymore_used ) + { + indent_puts( "static int yy_more_flag = 0;" ); + indent_puts( "static int yy_doing_yy_more = 0;" ); + indent_puts( "static int yy_more_len = 0;" ); + indent_puts( + "#define yymore() { yy_more_flag = 1; }" ); + indent_puts( + "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); + } + + else + { + indent_puts( "#define yymore() yymore_used_but_not_detected" ); + indent_puts( "#define YY_MORE_ADJ 0" ); + } + + skelout(); + + if ( ferror( temp_action_file ) ) + flexfatal( "error occurred when writing temporary action file" ); + + else if ( fclose( temp_action_file ) ) + flexfatal( "error occurred when closing temporary action file" ); + + temp_action_file = fopen( action_file_name, "r" ); + + if ( temp_action_file == NULL ) + flexfatal( "could not re-open temporary action file" ); + + /* copy prolog from action_file to output file */ + action_out(); + + skelout(); + + set_indent( 2 ); + + if ( yymore_used ) + { + indent_puts( "yy_more_len = 0;" ); + indent_puts( "yy_doing_yy_more = yy_more_flag;" ); + indent_puts( "if ( yy_doing_yy_more )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_more_len = yyleng;" ); + indent_puts( "yy_more_flag = 0;" ); + indent_puts( "}" ); + indent_down(); + } + + skelout(); + + gen_start_state(); + + /* note, don't use any indentation */ + puts( "yy_match:" ); + gen_next_match(); + + skelout(); + set_indent( 2 ); + gen_find_action(); + + skelout(); + if ( ddebug ) + { + indent_puts( "if ( yy_flex_debug )" ); + indent_up(); + + indent_puts( "{" ); + indent_puts( "if ( yy_act == 0 )" ); + indent_up(); + indent_puts( "fprintf( stderr, \"--scanner backtracking\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act < %d )\n", num_rules ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); + indent_puts( " yy_rule_linenum[yy_act], yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--accepting default rule (\\\"%s\\\")\\n\"," ); + indent_puts( " yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules + 1 ); + indent_up(); + indent_puts( "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else\n" ); + indent_up(); + indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + } + + /* copy actions from action_file to output file */ + skelout(); + indent_up(); + gen_bt_action(); + action_out(); + + /* generate cases for any missing EOF rules */ + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + { + do_indent(); + printf( "case YY_STATE_EOF(%s):\n", scname[i] ); + did_eof_rule = true; + } + + if ( did_eof_rule ) + { + indent_up(); + indent_puts( "yyterminate();" ); + indent_down(); + } + + + /* generate code for handling NUL's, if needed */ + + /* first, deal with backtracking and setting up yy_cp if the scanner + * finds that it should JAM on the NUL + */ + skelout(); + set_indent( 7 ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_c_buf_p;" ); + + else + { /* compressed table */ + if ( ! reject && ! interactive ) + { + /* do the guaranteed-needed backtrack to figure out the match */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + } + } + + + /* generate code for yy_get_previous_state() */ + set_indent( 1 ); + skelout(); + + if ( bol_needed ) + indent_puts( "register YY_CHAR *yy_bp = yytext;\n" ); + + gen_start_state(); + + set_indent( 2 ); + skelout(); + gen_next_state( true ); + + set_indent( 1 ); + skelout(); + gen_NUL_trans(); + + skelout(); + + /* copy remainder of input to output */ + + line_directive_out( stdout ); + (void) flexscan(); /* copy remainder of input to output */ + } diff --git a/util/flex/initscan.c b/util/flex/initscan.c new file mode 100644 index 000000000..7217573a3 --- /dev/null +++ b/util/flex/initscan.c @@ -0,0 +1,2294 @@ +/* A lexical scanner generated by flex */ + +/* scanner skeleton version: + * $Header$ + */ + +#define FLEX_SCANNER + +#include + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include +#include + +/* use prototypes in function declarations */ +#define YY_USE_PROTOS + +/* the "const" storage-class-modifier is valid */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#ifdef __STDC__ + +#ifdef __GNUC__ +#include +void *malloc( size_t ); +void free( void* ); +#else +#include +#endif /* __GNUC__ */ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + + +#ifdef __TURBOC__ +#define YY_USE_CONST +#endif + + +#ifndef YY_USE_CONST +#define const +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +/* we can't get here if it's an ANSI C compiler, or a C++ compiler, + * so it's got to be a K&R compiler, and therefore there's no standard + * place from which to include these definitions + */ +char *malloc(); +int free(); +int read(); +#endif + + +/* amount of stuff to slurp up with each read */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* returned upon end-of-file */ +#define YY_END_TOK 0 + +/* copy whatever the last rule matched to the standard output */ + +/* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ +/* this used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite() + */ +#define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) + +/* gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#define YY_INPUT(buf,result,max_size) \ + if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ + YY_FATAL_ERROR( "read() in flex scanner failed" ); +#define YY_NULL 0 + +/* no semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#define yyterminate() return ( YY_NULL ) + +/* report a fatal error */ + +/* The funky do-while is used to turn this macro definition into + * a single C statement (which needs a semi-colon terminator). + * This avoids problems with code like: + * + * if ( something_happens ) + * YY_FATAL_ERROR( "oops, the something happened" ); + * else + * everything_okay(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the YY_FATAL_ERROR() call. + */ + +#define YY_FATAL_ERROR(msg) \ + do \ + { \ + (void) fputs( msg, stderr ); \ + (void) putc( '\n', stderr ); \ + exit( 1 ); \ + } \ + while ( 0 ) + +/* default yywrap function - always treat EOF as an EOF */ +#define yywrap() 1 + +/* enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN + */ +#define BEGIN yy_start = 1 + 2 * + +/* action number for EOF rule of a given start state */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* special action meaning "start processing a new file" */ +#define YY_NEW_FILE \ + do \ + { \ + yy_init_buffer( yy_current_buffer, yyin ); \ + yy_load_buffer_state(); \ + } \ + while ( 0 ) + +/* default declaration of generated scanner - a define so the user can + * easily add parameters + */ +#define YY_DECL int yylex YY_PROTO(( void )) + +/* code executed at the end of each rule */ +#define YY_BREAK break; + +#define YY_END_OF_BUFFER_CHAR 0 + +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ +#endif + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +#define YY_CHAR unsigned char +# line 1 "scan.l" +#define INITIAL 0 +/* scan.l - scanner for flex input */ +# line 5 "scan.l" +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#undef yywrap + +#include "flexdef.h" +#include "parse.h" + +#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) +#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); + +#undef YY_DECL +#define YY_DECL \ + int flexscan() + +#define RETURNCHAR \ + yylval = yytext[0]; \ + return ( CHAR ); + +#define RETURNNAME \ + (void) strcpy( nmstr, (char *) yytext ); \ + return ( NAME ); + +#define PUT_BACK_STRING(str, start) \ + for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ + unput((str)[i]) + +#define CHECK_REJECT(str) \ + if ( all_upper( str ) ) \ + reject = true; + +#define CHECK_YYMORE(str) \ + if ( all_lower( str ) ) \ + yymore_used = true; +#define SECT2 1 +#define SECT2PROLOG 2 +#define SECT3 3 +#define CODEBLOCK 4 +#define PICKUPDEF 5 +#define SC 6 +#define CARETISBOL 7 +#define NUM 8 +#define QUOTE 9 +#define FIRSTCCL 10 +#define CCL 11 +#define ACTION 12 +#define RECOVER 13 +#define BRACEERROR 14 +#define C_COMMENT 15 +#define ACTION_COMMENT 16 +#define ACTION_STRING 17 +#define PERCENT_BRACE_ACTION 18 +#define USED_LIST 19 +#define CODEBLOCK_2 20 +#define XLATION 21 +# line 84 "scan.l" + +/* done after the current pattern has been matched and before the + * corresponding action - sets up yytext + */ +#define YY_DO_BEFORE_ACTION \ + yytext = yy_bp; \ + yyleng = yy_cp - yy_bp; \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* return all but the first 'n' matched characters back to the input stream */ +#define yyless(n) \ + do \ + { \ + /* undo effects of setting up yytext */ \ + *yy_cp = yy_hold_char; \ + yy_c_buf_p = yy_cp = yy_bp + n; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext ) + + +struct yy_buffer_state + { + FILE *yy_input_file; + + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ + + /* size of input buffer in bytes, not including room for EOB characters*/ + int yy_buf_size; + + /* number of characters read into yy_ch_buf, not including EOB characters */ + int yy_n_chars; + + int yy_eof_status; /* whether we've seen an EOF on this buffer */ +#define EOF_NOT_SEEN 0 + /* "pending" happens when the EOF has been seen but there's still + * some text process + */ +#define EOF_PENDING 1 +#define EOF_DONE 2 + }; + +static YY_BUFFER_STATE yy_current_buffer; + +/* we provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state" + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed */ +static YY_CHAR yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + + +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +#ifndef YY_USER_INIT +#define YY_USER_INIT +#endif + +extern YY_CHAR *yytext; +extern int yyleng; +extern FILE *yyin, *yyout; + +YY_CHAR *yytext; +int yyleng; + +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +#define YY_END_OF_BUFFER 121 +typedef int yy_state_type; +static const short int yy_accept[341] = + { 0, + 0, 0, 0, 0, 0, 0, 119, 119, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 121, 19, 7, 18, 19, 16, + 1, 17, 19, 19, 19, 15, 67, 59, 60, 53, + 67, 66, 51, 67, 67, 67, 50, 49, 67, 52, + 120, 47, 119, 119, 28, 29, 28, 28, 28, 28, + 31, 30, 32, 73, 120, 69, 70, 72, 74, 88, + 89, 86, 85, 87, 75, 77, 76, 75, 81, 80, + + 81, 81, 83, 83, 83, 84, 99, 104, 103, 105, + 105, 100, 100, 100, 97, 98, 120, 33, 91, 90, + 22, 24, 23, 107, 109, 108, 111, 113, 114, 115, + 95, 95, 96, 95, 95, 95, 95, 38, 35, 34, + 38, 38, 44, 42, 45, 44, 44, 41, 41, 41, + 40, 41, 7, 18, 0, 16, 1, 17, 0, 2, + 14, 8, 0, 12, 4, 0, 0, 5, 0, 3, + 15, 59, 60, 0, 0, 56, 0, 0, 0, 117, + 117, 117, 55, 54, 55, 50, 49, 63, 50, 0, + 47, 46, 119, 119, 28, 28, 28, 28, 28, 31, + + 30, 71, 72, 85, 118, 118, 118, 78, 79, 82, + 99, 0, 102, 0, 101, 100, 100, 100, 0, 33, + 22, 20, 107, 106, 111, 112, 95, 95, 95, 92, + 95, 95, 95, 38, 35, 38, 38, 42, 0, 43, + 43, 43, 42, 40, 0, 13, 14, 8, 8, 0, + 12, 4, 0, 0, 0, 5, 0, 6, 57, 0, + 58, 0, 64, 0, 0, 117, 117, 55, 55, 65, + 63, 28, 28, 28, 25, 0, 118, 118, 100, 100, + 0, 21, 92, 92, 95, 95, 38, 38, 0, 39, + 43, 43, 0, 11, 4, 0, 11, 0, 0, 5, + + 0, 0, 0, 117, 28, 28, 118, 100, 100, 95, + 95, 38, 38, 43, 0, 9, 0, 0, 0, 28, + 28, 100, 100, 95, 95, 38, 38, 0, 0, 26, + 27, 93, 94, 93, 94, 36, 37, 10, 62, 0 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 5, 1, 6, 7, 8, 9, 1, 10, 11, + 11, 12, 11, 13, 14, 11, 15, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 1, 1, 17, + 1, 18, 11, 1, 24, 25, 26, 27, 28, 29, + 23, 23, 23, 30, 31, 23, 32, 33, 34, 31, + 23, 35, 36, 37, 38, 23, 23, 39, 40, 23, + 19, 20, 21, 22, 23, 1, 24, 25, 26, 27, + + 28, 29, 23, 23, 23, 30, 31, 23, 32, 33, + 34, 31, 23, 35, 36, 37, 38, 23, 23, 39, + 40, 23, 41, 42, 43, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static const YY_CHAR yy_meta[44] = + { 0, + 1, 2, 3, 2, 2, 4, 1, 1, 1, 5, + 1, 6, 1, 7, 5, 8, 1, 1, 1, 9, + 10, 1, 11, 12, 12, 12, 12, 12, 12, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 5, 1, 13 + } ; + +static const short int yy_base[404] = + { 0, + 0, 43, 85, 126, 1371, 1370, 1369, 1353, 168, 1346, + 104, 108, 211, 0, 1332, 1320, 120, 252, 95, 119, + 137, 144, 100, 141, 295, 0, 1327, 1323, 113, 336, + 254, 255, 257, 258, 253, 268, 379, 0, 338, 421, + 0, 0, 273, 460, 1325, 1442, 281, 1442, 1287, 0, + 287, 1442, 1279, 472, 1257, 0, 1442, 425, 1442, 1442, + 147, 1442, 1239, 1235, 78, 513, 433, 1442, 83, 1442, + 1248, 0, 1247, 1442, 0, 1442, 0, 1218, 1205, 1194, + 0, 342, 1442, 1442, 1442, 1442, 1202, 0, 1442, 1442, + 1442, 1442, 1201, 1442, 1442, 1442, 1442, 79, 1442, 1442, + + 103, 1198, 1442, 0, 248, 1442, 0, 1442, 1442, 252, + 1199, 0, 1173, 1158, 1442, 1442, 1185, 1442, 1442, 1442, + 0, 1442, 1170, 0, 1442, 1152, 0, 1442, 1442, 0, + 0, 346, 1442, 1123, 0, 1125, 1105, 0, 352, 1442, + 1116, 1103, 1442, 356, 1442, 1100, 329, 1442, 360, 1093, + 1101, 333, 441, 1442, 445, 0, 449, 1442, 1101, 1442, + 365, 453, 1094, 466, 0, 480, 330, 0, 1095, 1442, + 0, 555, 1442, 1044, 1077, 1442, 1051, 133, 456, 1442, + 1059, 0, 0, 1442, 584, 563, 1442, 0, 1442, 1071, + 0, 1442, 1063, 1442, 0, 0, 1010, 1007, 627, 0, + + 484, 1442, 0, 998, 1442, 992, 0, 1442, 1442, 1442, + 0, 421, 1442, 0, 1442, 0, 971, 964, 992, 1442, + 0, 962, 0, 1442, 0, 1442, 0, 488, 921, 670, + 0, 717, 714, 0, 497, 715, 712, 569, 573, 1442, + 727, 0, 577, 726, 581, 1442, 585, 0, 590, 738, + 597, 0, 712, 683, 691, 0, 670, 1442, 1442, 623, + 1442, 591, 1442, 458, 702, 590, 0, 0, 0, 1442, + 0, 576, 569, 0, 1442, 593, 575, 0, 560, 546, + 567, 1442, 0, 0, 541, 529, 534, 527, 730, 1442, + 500, 0, 509, 1442, 0, 734, 1442, 468, 467, 0, + + 462, 704, 724, 1442, 461, 438, 1442, 440, 425, 433, + 405, 413, 398, 1442, 404, 1442, 359, 259, 332, 338, + 346, 334, 331, 257, 253, 226, 137, 133, 81, 0, + 0, 0, 0, 0, 0, 0, 0, 1442, 1442, 1442, + 753, 766, 779, 792, 805, 818, 831, 844, 857, 870, + 883, 896, 909, 922, 935, 948, 955, 967, 980, 986, + 998, 1011, 1024, 1037, 1050, 1063, 1070, 1082, 1089, 1101, + 1114, 1127, 1140, 1150, 1157, 1169, 1182, 1195, 1208, 1221, + 1234, 1241, 1253, 1266, 1279, 1282, 1284, 1296, 1309, 1315, + 1327, 1339, 1345, 1357, 1363, 1375, 1382, 1388, 1393, 1405, + + 1411, 1423, 1429 + } ; + +static const short int yy_def[404] = + { 0, + 340, 340, 341, 341, 342, 342, 343, 343, 340, 9, + 344, 344, 340, 13, 345, 345, 346, 346, 347, 347, + 348, 348, 349, 349, 340, 25, 350, 350, 345, 345, + 351, 351, 352, 352, 353, 353, 340, 37, 354, 354, + 37, 37, 355, 356, 340, 340, 340, 340, 340, 357, + 340, 340, 358, 359, 340, 360, 340, 340, 340, 340, + 340, 340, 340, 361, 362, 340, 340, 340, 340, 340, + 363, 364, 365, 340, 366, 340, 367, 367, 367, 366, + 368, 340, 340, 340, 340, 340, 340, 369, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 362, 340, 340, + + 370, 371, 340, 372, 362, 340, 373, 340, 340, 374, + 340, 375, 375, 375, 340, 340, 376, 340, 340, 340, + 377, 340, 340, 378, 340, 340, 379, 340, 340, 380, + 381, 381, 340, 381, 382, 382, 382, 383, 340, 340, + 383, 383, 340, 340, 340, 340, 384, 340, 340, 340, + 340, 384, 340, 340, 340, 357, 340, 340, 358, 340, + 340, 385, 340, 340, 386, 340, 340, 387, 388, 340, + 360, 340, 340, 340, 389, 340, 340, 361, 361, 340, + 340, 390, 391, 340, 391, 340, 340, 392, 340, 363, + 364, 340, 365, 340, 366, 367, 367, 367, 340, 368, + + 340, 340, 369, 340, 340, 340, 393, 340, 340, 340, + 373, 374, 340, 374, 340, 375, 375, 375, 376, 340, + 377, 394, 378, 340, 379, 340, 381, 381, 381, 340, + 382, 382, 382, 383, 340, 383, 383, 340, 340, 340, + 340, 395, 340, 340, 340, 340, 340, 385, 385, 396, + 340, 397, 396, 340, 340, 398, 388, 340, 340, 389, + 340, 340, 340, 361, 361, 340, 399, 391, 185, 340, + 392, 367, 367, 199, 340, 400, 340, 401, 375, 375, + 394, 340, 230, 402, 382, 382, 383, 383, 340, 340, + 340, 403, 396, 340, 397, 396, 340, 340, 340, 398, + + 340, 361, 265, 340, 367, 367, 340, 375, 375, 382, + 382, 383, 383, 340, 340, 340, 340, 361, 361, 367, + 367, 375, 375, 382, 382, 383, 383, 340, 340, 367, + 367, 375, 375, 382, 382, 383, 383, 340, 340, 0, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + + 340, 340, 340 + } ; + +static const short int yy_nxt[1486] = + { 0, + 46, 47, 48, 47, 47, 46, 46, 46, 49, 46, + 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 46, 46, 46, 46, 51, 52, 51, 51, 46, 53, + 46, 54, 46, 46, 46, 46, 46, 55, 46, 46, + 46, 46, 46, 46, 46, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 46, 46, 46, 58, 59, 58, 58, + 60, 188, 61, 181, 181, 62, 62, 96, 339, 62, + + 97, 63, 85, 64, 65, 82, 83, 82, 82, 82, + 83, 82, 82, 104, 98, 119, 182, 182, 206, 105, + 106, 96, 91, 189, 97, 66, 62, 67, 68, 67, + 67, 60, 92, 61, 69, 93, 62, 62, 98, 85, + 62, 207, 63, 85, 64, 65, 85, 70, 176, 176, + 100, 176, 179, 263, 104, 120, 101, 100, 102, 338, + 105, 106, 94, 101, 337, 102, 66, 62, 75, 75, + 76, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + + 77, 77, 78, 77, 77, 77, 77, 79, 75, 75, + 75, 84, 84, 85, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 86, 84, 84, 84, 84, 87, 84, + 84, 84, 84, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, + 88, 84, 84, 84, 91, 128, 122, 122, 129, 125, + 125, 213, 336, 181, 92, 123, 123, 93, 126, 126, + 128, 214, 130, 129, 144, 145, 144, 144, 179, 263, + 335, 146, 153, 154, 153, 153, 182, 130, 157, 158, + 157, 157, 147, 334, 94, 107, 107, 108, 107, 107, + + 109, 107, 107, 107, 110, 107, 107, 107, 107, 111, + 107, 107, 107, 107, 107, 107, 107, 112, 112, 112, + 112, 112, 112, 112, 112, 112, 112, 112, 112, 113, + 112, 112, 112, 112, 114, 115, 107, 116, 119, 139, + 140, 139, 139, 201, 241, 201, 201, 228, 241, 228, + 228, 179, 263, 235, 229, 235, 235, 238, 333, 238, + 238, 243, 254, 243, 243, 255, 247, 242, 247, 247, + 332, 242, 141, 331, 330, 244, 329, 142, 120, 131, + 132, 133, 132, 132, 131, 131, 131, 134, 131, 131, + 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + + 131, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 136, 135, 135, 135, 135, 137, 131, + 131, 131, 139, 140, 139, 139, 172, 173, 172, 172, + 213, 328, 327, 174, 186, 187, 186, 186, 326, 325, + 214, 174, 153, 154, 153, 153, 245, 246, 245, 245, + 157, 158, 157, 157, 249, 141, 249, 249, 324, 323, + 142, 149, 145, 149, 149, 322, 175, 251, 150, 251, + 251, 264, 321, 302, 175, 151, 178, 179, 263, 152, + 162, 245, 246, 245, 253, 201, 320, 201, 201, 228, + 317, 228, 228, 316, 265, 163, 229, 164, 235, 163, + + 235, 235, 163, 315, 163, 163, 164, 165, 166, 167, + 168, 294, 169, 183, 183, 240, 183, 183, 183, 183, + 183, 183, 183, 183, 183, 183, 183, 183, 184, 183, + 183, 183, 183, 183, 183, 185, 185, 185, 185, 185, + 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, + 185, 185, 185, 183, 183, 183, 172, 173, 172, 172, + 313, 312, 311, 174, 186, 187, 186, 186, 310, 282, + 238, 174, 238, 238, 289, 290, 289, 289, 243, 309, + 243, 243, 245, 246, 245, 245, 247, 308, 247, 247, + 205, 249, 244, 249, 249, 275, 175, 269, 251, 269, + + 251, 251, 306, 305, 175, 180, 269, 269, 269, 269, + 269, 269, 269, 269, 269, 269, 269, 269, 269, 269, + 269, 269, 269, 269, 301, 261, 270, 274, 274, 275, + 274, 274, 274, 274, 274, 274, 274, 274, 274, 274, + 274, 274, 274, 274, 274, 274, 274, 274, 274, 276, + 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, + 276, 276, 276, 276, 276, 276, 276, 274, 274, 274, + 283, 283, 258, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, + + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 283, 283, 283, 296, 297, 296, 296, 303, 299, 318, + 298, 179, 263, 179, 263, 303, 303, 303, 303, 303, + 303, 289, 290, 289, 289, 296, 297, 296, 296, 319, + 294, 244, 291, 288, 287, 286, 285, 319, 319, 319, + 319, 319, 319, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 73, + 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, + 73, 73, 81, 81, 81, 81, 81, 81, 81, 81, + + 81, 81, 81, 81, 81, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 95, 95, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 95, 95, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 103, 103, 103, + 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, + 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, + 117, 117, 117, 121, 121, 121, 121, 121, 121, 121, + 121, 121, 121, 121, 121, 121, 124, 124, 124, 124, + + 124, 124, 124, 124, 124, 124, 124, 124, 124, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 138, 138, 138, 138, 138, 138, 138, 138, + 138, 138, 138, 138, 138, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 143, 143, 143, 143, 148, 148, + 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, + 148, 156, 156, 230, 282, 156, 156, 159, 159, 159, + 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 171, 171, 220, 280, 171, 171, 178, 178, + + 279, 178, 178, 178, 178, 178, 178, 277, 178, 178, + 178, 180, 180, 204, 180, 180, 180, 180, 180, 180, + 180, 180, 180, 180, 190, 190, 190, 190, 190, 190, + 190, 190, 190, 190, 190, 190, 190, 192, 273, 272, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, + 193, 193, 193, 193, 193, 193, 193, 193, 193, 193, + 193, 193, 193, 195, 195, 194, 195, 195, 195, 195, + 195, 195, 195, 191, 266, 195, 196, 196, 262, 261, + 196, 196, 200, 200, 259, 200, 200, 200, 200, 200, + 200, 200, 200, 200, 200, 203, 203, 258, 250, 203, + + 203, 205, 205, 160, 205, 205, 205, 205, 205, 205, + 205, 205, 205, 205, 208, 208, 244, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 210, 210, 239, + 210, 210, 210, 210, 210, 210, 239, 210, 210, 210, + 211, 211, 237, 236, 233, 211, 211, 211, 211, 211, + 212, 212, 232, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 216, 216, 230, 224, 216, 216, 219, + 219, 219, 219, 219, 219, 219, 219, 219, 219, 219, + 219, 219, 221, 221, 222, 221, 221, 220, 221, 221, + 221, 221, 221, 221, 221, 223, 223, 218, 223, 223, + + 217, 223, 223, 223, 223, 223, 223, 223, 225, 225, + 215, 209, 225, 225, 225, 225, 204, 225, 225, 225, + 225, 226, 226, 202, 226, 226, 226, 226, 226, 226, + 226, 226, 226, 226, 227, 227, 199, 227, 227, 227, + 227, 227, 227, 227, 198, 197, 227, 231, 231, 194, + 191, 231, 231, 234, 179, 177, 234, 234, 234, 234, + 234, 234, 234, 234, 234, 234, 240, 240, 170, 240, + 240, 240, 240, 240, 240, 240, 240, 240, 240, 248, + 248, 160, 248, 248, 248, 248, 248, 248, 248, 248, + 248, 248, 252, 252, 256, 256, 257, 257, 257, 257, + + 257, 257, 257, 257, 257, 257, 257, 257, 257, 260, + 260, 260, 260, 260, 260, 260, 260, 260, 260, 260, + 260, 260, 267, 155, 340, 118, 267, 268, 268, 118, + 268, 268, 268, 268, 268, 268, 268, 268, 268, 271, + 271, 89, 271, 271, 271, 271, 271, 271, 271, 271, + 271, 271, 278, 89, 80, 74, 278, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 292, 74, 72, 72, 292, 293, 293, 293, 293, 293, + 293, 293, 293, 293, 293, 293, 293, 293, 295, 295, + 340, 340, 295, 295, 300, 300, 340, 340, 300, 300, + + 304, 340, 340, 340, 304, 276, 276, 276, 276, 276, + 276, 276, 276, 276, 276, 276, 276, 276, 307, 340, + 340, 340, 307, 284, 284, 340, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 314, 340, 340, 340, + 314, 45, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340 + } ; + +static const short int yy_chk[1486] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, + 3, 69, 3, 65, 98, 3, 3, 19, 329, 3, + + 19, 3, 23, 3, 3, 11, 11, 11, 11, 12, + 12, 12, 12, 23, 19, 29, 65, 98, 101, 23, + 23, 20, 17, 69, 20, 3, 3, 4, 4, 4, + 4, 4, 17, 4, 4, 17, 4, 4, 20, 21, + 4, 101, 4, 24, 4, 4, 22, 4, 61, 61, + 21, 61, 178, 178, 24, 29, 21, 22, 21, 328, + 24, 24, 17, 22, 327, 22, 4, 4, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 18, 35, 31, 32, 35, 33, + 34, 110, 326, 105, 18, 31, 32, 18, 33, 34, + 36, 110, 35, 36, 43, 43, 43, 43, 318, 318, + 325, 43, 47, 47, 47, 47, 105, 36, 51, 51, + 51, 51, 43, 324, 18, 25, 25, 25, 25, 25, + + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 30, 39, + 39, 39, 39, 82, 147, 82, 82, 132, 152, 132, + 132, 319, 319, 139, 132, 139, 139, 144, 323, 144, + 144, 149, 167, 149, 149, 167, 161, 147, 161, 161, + 322, 152, 39, 321, 320, 149, 317, 39, 30, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 40, 40, 40, 40, 58, 58, 58, 58, + 212, 315, 313, 58, 67, 67, 67, 67, 312, 311, + 212, 67, 153, 153, 153, 153, 155, 155, 155, 155, + 157, 157, 157, 157, 162, 40, 162, 162, 310, 309, + 40, 44, 44, 44, 44, 308, 58, 164, 44, 164, + 164, 179, 306, 264, 67, 44, 179, 264, 264, 44, + 54, 166, 166, 166, 166, 201, 305, 201, 201, 228, + 301, 228, 228, 299, 179, 54, 228, 54, 235, 54, + + 235, 235, 54, 298, 54, 54, 54, 54, 54, 54, + 54, 293, 54, 66, 66, 291, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 172, 172, 172, 172, + 288, 287, 286, 172, 186, 186, 186, 186, 285, 281, + 238, 186, 238, 238, 239, 239, 239, 239, 243, 280, + 243, 243, 245, 245, 245, 245, 247, 279, 247, 247, + 277, 249, 243, 249, 249, 276, 172, 185, 251, 185, + + 251, 251, 273, 272, 186, 266, 185, 185, 185, 185, + 185, 185, 185, 185, 185, 185, 185, 185, 185, 185, + 185, 185, 185, 185, 262, 260, 185, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, + 230, 230, 257, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, + + 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, + 230, 230, 230, 253, 253, 253, 253, 265, 255, 302, + 254, 265, 265, 302, 302, 265, 265, 265, 265, 265, + 265, 289, 289, 289, 289, 296, 296, 296, 296, 303, + 250, 244, 241, 237, 236, 233, 232, 303, 303, 303, + 303, 303, 303, 341, 341, 341, 341, 341, 341, 341, + 341, 341, 341, 341, 341, 341, 342, 342, 342, 342, + 342, 342, 342, 342, 342, 342, 342, 342, 342, 343, + 343, 343, 343, 343, 343, 343, 343, 343, 343, 343, + 343, 343, 344, 344, 344, 344, 344, 344, 344, 344, + + 344, 344, 344, 344, 344, 345, 345, 345, 345, 345, + 345, 345, 345, 345, 345, 345, 345, 345, 346, 346, + 346, 346, 346, 346, 346, 346, 346, 346, 346, 346, + 346, 347, 347, 347, 347, 347, 347, 347, 347, 347, + 347, 347, 347, 347, 348, 348, 348, 348, 348, 348, + 348, 348, 348, 348, 348, 348, 348, 349, 349, 349, + 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, + 350, 350, 350, 350, 350, 350, 350, 350, 350, 350, + 350, 350, 350, 351, 351, 351, 351, 351, 351, 351, + 351, 351, 351, 351, 351, 351, 352, 352, 352, 352, + + 352, 352, 352, 352, 352, 352, 352, 352, 352, 353, + 353, 353, 353, 353, 353, 353, 353, 353, 353, 353, + 353, 353, 354, 354, 354, 354, 354, 354, 354, 354, + 354, 354, 354, 354, 354, 355, 355, 355, 355, 355, + 355, 355, 355, 355, 355, 355, 355, 355, 356, 356, + 356, 356, 356, 356, 356, 356, 356, 356, 356, 356, + 356, 357, 357, 229, 222, 357, 357, 358, 358, 358, + 358, 358, 358, 358, 358, 358, 358, 358, 358, 358, + 359, 359, 359, 359, 359, 359, 359, 359, 359, 359, + 359, 359, 360, 360, 219, 218, 360, 360, 361, 361, + + 217, 361, 361, 361, 361, 361, 361, 206, 361, 361, + 361, 362, 362, 204, 362, 362, 362, 362, 362, 362, + 362, 362, 362, 362, 363, 363, 363, 363, 363, 363, + 363, 363, 363, 363, 363, 363, 363, 364, 198, 197, + 364, 364, 364, 364, 364, 364, 364, 364, 364, 364, + 365, 365, 365, 365, 365, 365, 365, 365, 365, 365, + 365, 365, 365, 366, 366, 193, 366, 366, 366, 366, + 366, 366, 366, 190, 181, 366, 367, 367, 177, 175, + 367, 367, 368, 368, 174, 368, 368, 368, 368, 368, + 368, 368, 368, 368, 368, 369, 369, 169, 163, 369, + + 369, 370, 370, 159, 370, 370, 370, 370, 370, 370, + 370, 370, 370, 370, 371, 371, 151, 371, 371, 371, + 371, 371, 371, 371, 371, 371, 371, 372, 372, 150, + 372, 372, 372, 372, 372, 372, 146, 372, 372, 372, + 373, 373, 142, 141, 137, 373, 373, 373, 373, 373, + 374, 374, 136, 374, 374, 374, 374, 374, 374, 374, + 374, 374, 374, 375, 375, 134, 126, 375, 375, 376, + 376, 376, 376, 376, 376, 376, 376, 376, 376, 376, + 376, 376, 377, 377, 123, 377, 377, 117, 377, 377, + 377, 377, 377, 377, 377, 378, 378, 114, 378, 378, + + 113, 378, 378, 378, 378, 378, 378, 378, 379, 379, + 111, 102, 379, 379, 379, 379, 93, 379, 379, 379, + 379, 380, 380, 87, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 380, 381, 381, 80, 381, 381, 381, + 381, 381, 381, 381, 79, 78, 381, 382, 382, 73, + 71, 382, 382, 383, 64, 63, 383, 383, 383, 383, + 383, 383, 383, 383, 383, 383, 384, 384, 55, 384, + 384, 384, 384, 384, 384, 384, 384, 384, 384, 385, + 385, 53, 385, 385, 385, 385, 385, 385, 385, 385, + 385, 385, 386, 386, 387, 387, 388, 388, 388, 388, + + 388, 388, 388, 388, 388, 388, 388, 388, 388, 389, + 389, 389, 389, 389, 389, 389, 389, 389, 389, 389, + 389, 389, 390, 49, 45, 28, 390, 391, 391, 27, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 392, + 392, 16, 392, 392, 392, 392, 392, 392, 392, 392, + 392, 392, 393, 15, 10, 8, 393, 394, 394, 394, + 394, 394, 394, 394, 394, 394, 394, 394, 394, 394, + 395, 7, 6, 5, 395, 396, 396, 396, 396, 396, + 396, 396, 396, 396, 396, 396, 396, 396, 397, 397, + 0, 0, 397, 397, 398, 398, 0, 0, 398, 398, + + 399, 0, 0, 0, 399, 400, 400, 400, 400, 400, + 400, 400, 400, 400, 400, 400, 400, 400, 401, 0, + 0, 0, 401, 402, 402, 0, 402, 402, 402, 402, + 402, 402, 402, 402, 402, 402, 403, 0, 0, 0, + 403, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340, 340, 340, 340, 340, 340, + 340, 340, 340, 340, 340 + } ; + +static yy_state_type yy_last_accepting_state; +static YY_CHAR *yy_last_accepting_cpos; + +/* the intent behind this definition is that it'll catch + * any uses of REJECT which flex missed + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 + +/* these variables are all declared out here so that section 3 code can + * manipulate them + */ +/* points to current character in buffer */ +static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr )); +void yyrestart YY_PROTO(( FILE *input_file )); +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); + +#define yy_new_buffer yy_create_buffer + +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif + +YY_DECL + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp, *yy_bp; + register int yy_act; + + + static int bracelevel, didadef; + int i, indented_code, checking_used, new_xlation; + int doing_codeblock = false; + Char nmdef[MAXLINE], myesc(); + + + if ( yy_init ) + { + YY_USER_INIT; + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( yy_current_buffer ) + yy_init_buffer( yy_current_buffer, yyin ); + else + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + + yy_init = 0; + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* support of yytext */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of the + * current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; + if ( yy_bp[-1] == '\n' ) + ++yy_current_state; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[*yy_cp]; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = yy_def[yy_current_state]; + if ( yy_current_state >= 341 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + ++yy_cp; + } + while ( yy_current_state != 340 ); + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + YY_USER_ACTION; + +do_action: /* this label is used only to access EOF actions */ + + + switch ( yy_act ) + { + case 0: /* must backtrack */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + +case 1: +# line 90 "scan.l" +indented_code = true; BEGIN(CODEBLOCK); + YY_BREAK +case 2: +# line 91 "scan.l" +++linenum; /* treat as a comment */ + YY_BREAK +case 3: +# line 92 "scan.l" +ECHO; BEGIN(C_COMMENT); + YY_BREAK +case 4: +# line 93 "scan.l" +return ( SCDECL ); + YY_BREAK +case 5: +# line 94 "scan.l" +return ( XSCDECL ); + YY_BREAK +case 6: +# line 95 "scan.l" +{ + ++linenum; + line_directive_out( stdout ); + indented_code = false; + BEGIN(CODEBLOCK); + } + YY_BREAK +case 7: +# line 102 "scan.l" +return ( WHITESPACE ); + YY_BREAK +case 8: +# line 104 "scan.l" +{ + sectnum = 2; + line_directive_out( stdout ); + BEGIN(SECT2PROLOG); + return ( SECTEND ); + } + YY_BREAK +case 9: +# line 111 "scan.l" +{ + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); + checking_used = REALLY_USED; BEGIN(USED_LIST); + } + YY_BREAK +case 10: +# line 115 "scan.l" +{ + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + } + YY_BREAK +case 11: +# line 122 "scan.l" +{ +#ifdef NOTDEF + fprintf( stderr, + "old-style lex command at line %d ignored:\n\t%s", + linenum, yytext ); +#endif + ++linenum; + } + YY_BREAK +case 12: +# line 131 "scan.l" +/* ignore old lex directive */ + YY_BREAK +case 13: +# line 133 "scan.l" +{ + ++linenum; + xlation = + (int *) malloc( sizeof( int ) * (unsigned) csize ); + + if ( ! xlation ) + flexfatal( + "dynamic memory failure building %t table" ); + + for ( i = 0; i < csize; ++i ) + xlation[i] = 0; + + num_xlations = 0; + + BEGIN(XLATION); + } + YY_BREAK +case 14: +# line 150 "scan.l" +synerr( "unrecognized '%' directive" ); + YY_BREAK +case 15: +# line 152 "scan.l" +{ + (void) strcpy( nmstr, (char *) yytext ); + didadef = false; + BEGIN(PICKUPDEF); + } + YY_BREAK +case 16: +# line 158 "scan.l" +RETURNNAME; + YY_BREAK +case 17: +# line 159 "scan.l" +++linenum; /* allows blank lines in section 1 */ + YY_BREAK +case 18: +# line 160 "scan.l" +++linenum; return ( '\n' ); + YY_BREAK +case 19: +# line 161 "scan.l" +synerr( "illegal character" ); BEGIN(RECOVER); + YY_BREAK +case 20: +# line 164 "scan.l" +ECHO; BEGIN(INITIAL); + YY_BREAK +case 21: +# line 165 "scan.l" +++linenum; ECHO; BEGIN(INITIAL); + YY_BREAK +case 22: +# line 166 "scan.l" +ECHO; + YY_BREAK +case 23: +# line 167 "scan.l" +ECHO; + YY_BREAK +case 24: +# line 168 "scan.l" +++linenum; ECHO; + YY_BREAK +case 25: +# line 171 "scan.l" +++linenum; BEGIN(INITIAL); + YY_BREAK +case 26: +# line 172 "scan.l" +ECHO; CHECK_REJECT(yytext); + YY_BREAK +case 27: +# line 173 "scan.l" +ECHO; CHECK_YYMORE(yytext); + YY_BREAK +case 28: +# line 174 "scan.l" +ECHO; + YY_BREAK +case 29: +# line 175 "scan.l" +{ + ++linenum; + ECHO; + if ( indented_code ) + BEGIN(INITIAL); + } + YY_BREAK +case 30: +# line 183 "scan.l" +/* separates name and definition */ + YY_BREAK +case 31: +# line 185 "scan.l" +{ + (void) strcpy( (char *) nmdef, (char *) yytext ); + + for ( i = strlen( (char *) nmdef ) - 1; + i >= 0 && + nmdef[i] == ' ' || nmdef[i] == '\t'; + --i ) + ; + + nmdef[i + 1] = '\0'; + + ndinstal( nmstr, nmdef ); + didadef = true; + } + YY_BREAK +case 32: +# line 200 "scan.l" +{ + if ( ! didadef ) + synerr( "incomplete name definition" ); + BEGIN(INITIAL); + ++linenum; + } + YY_BREAK +case 33: +# line 207 "scan.l" +++linenum; BEGIN(INITIAL); RETURNNAME; + YY_BREAK +case 34: +# line 210 "scan.l" +++linenum; BEGIN(INITIAL); + YY_BREAK +case 35: +# line 211 "scan.l" + + YY_BREAK +case 36: +# line 212 "scan.l" +{ + if ( all_upper( yytext ) ) + reject_really_used = checking_used; + else + synerr( "unrecognized %used/%unused construct" ); + } + YY_BREAK +case 37: +# line 218 "scan.l" +{ + if ( all_lower( yytext ) ) + yymore_really_used = checking_used; + else + synerr( "unrecognized %used/%unused construct" ); + } + YY_BREAK +case 38: +# line 224 "scan.l" +synerr( "unrecognized %used/%unused construct" ); + YY_BREAK +case 39: +# line 227 "scan.l" +++linenum; BEGIN(INITIAL); + YY_BREAK +case 40: +# line 228 "scan.l" +++num_xlations; new_xlation = true; + YY_BREAK +case 41: +# line 229 "scan.l" +synerr( "bad row in translation table" ); + YY_BREAK +case 42: +# line 230 "scan.l" +/* ignore whitespace */ + YY_BREAK +case 43: +# line 232 "scan.l" +{ + xlation[myesc( yytext )] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } + YY_BREAK +case 44: +# line 237 "scan.l" +{ + xlation[yytext[0]] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } + YY_BREAK +case 45: +# line 243 "scan.l" +++linenum; + YY_BREAK +case 46: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 246 "scan.l" +{ + ++linenum; + ACTION_ECHO; + MARK_END_OF_PROLOG; + BEGIN(SECT2); + } + YY_BREAK +case 47: +# line 253 "scan.l" +++linenum; ACTION_ECHO; + YY_BREAK +case YY_STATE_EOF(SECT2PROLOG): +# line 255 "scan.l" +MARK_END_OF_PROLOG; yyterminate(); + YY_BREAK +case 49: +# line 257 "scan.l" +++linenum; /* allow blank lines in section 2 */ + YY_BREAK +case 50: +# line 259 "scan.l" +{ + indented_code = (yytext[0] != '%'); + doing_codeblock = true; + bracelevel = 1; + + if ( indented_code ) + ACTION_ECHO; + + BEGIN(CODEBLOCK_2); + } + YY_BREAK +case 51: +# line 270 "scan.l" +BEGIN(SC); return ( '<' ); + YY_BREAK +case 52: +# line 271 "scan.l" +return ( '^' ); + YY_BREAK +case 53: +# line 272 "scan.l" +BEGIN(QUOTE); return ( '"' ); + YY_BREAK +case 54: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 273 "scan.l" +BEGIN(NUM); return ( '{' ); + YY_BREAK +case 55: +# line 274 "scan.l" +BEGIN(BRACEERROR); + YY_BREAK +case 56: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 275 "scan.l" +return ( '$' ); + YY_BREAK +case 57: +# line 277 "scan.l" +{ + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + return ( '\n' ); + } + YY_BREAK +case 58: +# line 282 "scan.l" +continued_action = true; ++linenum; return ( '\n' ); + YY_BREAK +case 59: +# line 284 "scan.l" +{ + /* this rule is separate from the one below because + * otherwise we get variable trailing context, so + * we can't build the scanner using -{f,F} + */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + return ( '\n' ); + } + YY_BREAK +case 60: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp -= 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 295 "scan.l" +{ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + return ( '\n' ); + } + YY_BREAK +case 61: +# line 302 "scan.l" +++linenum; return ( '\n' ); + YY_BREAK +case 62: +# line 304 "scan.l" +return ( EOF_OP ); + YY_BREAK +case 63: +# line 306 "scan.l" +{ + sectnum = 3; + BEGIN(SECT3); + return ( EOF ); /* to stop the parser */ + } + YY_BREAK +case 64: +# line 312 "scan.l" +{ + int cclval; + + (void) strcpy( nmstr, (char *) yytext ); + + /* check to see if we've already encountered this ccl */ + if ( (cclval = ccllookup( (Char *) nmstr )) ) + { + yylval = cclval; + ++cclreuse; + return ( PREVCCL ); + } + else + { + /* we fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit + */ + cclinstal( (Char *) nmstr, lastccl + 1 ); + + /* push back everything but the leading bracket + * so the ccl can be rescanned + */ + PUT_BACK_STRING((Char *) nmstr, 1); + + BEGIN(FIRSTCCL); + return ( '[' ); + } + } + YY_BREAK +case 65: +# line 341 "scan.l" +{ + register Char *nmdefptr; + Char *ndlookup(); + + (void) strcpy( nmstr, (char *) yytext ); + nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ + + /* lookup from "nmstr + 1" to chop leading brace */ + if ( ! (nmdefptr = ndlookup( nmstr + 1 )) ) + synerr( "undefined {name}" ); + + else + { /* push back name surrounded by ()'s */ + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } + } + YY_BREAK +case 66: +# line 360 "scan.l" +return ( yytext[0] ); + YY_BREAK +case 67: +# line 361 "scan.l" +RETURNCHAR; + YY_BREAK +case 68: +# line 362 "scan.l" +++linenum; return ( '\n' ); + YY_BREAK +case 69: +# line 365 "scan.l" +return ( ',' ); + YY_BREAK +case 70: +# line 366 "scan.l" +BEGIN(SECT2); return ( '>' ); + YY_BREAK +case 71: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 367 "scan.l" +BEGIN(CARETISBOL); return ( '>' ); + YY_BREAK +case 72: +# line 368 "scan.l" +RETURNNAME; + YY_BREAK +case 73: +# line 369 "scan.l" +synerr( "bad start condition name" ); + YY_BREAK +case 74: +# line 371 "scan.l" +BEGIN(SECT2); return ( '^' ); + YY_BREAK +case 75: +# line 374 "scan.l" +RETURNCHAR; + YY_BREAK +case 76: +# line 375 "scan.l" +BEGIN(SECT2); return ( '"' ); + YY_BREAK +case 77: +# line 377 "scan.l" +{ + synerr( "missing quote" ); + BEGIN(SECT2); + ++linenum; + return ( '"' ); + } + YY_BREAK +case 78: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 385 "scan.l" +BEGIN(CCL); return ( '^' ); + YY_BREAK +case 79: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 386 "scan.l" +return ( '^' ); + YY_BREAK +case 80: +# line 387 "scan.l" +BEGIN(CCL); yylval = '-'; return ( CHAR ); + YY_BREAK +case 81: +# line 388 "scan.l" +BEGIN(CCL); RETURNCHAR; + YY_BREAK +case 82: +*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ +yy_c_buf_p = yy_cp = yy_bp + 1; +YY_DO_BEFORE_ACTION; /* set up yytext again */ +# line 390 "scan.l" +return ( '-' ); + YY_BREAK +case 83: +# line 391 "scan.l" +RETURNCHAR; + YY_BREAK +case 84: +# line 392 "scan.l" +BEGIN(SECT2); return ( ']' ); + YY_BREAK +case 85: +# line 395 "scan.l" +{ + yylval = myctoi( yytext ); + return ( NUMBER ); + } + YY_BREAK +case 86: +# line 400 "scan.l" +return ( ',' ); + YY_BREAK +case 87: +# line 401 "scan.l" +BEGIN(SECT2); return ( '}' ); + YY_BREAK +case 88: +# line 403 "scan.l" +{ + synerr( "bad character inside {}'s" ); + BEGIN(SECT2); + return ( '}' ); + } + YY_BREAK +case 89: +# line 409 "scan.l" +{ + synerr( "missing }" ); + BEGIN(SECT2); + ++linenum; + return ( '}' ); + } + YY_BREAK +case 90: +# line 417 "scan.l" +synerr( "bad name in {}'s" ); BEGIN(SECT2); + YY_BREAK +case 91: +# line 418 "scan.l" +synerr( "missing }" ); ++linenum; BEGIN(SECT2); + YY_BREAK +case 92: +# line 421 "scan.l" +bracelevel = 0; + YY_BREAK +case 93: +# line 422 "scan.l" +{ + ACTION_ECHO; + CHECK_REJECT(yytext); + } + YY_BREAK +case 94: +# line 426 "scan.l" +{ + ACTION_ECHO; + CHECK_YYMORE(yytext); + } + YY_BREAK +case 95: +# line 430 "scan.l" +ACTION_ECHO; + YY_BREAK +case 96: +# line 431 "scan.l" +{ + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 || + (doing_codeblock && indented_code) ) + { + if ( ! doing_codeblock ) + fputs( "\tYY_BREAK\n", temp_action_file ); + + doing_codeblock = false; + BEGIN(SECT2); + } + } + YY_BREAK + /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ +case 97: +# line 447 "scan.l" +ACTION_ECHO; ++bracelevel; + YY_BREAK +case 98: +# line 448 "scan.l" +ACTION_ECHO; --bracelevel; + YY_BREAK +case 99: +# line 449 "scan.l" +ACTION_ECHO; + YY_BREAK +case 100: +# line 450 "scan.l" +ACTION_ECHO; + YY_BREAK +case 101: +# line 451 "scan.l" +ACTION_ECHO; BEGIN(ACTION_COMMENT); + YY_BREAK +case 102: +# line 452 "scan.l" +ACTION_ECHO; /* character constant */ + YY_BREAK +case 103: +# line 453 "scan.l" +ACTION_ECHO; BEGIN(ACTION_STRING); + YY_BREAK +case 104: +# line 454 "scan.l" +{ + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 ) + { + fputs( "\tYY_BREAK\n", temp_action_file ); + BEGIN(SECT2); + } + } + YY_BREAK +case 105: +# line 463 "scan.l" +ACTION_ECHO; + YY_BREAK +case 106: +# line 465 "scan.l" +ACTION_ECHO; BEGIN(ACTION); + YY_BREAK +case 107: +# line 466 "scan.l" +ACTION_ECHO; + YY_BREAK +case 108: +# line 467 "scan.l" +ACTION_ECHO; + YY_BREAK +case 109: +# line 468 "scan.l" +++linenum; ACTION_ECHO; + YY_BREAK +case 110: +# line 469 "scan.l" +ACTION_ECHO; + YY_BREAK +case 111: +# line 471 "scan.l" +ACTION_ECHO; + YY_BREAK +case 112: +# line 472 "scan.l" +ACTION_ECHO; + YY_BREAK +case 113: +# line 473 "scan.l" +++linenum; ACTION_ECHO; + YY_BREAK +case 114: +# line 474 "scan.l" +ACTION_ECHO; BEGIN(ACTION); + YY_BREAK +case 115: +# line 475 "scan.l" +ACTION_ECHO; + YY_BREAK +case YY_STATE_EOF(ACTION): +case YY_STATE_EOF(ACTION_COMMENT): +case YY_STATE_EOF(ACTION_STRING): +# line 477 "scan.l" +{ + synerr( "EOF encountered inside an action" ); + yyterminate(); + } + YY_BREAK +case 117: +# line 483 "scan.l" +{ + yylval = myesc( yytext ); + return ( CHAR ); + } + YY_BREAK +case 118: +# line 488 "scan.l" +{ + yylval = myesc( yytext ); + BEGIN(CCL); + return ( CHAR ); + } + YY_BREAK +case 119: +# line 495 "scan.l" +ECHO; + YY_BREAK +case 120: +# line 496 "scan.l" +YY_FATAL_ERROR( "flex scanner jammed" ); + YY_BREAK +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(SECT2): +case YY_STATE_EOF(SECT3): +case YY_STATE_EOF(CODEBLOCK): +case YY_STATE_EOF(PICKUPDEF): +case YY_STATE_EOF(SC): +case YY_STATE_EOF(CARETISBOL): +case YY_STATE_EOF(NUM): +case YY_STATE_EOF(QUOTE): +case YY_STATE_EOF(FIRSTCCL): +case YY_STATE_EOF(CCL): +case YY_STATE_EOF(RECOVER): +case YY_STATE_EOF(BRACEERROR): +case YY_STATE_EOF(C_COMMENT): +case YY_STATE_EOF(PERCENT_BRACE_ACTION): +case YY_STATE_EOF(USED_LIST): +case YY_STATE_EOF(CODEBLOCK_2): +case YY_STATE_EOF(XLATION): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* amount of text matched not including the EOB char */ + int yy_amount_of_matched_text = yy_cp - yytext - 1; + + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + + /* note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the end- + * of-buffer state). Contrast this with the test in yyinput(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* this was really a NUL */ + { + yy_state_type yy_next_state; + + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* okay, we're now positioned to make the + * NUL transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we + * don't want to build jamming into it because + * then it will run more slowly) + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* consume the NUL */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* note: because we've taken care in + * yy_get_next_buffer() to have set up yytext, + * we can now set up yy_c_buf_p so that if some + * total hoser (like flex itself) wants + * to call the scanner after we return the + * YY_NULL, it'll still work - another YY_NULL + * will get returned. + */ + yy_c_buf_p = yytext + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF((yy_start - 1) / 2); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + } + break; + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: +#ifdef FLEX_DEBUG + printf( "action # %d\n", yy_act ); +#endif + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } + } + } + + +/* yy_get_next_buffer - try to read in a new buffer + * + * synopsis + * int yy_get_next_buffer(); + * + * returns a code representing an action + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + + { + register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; + register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + /* try to read more data */ + + /* first move last chars to start of buffer */ + number_to_move = yy_c_buf_p - yytext; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_n_chars = 0; + + else + { + int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + else if ( num_to_read <= 0 ) + YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); + + /* read in more data */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == 1 ) + { + ret_val = EOB_ACT_END_OF_FILE; + yy_current_buffer->yy_eof_status = EOF_DONE; + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_eof_status = EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + /* yytext begins at the second character in yy_ch_buf; the first + * character is the one which preceded it before reading in the latest + * buffer; it needs to be kept around in case it's a newline, so + * yy_get_previous_state() will have with '^' rules active + */ + + yytext = &yy_current_buffer->yy_ch_buf[1]; + + return ( ret_val ); + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached + * + * synopsis + * yy_state_type yy_get_previous_state(); + */ + +static yy_state_type yy_get_previous_state() + + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp; + + register YY_CHAR *yy_bp = yytext; + + yy_current_state = yy_start; + if ( yy_bp[-1] == '\n' ) + ++yy_current_state; + + for ( yy_cp = yytext + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[*yy_cp] : 1); + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = yy_def[yy_current_state]; + if ( yy_current_state >= 341 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + } + + return ( yy_current_state ); + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +register yy_state_type yy_current_state; +#endif + + { + register int yy_is_jam; + register YY_CHAR *yy_cp = yy_c_buf_p; + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = yy_def[yy_current_state]; + if ( yy_current_state >= 341 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 340); + + return ( yy_is_jam ? 0 : yy_current_state ); + } + + +#ifdef YY_USE_PROTOS +static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) +#else +static void yyunput( c, yy_bp ) +YY_CHAR c; +register YY_CHAR *yy_bp; +#endif + + { + register YY_CHAR *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ + register YY_CHAR *dest = + &yy_current_buffer->yy_ch_buf[yy_current_buffer->yy_buf_size + 2]; + register YY_CHAR *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += dest - source; + yy_bp += dest - source; + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) + yy_cp[-2] = '\n'; + + *--yy_cp = c; + + /* note: the formal parameter *must* be called "yy_bp" for this + * macro to now work correctly + */ + YY_DO_BEFORE_ACTION; /* set up yytext again */ + } + + +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + + { + int c; + YY_CHAR *yy_cp = yy_c_buf_p; + + *yy_cp = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* this was really a NUL */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + yytext = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + { + yy_c_buf_p = yytext + YY_MORE_ADJ; + return ( EOF ); + } + + YY_NEW_FILE; + +#ifdef __cplusplus + return ( yyinput() ); +#else + return ( input() ); +#endif + } + break; + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + YY_MORE_ADJ; + break; + + case EOB_ACT_LAST_MATCH: +#ifdef __cplusplus + YY_FATAL_ERROR( "unexpected last match in yyinput()" ); +#else + YY_FATAL_ERROR( "unexpected last match in input()" ); +#endif + } + } + } + + c = *yy_c_buf_p; + yy_hold_char = *++yy_c_buf_p; + + return ( c ); + } + + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + + { + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* flush out information for old buffer */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* we don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); + + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + yy_init_buffer( b, file ); + + return ( b ); + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + free( (char *) b->yy_ch_buf ); + free( (char *) b ); + } + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + { + b->yy_input_file = file; + + /* we put in the '\n' and start reading from [1] so that an + * initial match-at-newline will be true. + */ + + b->yy_ch_buf[0] = '\n'; + b->yy_n_chars = 1; + + /* we always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[1]; + + b->yy_eof_status = EOF_NOT_SEEN; + } +# line 496 "scan.l" + + + +int yywrap() + + { + if ( --num_input_files > 0 ) + { + set_input_file( *++input_files ); + return ( 0 ); + } + + else + return ( 1 ); + } + + +/* set_input_file - open the given file (if NULL, stdin) for scanning */ + +void set_input_file( file ) +char *file; + + { + if ( file ) + { + infilename = file; + yyin = fopen( infilename, "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", file ); + } + + else + { + yyin = stdin; + infilename = ""; + } + } diff --git a/util/flex/libmain.c b/util/flex/libmain.c new file mode 100644 index 000000000..951bdaab9 --- /dev/null +++ b/util/flex/libmain.c @@ -0,0 +1,13 @@ +/* libmain - flex run-time support library "main" function */ + +/* $Header$ */ + +extern int yylex(); + +int main( argc, argv ) +int argc; +char *argv[]; + + { + return yylex(); + } diff --git a/util/flex/main.c b/util/flex/main.c new file mode 100644 index 000000000..606066f3a --- /dev/null +++ b/util/flex/main.c @@ -0,0 +1,769 @@ +/* flex - tool to generate fast lexical analyzers */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +char copyright[] = +"@(#) Copyright (c) 1990 The Regents of the University of California.\n\ + All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + + +#include "flexdef.h" + +static char flex_version[] = "2.3"; + + +/* declare functions that have forward references */ + +void flexinit PROTO((int, char**)); +void readin PROTO(()); +void set_up_initial_allocations PROTO(()); + + +/* these globals are all defined and commented in flexdef.h */ +int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +int interactive, caseins, useecs, fulltbl, usemecs; +int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; +int yymore_used, reject, real_reject, continued_action; +int yymore_really_used, reject_really_used; +int datapos, dataline, linenum; +FILE *skelfile = NULL; +char *infilename = NULL; +int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; +int current_mns, num_rules, current_max_rules, lastnfa; +int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; +int current_state_type; +int variable_trailing_context_rules; +int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; +int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int tecbck[CSIZE + 1]; +int *xlation = (int *) 0; +int num_xlations; +int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +char **scname; +int current_max_dfa_size, current_max_xpairs; +int current_max_template_xpairs, current_max_dfas; +int lastdfa, *nxt, *chk, *tnxt; +int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; +union dfaacc_union *dfaacc; +int *accsiz, *dhash, numas; +int numsnpairs, jambase, jamstate; +int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +int current_max_ccl_tbl_size; +Char *ccltbl; +char *starttime, *endtime, nmstr[MAXLINE]; +int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +int num_backtracking, bol_needed; +FILE *temp_action_file; +FILE *backtrack_file; +int end_of_buffer_state; +char *action_file_name = NULL; +char **input_files; +int num_input_files; +char *program_name; + +#ifndef SHORT_FILE_NAMES +static char *outfile = "lex.yy.c"; +#else +static char *outfile = "lexyy.c"; +#endif +static int outfile_created = 0; +static int use_stdout; +static char *skelname = NULL; + + +int main( argc, argv ) +int argc; +char **argv; + + { + flexinit( argc, argv ); + + readin(); + + if ( syntaxerror ) + flexend( 1 ); + + if ( yymore_really_used == REALLY_USED ) + yymore_used = true; + else if ( yymore_really_used == REALLY_NOT_USED ) + yymore_used = false; + + if ( reject_really_used == REALLY_USED ) + reject = true; + else if ( reject_really_used == REALLY_NOT_USED ) + reject = false; + + if ( performance_report ) + { + if ( interactive ) + fprintf( stderr, + "-I (interactive) entails a minor performance penalty\n" ); + + if ( yymore_used ) + fprintf( stderr, "yymore() entails a minor performance penalty\n" ); + + if ( reject ) + fprintf( stderr, "REJECT entails a large performance penalty\n" ); + + if ( variable_trailing_context_rules ) + fprintf( stderr, +"Variable trailing context rules entail a large performance penalty\n" ); + } + + if ( reject ) + real_reject = true; + + if ( variable_trailing_context_rules ) + reject = true; + + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( "REJECT cannot be used with -f or -F" ); + else + flexerror( + "variable trailing context rules cannot be used with -f or -F" ); + } + + ntod(); + + /* generate the C state transition tables from the DFA */ + make_tables(); + + /* note, flexend does not return. It exits with its argument as status. */ + + flexend( 0 ); + + /*NOTREACHED*/ + } + + +/* flexend - terminate flex + * + * synopsis + * int status; + * flexend( status ); + * + * status is exit status. + * + * note + * This routine does not return. + */ + +void flexend( status ) +int status; + + { + int tblsiz; + char *flex_gettime(); + + if ( skelfile != NULL ) + { + if ( ferror( skelfile ) ) + flexfatal( "error occurred when writing skeleton file" ); + + else if ( fclose( skelfile ) ) + flexfatal( "error occurred when closing skeleton file" ); + } + + if ( temp_action_file ) + { + if ( ferror( temp_action_file ) ) + flexfatal( "error occurred when writing temporary action file" ); + + else if ( fclose( temp_action_file ) ) + flexfatal( "error occurred when closing temporary action file" ); + + else if ( unlink( action_file_name ) ) + flexfatal( "error occurred when deleting temporary action file" ); + } + + if ( status != 0 && outfile_created ) + { + if ( ferror( stdout ) ) + flexfatal( "error occurred when writing output file" ); + + else if ( fclose( stdout ) ) + flexfatal( "error occurred when closing output file" ); + + else if ( unlink( outfile ) ) + flexfatal( "error occurred when deleting output file" ); + } + + if ( backtrack_report && backtrack_file ) + { + if ( num_backtracking == 0 ) + fprintf( backtrack_file, "No backtracking.\n" ); + else if ( fullspd || fulltbl ) + fprintf( backtrack_file, + "%d backtracking (non-accepting) states.\n", + num_backtracking ); + else + fprintf( backtrack_file, "Compressed tables always backtrack.\n" ); + + if ( ferror( backtrack_file ) ) + flexfatal( "error occurred when writing backtracking file" ); + + else if ( fclose( backtrack_file ) ) + flexfatal( "error occurred when closing backtracking file" ); + } + + if ( printstats ) + { + endtime = flex_gettime(); + + fprintf( stderr, "%s version %s usage statistics:\n", program_name, + flex_version ); + fprintf( stderr, " started at %s, finished at %s\n", + starttime, endtime ); + + fprintf( stderr, " scanner options: -" ); + + if ( backtrack_report ) + putc( 'b', stderr ); + if ( ddebug ) + putc( 'd', stderr ); + if ( interactive ) + putc( 'I', stderr ); + if ( caseins ) + putc( 'i', stderr ); + if ( ! gen_line_dirs ) + putc( 'L', stderr ); + if ( performance_report ) + putc( 'p', stderr ); + if ( spprdflt ) + putc( 's', stderr ); + if ( use_stdout ) + putc( 't', stderr ); + if ( trace ) + putc( 'T', stderr ); + if ( printstats ) + putc( 'v', stderr ); /* always true! */ + if ( csize == 256 ) + putc( '8', stderr ); + + fprintf( stderr, " -C" ); + + if ( fulltbl ) + putc( 'f', stderr ); + if ( fullspd ) + putc( 'F', stderr ); + if ( useecs ) + putc( 'e', stderr ); + if ( usemecs ) + putc( 'm', stderr ); + + if ( strcmp( skelname, DEFAULT_SKELETON_FILE ) ) + fprintf( stderr, " -S%s", skelname ); + + putc( '\n', stderr ); + + fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); + fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, + current_max_dfas, totnst ); + fprintf( stderr, + " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); + + if ( num_backtracking == 0 ) + fprintf( stderr, " No backtracking\n" ); + else if ( fullspd || fulltbl ) + fprintf( stderr, " %d backtracking (non-accepting) states\n", + num_backtracking ); + else + fprintf( stderr, " compressed tables always backtrack\n" ); + + if ( bol_needed ) + fprintf( stderr, " Beginning-of-line patterns used\n" ); + + fprintf( stderr, " %d/%d start conditions\n", lastsc, + current_max_scs ); + fprintf( stderr, " %d epsilon states, %d double epsilon states\n", + numeps, eps2 ); + + if ( lastccl == 0 ) + fprintf( stderr, " no character classes\n" ); + else + fprintf( stderr, + " %d/%d character classes needed %d/%d words of storage, %d reused\n", + lastccl, current_maxccls, + cclmap[lastccl] + ccllen[lastccl], + current_max_ccl_tbl_size, cclreuse ); + + fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); + fprintf( stderr, " %d/%d unique/duplicate transitions\n", + numuniq, numdup ); + + if ( fulltbl ) + { + tblsiz = lastdfa * numecs; + fprintf( stderr, " %d table entries\n", tblsiz ); + } + + else + { + tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; + + fprintf( stderr, " %d/%d base-def entries created\n", + lastdfa + numtemps, current_max_dfas ); + fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n", + tblend, current_max_xpairs, peakpairs ); + fprintf( stderr, + " %d/%d (peak %d) template nxt-chk entries created\n", + numtemps * nummecs, current_max_template_xpairs, + numtemps * numecs ); + fprintf( stderr, " %d empty table entries\n", nummt ); + fprintf( stderr, " %d protos created\n", numprots ); + fprintf( stderr, " %d templates created, %d uses\n", + numtemps, tmpuses ); + } + + if ( useecs ) + { + tblsiz = tblsiz + csize; + fprintf( stderr, " %d/%d equivalence classes created\n", + numecs, csize ); + } + + if ( usemecs ) + { + tblsiz = tblsiz + numecs; + fprintf( stderr, " %d/%d meta-equivalence classes created\n", + nummecs, csize ); + } + + fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", + hshcol, hshsave, dfaeql ); + fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs ); + fprintf( stderr, " %d total table entries needed\n", tblsiz ); + } + +#ifndef VMS + exit( status ); +#else + exit( status + 1 ); +#endif + } + + +/* flexinit - initialize flex + * + * synopsis + * int argc; + * char **argv; + * flexinit( argc, argv ); + */ + +void flexinit( argc, argv ) +int argc; +char **argv; + + { + int i, sawcmpflag; + char *arg, *flex_gettime(), *mktemp(); + + printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; + backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; + yymore_used = continued_action = reject = false; + yymore_really_used = reject_really_used = false; + gen_line_dirs = usemecs = useecs = true; + + sawcmpflag = false; + use_stdout = false; + + csize = DEFAULT_CSIZE; + + program_name = argv[0]; + + /* read flags */ + for ( --argc, ++argv; argc ; --argc, ++argv ) + { + if ( argv[0][0] != '-' || argv[0][1] == '\0' ) + break; + + arg = argv[0]; + + for ( i = 1; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'b': + backtrack_report = true; + break; + + case 'c': + fprintf( stderr, + "%s: Assuming use of deprecated -c flag is really intended to be -C\n", + program_name ); + + /* fall through */ + + case 'C': + if ( i != 1 ) + flexerror( "-C flag must be given separately" ); + + if ( ! sawcmpflag ) + { + useecs = false; + usemecs = false; + fulltbl = false; + sawcmpflag = true; + } + + for ( ++i; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'e': + useecs = true; + break; + + case 'F': + fullspd = true; + break; + + case 'f': + fulltbl = true; + break; + + case 'm': + usemecs = true; + break; + + default: + lerrif( "unknown -C option '%c'", + (int) arg[i] ); + break; + } + + goto get_next_arg; + + case 'd': + ddebug = true; + break; + + case 'f': + useecs = usemecs = false; + fulltbl = true; + break; + + case 'F': + useecs = usemecs = false; + fullspd = true; + break; + + case 'I': + interactive = true; + break; + + case 'i': + caseins = true; + break; + + case 'L': + gen_line_dirs = false; + break; + + case 'n': + /* stupid do-nothing deprecated option */ + break; + + case 'p': + performance_report = true; + break; + + case 'S': + if ( i != 1 ) + flexerror( "-S flag must be given separately" ); + + skelname = arg + i + 1; + goto get_next_arg; + + case 's': + spprdflt = true; + break; + + case 't': + use_stdout = true; + break; + + case 'T': + trace = true; + break; + + case 'v': + printstats = true; + break; + + case '8': + csize = CSIZE; + break; + + default: + lerrif( "unknown flag '%c'", (int) arg[i] ); + break; + } + +get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ + ; + } + + if ( (fulltbl || fullspd) && usemecs ) + flexerror( "full table and -Cm don't make sense together" ); + + if ( (fulltbl || fullspd) && interactive ) + flexerror( "full table and -I are (currently) incompatible" ); + + if ( fulltbl && fullspd ) + flexerror( "full table and -F are mutually exclusive" ); + + if ( ! skelname ) + { + static char skeleton_name_storage[400]; + + skelname = skeleton_name_storage; + (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); + } + + if ( ! use_stdout ) + { + FILE *prev_stdout = freopen( outfile, "w", stdout ); + + if ( prev_stdout == NULL ) + lerrsf( "could not create %s", outfile ); + + outfile_created = 1; + } + + num_input_files = argc; + input_files = argv; + set_input_file( num_input_files > 0 ? input_files[0] : NULL ); + + if ( backtrack_report ) + { +#ifndef SHORT_FILE_NAMES + backtrack_file = fopen( "lex.backtrack", "w" ); +#else + backtrack_file = fopen( "lex.bck", "w" ); +#endif + + if ( backtrack_file == NULL ) + flexerror( "could not create lex.backtrack" ); + } + + else + backtrack_file = NULL; + + + lastccl = 0; + lastsc = 0; + + /* initialize the statistics */ + starttime = flex_gettime(); + + if ( (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( "can't open skeleton file %s", skelname ); + +#ifndef ACK_MOD +#ifdef SYS_V + action_file_name = tmpnam( NULL ); +#endif +#endif + + if ( action_file_name == NULL ) + { + static char temp_action_file_name[32]; + +#ifndef SHORT_FILE_NAMES + (void) strcpy( temp_action_file_name, "/tmp/flexXXXXXX" ); +#else + (void) strcpy( temp_action_file_name, "flexXXXXXX.tmp" ); +#endif + (void) mktemp( temp_action_file_name ); + + action_file_name = temp_action_file_name; + } + + if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) + lerrsf( "can't open temporary action file %s", action_file_name ); + + lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0; + numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; + numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; + num_backtracking = onesp = numprots = 0; + variable_trailing_context_rules = bol_needed = false; + + linenum = sectnum = 1; + firstprot = NIL; + + /* used in mkprot() so that the first proto goes in slot 1 + * of the proto queue + */ + lastprot = 1; + + if ( useecs ) + { /* set up doubly-linked equivalence classes */ + /* We loop all the way up to csize, since ecgroup[csize] is the + * position used for NUL characters + */ + ecgroup[1] = NIL; + + for ( i = 2; i <= csize; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } + + nextecm[csize] = NIL; + } + + else + { /* put everything in its own equivalence class */ + for ( i = 1; i <= csize; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } + + set_up_initial_allocations(); + } + + +/* readin - read in the rules section of the input file(s) + * + * synopsis + * readin(); + */ + +void readin() + + { + skelout(); + + if ( ddebug ) + puts( "#define FLEX_DEBUG" ); + + if ( csize == 256 ) + puts( "#define YY_CHAR unsigned char" ); + else + puts( "#define YY_CHAR char" ); + + line_directive_out( stdout ); + + if ( yyparse() ) + { + pinpoint_message( "fatal parse error" ); + flexend( 1 ); + } + + if ( xlation ) + { + numecs = ecs_from_xlation( ecgroup ); + useecs = true; + } + + else if ( useecs ) + numecs = cre8ecs( nextecm, ecgroup, csize ); + + else + numecs = csize; + + /* now map the equivalence class for NUL to its expected place */ + ecgroup[0] = ecgroup[csize]; + NUL_ec = abs( ecgroup[0] ); + + if ( useecs ) + ccl2ecl(); + } + + + +/* set_up_initial_allocations - allocate memory for internal tables */ + +void set_up_initial_allocations() + + { + current_mns = INITIAL_MNS; + firstst = allocate_integer_array( current_mns ); + lastst = allocate_integer_array( current_mns ); + finalst = allocate_integer_array( current_mns ); + transchar = allocate_integer_array( current_mns ); + trans1 = allocate_integer_array( current_mns ); + trans2 = allocate_integer_array( current_mns ); + accptnum = allocate_integer_array( current_mns ); + assoc_rule = allocate_integer_array( current_mns ); + state_type = allocate_integer_array( current_mns ); + + current_max_rules = INITIAL_MAX_RULES; + rule_type = allocate_integer_array( current_max_rules ); + rule_linenum = allocate_integer_array( current_max_rules ); + + current_max_scs = INITIAL_MAX_SCS; + scset = allocate_integer_array( current_max_scs ); + scbol = allocate_integer_array( current_max_scs ); + scxclu = allocate_integer_array( current_max_scs ); + sceof = allocate_integer_array( current_max_scs ); + scname = allocate_char_ptr_array( current_max_scs ); + actvsc = allocate_integer_array( current_max_scs ); + + current_maxccls = INITIAL_MAX_CCLS; + cclmap = allocate_integer_array( current_maxccls ); + ccllen = allocate_integer_array( current_maxccls ); + cclng = allocate_integer_array( current_maxccls ); + + current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; + ccltbl = allocate_character_array( current_max_ccl_tbl_size ); + + current_max_dfa_size = INITIAL_MAX_DFA_SIZE; + + current_max_xpairs = INITIAL_MAX_XPAIRS; + nxt = allocate_integer_array( current_max_xpairs ); + chk = allocate_integer_array( current_max_xpairs ); + + current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; + tnxt = allocate_integer_array( current_max_template_xpairs ); + + current_max_dfas = INITIAL_MAX_DFAS; + base = allocate_integer_array( current_max_dfas ); + def = allocate_integer_array( current_max_dfas ); + dfasiz = allocate_integer_array( current_max_dfas ); + accsiz = allocate_integer_array( current_max_dfas ); + dhash = allocate_integer_array( current_max_dfas ); + dss = allocate_int_ptr_array( current_max_dfas ); + dfaacc = allocate_dfaacc_union( current_max_dfas ); + + nultrans = (int *) 0; + } diff --git a/util/flex/makefile b/util/flex/makefile new file mode 100644 index 000000000..ba2a2ea52 --- /dev/null +++ b/util/flex/makefile @@ -0,0 +1,158 @@ +# Note: this is a modified version of Makefile, for ACK installation. The +# original flex makefile has a capital M (Makefile). + +EMHOME = ../.. + +# make file for "flex" tool + +# Porting considerations: +# +# For System V Unix machines, add -DUSG to CFLAGS (if it's not +# automatically defined) +# For Vax/VMS, add "-DVMS -DUSG" to CFLAGS. +# For MS-DOS, add "-DMS_DOS -DUSG" to CFLAGS. Create \tmp if not present. +# You will also want to rename flex.skel to something with a three +# character extension, change SKELETON_FILE below appropriately, +# See MSDOS.notes for more info. +# For Amiga, add "-DAMIGA -DUSG" to CFLAGS. +# For SCO Unix, add "-DSCO_UNIX" to CFLAGS. +# +# For C compilers which don't know about "void", add -Dvoid=int to CFLAGS. +# +# If your C compiler is ANSI standard but does not include the +# header file (some installations of gcc have this problem), then add +# -DDONT_HAVE_STDLIB_H to CFLAGS. +# +# By default, flex will be configured to generate 8-bit scanners only +# if the -8 flag is given. If you want it to always generate 8-bit +# scanners, add "-DDEFAULT_CSIZE=256" to CFLAGS. Note that doing +# so will double the size of all uncompressed scanners. +# +# If on your system you have trouble building flex due to 8-bit +# character problems, remove the -8 from FLEX_FLAGS and the +# "#define FLEX_8_BIT_CHARS" from the beginning of flexdef.h. + + +# the first time around use "make first_flex" + + +# Installation targeting. Files will be installed under the tree rooted +# at DESTDIR. User commands will be installed in BINDIR, library files +# in LIBDIR (which will be created if necessary), auxiliary files in +# AUXDIR, manual pages will be installed in MANDIR with extension MANEXT. +# Raw, unformatted troff source will be installed if INSTALLMAN=man, nroff +# preformatted versions will be installed if INSTALLMAN=cat. +DESTDIR = +BINDIR = $(EMHOME)/bin +AUXDIR = $(EMHOME)/lib/flex +MANDIR = $(EMHOME)/man +MANEXT = 1 +INSTALLMAN = man + +# MAKE = make + +SKELETON_FILE = \"`cd $(AUXDIR); pwd`/flex.skel\" +SKELFLAGS = -DDEFAULT_SKELETON_FILE=$(SKELETON_FILE) +CFLAGS = -O -Dvoid=int -DACK_MOD +LDFLAGS = -s + +COMPRESSION = +FLEX_FLAGS = -ist8 -Sflex.skel +# which "flex" to use to generate scan.c from scan.l +FLEX = ./flex +# CC = cc + +AR = ar +RANLIB = ranlib + +FLEXOBJS = \ + ccl.o \ + dfa.o \ + ecs.o \ + gen.o \ + main.o \ + misc.o \ + nfa.o \ + parse.o \ + scan.o \ + sym.o \ + tblcmp.o \ + yylex.o + +FLEX_C_SOURCES = \ + ccl.c \ + dfa.c \ + ecs.c \ + gen.c \ + main.c \ + misc.c \ + nfa.c \ + parse.c \ + scan.c \ + sym.c \ + tblcmp.c \ + yylex.c + + +all : flex + +flex : $(FLEXOBJS) + $(CC) $(CFLAGS) -o flex $(LDFLAGS) $(FLEXOBJS) + +first_flex: + cp initscan.c scan.c + $(MAKE) $(MFLAGS) flex + +parse.h parse.c : parse.y + $(YACC) -d parse.y + @mv y.tab.c parse.c + @mv y.tab.h parse.h + +scan.c : scan.l + $(FLEX) $(FLEX_FLAGS) $(COMPRESSION) scan.l >scan.c + +scan.o : scan.c parse.h flexdef.h + +main.o : main.c flexdef.h + -mkdir $(AUXDIR) + $(CC) $(CFLAGS) -c $(SKELFLAGS) main.c + +ccl.o : ccl.c flexdef.h +dfa.o : dfa.c flexdef.h +ecs.o : ecs.c flexdef.h +gen.o : gen.c flexdef.h +misc.o : misc.c flexdef.h +nfa.o : nfa.c flexdef.h +parse.o : parse.c flexdef.h +sym.o : sym.c flexdef.h +tblcmp.o : tblcmp.c flexdef.h +yylex.o : yylex.c flexdef.h + +lint : $(FLEX_C_SOURCES) + lint $(FLEX_C_SOURCES) > flex.lint + +install: first_flex flex.skel + rm -f $(BINDIR)flex + cp flex $(BINDIR)/flex + cp flex.skel $(AUXDIR)/flex.skel + cp flex.1 $(MANDIR)/flex.1 + cp flexdoc.1 $(MANDIR)/flexdoc.1 + +clean : + rm -f core errs flex *.o parse.c *.lint parse.h tags + +tags : + ctags $(FLEX_C_SOURCES) + +test : flex + ./flex $(FLEX_FLAGS) $(COMPRESSION) scan.l | diff scan.c - + +bigtest : + rm -f scan.c ; $(MAKE) COMPRESSION="-C" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" test + rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" test + rm -f scan.c ; $(MAKE) COMPRESSION="-Cf" test + rm -f scan.c ; $(MAKE) COMPRESSION="-CF" test + rm -f scan.c ; $(MAKE) diff --git a/util/flex/misc.c b/util/flex/misc.c new file mode 100644 index 000000000..18f5727ed --- /dev/null +++ b/util/flex/misc.c @@ -0,0 +1,826 @@ +/* misc - miscellaneous flex routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include +#include "flexdef.h" + + +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) +#endif + + + +/* declare functions that have forward references */ + +void dataflush PROTO(()); +int otoi PROTO((Char [])); + + +/* action_out - write the actions from the temporary file to lex.yy.c + * + * synopsis + * action_out(); + * + * Copies the action file up to %% (or end-of-file) to lex.yy.c + */ + +void action_out() + + { + char buf[MAXLINE]; + + while ( fgets( buf, MAXLINE, temp_action_file ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } + + +/* allocate_array - allocate memory for an integer array of the given size */ + +void *allocate_array( size, element_size ) +int size, element_size; + + { + register void *mem; + + /* on 16-bit int machines (e.g., 80286) we might be trying to + * allocate more than a signed int can hold, and that won't + * work. Cheap test: + */ + if ( element_size * size <= 0 ) + flexfatal( "request for < 1 byte in allocate_array()" ); + + mem = (void *) malloc( (unsigned) (element_size * size) ); + + if ( mem == NULL ) + flexfatal( "memory allocation failed in allocate_array()" ); + + return ( mem ); + } + + +/* all_lower - true if a string is all lower-case + * + * synopsis: + * Char *str; + * int all_lower(); + * true/false = all_lower( str ); + */ + +int all_lower( str ) +register Char *str; + + { + while ( *str ) + { + if ( ! isascii( *str ) || ! islower( *str ) ) + return ( 0 ); + ++str; + } + + return ( 1 ); + } + + +/* all_upper - true if a string is all upper-case + * + * synopsis: + * Char *str; + * int all_upper(); + * true/false = all_upper( str ); + */ + +int all_upper( str ) +register Char *str; + + { + while ( *str ) + { + if ( ! isascii( *str ) || ! isupper( (char) *str ) ) + return ( 0 ); + ++str; + } + + return ( 1 ); + } + + +/* bubble - bubble sort an integer array in increasing order + * + * synopsis + * int v[n], n; + * bubble( v, n ); + * + * description + * sorts the first n elements of array v and replaces them in + * increasing order. + * + * passed + * v - the array to be sorted + * n - the number of elements of 'v' to be sorted */ + +void bubble( v, n ) +int v[], n; + + { + register int i, j, k; + + for ( i = n; i > 1; --i ) + for ( j = 1; j < i; ++j ) + if ( v[j] > v[j + 1] ) /* compare */ + { + k = v[j]; /* exchange */ + v[j] = v[j + 1]; + v[j + 1] = k; + } + } + + +/* clower - replace upper-case letter to lower-case + * + * synopsis: + * Char clower(); + * int c; + * c = clower( c ); + */ + +Char clower( c ) +register int c; + + { + return ( (isascii( c ) && isupper( c )) ? tolower( c ) : c ); + } + + +/* copy_string - returns a dynamically allocated copy of a string + * + * synopsis + * char *str, *copy, *copy_string(); + * copy = copy_string( str ); + */ + +char *copy_string( str ) +register char *str; + + { + register char *c; + char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); + + if ( copy == NULL ) + flexfatal( "dynamic memory failure in copy_string()" ); + + for ( c = copy; (*c++ = *str++); ) + ; + + return ( copy ); + } + + +/* copy_unsigned_string - + * returns a dynamically allocated copy of a (potentially) unsigned string + * + * synopsis + * Char *str, *copy, *copy_unsigned_string(); + * copy = copy_unsigned_string( str ); + */ + +Char *copy_unsigned_string( str ) +register Char *str; + + { + register Char *c; + Char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = (Char *) malloc( (unsigned) ((c - str + 1) * sizeof( Char )) ); + + if ( copy == NULL ) + flexfatal( "dynamic memory failure in copy_unsigned_string()" ); + + for ( c = copy; (*c++ = *str++); ) + ; + + return ( copy ); + } + + +/* cshell - shell sort a character array in increasing order + * + * synopsis + * + * Char v[n]; + * int n, special_case_0; + * cshell( v, n, special_case_0 ); + * + * description + * does a shell sort of the first n elements of array v. + * If special_case_0 is true, then any element equal to 0 + * is instead assumed to have infinite weight. + * + * passed + * v - array to be sorted + * n - number of elements of v to be sorted + */ + +void cshell( v, n, special_case_0 ) +Char v[]; +int n, special_case_0; + + { + int gap, i, j, jg; + Char k; + + for ( gap = n / 2; gap > 0; gap = gap / 2 ) + for ( i = gap; i < n; ++i ) + for ( j = i - gap; j >= 0; j = j - gap ) + { + jg = j + gap; + + if ( special_case_0 ) + { + if ( v[jg] == 0 ) + break; + + else if ( v[j] != 0 && v[j] <= v[jg] ) + break; + } + + else if ( v[j] <= v[jg] ) + break; + + k = v[j]; + v[j] = v[jg]; + v[jg] = k; + } + } + + +/* dataend - finish up a block of data declarations + * + * synopsis + * dataend(); + */ + +void dataend() + + { + if ( datapos > 0 ) + dataflush(); + + /* add terminator for initialization */ + puts( " } ;\n" ); + + dataline = 0; + datapos = 0; + } + + + +/* dataflush - flush generated data statements + * + * synopsis + * dataflush(); + */ + +void dataflush() + + { + putchar( '\n' ); + + if ( ++dataline >= NUMDATALINES ) + { + /* put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily + */ + putchar( '\n' ); + dataline = 0; + } + + /* reset the number of characters written on the current line */ + datapos = 0; + } + + +/* flexerror - report an error message and terminate + * + * synopsis + * char msg[]; + * flexerror( msg ); + */ + +void flexerror( msg ) +char msg[]; + + { + fprintf( stderr, "%s: %s\n", program_name, msg ); + + flexend( 1 ); + } + + +/* flexfatal - report a fatal error message and terminate + * + * synopsis + * char msg[]; + * flexfatal( msg ); + */ + +void flexfatal( msg ) +char msg[]; + + { + fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); + flexend( 1 ); + } + + +/* flex_gettime - return current time + * + * synopsis + * char *flex_gettime(), *time_str; + * time_str = flex_gettime(); + * + * note + * the routine name has the "flex_" prefix because of name clashes + * with Turbo-C + */ + +/* include sys/types.h to use time_t and make lint happy */ + +#ifndef MS_DOS +#ifndef VMS +#include +#else +#include +#endif +#endif + +#ifdef MS_DOS +#include +typedef long time_t; +#endif + +char *flex_gettime() + + { + time_t t, time(); + char *result, *ctime(), *copy_string(); + + t = time( (long *) 0 ); + + result = copy_string( ctime( &t ) ); + + /* get rid of trailing newline */ + result[24] = '\0'; + + return ( result ); + } + + +/* lerrif - report an error message formatted with one integer argument + * + * synopsis + * char msg[]; + * int arg; + * lerrif( msg, arg ); + */ + +void lerrif( msg, arg ) +char msg[]; +int arg; + + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } + + +/* lerrsf - report an error message formatted with one string argument + * + * synopsis + * char msg[], arg[]; + * lerrsf( msg, arg ); + */ + +void lerrsf( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } + + +/* htoi - convert a hexadecimal digit string to an integer value + * + * synopsis: + * int val, htoi(); + * Char str[]; + * val = htoi( str ); + */ + +int htoi( str ) +Char str[]; + + { + int result; + + (void) sscanf( (char *) str, "%x", &result ); + + return ( result ); + } + + +/* is_hex_digit - returns true if a character is a valid hex digit, false + * otherwise + * + * synopsis: + * int true_or_false, is_hex_digit(); + * int ch; + * val = is_hex_digit( ch ); + */ + +int is_hex_digit( ch ) +int ch; + + { + if ( isdigit( ch ) ) + return ( 1 ); + + switch ( clower( ch ) ) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return ( 1 ); + + default: + return ( 0 ); + } + } + + +/* line_directive_out - spit out a "# line" statement */ + +void line_directive_out( output_file_name ) +FILE *output_file_name; + + { + if ( infilename && gen_line_dirs ) + fprintf( output_file_name, "# line %d \"%s\"\n", linenum, infilename ); + } + + +/* mk2data - generate a data statement for a two-dimensional array + * + * synopsis + * int value; + * mk2data( value ); + * + * generates a data statement initializing the current 2-D array to "value" + */ +void mk2data( value ) +int value; + + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + +/* mkdata - generate a data statement + * + * synopsis + * int value; + * mkdata( value ); + * + * generates a data statement initializing the current array element to + * "value" + */ +void mkdata( value ) +int value; + + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + +/* myctoi - return the integer represented by a string of digits + * + * synopsis + * Char array[]; + * int val, myctoi(); + * val = myctoi( array ); + * + */ + +int myctoi( array ) +Char array[]; + + { + int val = 0; + + (void) sscanf( (char *) array, "%d", &val ); + + return ( val ); + } + + +/* myesc - return character corresponding to escape sequence + * + * synopsis + * Char array[], c, myesc(); + * c = myesc( array ); + * + */ + +Char myesc( array ) +Char array[]; + + { + Char c, esc_char; + register int sptr; + + switch ( array[1] ) + { + case 'a': return ( '\a' ); + case 'b': return ( '\b' ); + case 'f': return ( '\f' ); + case 'n': return ( '\n' ); + case 'r': return ( '\r' ); + case 't': return ( '\t' ); + case 'v': return ( '\v' ); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { /* \ */ + sptr = 1; + + while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) + /* don't increment inside loop control because if + * isdigit() is a macro it might expand into multiple + * increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 1 ); + + array[sptr] = c; + + return ( esc_char ); + } + + case 'x': + { /* \x */ + int sptr = 2; + + while ( isascii( array[sptr] ) && is_hex_digit( array[sptr] ) ) + /* don't increment inside loop control because if + * isdigit() is a macro it might expand into multiple + * increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = htoi( array + 2 ); + + array[sptr] = c; + + return ( esc_char ); + } + + default: + return ( array[1] ); + } + } + + +/* otoi - convert an octal digit string to an integer value + * + * synopsis: + * int val, otoi(); + * Char str[]; + * val = otoi( str ); + */ + +int otoi( str ) +Char str[]; + + { + int result; + + (void) sscanf( (char *) str, "%o", &result ); + + return ( result ); + } + + +/* readable_form - return the the human-readable form of a character + * + * synopsis: + * int c; + * char *readable_form(); + * = readable_form( c ); + * + * The returned string is in static storage. + */ + +char *readable_form( c ) +register int c; + + { + static char rform[10]; + + if ( (c >= 0 && c < 32) || c >= 127 ) + { + switch ( c ) + { + case '\n': return ( "\\n" ); + case '\t': return ( "\\t" ); + case '\f': return ( "\\f" ); + case '\r': return ( "\\r" ); + case '\b': return ( "\\b" ); + + default: + (void) sprintf( rform, "\\%.3o", c ); + return ( rform ); + } + } + + else if ( c == ' ' ) + return ( "' '" ); + + else + { + rform[0] = c; + rform[1] = '\0'; + + return ( rform ); + } + } + + +/* reallocate_array - increase the size of a dynamic array */ + +void *reallocate_array( array, size, element_size ) +void *array; +int size, element_size; + + { + register void *new_array; + + /* same worry as in allocate_array(): */ + if ( size * element_size <= 0 ) + flexfatal( "attempt to increase array size by less than 1 byte" ); + + new_array = + (void *) realloc( (char *)array, (unsigned) (size * element_size )); + + if ( new_array == NULL ) + flexfatal( "attempt to increase array size failed" ); + + return ( new_array ); + } + + +/* skelout - write out one section of the skeleton file + * + * synopsis + * skelout(); + * + * DESCRIPTION + * Copies from skelfile to stdout until a line beginning with "%%" or + * EOF is found. + */ +void skelout() + + { + char buf[MAXLINE]; + + while ( fgets( buf, MAXLINE, skelfile ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } + + +/* transition_struct_out - output a yy_trans_info structure + * + * synopsis + * int element_v, element_n; + * transition_struct_out( element_v, element_n ); + * + * outputs the yy_trans_info structure with the two elements, element_v and + * element_n. Formats the output with spaces and carriage returns. + */ + +void transition_struct_out( element_v, element_n ) +int element_v, element_n; + + { + printf( "%7d, %5d,", element_v, element_n ); + + datapos += TRANS_STRUCT_PRINT_LENGTH; + + if ( datapos >= 75 ) + { + putchar( '\n' ); + + if ( ++dataline % 10 == 0 ) + putchar( '\n' ); + + datapos = 0; + } + } diff --git a/util/flex/nfa.c b/util/flex/nfa.c new file mode 100644 index 000000000..a2d04c322 --- /dev/null +++ b/util/flex/nfa.c @@ -0,0 +1,717 @@ +/* nfa - NFA construction routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + + +/* declare functions that have forward references */ + +int dupmachine PROTO((int)); +void mkxtion PROTO((int, int)); + + +/* add_accept - add an accepting state to a machine + * + * synopsis + * + * add_accept( mach, accepting_number ); + * + * accepting_number becomes mach's accepting number. + */ + +void add_accept( mach, accepting_number ) +int mach, accepting_number; + + { + /* hang the accepting number off an epsilon state. if it is associated + * with a state that has a non-epsilon out-transition, then the state + * will accept BEFORE it makes that transition, i.e., one character + * too soon + */ + + if ( transchar[finalst[mach]] == SYM_EPSILON ) + accptnum[finalst[mach]] = accepting_number; + + else + { + int astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accepting_number; + mach = link_machines( mach, astate ); + } + } + + +/* copysingl - make a given number of copies of a singleton machine + * + * synopsis + * + * newsng = copysingl( singl, num ); + * + * newsng - a new singleton composed of num copies of singl + * singl - a singleton machine + * num - the number of copies of singl to be present in newsng + */ + +int copysingl( singl, num ) +int singl, num; + + { + int copy, i; + + copy = mkstate( SYM_EPSILON ); + + for ( i = 1; i <= num; ++i ) + copy = link_machines( copy, dupmachine( singl ) ); + + return ( copy ); + } + + +/* dumpnfa - debugging routine to write out an nfa + * + * synopsis + * int state1; + * dumpnfa( state1 ); + */ + +void dumpnfa( state1 ) +int state1; + + { + int sym, tsp1, tsp2, anum, ns; + + fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", + state1 ); + + /* we probably should loop starting at firstst[state1] and going to + * lastst[state1], but they're not maintained properly when we "or" + * all of the rules together. So we use our knowledge that the machine + * starts at state 1 and ends at lastnfa. + */ + + /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ + for ( ns = 1; ns <= lastnfa; ++ns ) + { + fprintf( stderr, "state # %4d\t", ns ); + + sym = transchar[ns]; + tsp1 = trans1[ns]; + tsp2 = trans2[ns]; + anum = accptnum[ns]; + + fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); + + if ( anum != NIL ) + fprintf( stderr, " [%d]", anum ); + + fprintf( stderr, "\n" ); + } + + fprintf( stderr, "********** end of dump\n" ); + } + + +/* dupmachine - make a duplicate of a given machine + * + * synopsis + * + * copy = dupmachine( mach ); + * + * copy - holds duplicate of mach + * mach - machine to be duplicated + * + * note that the copy of mach is NOT an exact duplicate; rather, all the + * transition states values are adjusted so that the copy is self-contained, + * as the original should have been. + * + * also note that the original MUST be contiguous, with its low and high + * states accessible by the arrays firstst and lastst + */ + +int dupmachine( mach ) +int mach; + + { + int i, init, state_offset; + int state = 0; + int last = lastst[mach]; + + for ( i = firstst[mach]; i <= last; ++i ) + { + state = mkstate( transchar[i] ); + + if ( trans1[i] != NO_TRANSITION ) + { + mkxtion( finalst[state], trans1[i] + state - i ); + + if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) + mkxtion( finalst[state], trans2[i] + state - i ); + } + + accptnum[state] = accptnum[i]; + } + + if ( state == 0 ) + flexfatal( "empty machine in dupmachine()" ); + + state_offset = state - i + 1; + + init = mach + state_offset; + firstst[init] = firstst[mach] + state_offset; + finalst[init] = finalst[mach] + state_offset; + lastst[init] = lastst[mach] + state_offset; + + return ( init ); + } + + +/* finish_rule - finish up the processing for a rule + * + * synopsis + * + * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); + * + * An accepting number is added to the given machine. If variable_trail_rule + * is true then the rule has trailing context and both the head and trail + * are variable size. Otherwise if headcnt or trailcnt is non-zero then + * the machine recognizes a pattern with trailing context and headcnt is + * the number of characters in the matched part of the pattern, or zero + * if the matched part has variable length. trailcnt is the number of + * trailing context characters in the pattern, or zero if the trailing + * context has variable length. + */ + +void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) +int mach, variable_trail_rule, headcnt, trailcnt; + + { + add_accept( mach, num_rules ); + + /* we did this in new_rule(), but it often gets the wrong + * number because we do it before we start parsing the current rule + */ + rule_linenum[num_rules] = linenum; + + /* if this is a continued action, then the line-number has + * already been updated, giving us the wrong number + */ + if ( continued_action ) + --rule_linenum[num_rules]; + + fprintf( temp_action_file, "case %d:\n", num_rules ); + + if ( variable_trail_rule ) + { + rule_type[num_rules] = RULE_VARIABLE; + + if ( performance_report ) + fprintf( stderr, "Variable trailing context rule at line %d\n", + rule_linenum[num_rules] ); + + variable_trailing_context_rules = true; + } + + else + { + rule_type[num_rules] = RULE_NORMAL; + + if ( headcnt > 0 || trailcnt > 0 ) + { + /* do trailing context magic to not match the trailing characters */ + char *scanner_cp = "yy_c_buf_p = yy_cp"; + char *scanner_bp = "yy_bp"; + + fprintf( temp_action_file, + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); + + if ( headcnt > 0 ) + fprintf( temp_action_file, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); + + else + fprintf( temp_action_file, + "%s -= %d;\n", scanner_cp, trailcnt ); + + fprintf( temp_action_file, + "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + } + } + + line_directive_out( temp_action_file ); + } + + +/* link_machines - connect two machines together + * + * synopsis + * + * new = link_machines( first, last ); + * + * new - a machine constructed by connecting first to last + * first - the machine whose successor is to be last + * last - the machine whose predecessor is to be first + * + * note: this routine concatenates the machine first with the machine + * last to produce a machine new which will pattern-match first first + * and then last, and will fail if either of the sub-patterns fails. + * FIRST is set to new by the operation. last is unmolested. + */ + +int link_machines( first, last ) +int first, last; + + { + if ( first == NIL ) + return ( last ); + + else if ( last == NIL ) + return ( first ); + + else + { + mkxtion( finalst[first], last ); + finalst[first] = finalst[last]; + lastst[first] = max( lastst[first], lastst[last] ); + firstst[first] = min( firstst[first], firstst[last] ); + + return ( first ); + } + } + + +/* mark_beginning_as_normal - mark each "beginning" state in a machine + * as being a "normal" (i.e., not trailing context- + * associated) states + * + * synopsis + * + * mark_beginning_as_normal( mach ) + * + * mach - machine to mark + * + * The "beginning" states are the epsilon closure of the first state + */ + +void mark_beginning_as_normal( mach ) +register int mach; + + { + switch ( state_type[mach] ) + { + case STATE_NORMAL: + /* oh, we've already visited here */ + return; + + case STATE_TRAILING_CONTEXT: + state_type[mach] = STATE_NORMAL; + + if ( transchar[mach] == SYM_EPSILON ) + { + if ( trans1[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans1[mach] ); + + if ( trans2[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans2[mach] ); + } + break; + + default: + flexerror( "bad state type in mark_beginning_as_normal()" ); + break; + } + } + + +/* mkbranch - make a machine that branches to two machines + * + * synopsis + * + * branch = mkbranch( first, second ); + * + * branch - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are NEITHER destroyed by the operation. Also, + * the resulting machine CANNOT be used with any other "mk" operation except + * more mkbranch's. Compare with mkor() + */ + +int mkbranch( first, second ) +int first, second; + + { + int eps; + + if ( first == NO_TRANSITION ) + return ( second ); + + else if ( second == NO_TRANSITION ) + return ( first ); + + eps = mkstate( SYM_EPSILON ); + + mkxtion( eps, first ); + mkxtion( eps, second ); + + return ( eps ); + } + + +/* mkclos - convert a machine into a closure + * + * synopsis + * new = mkclos( state ); + * + * new - a new state which matches the closure of "state" + */ + +int mkclos( state ) +int state; + + { + return ( mkopt( mkposcl( state ) ) ); + } + + +/* mkopt - make a machine optional + * + * synopsis + * + * new = mkopt( mach ); + * + * new - a machine which optionally matches whatever mach matched + * mach - the machine to make optional + * + * notes: + * 1. mach must be the last machine created + * 2. mach is destroyed by the call + */ + +int mkopt( mach ) +int mach; + + { + int eps; + + if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) + { + eps = mkstate( SYM_EPSILON ); + mach = link_machines( mach, eps ); + } + + /* can't skimp on the following if FREE_EPSILON(mach) is true because + * some state interior to "mach" might point back to the beginning + * for a closure + */ + eps = mkstate( SYM_EPSILON ); + mach = link_machines( eps, mach ); + + mkxtion( mach, finalst[mach] ); + + return ( mach ); + } + + +/* mkor - make a machine that matches either one of two machines + * + * synopsis + * + * new = mkor( first, second ); + * + * new - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are both destroyed by the operation + * the code is rather convoluted because an attempt is made to minimize + * the number of epsilon states needed + */ + +int mkor( first, second ) +int first, second; + + { + int eps, orend; + + if ( first == NIL ) + return ( second ); + + else if ( second == NIL ) + return ( first ); + + else + { + /* see comment in mkopt() about why we can't use the first state + * of "first" or "second" if they satisfy "FREE_EPSILON" + */ + eps = mkstate( SYM_EPSILON ); + + first = link_machines( eps, first ); + + mkxtion( first, second ); + + if ( SUPER_FREE_EPSILON(finalst[first]) && + accptnum[finalst[first]] == NIL ) + { + orend = finalst[first]; + mkxtion( finalst[second], orend ); + } + + else if ( SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL ) + { + orend = finalst[second]; + mkxtion( finalst[first], orend ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + + first = link_machines( first, eps ); + orend = finalst[first]; + + mkxtion( finalst[second], orend ); + } + } + + finalst[first] = orend; + return ( first ); + } + + +/* mkposcl - convert a machine into a positive closure + * + * synopsis + * new = mkposcl( state ); + * + * new - a machine matching the positive closure of "state" + */ + +int mkposcl( state ) +int state; + + { + int eps; + + if ( SUPER_FREE_EPSILON(finalst[state]) ) + { + mkxtion( finalst[state], state ); + return ( state ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + mkxtion( eps, state ); + return ( link_machines( state, eps ) ); + } + } + + +/* mkrep - make a replicated machine + * + * synopsis + * new = mkrep( mach, lb, ub ); + * + * new - a machine that matches whatever "mach" matched from "lb" + * number of times to "ub" number of times + * + * note + * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach" + */ + +int mkrep( mach, lb, ub ) +int mach, lb, ub; + + { + int base_mach, tail, copy, i; + + base_mach = copysingl( mach, lb - 1 ); + + if ( ub == INFINITY ) + { + copy = dupmachine( mach ); + mach = link_machines( mach, + link_machines( base_mach, mkclos( copy ) ) ); + } + + else + { + tail = mkstate( SYM_EPSILON ); + + for ( i = lb; i < ub; ++i ) + { + copy = dupmachine( mach ); + tail = mkopt( link_machines( copy, tail ) ); + } + + mach = link_machines( mach, link_machines( base_mach, tail ) ); + } + + return ( mach ); + } + + +/* mkstate - create a state with a transition on a given symbol + * + * synopsis + * + * state = mkstate( sym ); + * + * state - a new state matching sym + * sym - the symbol the new state is to have an out-transition on + * + * note that this routine makes new states in ascending order through the + * state array (and increments LASTNFA accordingly). The routine DUPMACHINE + * relies on machines being made in ascending order and that they are + * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge + * that it admittedly is) + */ + +int mkstate( sym ) +int sym; + + { + if ( ++lastnfa >= current_mns ) + { + if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) + lerrif( "input rules are too complicated (>= %d NFA states)", + current_mns ); + + ++num_reallocs; + + firstst = reallocate_integer_array( firstst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); + transchar = reallocate_integer_array( transchar, current_mns ); + trans1 = reallocate_integer_array( trans1, current_mns ); + trans2 = reallocate_integer_array( trans2, current_mns ); + accptnum = reallocate_integer_array( accptnum, current_mns ); + assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); + state_type = reallocate_integer_array( state_type, current_mns ); + } + + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; + transchar[lastnfa] = sym; + trans1[lastnfa] = NO_TRANSITION; + trans2[lastnfa] = NO_TRANSITION; + accptnum[lastnfa] = NIL; + assoc_rule[lastnfa] = num_rules; + state_type[lastnfa] = current_state_type; + + /* fix up equivalence classes base on this transition. Note that any + * character which has its own transition gets its own equivalence class. + * Thus only characters which are only in character classes have a chance + * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' + * into two different equivalence classes. "[ab]" puts them in the same + * equivalence class (barring other differences elsewhere in the input). + */ + + if ( sym < 0 ) + { + /* we don't have to update the equivalence classes since that was + * already done when the ccl was created for the first time + */ + } + + else if ( sym == SYM_EPSILON ) + ++numeps; + + else + { + if ( useecs ) + /* map NUL's to csize */ + mkechar( sym ? sym : csize, nextecm, ecgroup ); + } + + return ( lastnfa ); + } + + +/* mkxtion - make a transition from one state to another + * + * synopsis + * + * mkxtion( statefrom, stateto ); + * + * statefrom - the state from which the transition is to be made + * stateto - the state to which the transition is to be made + */ + +void mkxtion( statefrom, stateto ) +int statefrom, stateto; + + { + if ( trans1[statefrom] == NO_TRANSITION ) + trans1[statefrom] = stateto; + + else if ( (transchar[statefrom] != SYM_EPSILON) || + (trans2[statefrom] != NO_TRANSITION) ) + flexfatal( "found too many transitions in mkxtion()" ); + + else + { /* second out-transition for an epsilon state */ + ++eps2; + trans2[statefrom] = stateto; + } + } + +/* new_rule - initialize for a new rule + * + * synopsis + * + * new_rule(); + * + * the global num_rules is incremented and the any corresponding dynamic + * arrays (such as rule_type[]) are grown as needed. + */ + +void new_rule() + + { + if ( ++num_rules >= current_max_rules ) + { + ++num_reallocs; + current_max_rules += MAX_RULES_INCREMENT; + rule_type = reallocate_integer_array( rule_type, current_max_rules ); + rule_linenum = + reallocate_integer_array( rule_linenum, current_max_rules ); + } + + if ( num_rules > MAX_RULE ) + lerrif( "too many rules (> %d)!", MAX_RULE ); + + rule_linenum[num_rules] = linenum; + } diff --git a/util/flex/parse.y b/util/flex/parse.y new file mode 100644 index 000000000..f74d32c63 --- /dev/null +++ b/util/flex/parse.y @@ -0,0 +1,702 @@ + +/* parse.y - parser for flex input */ + +%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP + +%{ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + +int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; +int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; +Char clower(); + +static int madeany = false; /* whether we've made the '.' character class */ +int previous_continued_action; /* whether the previous rule's action was '|' */ + +%} + +%% +goal : initlex sect1 sect1end sect2 initforrule + { /* add default rule */ + int def_rule; + + pat = cclinit(); + cclnegate( pat ); + + def_rule = mkstate( -pat ); + + finish_rule( def_rule, false, 0, 0 ); + + for ( i = 1; i <= lastsc; ++i ) + scset[i] = mkbranch( scset[i], def_rule ); + + if ( spprdflt ) + fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", + temp_action_file ); + else + fputs( "ECHO", temp_action_file ); + + fputs( ";\n\tYY_BREAK\n", temp_action_file ); + } + ; + +initlex : + { + /* initialize for processing rules */ + + /* create default DFA start condition */ + scinstal( "INITIAL", false ); + } + ; + +sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' + | + | error '\n' + { synerr( "unknown error processing section 1" ); } + ; + +sect1end : SECTEND + ; + +startconddecl : SCDECL + { + /* these productions are separate from the s1object + * rule because the semantics must be done before + * we parse the remainder of an s1object + */ + + xcluflg = false; + } + + | XSCDECL + { xcluflg = true; } + ; + +namelist1 : namelist1 WHITESPACE NAME + { scinstal( nmstr, xcluflg ); } + + | NAME + { scinstal( nmstr, xcluflg ); } + + | error + { synerr( "bad start condition list" ); } + ; + +sect2 : sect2 initforrule flexrule '\n' + | + ; + +initforrule : + { + /* initialize for a parse of one rule */ + trlcontxt = variable_trail_rule = varlength = false; + trailcnt = headcnt = rulelen = 0; + current_state_type = STATE_NORMAL; + previous_continued_action = continued_action; + new_rule(); + } + ; + +flexrule : scon '^' rule + { + pat = $3; + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scbol[actvsc[i]] = + mkbranch( scbol[actvsc[i]], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + pinpoint_message( + "'^' operator results in sub-optimal performance" ); + } + } + + | scon rule + { + pat = $2; + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scset[actvsc[i]] = + mkbranch( scset[actvsc[i]], pat ); + } + + | '^' rule + { + pat = $2; + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); + + /* add to all non-exclusive start conditions, + * including the default (0) start condition + */ + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scbol[i] = mkbranch( scbol[i], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + pinpoint_message( + "'^' operator results in sub-optimal performance" ); + } + } + + | rule + { + pat = $1; + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scset[i] = mkbranch( scset[i], pat ); + } + + | scon EOF_OP + { build_eof_action(); } + + | EOF_OP + { + /* this EOF applies to all start conditions + * which don't already have EOF actions + */ + actvp = 0; + + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + actvsc[++actvp] = i; + + if ( actvp == 0 ) + pinpoint_message( + "warning - all start conditions already have <> rules" ); + + else + build_eof_action(); + } + + | error + { synerr( "unrecognized rule" ); } + ; + +scon : '<' namelist2 '>' + ; + +namelist2 : namelist2 ',' NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + format_pinpoint_message( + "undeclared start condition %s", nmstr ); + + else + actvsc[++actvp] = scnum; + } + + | NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + format_pinpoint_message( + "undeclared start condition %s", nmstr ); + else + actvsc[actvp = 1] = scnum; + } + + | error + { synerr( "bad start condition list" ); } + ; + +rule : re2 re + { + if ( transchar[lastst[$2]] != SYM_EPSILON ) + /* provide final transition \now/ so it + * will be marked as a trailing context + * state + */ + $2 = link_machines( $2, mkstate( SYM_EPSILON ) ); + + mark_beginning_as_normal( $2 ); + current_state_type = STATE_NORMAL; + + if ( previous_continued_action ) + { + /* we need to treat this as variable trailing + * context so that the backup does not happen + * in the action but before the action switch + * statement. If the backup happens in the + * action, then the rules "falling into" this + * one's action will *also* do the backup, + * erroneously. + */ + if ( ! varlength || headcnt != 0 ) + { + fprintf( stderr, + "%s: warning - trailing context rule at line %d made variable because\n", + program_name, linenum ); + fprintf( stderr, + " of preceding '|' action\n" ); + } + + /* mark as variable */ + varlength = true; + headcnt = 0; + } + + if ( varlength && headcnt == 0 ) + { /* variable trailing context rule */ + /* mark the first part of the rule as the accepting + * "head" part of a trailing context rule + */ + /* by the way, we didn't do this at the beginning + * of this production because back then + * current_state_type was set up for a trail + * rule, and add_accept() can create a new + * state ... + */ + add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); + variable_trail_rule = true; + } + + else + trailcnt = rulelen; + + $$ = link_machines( $1, $2 ); + } + + | re2 re '$' + { synerr( "trailing context used twice" ); } + + | re '$' + { + if ( trlcontxt ) + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } + + else if ( previous_continued_action ) + { + /* see the comment in the rule for "re2 re" + * above + */ + if ( ! varlength || headcnt != 0 ) + { + fprintf( stderr, + "%s: warning - trailing context rule at line %d made variable because\n", + program_name, linenum ); + fprintf( stderr, + " of preceding '|' action\n" ); + } + + /* mark as variable */ + varlength = true; + headcnt = 0; + } + + trlcontxt = true; + + if ( ! varlength ) + headcnt = rulelen; + + ++rulelen; + trailcnt = 1; + + eps = mkstate( SYM_EPSILON ); + $$ = link_machines( $1, + link_machines( eps, mkstate( '\n' ) ) ); + } + + | re + { + $$ = $1; + + if ( trlcontxt ) + { + if ( varlength && headcnt == 0 ) + /* both head and trail are variable-length */ + variable_trail_rule = true; + else + trailcnt = rulelen; + } + } + ; + + +re : re '|' series + { + varlength = true; + $$ = mkor( $1, $3 ); + } + + | series + { $$ = $1; } + ; + + +re2 : re '/' + { + /* this rule is written separately so + * the reduction will occur before the trailing + * series is parsed + */ + + if ( trlcontxt ) + synerr( "trailing context used twice" ); + else + trlcontxt = true; + + if ( varlength ) + /* we hope the trailing context is fixed-length */ + varlength = false; + else + headcnt = rulelen; + + rulelen = 0; + + current_state_type = STATE_TRAILING_CONTEXT; + $$ = $1; + } + ; + +series : series singleton + { + /* this is where concatenation of adjacent patterns + * gets done + */ + $$ = link_machines( $1, $2 ); + } + + | singleton + { $$ = $1; } + ; + +singleton : singleton '*' + { + varlength = true; + + $$ = mkclos( $1 ); + } + + | singleton '+' + { + varlength = true; + + $$ = mkposcl( $1 ); + } + + | singleton '?' + { + varlength = true; + + $$ = mkopt( $1 ); + } + + | singleton '{' NUMBER ',' NUMBER '}' + { + varlength = true; + + if ( $3 > $5 || $3 < 0 ) + { + synerr( "bad iteration values" ); + $$ = $1; + } + else + { + if ( $3 == 0 ) + $$ = mkopt( mkrep( $1, $3, $5 ) ); + else + $$ = mkrep( $1, $3, $5 ); + } + } + + | singleton '{' NUMBER ',' '}' + { + varlength = true; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = mkrep( $1, $3, INFINITY ); + } + + | singleton '{' NUMBER '}' + { + /* the singleton could be something like "(foo)", + * in which case we have no idea what its length + * is, so we punt here. + */ + varlength = true; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = link_machines( $1, copysingl( $1, $3 - 1 ) ); + } + + | '.' + { + if ( ! madeany ) + { + /* create the '.' character class */ + anyccl = cclinit(); + ccladd( anyccl, '\n' ); + cclnegate( anyccl ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[anyccl], + ccllen[anyccl], nextecm, + ecgroup, csize, csize ); + + madeany = true; + } + + ++rulelen; + + $$ = mkstate( -anyccl ); + } + + | fullccl + { + if ( ! cclsorted ) + /* sort characters for fast searching. We use a + * shell sort since this list could be large. + */ + cshell( ccltbl + cclmap[$1], ccllen[$1], true ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[$1], ccllen[$1], + nextecm, ecgroup, csize, csize ); + + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | PREVCCL + { + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | '"' string '"' + { $$ = $2; } + + | '(' re ')' + { $$ = $2; } + + | CHAR + { + ++rulelen; + + if ( caseins && $1 >= 'A' && $1 <= 'Z' ) + $1 = clower( $1 ); + + $$ = mkstate( $1 ); + } + ; + +fullccl : '[' ccl ']' + { $$ = $2; } + + | '[' '^' ccl ']' + { + /* *Sigh* - to be compatible Unix lex, negated ccls + * match newlines + */ +#ifdef NOTDEF + ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ + cclsorted = false; /* because we added the newline */ +#endif + cclnegate( $3 ); + $$ = $3; + } + ; + +ccl : ccl CHAR '-' CHAR + { + if ( $2 > $4 ) + synerr( "negative range in character class" ); + + else + { + if ( caseins ) + { + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + if ( $4 >= 'A' && $4 <= 'Z' ) + $4 = clower( $4 ); + } + + for ( i = $2; i <= $4; ++i ) + ccladd( $1, i ); + + /* keep track if this ccl is staying in alphabetical + * order + */ + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $4; + } + + $$ = $1; + } + + | ccl CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ccladd( $1, $2 ); + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $2; + $$ = $1; + } + + | + { + cclsorted = true; + lastchar = 0; + $$ = cclinit(); + } + ; + +string : string CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ++rulelen; + + $$ = link_machines( $1, mkstate( $2 ) ); + } + + | + { $$ = mkstate( SYM_EPSILON ); } + ; + +%% + + +/* build_eof_action - build the "<>" action for the active start + * conditions + */ + +void build_eof_action() + + { + register int i; + + for ( i = 1; i <= actvp; ++i ) + { + if ( sceof[actvsc[i]] ) + format_pinpoint_message( + "multiple <> rules for start condition %s", + scname[actvsc[i]] ); + + else + { + sceof[actvsc[i]] = true; + fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n", + scname[actvsc[i]] ); + } + } + + line_directive_out( temp_action_file ); + } + + +/* synerr - report a syntax error */ + +void synerr( str ) +char str[]; + + { + syntaxerror = true; + pinpoint_message( str ); + } + + +/* format_pinpoint_message - write out a message formatted with one string, + * pinpointing its location + */ + +void format_pinpoint_message( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + + (void) sprintf( errmsg, msg, arg ); + pinpoint_message( errmsg ); + } + + +/* pinpoint_message - write out a message, pinpointing its location */ + +void pinpoint_message( str ) +char str[]; + + { + fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str ); + } + + +/* yyerror - eat up an error message from the parser; + * currently, messages are ignore + */ + +void yyerror( msg ) +char msg[]; + + { + } diff --git a/util/flex/scan.l b/util/flex/scan.l new file mode 100644 index 000000000..d02acc259 --- /dev/null +++ b/util/flex/scan.l @@ -0,0 +1,533 @@ + +/* scan.l - scanner for flex input */ + +%{ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#undef yywrap + +#include "flexdef.h" +#include "parse.h" + +#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) +#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); + +#undef YY_DECL +#define YY_DECL \ + int flexscan() + +#define RETURNCHAR \ + yylval = yytext[0]; \ + return ( CHAR ); + +#define RETURNNAME \ + (void) strcpy( nmstr, (char *) yytext ); \ + return ( NAME ); + +#define PUT_BACK_STRING(str, start) \ + for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ + unput((str)[i]) + +#define CHECK_REJECT(str) \ + if ( all_upper( str ) ) \ + reject = true; + +#define CHECK_YYMORE(str) \ + if ( all_lower( str ) ) \ + yymore_used = true; +%} + +%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION + +WS [ \t\f]+ +OPTWS [ \t\f]* +NOT_WS [^ \t\f\n] + +NAME [a-z_][a-z_0-9-]* +NOT_NAME [^a-z_\n]+ + +SCNAME {NAME} + +ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) + +%% + static int bracelevel, didadef; + int i, indented_code, checking_used, new_xlation; + int doing_codeblock = false; + Char nmdef[MAXLINE], myesc(); + +^{WS} indented_code = true; BEGIN(CODEBLOCK); +^#.*\n ++linenum; /* treat as a comment */ +^"/*" ECHO; BEGIN(C_COMMENT); +^"%s"{NAME}? return ( SCDECL ); +^"%x"{NAME}? return ( XSCDECL ); +^"%{".*\n { + ++linenum; + line_directive_out( stdout ); + indented_code = false; + BEGIN(CODEBLOCK); + } + +{WS} return ( WHITESPACE ); + +^"%%".* { + sectnum = 2; + line_directive_out( stdout ); + BEGIN(SECT2PROLOG); + return ( SECTEND ); + } + +^"%used" { + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); + checking_used = REALLY_USED; BEGIN(USED_LIST); + } +^"%unused" { + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + } + + +^"%"[aeknopt]" ".*\n { +#ifdef NOTDEF + fprintf( stderr, + "old-style lex command at line %d ignored:\n\t%s", + linenum, yytext ); +#endif + ++linenum; + } + +^"%"[cr]{OPTWS} /* ignore old lex directive */ + +%t{OPTWS}\n { + ++linenum; + xlation = + (int *) malloc( sizeof( int ) * (unsigned) csize ); + + if ( ! xlation ) + flexfatal( + "dynamic memory failure building %t table" ); + + for ( i = 0; i < csize; ++i ) + xlation[i] = 0; + + num_xlations = 0; + + BEGIN(XLATION); + } + +^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" ); + +^{NAME} { + (void) strcpy( nmstr, (char *) yytext ); + didadef = false; + BEGIN(PICKUPDEF); + } + +{SCNAME} RETURNNAME; +^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */ +{OPTWS}\n ++linenum; return ( '\n' ); +. synerr( "illegal character" ); BEGIN(RECOVER); + + +"*/" ECHO; BEGIN(INITIAL); +"*/".*\n ++linenum; ECHO; BEGIN(INITIAL); +[^*\n]+ ECHO; +"*" ECHO; +\n ++linenum; ECHO; + + +^"%}".*\n ++linenum; BEGIN(INITIAL); +"reject" ECHO; CHECK_REJECT(yytext); +"yymore" ECHO; CHECK_YYMORE(yytext); +{NAME}|{NOT_NAME}|. ECHO; +\n { + ++linenum; + ECHO; + if ( indented_code ) + BEGIN(INITIAL); + } + + +{WS} /* separates name and definition */ + +{NOT_WS}.* { + (void) strcpy( (char *) nmdef, (char *) yytext ); + + for ( i = strlen( (char *) nmdef ) - 1; + i >= 0 && + nmdef[i] == ' ' || nmdef[i] == '\t'; + --i ) + ; + + nmdef[i + 1] = '\0'; + + ndinstal( nmstr, nmdef ); + didadef = true; + } + +\n { + if ( ! didadef ) + synerr( "incomplete name definition" ); + BEGIN(INITIAL); + ++linenum; + } + +.*\n ++linenum; BEGIN(INITIAL); RETURNNAME; + + +\n ++linenum; BEGIN(INITIAL); +{WS} +"reject" { + if ( all_upper( yytext ) ) + reject_really_used = checking_used; + else + synerr( "unrecognized %used/%unused construct" ); + } +"yymore" { + if ( all_lower( yytext ) ) + yymore_really_used = checking_used; + else + synerr( "unrecognized %used/%unused construct" ); + } +{NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); + + +"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL); +^{OPTWS}[0-9]+ ++num_xlations; new_xlation = true; +^. synerr( "bad row in translation table" ); +{WS} /* ignore whitespace */ + +{ESCSEQ} { + xlation[myesc( yytext )] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } +. { + xlation[yytext[0]] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } + +\n ++linenum; + + +.*\n/{NOT_WS} { + ++linenum; + ACTION_ECHO; + MARK_END_OF_PROLOG; + BEGIN(SECT2); + } + +.*\n ++linenum; ACTION_ECHO; + +<> MARK_END_OF_PROLOG; yyterminate(); + +^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ + +^({WS}|"%{") { + indented_code = (yytext[0] != '%'); + doing_codeblock = true; + bracelevel = 1; + + if ( indented_code ) + ACTION_ECHO; + + BEGIN(CODEBLOCK_2); + } + +"<" BEGIN(SC); return ( '<' ); +^"^" return ( '^' ); +\" BEGIN(QUOTE); return ( '"' ); +"{"/[0-9] BEGIN(NUM); return ( '{' ); +"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); +"$"/[ \t\n] return ( '$' ); + +{WS}"%{" { + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + return ( '\n' ); + } +{WS}"|".*\n continued_action = true; ++linenum; return ( '\n' ); + +{WS} { + /* this rule is separate from the one below because + * otherwise we get variable trailing context, so + * we can't build the scanner using -{f,F} + */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + return ( '\n' ); + } + +{OPTWS}/\n { + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + return ( '\n' ); + } + +^{OPTWS}\n ++linenum; return ( '\n' ); + +"<>" return ( EOF_OP ); + +^"%%".* { + sectnum = 3; + BEGIN(SECT3); + return ( EOF ); /* to stop the parser */ + } + +"["([^\\\]\n]|{ESCSEQ})+"]" { + int cclval; + + (void) strcpy( nmstr, (char *) yytext ); + + /* check to see if we've already encountered this ccl */ + if ( (cclval = ccllookup( (Char *) nmstr )) ) + { + yylval = cclval; + ++cclreuse; + return ( PREVCCL ); + } + else + { + /* we fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit + */ + cclinstal( (Char *) nmstr, lastccl + 1 ); + + /* push back everything but the leading bracket + * so the ccl can be rescanned + */ + PUT_BACK_STRING((Char *) nmstr, 1); + + BEGIN(FIRSTCCL); + return ( '[' ); + } + } + +"{"{NAME}"}" { + register Char *nmdefptr; + Char *ndlookup(); + + (void) strcpy( nmstr, (char *) yytext ); + nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ + + /* lookup from "nmstr + 1" to chop leading brace */ + if ( ! (nmdefptr = ndlookup( nmstr + 1 )) ) + synerr( "undefined {name}" ); + + else + { /* push back name surrounded by ()'s */ + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } + } + +[/|*+?.()] return ( yytext[0] ); +. RETURNCHAR; +\n ++linenum; return ( '\n' ); + + +"," return ( ',' ); +">" BEGIN(SECT2); return ( '>' ); +">"/"^" BEGIN(CARETISBOL); return ( '>' ); +{SCNAME} RETURNNAME; +. synerr( "bad start condition name" ); + +"^" BEGIN(SECT2); return ( '^' ); + + +[^"\n] RETURNCHAR; +\" BEGIN(SECT2); return ( '"' ); + +\n { + synerr( "missing quote" ); + BEGIN(SECT2); + ++linenum; + return ( '"' ); + } + + +"^"/[^-\n] BEGIN(CCL); return ( '^' ); +"^"/- return ( '^' ); +- BEGIN(CCL); yylval = '-'; return ( CHAR ); +. BEGIN(CCL); RETURNCHAR; + +-/[^\]\n] return ( '-' ); +[^\]\n] RETURNCHAR; +"]" BEGIN(SECT2); return ( ']' ); + + +[0-9]+ { + yylval = myctoi( yytext ); + return ( NUMBER ); + } + +"," return ( ',' ); +"}" BEGIN(SECT2); return ( '}' ); + +. { + synerr( "bad character inside {}'s" ); + BEGIN(SECT2); + return ( '}' ); + } + +\n { + synerr( "missing }" ); + BEGIN(SECT2); + ++linenum; + return ( '}' ); + } + + +"}" synerr( "bad name in {}'s" ); BEGIN(SECT2); +\n synerr( "missing }" ); ++linenum; BEGIN(SECT2); + + +{OPTWS}"%}".* bracelevel = 0; +"reject" { + ACTION_ECHO; + CHECK_REJECT(yytext); + } +"yymore" { + ACTION_ECHO; + CHECK_YYMORE(yytext); + } +{NAME}|{NOT_NAME}|. ACTION_ECHO; +\n { + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 || + (doing_codeblock && indented_code) ) + { + if ( ! doing_codeblock ) + fputs( "\tYY_BREAK\n", temp_action_file ); + + doing_codeblock = false; + BEGIN(SECT2); + } + } + + + /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ +"{" ACTION_ECHO; ++bracelevel; +"}" ACTION_ECHO; --bracelevel; +[^a-z_{}"'/\n]+ ACTION_ECHO; +{NAME} ACTION_ECHO; +"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); +"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ +\" ACTION_ECHO; BEGIN(ACTION_STRING); +\n { + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 ) + { + fputs( "\tYY_BREAK\n", temp_action_file ); + BEGIN(SECT2); + } + } +. ACTION_ECHO; + +"*/" ACTION_ECHO; BEGIN(ACTION); +[^*\n]+ ACTION_ECHO; +"*" ACTION_ECHO; +\n ++linenum; ACTION_ECHO; +. ACTION_ECHO; + +[^"\\\n]+ ACTION_ECHO; +\\. ACTION_ECHO; +\n ++linenum; ACTION_ECHO; +\" ACTION_ECHO; BEGIN(ACTION); +. ACTION_ECHO; + +<> { + synerr( "EOF encountered inside an action" ); + yyterminate(); + } + + +{ESCSEQ} { + yylval = myesc( yytext ); + return ( CHAR ); + } + +{ESCSEQ} { + yylval = myesc( yytext ); + BEGIN(CCL); + return ( CHAR ); + } + + +.*(\n?) ECHO; +%% + + +int yywrap() + + { + if ( --num_input_files > 0 ) + { + set_input_file( *++input_files ); + return ( 0 ); + } + + else + return ( 1 ); + } + + +/* set_input_file - open the given file (if NULL, stdin) for scanning */ + +void set_input_file( file ) +char *file; + + { + if ( file ) + { + infilename = file; + yyin = fopen( infilename, "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", file ); + } + + else + { + yyin = stdin; + infilename = ""; + } + } diff --git a/util/flex/sym.c b/util/flex/sym.c new file mode 100644 index 000000000..bdca15458 --- /dev/null +++ b/util/flex/sym.c @@ -0,0 +1,315 @@ +/* sym - symbol table routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + + +/* declare functions that have forward references */ + +int hashfunct PROTO((register char[], int)); + + +struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +struct hash_entry *sctbl[START_COND_HASH_SIZE]; +struct hash_entry *ccltab[CCL_HASH_SIZE]; + +struct hash_entry *findsym(); + + +/* addsym - add symbol and definitions to symbol table + * + * synopsis + * char sym[], *str_def; + * int int_def; + * hash_table table; + * int table_size; + * 0 / -1 = addsym( sym, def, int_def, table, table_size ); + * + * -1 is returned if the symbol already exists, and the change not made. + */ + +int addsym( sym, str_def, int_def, table, table_size ) +register char sym[]; +char *str_def; +int int_def; +hash_table table; +int table_size; + + { + int hash_val = hashfunct( sym, table_size ); + register struct hash_entry *sym_entry = table[hash_val]; + register struct hash_entry *new_entry; + register struct hash_entry *successor; + + while ( sym_entry ) + { + if ( ! strcmp( sym, sym_entry->name ) ) + { /* entry already exists */ + return ( -1 ); + } + + sym_entry = sym_entry->next; + } + + /* create new entry */ + new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); + + if ( new_entry == NULL ) + flexfatal( "symbol table memory allocation failed" ); + + if ( (successor = table[hash_val]) ) + { + new_entry->next = successor; + successor->prev = new_entry; + } + else + new_entry->next = NULL; + + new_entry->prev = NULL; + new_entry->name = sym; + new_entry->str_val = str_def; + new_entry->int_val = int_def; + + table[hash_val] = new_entry; + + return ( 0 ); + } + + +/* cclinstal - save the text of a character class + * + * synopsis + * Char ccltxt[]; + * int cclnum; + * cclinstal( ccltxt, cclnum ); + */ + +void cclinstal( ccltxt, cclnum ) +Char ccltxt[]; +int cclnum; + + { + /* we don't bother checking the return status because we are not called + * unless the symbol is new + */ + Char *copy_unsigned_string(); + + (void) addsym( (char *) copy_unsigned_string( ccltxt ), (char *) 0, cclnum, + ccltab, CCL_HASH_SIZE ); + } + + +/* ccllookup - lookup the number associated with character class text + * + * synopsis + * Char ccltxt[]; + * int ccllookup, cclval; + * cclval/0 = ccllookup( ccltxt ); + */ + +int ccllookup( ccltxt ) +Char ccltxt[]; + + { + return ( findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val ); + } + + +/* findsym - find symbol in symbol table + * + * synopsis + * char sym[]; + * hash_table table; + * int table_size; + * struct hash_entry *sym_entry, *findsym(); + * sym_entry = findsym( sym, table, table_size ); + */ + +struct hash_entry *findsym( sym, table, table_size ) +register char sym[]; +hash_table table; +int table_size; + + { + register struct hash_entry *sym_entry = table[hashfunct( sym, table_size )]; + static struct hash_entry empty_entry = + { + (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, + } ; + + while ( sym_entry ) + { + if ( ! strcmp( sym, sym_entry->name ) ) + return ( sym_entry ); + sym_entry = sym_entry->next; + } + + return ( &empty_entry ); + } + + +/* hashfunct - compute the hash value for "str" and hash size "hash_size" + * + * synopsis + * char str[]; + * int hash_size, hash_val; + * hash_val = hashfunct( str, hash_size ); + */ + +int hashfunct( str, hash_size ) +register char str[]; +int hash_size; + + { + register int hashval; + register int locstr; + + hashval = 0; + locstr = 0; + + while ( str[locstr] ) + hashval = ((hashval << 1) + str[locstr++]) % hash_size; + + return ( hashval ); + } + + +/* ndinstal - install a name definition + * + * synopsis + * char nd[]; + * Char def[]; + * ndinstal( nd, def ); + */ + +void ndinstal( nd, def ) +char nd[]; +Char def[]; + + { + char *copy_string(); + Char *copy_unsigned_string(); + + if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0, + ndtbl, NAME_TABLE_HASH_SIZE ) ) + synerr( "name defined twice" ); + } + + +/* ndlookup - lookup a name definition + * + * synopsis + * char nd[], *def; + * char *ndlookup(); + * def/NULL = ndlookup( nd ); + */ + +Char *ndlookup( nd ) +char nd[]; + + { + return ( (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val ); + } + + +/* scinstal - make a start condition + * + * synopsis + * char str[]; + * int xcluflg; + * scinstal( str, xcluflg ); + * + * NOTE + * the start condition is Exclusive if xcluflg is true + */ + +void scinstal( str, xcluflg ) +char str[]; +int xcluflg; + + { + char *copy_string(); + + /* bit of a hack. We know how the default start-condition is + * declared, and don't put out a define for it, because it + * would come out as "#define 0 1" + */ + /* actually, this is no longer the case. The default start-condition + * is now called "INITIAL". But we keep the following for the sake + * of future robustness. + */ + + if ( strcmp( str, "0" ) ) + printf( "#define %s %d\n", str, lastsc ); + + if ( ++lastsc >= current_max_scs ) + { + current_max_scs += MAX_SCS_INCREMENT; + + ++num_reallocs; + + scset = reallocate_integer_array( scset, current_max_scs ); + scbol = reallocate_integer_array( scbol, current_max_scs ); + scxclu = reallocate_integer_array( scxclu, current_max_scs ); + sceof = reallocate_integer_array( sceof, current_max_scs ); + scname = reallocate_char_ptr_array( scname, current_max_scs ); + actvsc = reallocate_integer_array( actvsc, current_max_scs ); + } + + scname[lastsc] = copy_string( str ); + + if ( addsym( scname[lastsc], (char *) 0, lastsc, + sctbl, START_COND_HASH_SIZE ) ) + format_pinpoint_message( "start condition %s declared twice", str ); + + scset[lastsc] = mkstate( SYM_EPSILON ); + scbol[lastsc] = mkstate( SYM_EPSILON ); + scxclu[lastsc] = xcluflg; + sceof[lastsc] = false; + } + + +/* sclookup - lookup the number associated with a start condition + * + * synopsis + * char str[], scnum; + * int sclookup; + * scnum/0 = sclookup( str ); + */ + +int sclookup( str ) +char str[]; + + { + return ( findsym( str, sctbl, START_COND_HASH_SIZE )->int_val ); + } diff --git a/util/flex/tblcmp.c b/util/flex/tblcmp.c new file mode 100644 index 000000000..e0fdaf263 --- /dev/null +++ b/util/flex/tblcmp.c @@ -0,0 +1,932 @@ +/* tblcmp - table compression routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include "flexdef.h" + + +/* declarations for functions that have forward references */ + +void mkentry PROTO((register int*, int, int, int, int)); +void mkprot PROTO((int[], int, int)); +void mktemplate PROTO((int[], int, int)); +void mv2front PROTO((int)); +int tbldiff PROTO((int[], int, int[])); + + +/* bldtbl - build table entries for dfa state + * + * synopsis + * int state[numecs], statenum, totaltrans, comstate, comfreq; + * bldtbl( state, statenum, totaltrans, comstate, comfreq ); + * + * State is the statenum'th dfa state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * totaltrans is the total number of transitions out of the state. Comstate + * is that state which is the destination of the most transitions out of State. + * Comfreq is how many transitions there are out of State to Comstate. + * + * A note on terminology: + * "protos" are transition tables which have a high probability of + * either being redundant (a state processed later will have an identical + * transition table) or nearly redundant (a state processed later will have + * many of the same out-transitions). A "most recently used" queue of + * protos is kept around with the hope that most states will find a proto + * which is similar enough to be usable, and therefore compacting the + * output tables. + * "templates" are a special type of proto. If a transition table is + * homogeneous or nearly homogeneous (all transitions go to the same + * destination) then the odds are good that future states will also go + * to the same destination state on basically the same character set. + * These homogeneous states are so common when dealing with large rule + * sets that they merit special attention. If the transition table were + * simply made into a proto, then (typically) each subsequent, similar + * state will differ from the proto for two out-transitions. One of these + * out-transitions will be that character on which the proto does not go + * to the common destination, and one will be that character on which the + * state does not go to the common destination. Templates, on the other + * hand, go to the common state on EVERY transition character, and therefore + * cost only one difference. + */ + +void bldtbl( state, statenum, totaltrans, comstate, comfreq ) +int state[], statenum, totaltrans, comstate, comfreq; + + { + int extptr, extrct[2][CSIZE + 1]; + int mindiff, minprot, i, d; + int checkcom; + + /* If extptr is 0 then the first array of extrct holds the result of the + * "best difference" to date, which is those transitions which occur in + * "state" but not in the proto which, to date, has the fewest differences + * between itself and "state". If extptr is 1 then the second array of + * extrct hold the best difference. The two arrays are toggled + * between so that the best difference to date can be kept around and + * also a difference just created by checking against a candidate "best" + * proto. + */ + + extptr = 0; + + /* if the state has too few out-transitions, don't bother trying to + * compact its tables + */ + + if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + + else + { + /* checkcom is true if we should only check "state" against + * protos which have the same "comstate" value + */ + + checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; + + minprot = firstprot; + mindiff = totaltrans; + + if ( checkcom ) + { + /* find first proto which has the same "comstate" */ + for ( i = firstprot; i != NIL; i = protnext[i] ) + if ( protcomst[i] == comstate ) + { + minprot = i; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + break; + } + } + + else + { + /* since we've decided that the most common destination out + * of "state" does not occur with a high enough frequency, + * we set the "comstate" to zero, assuring that if this state + * is entered into the proto list, it will not be considered + * a template. + */ + comstate = 0; + + if ( firstprot != NIL ) + { + minprot = firstprot; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + } + } + + /* we now have the first interesting proto in "minprot". If + * it matches within the tolerances set for the first proto, + * we don't want to bother scanning the rest of the proto list + * to see if we have any other reasonable matches. + */ + + if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) + { /* not a good enough match. Scan the rest of the protos */ + for ( i = minprot; i != NIL; i = protnext[i] ) + { + d = tbldiff( state, i, extrct[1 - extptr] ); + if ( d < mindiff ) + { + extptr = 1 - extptr; + mindiff = d; + minprot = i; + } + } + } + + /* check if the proto we've decided on as our best bet is close + * enough to the state we want to match to be usable + */ + + if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) + { + /* no good. If the state is homogeneous enough, we make a + * template out of it. Otherwise, we make a proto. + */ + + if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) + mktemplate( state, statenum, comstate ); + + else + { + mkprot( state, statenum, comstate ); + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + } + } + + else + { /* use the proto */ + mkentry( extrct[extptr], numecs, statenum, + prottbl[minprot], mindiff ); + + /* if this state was sufficiently different from the proto + * we built it from, make it, too, a proto + */ + + if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) + mkprot( state, statenum, comstate ); + + /* since mkprot added a new proto to the proto queue, it's possible + * that "minprot" is no longer on the proto queue (if it happened + * to have been the last entry, it would have been bumped off). + * If it's not there, then the new proto took its physical place + * (though logically the new proto is at the beginning of the + * queue), so in that case the following call will do nothing. + */ + + mv2front( minprot ); + } + } + } + + +/* cmptmps - compress template table entries + * + * synopsis + * cmptmps(); + * + * template tables are compressed by using the 'template equivalence + * classes', which are collections of transition character equivalence + * classes which always appear together in templates - really meta-equivalence + * classes. until this point, the tables for templates have been stored + * up at the top end of the nxt array; they will now be compressed and have + * table entries made for them. + */ + +void cmptmps() + + { + int tmpstorage[CSIZE + 1]; + register int *tmp = tmpstorage, i, j; + int totaltrans, trans; + + peakpairs = numtemps * numecs + tblend; + + if ( usemecs ) + { + /* create equivalence classes base on data gathered on template + * transitions + */ + + nummecs = cre8ecs( tecfwd, tecbck, numecs ); + } + + else + nummecs = numecs; + + if ( lastdfa + numtemps + 1 >= current_max_dfas ) + increase_max_dfas(); + + /* loop through each template */ + + for ( i = 1; i <= numtemps; ++i ) + { + totaltrans = 0; /* number of non-jam transitions out of this template */ + + for ( j = 1; j <= numecs; ++j ) + { + trans = tnxt[numecs * i + j]; + + if ( usemecs ) + { + /* the absolute value of tecbck is the meta-equivalence class + * of a given equivalence class, as set up by cre8ecs + */ + if ( tecbck[j] > 0 ) + { + tmp[tecbck[j]] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + else + { + tmp[j] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + /* it is assumed (in a rather subtle way) in the skeleton that + * if we're using meta-equivalence classes, the def[] entry for + * all templates is the jam template, i.e., templates never default + * to other non-jam table entries (e.g., another template) + */ + + /* leave room for the jam-state after the last real state */ + mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); + } + } + +#ifdef ACK_MOD +static void bzero(p, cnt) +register char *p; +register int cnt; + { + while (cnt-- > 0) *p++ = '\0'; + } +#endif /* ACK_MOD */ + +/* expand_nxt_chk - expand the next check arrays */ + +void expand_nxt_chk() + + { + register int old_max = current_max_xpairs; + + current_max_xpairs += MAX_XPAIRS_INCREMENT; + + ++num_reallocs; + + nxt = reallocate_integer_array( nxt, current_max_xpairs ); + chk = reallocate_integer_array( chk, current_max_xpairs ); + + bzero( (char *) (chk + old_max), + MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); + } + + +/* find_table_space - finds a space in the table for a state to be placed + * + * synopsis + * int *state, numtrans, block_start; + * int find_table_space(); + * + * block_start = find_table_space( state, numtrans ); + * + * State is the state to be added to the full speed transition table. + * Numtrans is the number of out-transitions for the state. + * + * find_table_space() returns the position of the start of the first block (in + * chk) able to accommodate the state + * + * In determining if a state will or will not fit, find_table_space() must take + * into account the fact that an end-of-buffer state will be added at [0], + * and an action number will be added in [-1]. + */ + +int find_table_space( state, numtrans ) +int *state, numtrans; + + { + /* firstfree is the position of the first possible occurrence of two + * consecutive unused records in the chk and nxt arrays + */ + register int i; + register int *state_ptr, *chk_ptr; + register int *ptr_to_last_entry_in_state; + + /* if there are too many out-transitions, put the state at the end of + * nxt and chk + */ + if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) + { + /* if table is empty, return the first available spot in chk/nxt, + * which should be 1 + */ + if ( tblend < 2 ) + return ( 1 ); + + i = tblend - numecs; /* start searching for table space near the + * end of chk/nxt arrays + */ + } + + else + i = firstfree; /* start searching for table space from the + * beginning (skipping only the elements + * which will definitely not hold the new + * state) + */ + + while ( 1 ) /* loops until a space is found */ + { + if ( i + numecs > current_max_xpairs ) + expand_nxt_chk(); + + /* loops until space for end-of-buffer and action number are found */ + while ( 1 ) + { + if ( chk[i - 1] == 0 ) /* check for action number space */ + { + if ( chk[i] == 0 ) /* check for end-of-buffer space */ + break; + + else + i += 2; /* since i != 0, there is no use checking to + * see if (++i) - 1 == 0, because that's the + * same as i == 0, so we skip a space + */ + } + + else + ++i; + + if ( i + numecs > current_max_xpairs ) + expand_nxt_chk(); + } + + /* if we started search from the beginning, store the new firstfree for + * the next call of find_table_space() + */ + if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) + firstfree = i + 1; + + /* check to see if all elements in chk (and therefore nxt) that are + * needed for the new state have not yet been taken + */ + + state_ptr = &state[1]; + ptr_to_last_entry_in_state = &chk[i + numecs + 1]; + + for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state; + ++chk_ptr ) + if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) + break; + + if ( chk_ptr == ptr_to_last_entry_in_state ) + return ( i ); + + else + ++i; + } + } + + +/* inittbl - initialize transition tables + * + * synopsis + * inittbl(); + * + * Initializes "firstfree" to be one beyond the end of the table. Initializes + * all "chk" entries to be zero. Note that templates are built in their + * own tbase/tdef tables. They are shifted down to be contiguous + * with the non-template entries during table generation. + */ +void inittbl() + + { + register int i; + + bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); + + tblend = 0; + firstfree = tblend + 1; + numtemps = 0; + + if ( usemecs ) + { + /* set up doubly-linked meta-equivalence classes + * these are sets of equivalence classes which all have identical + * transitions out of TEMPLATES + */ + + tecbck[1] = NIL; + + for ( i = 2; i <= numecs; ++i ) + { + tecbck[i] = i - 1; + tecfwd[i - 1] = i; + } + + tecfwd[numecs] = NIL; + } + } + + +/* mkdeftbl - make the default, "jam" table entries + * + * synopsis + * mkdeftbl(); + */ + +void mkdeftbl() + + { + int i; + + jamstate = lastdfa + 1; + + ++tblend; /* room for transition on end-of-buffer character */ + + if ( tblend + numecs > current_max_xpairs ) + expand_nxt_chk(); + + /* add in default end-of-buffer transition */ + nxt[tblend] = end_of_buffer_state; + chk[tblend] = jamstate; + + for ( i = 1; i <= numecs; ++i ) + { + nxt[tblend + i] = 0; + chk[tblend + i] = jamstate; + } + + jambase = tblend; + + base[jamstate] = jambase; + def[jamstate] = 0; + + tblend += numecs; + ++numtemps; + } + + +/* mkentry - create base/def and nxt/chk entries for transition array + * + * synopsis + * int state[numchars + 1], numchars, statenum, deflink, totaltrans; + * mkentry( state, numchars, statenum, deflink, totaltrans ); + * + * "state" is a transition array "numchars" characters in size, "statenum" + * is the offset to be used into the base/def tables, and "deflink" is the + * entry to put in the "def" table entry. If "deflink" is equal to + * "JAMSTATE", then no attempt will be made to fit zero entries of "state" + * (i.e., jam entries) into the table. It is assumed that by linking to + * "JAMSTATE" they will be taken care of. In any case, entries in "state" + * marking transitions to "SAME_TRANS" are treated as though they will be + * taken care of by whereever "deflink" points. "totaltrans" is the total + * number of transitions out of the state. If it is below a certain threshold, + * the tables are searched for an interior spot that will accommodate the + * state array. + */ + +void mkentry( state, numchars, statenum, deflink, totaltrans ) +register int *state; +int numchars, statenum, deflink, totaltrans; + + { + register int minec, maxec, i, baseaddr; + int tblbase, tbllast; + + if ( totaltrans == 0 ) + { /* there are no out-transitions */ + if ( deflink == JAMSTATE ) + base[statenum] = JAMSTATE; + else + base[statenum] = 0; + + def[statenum] = deflink; + return; + } + + for ( minec = 1; minec <= numchars; ++minec ) + { + if ( state[minec] != SAME_TRANS ) + if ( state[minec] != 0 || deflink != JAMSTATE ) + break; + } + + if ( totaltrans == 1 ) + { + /* there's only one out-transition. Save it for later to fill + * in holes in the tables. + */ + stack1( statenum, minec, state[minec], deflink ); + return; + } + + for ( maxec = numchars; maxec > 0; --maxec ) + { + if ( state[maxec] != SAME_TRANS ) + if ( state[maxec] != 0 || deflink != JAMSTATE ) + break; + } + + /* Whether we try to fit the state table in the middle of the table + * entries we have already generated, or if we just take the state + * table at the end of the nxt/chk tables, we must make sure that we + * have a valid base address (i.e., non-negative). Note that not only are + * negative base addresses dangerous at run-time (because indexing the + * next array with one and a low-valued character might generate an + * array-out-of-bounds error message), but at compile-time negative + * base addresses denote TEMPLATES. + */ + + /* find the first transition of state that we need to worry about. */ + if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) + { /* attempt to squeeze it into the middle of the tabls */ + baseaddr = firstfree; + + while ( baseaddr < minec ) + { + /* using baseaddr would result in a negative base address below + * find the next free slot + */ + for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) + ; + } + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + if ( chk[baseaddr + i - minec] != 0 ) + { /* baseaddr unsuitable - find another */ + for ( ++baseaddr; + baseaddr < current_max_xpairs && + chk[baseaddr] != 0; + ++baseaddr ) + ; + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + /* reset the loop counter so we'll start all + * over again next time it's incremented + */ + + i = minec - 1; + } + } + + else + { + /* ensure that the base address we eventually generate is + * non-negative + */ + baseaddr = max( tblend + 1, minec ); + } + + tblbase = baseaddr - minec; + tbllast = tblbase + maxec; + + if ( tbllast >= current_max_xpairs ) + expand_nxt_chk(); + + base[statenum] = tblbase; + def[statenum] = deflink; + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + { + nxt[tblbase + i] = state[i]; + chk[tblbase + i] = statenum; + } + + if ( baseaddr == firstfree ) + /* find next free slot in tables */ + for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) + ; + + tblend = max( tblend, tbllast ); + } + + +/* mk1tbl - create table entries for a state (or state fragment) which + * has only one out-transition + * + * synopsis + * int state, sym, onenxt, onedef; + * mk1tbl( state, sym, onenxt, onedef ); + */ + +void mk1tbl( state, sym, onenxt, onedef ) +int state, sym, onenxt, onedef; + + { + if ( firstfree < sym ) + firstfree = sym; + + while ( chk[firstfree] != 0 ) + if ( ++firstfree >= current_max_xpairs ) + expand_nxt_chk(); + + base[state] = firstfree - sym; + def[state] = onedef; + chk[firstfree] = state; + nxt[firstfree] = onenxt; + + if ( firstfree > tblend ) + { + tblend = firstfree++; + + if ( firstfree >= current_max_xpairs ) + expand_nxt_chk(); + } + } + + +/* mkprot - create new proto entry + * + * synopsis + * int state[], statenum, comstate; + * mkprot( state, statenum, comstate ); + */ + +void mkprot( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, slot, tblbase; + + if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) + { + /* gotta make room for the new proto by dropping last entry in + * the queue + */ + slot = lastprot; + lastprot = protprev[lastprot]; + protnext[lastprot] = NIL; + } + + else + slot = numprots; + + protnext[slot] = firstprot; + + if ( firstprot != NIL ) + protprev[firstprot] = slot; + + firstprot = slot; + prottbl[slot] = statenum; + protcomst[slot] = comstate; + + /* copy state into save area so it can be compared with rapidly */ + tblbase = numecs * (slot - 1); + + for ( i = 1; i <= numecs; ++i ) + protsave[tblbase + i] = state[i]; + } + + +/* mktemplate - create a template entry based on a state, and connect the state + * to it + * + * synopsis + * int state[], statenum, comstate, totaltrans; + * mktemplate( state, statenum, comstate, totaltrans ); + */ + +void mktemplate( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, numdiff, tmpbase, tmp[CSIZE + 1]; + Char transset[CSIZE + 1]; + int tsptr; + + ++numtemps; + + tsptr = 0; + + /* calculate where we will temporarily store the transition table + * of the template in the tnxt[] array. The final transition table + * gets created by cmptmps() + */ + + tmpbase = numtemps * numecs; + + if ( tmpbase + numecs >= current_max_template_xpairs ) + { + current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; + + ++num_reallocs; + + tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); + } + + for ( i = 1; i <= numecs; ++i ) + if ( state[i] == 0 ) + tnxt[tmpbase + i] = 0; + else + { + transset[tsptr++] = i; + tnxt[tmpbase + i] = comstate; + } + + if ( usemecs ) + mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); + + mkprot( tnxt + tmpbase, -numtemps, comstate ); + + /* we rely on the fact that mkprot adds things to the beginning + * of the proto queue + */ + + numdiff = tbldiff( state, firstprot, tmp ); + mkentry( tmp, numecs, statenum, -numtemps, numdiff ); + } + + +/* mv2front - move proto queue element to front of queue + * + * synopsis + * int qelm; + * mv2front( qelm ); + */ + +void mv2front( qelm ) +int qelm; + + { + if ( firstprot != qelm ) + { + if ( qelm == lastprot ) + lastprot = protprev[lastprot]; + + protnext[protprev[qelm]] = protnext[qelm]; + + if ( protnext[qelm] != NIL ) + protprev[protnext[qelm]] = protprev[qelm]; + + protprev[qelm] = NIL; + protnext[qelm] = firstprot; + protprev[firstprot] = qelm; + firstprot = qelm; + } + } + + +/* place_state - place a state into full speed transition table + * + * synopsis + * int *state, statenum, transnum; + * place_state( state, statenum, transnum ); + * + * State is the statenum'th state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * Transnum is the number of out-transitions for the state. + */ + +void place_state( state, statenum, transnum ) +int *state, statenum, transnum; + + { + register int i; + register int *state_ptr; + int position = find_table_space( state, transnum ); + + /* base is the table of start positions */ + base[statenum] = position; + + /* put in action number marker; this non-zero number makes sure that + * find_table_space() knows that this position in chk/nxt is taken + * and should not be used for another accepting number in another state + */ + chk[position - 1] = 1; + + /* put in end-of-buffer marker; this is for the same purposes as above */ + chk[position] = 1; + + /* place the state into chk and nxt */ + state_ptr = &state[1]; + + for ( i = 1; i <= numecs; ++i, ++state_ptr ) + if ( *state_ptr != 0 ) + { + chk[position + i] = i; + nxt[position + i] = *state_ptr; + } + + if ( position + numecs > tblend ) + tblend = position + numecs; + } + + +/* stack1 - save states with only one out-transition to be processed later + * + * synopsis + * int statenum, sym, nextstate, deflink; + * stack1( statenum, sym, nextstate, deflink ); + * + * if there's room for another state one the "one-transition" stack, the + * state is pushed onto it, to be processed later by mk1tbl. If there's + * no room, we process the sucker right now. + */ + +void stack1( statenum, sym, nextstate, deflink ) +int statenum, sym, nextstate, deflink; + + { + if ( onesp >= ONE_STACK_SIZE - 1 ) + mk1tbl( statenum, sym, nextstate, deflink ); + + else + { + ++onesp; + onestate[onesp] = statenum; + onesym[onesp] = sym; + onenext[onesp] = nextstate; + onedef[onesp] = deflink; + } + } + + +/* tbldiff - compute differences between two state tables + * + * synopsis + * int state[], pr, ext[]; + * int tbldiff, numdifferences; + * numdifferences = tbldiff( state, pr, ext ) + * + * "state" is the state array which is to be extracted from the pr'th + * proto. "pr" is both the number of the proto we are extracting from + * and an index into the save area where we can find the proto's complete + * state table. Each entry in "state" which differs from the corresponding + * entry of "pr" will appear in "ext". + * Entries which are the same in both "state" and "pr" will be marked + * as transitions to "SAME_TRANS" in "ext". The total number of differences + * between "state" and "pr" is returned as function value. Note that this + * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". + */ + +int tbldiff( state, pr, ext ) +int state[], pr, ext[]; + + { + register int i, *sp = state, *ep = ext, *protp; + register int numdiff = 0; + + protp = &protsave[numecs * (pr - 1)]; + + for ( i = numecs; i > 0; --i ) + { + if ( *++protp == *++sp ) + *++ep = SAME_TRANS; + else + { + *++ep = *sp; + ++numdiff; + } + } + + return ( numdiff ); + } diff --git a/util/flex/yylex.c b/util/flex/yylex.c new file mode 100644 index 000000000..22bf3d616 --- /dev/null +++ b/util/flex/yylex.c @@ -0,0 +1,216 @@ +/* yylex - scanner front-end for flex */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + +#include +#include "flexdef.h" +#include "parse.h" + + +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) +#endif + + +/* yylex - scan for a regular expression token + * + * synopsis + * + * token = yylex(); + * + * token - return token found + */ + +int yylex() + + { + int toktype; + static int beglin = false; + + if ( eofseen ) + toktype = EOF; + else + toktype = flexscan(); + + if ( toktype == EOF || toktype == 0 ) + { + eofseen = 1; + + if ( sectnum == 1 ) + { + synerr( "premature EOF" ); + sectnum = 2; + toktype = SECTEND; + } + + else if ( sectnum == 2 ) + { + sectnum = 3; + toktype = 0; + } + + else + toktype = 0; + } + + if ( trace ) + { + if ( beglin ) + { + fprintf( stderr, "%d\t", num_rules + 1 ); + beglin = 0; + } + + switch ( toktype ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + (void) putc( toktype, stderr ); + break; + + case '\n': + (void) putc( '\n', stderr ); + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case SCDECL: + fputs( "%s", stderr ); + break; + + case XSCDECL: + fputs( "%x", stderr ); + break; + + case WHITESPACE: + (void) putc( ' ', stderr ); + break; + + case SECTEND: + fputs( "%%\n", stderr ); + + /* we set beglin to be true so we'll start + * writing out numbers as we echo rules. flexscan() has + * already assigned sectnum + */ + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case NAME: + fprintf( stderr, "'%s'", nmstr ); + break; + + case CHAR: + switch ( yylval ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + fprintf( stderr, "\\%c", yylval ); + break; + + default: + if ( ! isascii( yylval ) || ! isprint( yylval ) ) + fprintf( stderr, "\\%.3o", yylval ); + else + (void) putc( yylval, stderr ); + break; + } + + break; + + case NUMBER: + fprintf( stderr, "%d", yylval ); + break; + + case PREVCCL: + fprintf( stderr, "[%d]", yylval ); + break; + + case EOF_OP: + fprintf( stderr, "<>" ); + break; + + case 0: + fprintf( stderr, "End Marker" ); + break; + + default: + fprintf( stderr, "*Something Weird* - tok: %d val: %d\n", + toktype, yylval ); + break; + } + } + + return ( toktype ); + }