From: Alan Cox Date: Mon, 29 Dec 2014 23:33:31 +0000 (+0000) Subject: utils: from minixspace this time X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=9c745212af19a4a490acc1663fd6b9b22b35f113;p=FUZIX.git utils: from minixspace this time Note: sed gives sdcc the shits. Build send with 100 not 20000 or you'll be waiting all week --- diff --git a/Applications/util/Makefile b/Applications/util/Makefile index 917ab312..60724ab9 100644 --- a/Applications/util/Makefile +++ b/Applications/util/Makefile @@ -65,13 +65,17 @@ SRCS = banner.c \ pwd.c \ rm.c \ rmdir.c \ + sed.c \ sleep.c \ ssh.c \ + sort.c \ sum.c \ su.c \ sync.c \ + tee.c \ termcap.c \ tget.c \ + tail.c \ touch.c \ tr.c \ true.c \ diff --git a/Applications/util/sed.c b/Applications/util/sed.c new file mode 100644 index 00000000..2280b634 --- /dev/null +++ b/Applications/util/sed.c @@ -0,0 +1,1615 @@ +/* + Copyright (c) 1987,1997, Prentice Hall + All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* sed - stream editor Author: Eric S. Raymond */ + +/* This used to be three different files with the following makefile: + * (Note the chmem). + +CFLAGS= -F -T. + +OBJS= sedcomp.s sedexec.s + +sed: $(OBJS) + cc -T. -o sed $(OBJS) + @chmem =13312 sed + +$(OBJS): sed.h + + * If you want longer lines: increase MAXBUF. + * If you want scripts with more text: increase POOLSIZE. + * If you want more commands per script: increase MAXCMDS. + */ + +#include +#include +#include +#include +#include +#include + +/*+++++++++++++++*/ + +/* Sed.h -- types and constants for the stream editor */ + +/* Data area sizes used by both modules */ +#define MAXBUF 4000 /* current line buffer size */ +#define MAXAPPENDS 20 /* maximum number of appends */ +#define MAXTAGS 9 /* tagged patterns are \1 to \9 */ + +/* Constants for compiled-command representation */ +#define EQCMD 0x01 /* = -- print current line number */ +#define ACMD 0x02 /* a -- append text after current line */ +#define BCMD 0x03 /* b -- branch to label */ +#define CCMD 0x04 /* c -- change current line */ +#define DCMD 0x05 /* d -- delete all of pattern space */ +#define CDCMD 0x06 /* D -- delete first line of pattern space */ +#define GCMD 0x07 /* g -- copy hold space to pattern space */ +#define CGCMD 0x08 /* G -- append hold space to pattern space */ +#define HCMD 0x09 /* h -- copy pattern space to hold space */ +#define CHCMD 0x0A /* H -- append pattern space to hold space */ +#define ICMD 0x0B /* i -- insert text before current line */ +#define LCMD 0x0C /* l -- print pattern space in escaped form */ +#define NCMD 0x0D /* n -- get next line into pattern space */ +#define CNCMD 0x0E /* N -- append next line to pattern space */ +#define PCMD 0x0F /* p -- print pattern space to output */ +#define CPCMD 0x10 /* P -- print first line of pattern space */ +#define QCMD 0x11 /* q -- exit the stream editor */ +#define RCMD 0x12 /* r -- read in a file after current line */ +#define SCMD 0x13 /* s -- regular-expression substitute */ +#define TCMD 0x14 /* t -- branch on any substitute successful */ +#define CTCMD 0x15 /* T -- branch on any substitute failed */ +#define WCMD 0x16 /* w -- write pattern space to file */ +#define CWCMD 0x17 /* W -- write first line of pattern space */ +#define XCMD 0x18 /* x -- exhange pattern and hold spaces */ +#define YCMD 0x19 /* y -- transliterate text */ + +struct cmd_t { /* compiled-command representation */ + char *addr1; /* first address for command */ + char *addr2; /* second address for command */ + union { + char *lhs; /* s command lhs */ + struct cmd_t *link; /* label link */ + } u; + char command; /* command code */ + char *rhs; /* s command replacement string */ + FILE *fout; /* associated output file descriptor */ + struct { + char allbut; /* was negation specified? */ + char global; /* was g postfix specified? */ + char print; /* was p postfix specified? */ + char inrange; /* in an address range? */ + } flags; +}; +typedef struct cmd_t sedcmd; /* use this name for declarations */ + +#define BAD ((char *) -1) /* guaranteed not a string ptr */ + + + +/* Address and regular expression compiled-form markers */ +#define STAR 1 /* marker for Kleene star */ +#define CCHR 2 /* non-newline character to be matched + * follows */ +#define CDOT 4 /* dot wild-card marker */ +#define CCL 6 /* character class follows */ +#define CNL 8 /* match line start */ +#define CDOL 10 /* match line end */ +#define CBRA 12 /* tagged pattern start marker */ +#define CKET 14 /* tagged pattern end marker */ +#define CBACK 16 /* backslash-digit pair marker */ +#define CLNUM 18 /* numeric-address index follows */ +#define CEND 20 /* symbol for end-of-source */ +#define CEOF 22 /* end-of-field mark */ + +/* Sed.h ends here */ + +#ifndef CMASK +#define CMASK 0xFF /* some char type should have been unsigned + * char? */ +#endif + +/*+++++++++++++++*/ + +/* Sed - stream editor Author: Eric S. Raymond */ + +/* + The stream editor compiles its command input (from files or -e options) + into an internal form using compile() then executes the compiled form using + execute(). Main() just initializes data structures, interprets command line + options, and calls compile() and execute() in appropriate sequence. + + The data structure produced by compile() is an array of compiled-command + structures (type sedcmd). These contain several pointers into pool[], the + regular-expression and text-data pool, plus a command code and g & p flags. + In the special case that the command is a label the struct will hold a ptr + into the labels array labels[] during most of the compile, until resolve() + resolves references at the end. + + The operation of execute() is described in its source module. +*/ + +/* #include */ +/* #include "sed.h" */ + +/* Imported functions */ + +/***** public stuff ******/ + +#define MAXCMDS 500 /* maximum number of compiled commands */ +#define MAXLINES 256 /* max # numeric addresses to compile */ + +/* Main data areas */ +char linebuf[MAXBUF + 1]; /* current-line buffer */ +sedcmd cmds[MAXCMDS + 1]; /* hold compiled commands */ +long linenum[MAXLINES]; /* numeric-addresses table */ + +/* Miscellaneous shared variables */ +int nflag; /* -n option flag */ +int eargc; /* scratch copy of argument count */ +char **eargv; /* scratch copy of argument list */ +char bits[] = {1, 2, 4, 8, 16, 32, 64, 128}; + +/***** module common stuff *****/ + +#define POOLSIZE 20000 /* size of string-pool space */ +#define WFILES 10 /* max # w output files that can be compiled */ +#define RELIMIT 256 /* max chars in compiled RE */ +#define MAXDEPTH 20 /* maximum {}-nesting level */ +#define MAXLABS 50 /* max # of labels that can be handled */ + +#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++ +#define ABORT(msg) (fprintf(stderr, msg, linebuf), quit(2)) +#define IFEQ(x, v) if (*x == v) x++ , /* do expression */ + +/* Error messages */ +static char AGMSG[] = "sed: garbled address %s\n"; +static char CGMSG[] = "sed: garbled command %s\n"; +static char TMTXT[] = "sed: too much text: %s\n"; +static char AD1NG[] = "sed: no addresses allowed for %s\n"; +static char AD2NG[] = "sed: only one address allowed for %s\n"; +static char TMCDS[] = "sed: too many commands, last was %s\n"; +static char COCFI[] = "sed: cannot open command-file %s\n"; +static char UFLAG[] = "sed: unknown flag %c\n"; +static char CCOFI[] = "sed: cannot create %s\n"; +static char ULABL[] = "sed: undefined label %s\n"; +static char TMLBR[] = "sed: too many {'s\n"; +static char FRENL[] = "sed: first RE must be non-null\n"; +static char NSCAX[] = "sed: no such command as %s\n"; +static char TMRBR[] = "sed: too many }'s\n"; +static char DLABL[] = "sed: duplicate label %s\n"; +static char TMLAB[] = "sed: too many labels: %s\n"; +static char TMWFI[] = "sed: too many w files\n"; +static char REITL[] = "sed: RE too long: %s\n"; +static char TMLNR[] = "sed: too many line numbers\n"; +static char TRAIL[] = "sed: command \"%s\" has trailing garbage\n"; + +typedef struct { /* represent a command label */ + char *name; /* the label name */ + sedcmd *last; /* it's on the label search list */ + sedcmd *address; /* pointer to the cmd it labels */ +} + + label; + +/* Label handling */ +static label labels[MAXLABS]; /* here's the label table */ +static label *lab = labels + 1; /* pointer to current label */ +static label *lablst = labels; /* header for search list */ + +/* String pool for regular expressions, append text, etc. etc. */ +static char pool[POOLSIZE]; /* the pool */ +static char *fp = pool; /* current pool pointer */ +static char *poolend = pool + POOLSIZE; /* pointer past pool end */ + +/* Compilation state */ +static FILE *cmdf = NULL; /* current command source */ +static char *cp = linebuf; /* compile pointer */ +static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */ +static char *lastre = NULL; /* old RE pointer */ +static int bdepth = 0; /* current {}-nesting level */ +static int bcount = 0; /* # tagged patterns in current RE */ + +/* Compilation flags */ +static int eflag; /* -e option flag */ +static int gflag; /* -g option flag */ + +int main(int argc, char **argv); +static void compile(void); +static int cmdcomp(char cchar); +static char *rhscomp(char *rhsp, int delim); +static char *recomp(char *expbuf, int redelim); +static int cmdline(char *cbuf); +static char *address(char *expbuf); +static char *gettext(char *txp); +static label *search(label *ptr); +static void resolve(void); +static char *ycomp(char *ep, char delim); +void quit(int n); +void execute(void); +static int selected(sedcmd *ipc); +static int match(char *expbuf, int gf); +static int advance(char *lp, char *ep); +static int substitute(sedcmd *ipc); +static void dosub(char *rhsbuf); +static char *place(char *asp, char *al1, char *al2); +static void listto(char *p1, FILE *fp); +static void truncated(int h); +static void command(sedcmd *ipc); +static void openfile(char *file); +static void get(void); +static void initget(void); +static char *getline(char *buf); +static int Memcmp(char *a, char *b, int count); +static void readout(void); + +int main(int argc, char *argv[]) +/* Main sequence of the stream editor */ +{ + eargc = argc; /* set local copy of argument count */ + eargv = argv; /* set local copy of argument list */ + cmdp->addr1 = pool; /* 1st addr expand will be at pool start */ + if (eargc == 1) quit(0); /* exit immediately if no arguments */ + /* Scan through the arguments, interpreting each one */ + while ((--eargc > 0) && (**++eargv == '-')) switch (eargv[0][1]) { + case 'e': + eflag++; + compile(); /* compile with e flag on */ + eflag = 0; + continue; /* get another argument */ + case 'f': + if (eargc-- <= 0) /* barf if no -f file */ + quit(2); + if ((cmdf = fopen(*++eargv, "r")) == NULL) { + fprintf(stderr, COCFI, *eargv); + quit(2); + } + compile(); /* file is O.K., compile it */ + fclose(cmdf); + continue; /* go back for another argument */ + case 'g': + gflag++; /* set global flag on all s cmds */ + continue; + case 'n': + nflag++; /* no print except on p flag or w */ + continue; + default: + fprintf(stdout, UFLAG, eargv[0][1]); + continue; + } + + + if (cmdp == cmds) { /* no commands have been compiled */ + eargv--; + eargc++; + eflag++; + compile(); + eflag = 0; + eargv++; + eargc--; + } + if (bdepth) /* we have unbalanced squigglies */ + ABORT(TMLBR); + + lablst->address = cmdp; /* set up header of label linked list */ + resolve(); /* resolve label table indirections */ + execute(); /* execute commands */ + quit(0); /* everything was O.K. if we got here */ + return(0); +} + + +#define H 0x80 /* 128 bit, on if there's really code for + * command */ +#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */ + +/* Indirect through this to get command internal code, if it exists */ +static char cmdmask[] = +{ + 0, 0, H, 0, 0, H + EQCMD, 0, 0, + 0, 0, 0, 0, H + CDCMD, 0, 0, CGCMD, + CHCMD, 0, 0, 0, 0, 0, CNCMD, 0, + CPCMD, 0, 0, 0, H + CTCMD, 0, 0, H + CWCMD, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, H + ACMD, H + BCMD, H + CCMD, DCMD, 0, 0, GCMD, + HCMD, H + ICMD, 0, 0, H + LCMD, 0, NCMD, 0, + PCMD, H + QCMD, H + RCMD, H + SCMD, H + TCMD, 0, 0, H + WCMD, + XCMD, H + YCMD, 0, H + BCMD, 0, H, 0, 0, +}; + +static void compile(void) +/* Precompile sed commands out of a file */ +{ + char ccode; + + + for (;;) { /* main compilation loop */ + if (*cp == '\0') { /* get a new command line */ + *linebuf = '\0'; /* K.H */ + if (cmdline(cp = linebuf) < 0) break; + } + SKIPWS(cp); + if (*cp == '\0') /* empty */ + continue; + if (*cp == '#') { /* comment */ + while (*cp) ++cp; + continue; + } + if (*cp == ';') { /* ; separates cmds */ + cp++; + continue; + } + + /* Compile first address */ + if (fp > poolend) + ABORT(TMTXT); + else if ((fp = address(cmdp->addr1 = fp)) == BAD) + ABORT(AGMSG); + + if (fp == cmdp->addr1) {/* if empty RE was found */ + if (lastre) /* if there was previous RE */ + cmdp->addr1 = lastre; /* use it */ + else + ABORT(FRENL); + } else if (fp == NULL) {/* if fp was NULL */ + fp = cmdp->addr1; /* use current pool location */ + cmdp->addr1 = NULL; + } else { + lastre = cmdp->addr1; + if (*cp == ',' || *cp == ';') { /* there's 2nd addr */ + cp++; + if (fp > poolend) ABORT(TMTXT); + fp = address(cmdp->addr2 = fp); + if (fp == BAD || fp == NULL) ABORT(AGMSG); + if (fp == cmdp->addr2) + cmdp->addr2 = lastre; + else + lastre = cmdp->addr2; + } else + cmdp->addr2 = NULL; /* no 2nd address */ + } + if (fp > poolend) ABORT(TMTXT); + + SKIPWS(cp); /* discard whitespace after address */ + IFEQ(cp, '!') cmdp->flags.allbut = 1; + + SKIPWS(cp); /* get cmd char, range-check it */ + if ((*cp < LOWCMD) || (*cp > '~') + || ((ccode = cmdmask[*cp - LOWCMD]) == 0)) + ABORT(NSCAX); + + cmdp->command = ccode & ~H; /* fill in command value */ + if ((ccode & H) == 0) /* if no compile-time code */ + cp++; /* discard command char */ + else if (cmdcomp(*cp++))/* execute it; if ret = 1 */ + continue; /* skip next line read */ + + if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); + + SKIPWS(cp); /* look for trailing stuff */ + if (*cp != '\0' && *cp != ';' && *cp != '#') ABORT(TRAIL); + } +} + +static int cmdcomp(char cchar) +/* Compile a single command */ +{ + static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */ + static char *fname[WFILES]; /* w file name pointers */ + static FILE *fout[WFILES]; /* w file file ptrs */ + static int nwfiles = 1; /* count of open w files */ + int i; /* indexing dummy used in w */ + sedcmd *sp1, *sp2; /* temps for label searches */ + label *lpt; + char redelim; /* current RE delimiter */ + + fout[0] = stdout; + switch (cchar) { + case '{': /* start command group */ + cmdp->flags.allbut = !cmdp->flags.allbut; + cmpstk[bdepth++] = &(cmdp->u.link); + if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS); + return(1); + + case '}': /* end command group */ + if (cmdp->addr1) ABORT(AD1NG); /* no addresses allowed */ + if (--bdepth < 0) ABORT(TMRBR); /* too many right braces */ + *cmpstk[bdepth] = cmdp; /* set the jump address */ + return(1); + + case '=': /* print current source line number */ + case 'q': /* exit the stream editor */ + if (cmdp->addr2) ABORT(AD2NG); + break; + + case ':': /* label declaration */ + if (cmdp->addr1) ABORT(AD1NG); /* no addresses allowed */ + fp = gettext(lab->name = fp); /* get the label name */ + if (lpt = search(lab)) {/* does it have a double? */ + if (lpt->address) ABORT(DLABL); /* yes, abort */ + } else { /* check that it doesn't overflow label table */ + lab->last = NULL; + lpt = lab; + if (++lab >= labels + MAXLABS) ABORT(TMLAB); + } + lpt->address = cmdp; + return(1); + + case 'b': /* branch command */ + case 't': /* branch-on-succeed command */ + case 'T': /* branch-on-fail command */ + SKIPWS(cp); + if (*cp == '\0') { /* if branch is to start of cmds... */ + /* Add current command to end of label last */ + if (sp1 = lablst->last) { + while (sp2 = sp1->u.link) sp1 = sp2; + sp1->u.link = cmdp; + } else /* lablst->last == NULL */ + lablst->last = cmdp; + break; + } + fp = gettext(lab->name = fp); /* else get label into pool */ + if (lpt = search(lab)) {/* enter branch to it */ + if (lpt->address) + cmdp->u.link = lpt->address; + else { + sp1 = lpt->last; + while (sp2 = sp1->u.link) sp1 = sp2; + sp1->u.link = cmdp; + } + } else { /* matching named label not found */ + lab->last = cmdp; /* add the new label */ + lab->address = NULL; /* it's forward of here */ + if (++lab >= labels + MAXLABS) /* overflow if last */ + ABORT(TMLAB); + } + break; + + case 'a': /* append text */ + case 'i': /* insert text */ + case 'r': /* read file into stream */ + if (cmdp->addr2) ABORT(AD2NG); + case 'c': /* change text */ + if ((*cp == '\\') && (*++cp == '\n')) cp++; + fp = gettext(cmdp->u.lhs = fp); + break; + + case 'D': /* delete current line in hold space */ + cmdp->u.link = cmds; + break; + + case 's': /* substitute regular expression */ + redelim = *cp++; /* get delimiter from 1st ch */ + if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD) ABORT(CGMSG); + if (fp == cmdp->u.lhs) /* if compiled RE zero len */ + cmdp->u.lhs = lastre; /* use the previous one */ + else /* otherwise */ + lastre = cmdp->u.lhs; /* save the one just found */ + if ((cmdp->rhs = fp) > poolend) ABORT(TMTXT); + if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) ABORT(CGMSG); + if (gflag) cmdp->flags.global ++; + while (*cp == 'g' || *cp == 'p' || *cp == 'P') { + IFEQ(cp, 'g') cmdp->flags.global ++; + IFEQ(cp, 'p') cmdp->flags.print = 1; + IFEQ(cp, 'P') cmdp->flags.print = 2; + } + + case 'l': /* list pattern space */ + if (*cp == 'w') + cp++; /* and execute a w command! */ + else + break; /* s or l is done */ + + case 'w': /* write-pattern-space command */ + case 'W': /* write-first-line command */ + if (nwfiles >= WFILES) ABORT(TMWFI); + fp = gettext(fname[nwfiles] = fp); /* filename will be in pool */ + for (i = nwfiles - 1; i >= 0; i--) /* match it in table */ + if ((fname[i] != NULL) && + (strcmp(fname[nwfiles], fname[i]) == 0)) { + cmdp->fout = fout[i]; + return(0); + } + + /* If didn't find one, open new out file */ + if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL) { + fprintf(stderr, CCOFI, fname[nwfiles]); + quit(2); + } + fout[nwfiles++] = cmdp->fout; + break; + + case 'y': /* transliterate text */ + fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */ + if (fp == BAD) ABORT(CGMSG); /* fail on bad form */ + if (fp > poolend) ABORT(TMTXT); /* fail on overflow */ + break; + } + return(0); /* succeeded in interpreting one command */ +} + +static char *rhscomp(char *rhsp, int delim) /* uses bcount */ + /* Generate replacement string for substitute command right hand side */ +/* rhsp: place to compile expression to */ +/* delim: regular-expression end-mark to look for */ +{ + register char *p = cp; /* strictly for speed */ + + for (;;) + if ((*rhsp = *p++) == '\\') { /* copy; if it's a \, */ + *rhsp = *p++; /* copy escaped char */ + /* Check validity of pattern tag */ + if (*rhsp > bcount + '0' && *rhsp <= '9') return(BAD); + *rhsp++ |= 0x80;/* mark the good ones */ + continue; + } else if (*rhsp == delim) { /* found RE end, hooray... */ + *rhsp++ = '\0'; /* cap the expression string */ + cp = p; + return(rhsp); /* pt at 1 past the RE */ + } else if (*rhsp++ == '\0') /* last ch not RE end, help! */ + return(BAD); +} + +static char *recomp(char *expbuf, int redelim) /* uses cp, bcount */ +/* Compile a regular expression to internal form */ +/* expbuf: place to compile it to */ +/* redelim: RE end-marker to look for */ +{ + register char *ep = expbuf; /* current-compiled-char pointer */ + register char *sp = cp; /* source-character ptr */ + register int c; /* current-character pointer */ + char negclass; /* all-but flag */ + char *lastep; /* ptr to last expr compiled */ + char *svclass; /* start of current char class */ + char brnest[MAXTAGS]; /* bracket-nesting array */ + char *brnestp; /* ptr to current bracket-nest */ + int classct; /* class element count */ + int tags; /* # of closed tags */ + + if (*cp == redelim) /* if first char is RE endmarker */ + return(cp++, expbuf); /* leave existing RE unchanged */ + + lastep = NULL; /* there's no previous RE */ + brnestp = brnest; /* initialize ptr to brnest array */ + tags = bcount = 0; /* initialize counters */ + + if (*ep++ = (*sp == '^')) /* check for start-of-line syntax */ + sp++; + + for (;;) { + if (ep >= expbuf + RELIMIT) /* match is too large */ + return(cp = sp, BAD); + if ((c = *sp++) == redelim) { /* found the end of the RE */ + cp = sp; + if (brnestp != brnest) /* \(, \) unbalanced */ + return(BAD); + *ep++ = CEOF; /* write end-of-pattern mark */ + return(ep); /* return ptr to compiled RE */ + } + if (c != '*') /* if we're a postfix op */ + lastep = ep; /* get ready to match last */ + + switch (c) { + case '\\': + if ((c = *sp++) == '(') { /* start tagged section */ + if (bcount >= MAXTAGS) return(cp = sp, BAD); + *brnestp++ = bcount; /* update tag stack */ + *ep++ = CBRA; /* enter tag-start */ + *ep++ = bcount++; /* bump tag count */ + continue; + } else if (c == ')') { /* end tagged section */ + if (brnestp <= brnest) /* extra \) */ + return(cp = sp, BAD); + *ep++ = CKET; /* enter end-of-tag */ + *ep++ = *--brnestp; /* pop tag stack */ + tags++; /* count closed tags */ + continue; + } else if (c >= '1' && c <= '9') { /* tag use */ + if ((c -= '1') >= tags) /* too few */ + return(BAD); + *ep++ = CBACK; /* enter tag mark */ + *ep++ = c; /* and the number */ + continue; + } else if (c == '\n') /* escaped newline no good */ + return(cp = sp, BAD); + else if (c == 'n') /* match a newline */ + c = '\n'; + else if (c == 't') /* match a tab */ + c = '\t'; + else if (c == 'r') /* match a return */ + c = '\r'; + goto defchar; + + case '\0': /* ignore nuls */ + continue; + + case '\n': /* trailing pattern delimiter is missing */ + return(cp = sp, BAD); + + case '.': /* match any char except newline */ + *ep++ = CDOT; + continue; + case '*': /* 0..n repeats of previous pattern */ + if (lastep == NULL) /* if * isn't first on line */ + goto defchar; /* match a literal * */ + if (*lastep == CKET) /* can't iterate a tag */ + return(cp = sp, BAD); + *lastep |= STAR;/* flag previous pattern */ + continue; + + case '$': /* match only end-of-line */ + if (*sp != redelim) /* if we're not at end of RE */ + goto defchar; /* match a literal $ */ + *ep++ = CDOL; /* insert end-symbol mark */ + continue; + + case '[': /* begin character set pattern */ + if (ep + 17 >= expbuf + RELIMIT) ABORT(REITL); + *ep++ = CCL; /* insert class mark */ + if (negclass = ((c = *sp++) == '^')) c = *sp++; + svclass = sp; /* save ptr to class start */ + do { + if (c == '\0') ABORT(CGMSG); + + /* Handle character ranges */ + if (c == '-' && sp > svclass && *sp != ']') + for (c = sp[-2]; c < *sp; c++) + ep[c >> 3] |= bits[c & 7]; + + /* Handle escape sequences in sets */ + if (c == '\\') + if ((c = *sp++) == 'n') + c = '\n'; + else if (c == 't') + c = '\t'; + else if (c == 'r') + c = '\r'; + + /* Enter (possibly translated) char in set */ + ep[c >> 3] |= bits[c & 7]; + } while + ((c = *sp++) != ']'); + + /* Invert the bitmask if all-but was specified */ + if (negclass) for (classct = 0; classct < 16; classct++) + ep[classct] ^= 0xFF; + ep[0] &= 0xFE; /* never match ASCII 0 */ + ep += 16; /* advance ep past set mask */ + continue; + + defchar: /* match literal character */ + default: /* which is what we'd do by default */ + *ep++ = CCHR; /* insert character mark */ + *ep++ = c; + } + } +} + +static int cmdline(char *cbuf) /* uses eflag, eargc, cmdf */ + /* Read next command from -e argument or command file */ +{ + register int inc; /* not char because must hold EOF */ + + *cbuf-- = 0; /* so pre-increment points us at cbuf */ + + /* E command flag is on */ + if (eflag) { + register char *p; /* ptr to current -e argument */ + static char *savep; /* saves previous value of p */ + + if (eflag > 0) { /* there are pending -e arguments */ + eflag = -1; + if (eargc-- <= 0) quit(2); /* if no arguments, barf */ + + /* Else transcribe next e argument into cbuf */ + p = *++eargv; + while (*++cbuf = *p++) + if (*cbuf == '\\') { + if ((*++cbuf = *p++) == '\0') + return(savep = NULL, -1); + else + continue; + } else if (*cbuf == '\n') { /* end of 1 cmd line */ + *cbuf = '\0'; + return(savep = p, 1); + /* We'll be back for the rest... */ + } + + /* Found end-of-string; can advance to next argument */ + return(savep = NULL, 1); + } + if ((p = savep) == NULL) return(-1); + + while (*++cbuf = *p++) + if (*cbuf == '\\') { + if ((*++cbuf = *p++) == '0') + return(savep = NULL, -1); + else + continue; + } else if (*cbuf == '\n') { + *cbuf = '\0'; + return(savep = p, 1); + } + return(savep = NULL, 1); + } + + /* If no -e flag read from command file descriptor */ + while ((inc = getc(cmdf)) != EOF) /* get next char */ + if ((*++cbuf = inc) == '\\') /* if it's escape */ + *++cbuf = inc = getc(cmdf); /* get next char */ + else if (*cbuf == '\n') /* end on newline */ + return(*cbuf = '\0', 1); /* cap the string */ + + return(*++cbuf = '\0', -1); /* end-of-file, no more chars */ +} + +static char *address(char *expbuf) /* uses cp, linenum */ + /* Expand an address at *cp... into expbuf, return ptr at following char */ +{ + static int numl = 0; /* current ind in addr-number table */ + register char *rcp; /* temp compile ptr for forwd look */ + long lno; /* computed value of numeric address */ + + if (*cp == '$') { /* end-of-source address */ + *expbuf++ = CEND; /* write symbolic end address */ + *expbuf++ = CEOF; /* and the end-of-address mark (!) */ + cp++; /* go to next source character */ + return(expbuf); /* we're done */ + } + if (*cp == '/' || *cp == '\\') { /* start of regular-expression match */ + if (*cp == '\\') cp++; + return(recomp(expbuf, *cp++)); /* compile the RE */ + } + + rcp = cp; + lno = 0; /* now handle a numeric address */ + while (*rcp >= '0' && *rcp <= '9') /* collect digits */ + lno = lno * 10 + *rcp++ - '0'; /* compute their value */ + + if (rcp > cp) { /* if we caught a number... */ + *expbuf++ = CLNUM; /* put a numeric-address marker */ + *expbuf++ = numl; /* and the address table index */ + linenum[numl++] = lno; /* and set the table entry */ + if (numl >= MAXLINES) /* oh-oh, address table overflow */ + ABORT(TMLNR); /* abort with error message */ + *expbuf++ = CEOF; /* write the end-of-address marker */ + cp = rcp; /* point compile past the address */ + return(expbuf); /* we're done */ + } + return(NULL); /* no legal address was found */ +} + +static char *gettext(char *txp) /* uses global cp */ + /* Accept multiline input from *cp..., discarding leading whitespace */ +{ + register char *p = cp; /* this is for speed */ + + SKIPWS(p); /* discard whitespace */ + do { + if ((*txp = *p++) == '\\') /* handle escapes */ + *txp = *p++; + if (*txp == '\0') /* we're at end of input */ + return(cp = --p, ++txp); + else if (*txp == '\n') /* also SKIPWS after newline */ + SKIPWS(p); + } while + (txp++); /* keep going till we find that nul */ + return(txp); +} + +static label *search(label *ptr) /* uses global lablst */ + /* Find the label matching *ptr, return NULL if none */ +{ + register label *rp; + for (rp = lablst; rp < ptr; rp++) + if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0)) + return(rp); + return(NULL); +} + +static void resolve(void) +{ /* uses global lablst */ + /* Write label links into the compiled-command space */ + register label *lptr; + register sedcmd *rptr, *trptr; + + /* Loop through the label table */ + for (lptr = lablst; lptr < lab; lptr++) + if (lptr->address == NULL) { /* barf if not defined */ + fprintf(stderr, ULABL, lptr->name); + quit(2); + } else if (lptr->last) {/* if last is non-null */ + rptr = lptr->last; /* chase it */ + while (trptr = rptr->u.link) { /* resolve refs */ + rptr->u.link = lptr->address; + rptr = trptr; + } + rptr->u.link = lptr->address; + } +} + +static char *ycomp(char *ep, char delim) +/* Compile a y (transliterate) command */ +/* ep: where to compile to */ +/* delim: end delimiter to look for */ +{ + register char *tp, *sp; + register int c; + + /* Scan the 'from' section for invalid chars */ + for (sp = tp = cp; *tp != delim; tp++) { + if (*tp == '\\') tp++; + if ((*tp == '\n') || (*tp == '\0')) return (BAD); + } + tp++; /* tp now points at first char of 'to' + * section */ + + /* Now rescan the 'from' section */ + while ((c = *sp++ & 0x7F) != delim) { + if (c == '\\' && *sp == 'n') { + sp++; + c = '\n'; + } + if ((ep[c] = *tp++) == '\\' && *tp == 'n') { + ep[c] = '\n'; + tp++; + } + if ((ep[c] == delim) || (ep[c] == '\0')) return(BAD); + } + + if (*tp != delim) /* 'to', 'from' parts have unequal lengths */ + return(BAD); + + cp = ++tp; /* point compile ptr past translit */ + + for (c = 0; c < 128; c++) /* fill in self-map entries in table */ + if (ep[c] == 0) ep[c] = c; + + return(ep + 0x80); /* return first free location past table end */ +} + +void quit(int n) +{ +/* Flush buffers and exit. Now a historical relic. Rely on exit to flush + * the buffers. + */ + exit(n); +} + +/*+++++++++++++++*/ + +/* + sedexec.c -- execute compiled form of stream editor commands + + The single entry point of this module is the function execute(). It + may take a string argument (the name of a file to be used as text) or + the argument NULL which tells it to filter standard input. It executes + the compiled commands in cmds[] on each line in turn. + + The function command() does most of the work. Match() and advance() + are used for matching text against precompiled regular expressions and + dosub() does right-hand-side substitution. Getline() does text input; + readout() and Memcmp() are output and string-comparison utilities. +*/ + +/* #include */ +/* #include */ +/* #include "sed.h" */ + +#define MAXHOLD MAXBUF /* size of the hold space */ +#define GENSIZ MAXBUF /* maximum genbuf size */ + +#define TRUE 1 +#define FALSE 0 + +static char LTLMSG[] = "sed: line too long\n"; + +static char *spend; /* current end-of-line-buffer pointer */ +static long lnum = 0L; /* current source line number */ + +/* Append buffer maintenance */ +static sedcmd *appends[MAXAPPENDS]; /* array of ptrs to a,i,c commands */ +static sedcmd **aptr = appends; /* ptr to current append */ + +/* Genbuf and its pointers */ +static char genbuf[GENSIZ]; +static char *loc1; +static char *loc2; +static char *locs; + +/* Command-logic flags */ +static int lastline; /* do-line flag */ +static int jump; /* jump to cmd's link address if set */ +static int delete; /* delete command flag */ + +/* Tagged-pattern tracking */ +static char *bracend[MAXTAGS]; /* tagged pattern start pointers */ +static char *brastart[MAXTAGS]; /* tagged pattern end pointers */ + +static int anysub; /* true if any s on current line succeeded */ + + +void execute(void) +/* Execute the compiled commands in cmds[] */ +{ + register char *p1; /* dummy copy ptrs */ + register sedcmd *ipc; /* ptr to current command */ + char *execp; /* ptr to source */ + + + initget(); + + /* Here's the main command-execution loop */ + for (;;) { + + /* Get next line to filter */ + if ((execp = getline(linebuf)) == BAD) return; + spend = execp; + anysub = FALSE; + + /* Loop through compiled commands, executing them */ + for (ipc = cmds; ipc->command;) { + if (!selected(ipc)) { + ipc++; + continue; + } + command(ipc); /* execute the command pointed at */ + + if (delete) /* if delete flag is set */ + break; /* don't exec rest of compiled cmds */ + + if (jump) { /* if jump set, follow cmd's link */ + jump = FALSE; + if ((ipc = ipc->u.link) == 0) { + ipc = cmds; + break; + } + } else /* normal goto next command */ + ipc++; + } + + /* We've now done all modification commands on the line */ + + /* Here's where the transformed line is output */ + if (!nflag && !delete) { + for (p1 = linebuf; p1 < spend; p1++) putc(*p1, stdout); + putc('\n', stdout); + } + + /* If we've been set up for append, emit the text from it */ + if (aptr > appends) readout(); + + delete = FALSE; /* clear delete flag; about to get next cmd */ + } +} + +static int selected(sedcmd *ipc) +/* Is current command selected */ +{ + register char *p1 = ipc->addr1; /* point p1 at first address */ + register char *p2 = ipc->addr2; /* and p2 at second */ + int c; + int sel = TRUE; /* select by default */ + + if (!p1) /* No addresses: always selected */ + ; + else if (ipc->flags.inrange) { + if (*p2 == CEND); + else if (*p2 == CLNUM) { + c = p2[1] & CMASK; + if (lnum >= linenum[c]) { + ipc->flags.inrange = FALSE; + if (lnum > linenum[c]) sel = FALSE; + } + } else if (match(p2, 0)) + ipc->flags.inrange = FALSE; + } else if (*p1 == CEND) { + if (!lastline) sel = FALSE; + } else if (*p1 == CLNUM) { + c = p1[1] & CMASK; + if (lnum != linenum[c]) + sel = FALSE; + else if (p2) + ipc->flags.inrange = TRUE; + } else if (match(p1, 0)) { + if (p2) ipc->flags.inrange = TRUE; + } else + sel = FALSE; + + return ipc->flags.allbut ? !sel : sel; +} + +static int match(char *expbuf, int gf) /* uses genbuf */ + /* Match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */ +{ + register char *p1, *p2, c; + + if (gf) { + if (*expbuf) return(FALSE); + p1 = linebuf; + p2 = genbuf; + while (*p1++ = *p2++); + locs = p1 = loc2; + } else { + p1 = linebuf; + locs = FALSE; + } + + p2 = expbuf; + if (*p2++) { + loc1 = p1; + if (*p2 == CCHR && p2[1] != *p1) /* 1st char is wrong */ + return(FALSE); /* so fail */ + return(advance(p1, p2));/* else try to match rest */ + } + + /* Quick check for 1st character if it's literal */ + if (*p2 == CCHR) { + c = p2[1]; /* pull out character to search for */ + do { + if (*p1 != c) continue; /* scan the source string */ + if (advance(p1, p2)) /* found it, match the rest */ + return(loc1 = p1, 1); + } while + (*p1++); + return(FALSE); /* didn't find that first char */ + } + + /* Else try for unanchored match of the pattern */ + do { + if (advance(p1, p2)) return(loc1 = p1, 1); + } while + (*p1++); + + /* If got here, didn't match either way */ + return(FALSE); +} + +static int advance(char *lp, char *ep) +/* Attempt to advance match pointer by one pattern element */ +/* lp: source (linebuf) ptr */ +/* ep: regular expression element ptr */ +{ + register char *curlp; /* save ptr for closures */ + char c; /* scratch character holder */ + char *bbeg; + int ct; + + for (;;) switch (*ep++) { + case CCHR: /* literal character */ + if (*ep++ == *lp++) /* if chars are equal */ + continue; /* matched */ + return(FALSE); /* else return false */ + + case CDOT: /* anything but newline */ + if (*lp++) /* first NUL is at EOL */ + continue; /* keep going if didn't find */ + return(FALSE); /* else return false */ + + case CNL: /* start-of-line */ + case CDOL: /* end-of-line */ + if (*lp == 0) /* found that first NUL? */ + continue; /* yes, keep going */ + return(FALSE); /* else return false */ + + case CEOF: /* end-of-address mark */ + loc2 = lp; /* set second loc */ + return(TRUE); /* return true */ + + case CCL: /* a closure */ + c = *lp++ & 0177; + if (ep[c >> 3] & bits[c & 07]) { /* is char in set? */ + ep += 16; /* then skip rest of bitmask */ + continue; /* and keep going */ + } + return(FALSE); /* else return false */ + + case CBRA: /* start of tagged pattern */ + brastart[*ep++] = lp; /* mark it */ + continue; /* and go */ + + case CKET: /* end of tagged pattern */ + bracend[*ep++] = lp; /* mark it */ + continue; /* and go */ + + case CBACK: + bbeg = brastart[*ep]; + ct = bracend[*ep++] - bbeg; + + if (Memcmp(bbeg, lp, ct)) { + lp += ct; + continue; + } + return(FALSE); + + case CBACK | STAR: + bbeg = brastart[*ep]; + ct = bracend[*ep++] - bbeg; + curlp = lp; + while (Memcmp(bbeg, lp, ct)) lp += ct; + + while (lp >= curlp) { + if (advance(lp, ep)) return(TRUE); + lp -= ct; + } + return(FALSE); + + + case CDOT | STAR: /* match .* */ + curlp = lp; /* save closure start loc */ + while (*lp++); /* match anything */ + goto star; /* now look for followers */ + + case CCHR | STAR: /* match * */ + curlp = lp; /* save closure start loc */ + while (*lp++ == *ep); /* match many of that char */ + ep++; /* to start of next element */ + goto star; /* match it and followers */ + + case CCL | STAR: /* match [...]* */ + curlp = lp; /* save closure start loc */ + do { + c = *lp++ & 0x7F; /* match any in set */ + } while + (ep[c >> 3] & bits[c & 07]); + ep += 16; /* skip past the set */ + goto star; /* match followers */ + + star: /* the recursion part of a * or + match */ + if (--lp == curlp) /* 0 matches */ + continue; + + if (*ep == CCHR) { + c = ep[1]; + do { + if (*lp != c) continue; + if (advance(lp, ep)) return (TRUE); + } while + (lp-- > curlp); + return(FALSE); + } + if (*ep == CBACK) { + c = *(brastart[ep[1]]); + do { + if (*lp != c) continue; + if (advance(lp, ep)) return (TRUE); + } while + (lp-- > curlp); + return(FALSE); + } + do { + if (lp == locs) break; + if (advance(lp, ep)) return (TRUE); + } while + (lp-- > curlp); + return(FALSE); + + default: + fprintf(stderr, "sed: RE error, %o\n", *--ep); + quit(2); + } +} + +static int substitute(sedcmd *ipc) +/* Perform s command */ +/* ipc: ptr to s command struct */ +{ + int nullmatch; + + if (match(ipc->u.lhs, 0)) { /* if no match */ + nullmatch = (loc1 == loc2); + dosub(ipc->rhs); /* perform it once */ + } else + return(FALSE); /* command fails */ + + if (ipc->flags.global) /* if global flag enabled */ + while (*loc2) { /* cycle through possibles */ + if (nullmatch) loc2++; + if (match(ipc->u.lhs, 1)) { /* found another */ + nullmatch = (loc1 == loc2); + dosub(ipc->rhs); /* so substitute */ + } else /* otherwise, */ + break; /* we're done */ + } + return(TRUE); /* we succeeded */ +} + +static void dosub(char *rhsbuf) /* uses linebuf, genbuf, spend */ +/* Generate substituted right-hand side (of s command) */ +/* rhsbuf: where to put the result */ +{ + register char *lp, *sp, *rp; + int c; + + /* Copy linebuf to genbuf up to location 1 */ + lp = linebuf; + sp = genbuf; + while (lp < loc1) *sp++ = *lp++; + + for (rp = rhsbuf; c = *rp++;) { + if (c == '&') { + sp = place(sp, loc1, loc2); + continue; + } else if (c & 0200 && (c &= 0177) >= '1' && c < MAXTAGS + '1') { + sp = place(sp, brastart[c - '1'], bracend[c - '1']); + continue; + } + *sp++ = c & 0177; + if (sp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG); + } + lp = loc2; + loc2 = sp - genbuf + linebuf; + while (*sp++ = *lp++) + if (sp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG); + lp = linebuf; + sp = genbuf; + while (*lp++ = *sp++); + spend = lp - 1; +} + +static char *place(char *asp, char *al1, char *al2) /* uses genbuf */ + /* Place chars at *al1...*(al1 - 1) at asp... in genbuf[] */ +{ + while (al1 < al2) { + *asp++ = *al1++; + if (asp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG); + } + return(asp); +} + +static void listto(char *p1, FILE *fp) +/* Write a hex dump expansion of *p1... to fp */ +/* p1: the source */ +/* fp: output stream to write to */ +{ + p1--; + while (*p1++) + if (isprint(*p1)) + putc(*p1, fp); /* pass it through */ + else { + putc('\\', fp); /* emit a backslash */ + switch (*p1) { + case '\b': + putc('b', fp); + break; /* BS */ + case '\t': + putc('t', fp); + break; /* TAB */ + case '\n': + putc('n', fp); + break; /* NL */ + case '\r': + putc('r', fp); + break; /* CR */ + case '\33': + putc('e', fp); + break; /* ESC */ + default: + fprintf(fp, "%02x", *p1 & 0xFF); + } + } + putc('\n', fp); +} + +static void truncated(int h) +{ + static long last = 0L; + + if (lnum == last) return; + last = lnum; + + fprintf(stderr, "sed: "); + fprintf(stderr, h ? "hold space" : "line %ld", lnum); + fprintf(stderr, " truncated to %d characters\n", MAXBUF); +} + +static void command(sedcmd *ipc) +/* Execute compiled command pointed at by ipc */ +{ + static char holdsp[MAXHOLD + 1]; /* the hold space */ + static char *hspend = holdsp; /* hold space end pointer */ + register char *p1, *p2; + char *execp; + int didsub; /* true if last s succeeded */ + + switch (ipc->command) { + case ACMD: /* append */ + *aptr++ = ipc; + if (aptr >= appends + MAXAPPENDS) fprintf(stderr, + "sed: too many appends after line %ld\n", + lnum); + *aptr = 0; + break; + + case CCMD: /* change pattern space */ + delete = TRUE; + if (!ipc->flags.inrange || lastline) printf("%s\n", ipc->u.lhs); + break; + + case DCMD: /* delete pattern space */ + delete++; + break; + + case CDCMD: /* delete a line in hold space */ + p1 = p2 = linebuf; + while (*p1 != '\n') + if (delete = (*p1++ == 0)) return; + p1++; + while (*p2++ = *p1++) continue; + spend = p2 - 1; + jump++; + break; + + case EQCMD: /* show current line number */ + fprintf(stdout, "%ld\n", lnum); + break; + + case GCMD: /* copy hold space to pattern space */ + p1 = linebuf; + p2 = holdsp; + while (*p1++ = *p2++); + spend = p1 - 1; + break; + + case CGCMD: /* append hold space to pattern space */ + *spend++ = '\n'; + p1 = spend; + p2 = holdsp; + do + if (p1 > linebuf + MAXBUF) { + truncated(0); + p1[-1] = 0; + break; + } + while (*p1++ = *p2++); + + spend = p1 - 1; + break; + + case HCMD: /* copy pattern space to hold space */ + p1 = holdsp; + p2 = linebuf; + while (*p1++ = *p2++); + hspend = p1 - 1; + break; + + case CHCMD: /* append pattern space to hold space */ + *hspend++ = '\n'; + p1 = hspend; + p2 = linebuf; + do + if (p1 > holdsp + MAXBUF) { + truncated(1); + p1[-1] = 0; + break; + } + while (*p1++ = *p2++); + + hspend = p1 - 1; + break; + + case ICMD: /* insert text */ + printf("%s\n", ipc->u.lhs); + break; + + case BCMD: /* branch to label */ + jump = TRUE; + break; + + case LCMD: /* list text */ + listto(linebuf, (ipc->fout != NULL) ? ipc->fout : stdout); + break; + + case NCMD: /* read next line into pattern space */ + if (!nflag) puts(linebuf); /* flush out the current line */ + if (aptr > appends) readout(); /* do pending a, r commands */ + if ((execp = getline(linebuf)) == BAD) { + delete = TRUE; + break; + } + spend = execp; + anysub = FALSE; + break; + + case CNCMD: /* append next line to pattern space */ + if (aptr > appends) readout(); + *spend++ = '\n'; + if ((execp = getline(spend)) == BAD) { + *--spend = 0; + break; + } + spend = execp; + anysub = FALSE; + break; + + case PCMD: /* print pattern space */ + puts(linebuf); + break; + + case CPCMD: /* print one line from pattern space */ +cpcom: /* so s command can jump here */ + for (p1 = linebuf; *p1 != '\n' && *p1 != '\0';) putc(*p1++, stdout); + putc('\n', stdout); + break; + + case QCMD: /* quit the stream editor */ + if (!nflag) puts(linebuf); /* flush out the current line */ + if (aptr > appends) + readout(); /* do any pending a and r commands */ + quit(0); + + case RCMD: /* read a file into the stream */ + *aptr++ = ipc; + if (aptr >= appends + MAXAPPENDS) fprintf(stderr, + "sed: too many reads after line %ld\n", + lnum); + *aptr = 0; + break; + + case SCMD: /* substitute RE */ + didsub = substitute(ipc); + if (didsub) anysub = TRUE; + if (ipc->flags.print && didsub) + if (ipc->flags.print == TRUE) + puts(linebuf); + else + goto cpcom; + if (didsub && ipc->fout) fprintf(ipc->fout, "%s\n", linebuf); + break; + + case TCMD: /* branch on any s successful */ + case CTCMD: /* branch on any s failed */ + if (anysub == (ipc->command == CTCMD)) + break; /* no branch if any s failed, else */ + anysub = FALSE; + jump = TRUE; /* set up to jump to assoc'd label */ + break; + + case CWCMD: /* write one line from pattern space */ + for (p1 = linebuf; *p1 != '\n' && *p1 != '\0';) + putc(*p1++, ipc->fout); + putc('\n', ipc->fout); + break; + + case WCMD: /* write pattern space to file */ + fprintf(ipc->fout, "%s\n", linebuf); + break; + + case XCMD: /* exchange pattern and hold spaces */ + p1 = linebuf; + p2 = genbuf; + while (*p2++ = *p1++) continue; + p1 = holdsp; + p2 = linebuf; + while (*p2++ = *p1++) continue; + spend = p2 - 1; + p1 = genbuf; + p2 = holdsp; + while (*p2++ = *p1++) continue; + hspend = p2 - 1; + break; + + case YCMD: + p1 = linebuf; + p2 = ipc->u.lhs; + while (*p1 = p2[*p1]) p1++; + break; + } +} + +static void openfile(char *file) +/* Replace stdin by given file */ +{ + if (freopen(file, "r", stdin) == NULL) { + fprintf(stderr, "sed: can't open %s\n", file); + quit(1); + } +} + +static int c; /* Will be the next char to read, a kind of + * lookahead */ + +static void get(void) +/* Read next character into c treating all argument files as run through cat */ +{ + while ((c = getchar()) == EOF && --eargc >= 0) openfile(*eargv++); +} + +static void initget(void) +/* Initialise character input */ +{ + if (--eargc >= 0) openfile(*eargv++); /* else input == stdin */ + get(); +} + +static char *getline(char *buf) +/* Get next line of text to be edited, return pointer to end */ +/* buf: where to send the input */ +{ + if (c == EOF) return BAD; + + lnum++; /* we can read a new line */ + + do { + if (c == '\n') { + get(); + break; + } + if (buf <= linebuf + MAXBUF) *buf++ = c; + get(); + } while (c != EOF); + + if (c == EOF) lastline = TRUE; + + if (buf > linebuf + MAXBUF) { + truncated(0); + --buf; + } + *buf = 0; + return buf; +} + +static int Memcmp(char *a, char *b, int count) +/* Return TRUE if *a... == *b... for count chars, FALSE otherwise */ +{ + while (count--) /* look at count characters */ + if (*a++ != *b++) /* if any are nonequal */ + return(FALSE); /* return FALSE for false */ + return(TRUE); /* compare succeeded */ +} + +static void readout(void) +/* Write file indicated by r command to output */ +{ + register int t; /* hold input char or EOF */ + FILE *fi; /* ptr to file to be read */ + + aptr = appends - 1; /* arrange for pre-increment to work right */ + while (*++aptr) + if ((*aptr)->command == ACMD) /* process "a" cmd */ + printf("%s\n", (*aptr)->u.lhs); + else { /* process "r" cmd */ + if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL) { + fprintf(stderr, "sed: can't open %s\n", + (*aptr)->u.lhs); + continue; + } + while ((t = getc(fi)) != EOF) putc((char) t, stdout); + fclose(fi); + } + aptr = appends; /* reset the append ptr */ + *aptr = 0; +} + +/* Sedexec.c ends here */ diff --git a/Applications/util/sort.c b/Applications/util/sort.c new file mode 100644 index 00000000..fd2a786f --- /dev/null +++ b/Applications/util/sort.c @@ -0,0 +1,1186 @@ +/* + + Copyright (c) 1987,1997, Prentice Hall + All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ +/* sort - sort a file of lines Author: Michiel Huisjes */ + +/* SYNOPSIS: + * sort [-funbirdcmt'x'] [+beg_pos[opts] [-end_pos]] [-o outfile] [file].. + * + * [opts] can be any of + * -f : Fold upper case to lower. + * -n : Sort to numeric value (optional decimal point) implies -b + * -b : Skip leading blanks + * -i : Ignore chars outside ASCII range (040 - 0176) + * -r : Reverse the sense of comparisons. + * -d : Sort to dictionary order. Only letters, digits, comma's and points + * are compared. + * If any of these flags are used in [opts], then they override all global + * ordering for this field. + * + * I/O control flags are: + * -u : Print uniq lines only once. + * -c : Check if files are sorted in order. + * -m : Merge already sorted files. + * -o outfile : Name of output file. (Can be one of the input files). + * Default is stdout. + * - : Take stdin as input. + * + * Fields: + * -t'x' : Field separating character is 'x' + * +a.b : Start comparing at field 'a' with offset 'b'. A missing 'b' is + * taken to be 0. + * -a.b : Stop comparing at field 'a' with offset 'b'. A missing 'b' is + * taken to be 0. + * A missing -a.b means the rest of the line. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OPEN_MAX 8 /* HACK FIXME */ +#define OPEN_FILES (OPEN_MAX-4) /* Nr of open files per process */ +#define MEMORY_SIZE ((10 * sizeof(int)) * 1024) + /* Total mem_size */ +#define LINE_SIZE (1024 >> 1) /* Max length of a line */ +#define IO_SIZE (2 * 1024) /* Size of buffered output */ +#define STD_OUT 1 /* Fd of terminal */ + +/* Return status of functions */ +#define OK 0 +#define ERROR -1 +#define NIL_PTR ((char *) 0) + +/* Compare return values */ +#define LOWER -1 +#define SAME 0 +#define HIGHER 1 + +/* Table definitions. */ +#define DICT 0x001 /* Alpha, numeric, letters and . */ +#define ASCII 0x002 /* All between ' ' and '~' */ +#define BLANK 0x004 /* ' ' and '\t' */ +#define DIGIT 0x008 /* 0-9 */ +#define UPPER 0x010 /* A-Z */ + +typedef int BOOL; + +#define FALSE 0 +#define TRUE 1 + +typedef struct { + int fd; /* Fd of file */ + char *buffer; /* Buffer for reads */ + int read_chars; /* Nr of chars actually read in buffer */ + int cnt; /* Nr of chars taken out of buffer */ + char *line; /* Contains line currently used */ +} MERGE; + +#define NIL_MERGE ((MERGE *) 0) +MERGE merge_f[OPEN_FILES]; /* Merge structs */ +int buf_size; /* Size of core available for each struct */ + +#define FIELDS_LIMIT 10 /* 1 global + 9 user */ +#define GLOBAL 0 + +typedef struct { + int beg_field, beg_pos; /* Begin field + offset */ + int end_field, end_pos; /* End field + offset. ERROR == EOLN */ + BOOL reverse; /* TRUE if rev. flag set on this field */ + BOOL blanks; + BOOL dictionary; + BOOL fold_case; + BOOL ascii; + BOOL numeric; +} FIELD; + +/* Field declarations. A total of FILEDS_LIMIT is allowed */ +FIELD fields[FIELDS_LIMIT]; +int field_cnt; /* Nr of field actually assigned */ + +/* Various output control flags */ +BOOL check = FALSE; +BOOL only_merge = FALSE; +BOOL uniq = FALSE; + +char *mem_top; /* Mem_top points to lowest pos of memory. */ +char *cur_pos; /* First free position in mem */ +char **line_table; /* Pointer to the internal line table */ +BOOL in_core = TRUE; /* Set if input cannot all be sorted in core */ + + /* Place where temp_files should be made */ +char temp_files[] = "/tmp/sort.XXXXX.XX"; +char *output_file; /* Name of output file */ +int out_fd; /* Fd to output file (could be STD_OUT) */ +char out_buffer[IO_SIZE]; /* For buffered output */ + +char **argptr; /* Pointer to argv structure */ +int args_offset; /* Nr of args spilled on options */ +int args_limit; /* Nr of args given */ + +char separator; /* Char that separates fields */ +int nr_of_files = 0; /* Nr_of_files to be merged */ +int disabled; /* Nr of files done */ + +char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] .."; + +/* Forward declarations */ +int main(int argc, char **argv); +void get_opts(char *ptr, FIELD * field); +void new_field(FIELD * field, int *offset, BOOL beg_fl); +void adjust_options(FIELD * field); +void error(BOOL quit, char *message, char *arg); +void open_outfile(void); +void get_file(int fd, off_t size); +int last_line(void); +void print_table(int fd); +char *file_name(int nr); +void mread(int fd, char *address, int bytes); +void mwrite(int fd, char *address, int bytes); +void sort(void); +void sort_table(int nel); +void incr(int si, int ei); +int cmp_fields(char *el1, char *el2); +void build_field(char *dest, FIELD * field, char *src); +char *skip_fields(char *str, int nf); +int compare(char *el1, char *el2); +int cmp(unsigned char *el1, unsigned char *el2, FIELD * field); +int digits(char *str1, char *str2, BOOL check_sign); +void files_merge(int file_cnt); +void merge(int start_file, int limit_file); +void put_line(char *line); +MERGE * print(MERGE * merg, int file_cnt); +int read_line(MERGE * merg); +MERGE * skip_lines(MERGE * smallest, int file_cnt); +void uniq_lines(MERGE * merg); +void check_file(int fd, char *file); +int length(char *line); +void copy(char *dest, char *src); +char *msbrk(int size); +void mbrk(char *address); +void catch(signal_t dummy); + +/* Table of all chars. 0 means no special meaning. */ +char table[256] = { +/* '^@' to space */ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, BLANK | DICT, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + +/* Space to '0' */ + BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, + ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, + ASCII, ASCII, + +/* '0' until '9' */ + DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, + DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, + DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, + DIGIT | DICT | ASCII, + +/* ASCII from ':' to '@' */ + ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, + +/* Upper case letters 'A' to 'Z' */ + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII, + UPPER | DICT | ASCII, UPPER | DICT | ASCII, + +/* ASCII from '[' to '`' */ + ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, + +/* Lower case letters from 'a' to 'z' */ + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII, + DICT | ASCII, DICT | ASCII, + +/* ASCII from '{' to '~' */ + ASCII, ASCII, ASCII, ASCII, + +/* Stuff from -1 to -177 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0 +}; + + +/* + * Get_opts () assigns the options into the field structure as described in ptr. + * This field structure could be the GLOBAL one. + */ +void get_opts(char *ptr, FIELD * field) +{ + switch (*ptr) { + case 'b': /* Skip leading blanks */ + field->blanks = TRUE; + break; + case 'd': /* Dictionary order */ + field->dictionary = TRUE; + break; + case 'f': /* Fold upper case to lower */ + field->fold_case = TRUE; + break; + case 'i': /* Skip chars outside ' ' '~' */ + field->ascii = TRUE; + break; + case 'n': /* Sort on numeric */ + field->numeric = TRUE; + field->blanks = TRUE; + break; + case 'r': /* Reverse comparisons */ + field->reverse = TRUE; + break; + default: /* Illegal options */ + error(TRUE, USAGE, NIL_PTR); + } +} + +/* New_field () assigns a new field as described by the arguments. + * A field description is of the form: +a.b[opts] -c.d, where b and d, as well + * as -c.d and [opts] are optional. Nr before digit is field nr. Nr after digit + * is offset from field. + */ +void new_field(FIELD * field, int *offset, BOOL beg_fl) +/* register FIELD *field; Field to assign */ +/* int *offset; Offset in argv structure */ +/* BOOL beg_fl; Assign beg or end of field */ +{ + register char *ptr; + + ptr = argptr[*offset]; + *offset += 1; /* Incr offset to next arg */ + ptr++; + + if (beg_fl) + field->beg_field = atoi(ptr); /* Assign int of first field */ + else + field->end_field = atoi(ptr); + + while (table[*ptr] & DIGIT) /* Skip all digits */ + ptr++; + + if (*ptr == '.') { /* Check for offset */ + ptr++; + if (beg_fl) + field->beg_pos = atoi(ptr); + else + field->end_pos = atoi(ptr); + while (table[*ptr] & DIGIT) /* Skip digits */ + ptr++; + } + if (beg_fl) { + while (*ptr != '\0') /* Check options after field */ + get_opts(ptr++, field); + } + if (beg_fl) { /* Check for end pos */ + ptr = argptr[*offset]; + if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) { + new_field(field, offset, FALSE); + if (field->beg_field > field->end_field) + error(TRUE, "End field is before start field!", NIL_PTR); + } else /* No end pos. */ + field->end_field = ERROR; + } +} + +int main(int argc, char **argv) +{ + int arg_count = 1; /* Offset in argv */ + struct stat st; + register char *ptr; /* Ptr to *argv in use */ + register int fd; + int pid, pow; + + argptr = argv; + cur_pos = mem_top = msbrk(MEMORY_SIZE); /* Find lowest mem. location */ + + while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) { + if (*ptr == '-' && *(ptr + 1) == '\0') /* "-" means stdin */ + break; + if (*ptr == '+') { /* Assign field. */ + if (++field_cnt == FIELDS_LIMIT) + error(TRUE, "Too many fields", NIL_PTR); + new_field(&fields[field_cnt], &arg_count, TRUE); + } else { /* Get output options */ + while (*++ptr) { + switch (*ptr) { + case 'c': /* Only check file */ + check = TRUE; + break; + case 'm': /* Merge (sorted) files */ + only_merge = TRUE; + break; + case 'u': /* Only give uniq lines */ + uniq = TRUE; + break; + case 'o': /* Name of output file */ + output_file = argv[++arg_count]; + break; + case 't': /* Field separator */ + ptr++; + separator = *ptr; + break; + default: /* Sort options */ + get_opts(ptr, &fields[GLOBAL]); + } + } + arg_count++; + } + } + + for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]); + +/* Create name of tem_files 'sort.pid.aa' */ + ptr = &temp_files[10]; + pid = getpid(); + pow = 10000; + while (pow != 0) { + *ptr++ = pid / pow + '0'; + pid %= pow; + pow /= 10; + } + + signal(SIGINT, catch); + +/* Only merge files. Set up */ + if (only_merge) { + args_limit = args_offset = arg_count; + while (argv[args_limit] != NIL_PTR) + args_limit++; /* Find nr of args */ + files_merge(args_limit - arg_count); + exit(0); + } + if (arg_count == argc) { /* No args left. Use stdin */ + if (check) + check_file(0, NIL_PTR); + else + get_file(0, (off_t) 0); + } else + while (arg_count < argc) { /* Sort or check args */ + if (strcmp(argv[arg_count], "-") == 0) + fd = 0; + else if (stat(argv[arg_count], &st) < 0) { + error(FALSE, "Cannot find ", argv[arg_count++]); + continue; + } + + /* Open files */ + else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) { + error(FALSE, "Cannot open ", argv[arg_count++]); + continue; + } + if (check) + check_file(fd, argv[arg_count]); + else /* Get_file reads whole file */ + get_file(fd, st.st_size); + arg_count++; + } + + if (check) exit(0); + + sort(); /* Sort whatever is left */ + + if (nr_of_files == 1) /* Only one file sorted -> don't merge */ + exit(0); + + files_merge(nr_of_files); + return(0); +} + +/* Adjust_options() assigns all global variables set also in the fields + * assigned. + */ +void adjust_options(FIELD * field) +{ + register FIELD *gfield = &fields[GLOBAL]; + + if (gfield->reverse) field->reverse = TRUE; + if (gfield->blanks) field->blanks = TRUE; + if (gfield->dictionary) field->dictionary = TRUE; + if (gfield->fold_case) field->fold_case = TRUE; + if (gfield->ascii) field->ascii = TRUE; + if (gfield->numeric) field->numeric = TRUE; +} + +/* Error () prints the error message on stderr and exits if quit == TRUE. */ +void error(BOOL quit, char *message, char *arg) +{ + write(2, message, strlen(message)); + if (arg != NIL_PTR) write(2, arg, strlen(arg)); + perror(" "); + if (quit) exit(1); +} + +/* Open_outfile () assigns to out_fd the fd where the output must go when all + * the sorting is done. + */ +void open_outfile(void) +{ + if (output_file == NIL_PTR) + out_fd = STD_OUT; + else if ((out_fd = creat(output_file, 0644)) < 0) + error(TRUE, "Cannot creat ", output_file); +} + +/* Get_file reads the whole file of filedescriptor fd. If the file is too big + * to keep in core, a partial sort is done, and the output is stashed somewhere. + */ +void get_file(int fd, off_t size) +/* int fd; Fd of file to read */ +/* register off_t size; Size of file */ +{ + register int i; + int rest; /* Rest in memory */ + char save_ch; /* Used in stdin readings */ + + rest = MEMORY_SIZE - (cur_pos - mem_top); + if (fd == 0) { /* We're reding stdin */ + while ((i = read(0, cur_pos, rest)) > 0) { + if ((cur_pos - mem_top) + i == MEMORY_SIZE) { + in_core = FALSE; + i = last_line(); /* End of last line */ + save_ch = mem_top[i]; + mem_top[i] = '\0'; + sort(); /* Sort core */ + mem_top[i] = save_ch; /* Restore erased char */ + /* Restore last (half read) line */ + for (rest = 0; i + rest != MEMORY_SIZE; rest++) + mem_top[rest] = mem_top[i + rest]; + /* Assign current pos. in memory */ + cur_pos = &mem_top[rest]; + } else { /* Fits, just assign position in mem. */ + cur_pos = cur_pos + i; + *cur_pos = '\0'; + } + + /* Calculate rest of mem */ + rest = MEMORY_SIZE - (cur_pos - mem_top); + } + } + + /* Reading file. Check size */ + else if (size > rest) { /* Won't fit */ + mread(fd, cur_pos, rest); + in_core = FALSE; + i = last_line(); /* Get pos. of last line */ + mem_top[i] = '\0'; /* Truncate */ + (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR); /* Do this next time */ + size = size - rest - i + MEMORY_SIZE; /* Calculate rest */ + cur_pos = mem_top; /* Reset mem */ + sort(); /* Sort core */ + get_file(fd, size); /* Get rest of file */ + } else { /* Fits. Just read in */ + rest = size; + mread(fd, cur_pos, rest); + cur_pos = cur_pos + rest; /* Reassign cur_pos */ + *cur_pos = '\0'; + (void) close(fd); /* File completed */ + } +} + +/* Last_line () find the last line in core and retuns the offset from the top + * of the memory. + */ +int last_line(void) +{ + register int i; + + for (i = MEMORY_SIZE - 2; i > 0; i--) + if (mem_top[i] == '\n') break; + return i + 1; +} + +/* Print_table prints the line table in the given file_descriptor. If the fd + * equals ERROR, it opens a temp_file itself. + */ +void print_table(int fd) +{ + register char **line_ptr; /* Ptr in line_table */ + register char *ptr; /* Ptr to line */ + int index = 0; /* Index in output buffer */ + + if (fd == ERROR) { + if ((fd = creat(file_name(nr_of_files), 0644)) < 0) + error(TRUE, "Cannot creat ", file_name(nr_of_files)); + } + for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) { + ptr = *line_ptr; + /* Skip all same lines if uniq is set */ + if (uniq && *(line_ptr + 1) != NIL_PTR) { + if (compare(ptr, *(line_ptr + 1)) == SAME) continue; + } + do { /* Print line in a buffered way */ + out_buffer[index++] = *ptr; + if (index == IO_SIZE) { + mwrite(fd, out_buffer, IO_SIZE); + index = 0; + } + } while (*ptr++ != '\n'); + } + mwrite(fd, out_buffer, index);/* Flush buffer */ + (void) close(fd); /* Close file */ + nr_of_files++; /* Increment nr_of_files to merge */ +} + +/* File_name () returns the nr argument from the argument list, or a uniq + * filename if the nr is too high, or the arguments were not merge files. + */ +char *file_name(int nr) +{ + if (only_merge) { + if (args_offset + nr < args_limit) return argptr[args_offset + nr]; + } + temp_files[16] = nr / 26 + 'a'; + temp_files[17] = nr % 26 + 'a'; + + return temp_files; +} + +/* Mread () performs a normal read (), but checks the return value. */ +void mread(int fd, char *address, int bytes) +{ + if (read(fd, address, bytes) < 0 && bytes != 0) + error(TRUE, "Read error", NIL_PTR); +} + +/* Mwrite () performs a normal write (), but checks the return value. */ +void mwrite(int fd, char *address, int bytes) +{ + if (write(fd, address, bytes) != bytes && bytes != 0) + error(TRUE, "Write error", NIL_PTR); +} + +/* Sort () sorts the input in memory starting at mem_top. */ +void sort(void) +{ + register char *ptr = mem_top; + register int count = 0; + +/* Count number of lines in memory */ + while (*ptr) { + if (*ptr++ == '\n') count++; + } + +/* Set up the line table */ + line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *)); + + count = 1; + ptr = line_table[0] = mem_top; + while (*ptr) { + if (*ptr++ == '\n') line_table[count++] = ptr; + } + + line_table[count - 1] = NIL_PTR; + +/* Sort the line table */ + sort_table(count - 1); + +/* Stash output somewhere */ + if (in_core) { + open_outfile(); + print_table(out_fd); + } else + print_table(ERROR); + +/* Free line table */ + mbrk((char *) line_table); +} + +/* Sort_table () sorts the line table consisting of nel elements. */ +void sort_table(int nel) +{ + char *tmp; + register int i; + + /* Make heap */ + for (i = (nel >> 1); i >= 1; i--) incr(i, nel); + + /* Sort from heap */ + for (i = nel; i > 1; i--) { + tmp = line_table[0]; + line_table[0] = line_table[i - 1]; + line_table[i - 1] = tmp; + incr(1, i - 1); + } +} + +/* Incr () increments the heap. */ +void incr(int si, int ei) +{ + char *tmp; + + while (si <= (ei >> 1)) { + si <<= 1; + if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0) + si++; + if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0) + return; + tmp = line_table[(si >> 1) - 1]; + line_table[(si >> 1) - 1] = line_table[si - 1]; + line_table[si - 1] = tmp; + } +} + +/* Cmp_fields builds new lines out of the lines pointed to by el1 and el2 and + * puts it into the line1 and line2 arrays. It then calls the cmp () routine + * with the field describing the arguments. + */ +int cmp_fields(char *el1, char *el2) +{ + int i, ret; + char line1[LINE_SIZE], line2[LINE_SIZE]; + + for (i = 0; i < field_cnt; i++) { /* Setup line parts */ + build_field(line1, &fields[i + 1], el1); + build_field(line2, &fields[i + 1], el2); + if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2, + &fields[i + 1])) != SAME) + break; /* If equal, try next field */ + } + +/* Check for reverse flag */ + if (i != field_cnt && fields[i + 1].reverse) return -ret; + +/* Else return the last return value of cmp () */ + return ret; +} + +/* Build_field builds a new line from the src as described by the field. + * The result is put in dest. + */ +void build_field(char *dest, FIELD * field, char *src) +/* char *dest; Holds result */ +/* FIELD *field; Field description */ +/* char *src; Source line */ +{ + char *begin = src; /* Remember start location */ + char *last; /* Pointer to end location */ + int i; + +/* Skip begin fields */ + src = skip_fields(src, field->beg_field); + +/* Skip begin positions */ + for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++; + +/* Copy whatever is left */ + copy(dest, src); + +/* If end field is assigned truncate (perhaps) the part copied */ + if (field->end_field != ERROR) { /* Find last field */ + last = skip_fields(begin, field->end_field); +/* Skip positions as given by end fields description */ + for (i = 0; i < field->end_pos && *last != '\n'; i++) last++; + dest[last - src] = '\n';/* Truncate line */ + } +} + +/* Skip_fields () skips nf fields of the line pointed to by str. */ +char *skip_fields(char *str, int nf) +{ + while (nf-- > 0) { + if (separator == '\0') {/* Means ' ' or '\t' */ + while (*str != ' ' && *str != '\t' && *str != '\n') str++; + while (table[*str] & BLANK) str++; + } else { + while (*str != separator && *str != '\n') str++; + if (*str == separator) str++; + } + } + return str; /* Return pointer to indicated field */ +} + +/* Compare is called by all sorting routines. It checks if fields assignments + * has been made. if so, it calls cmp_fields (). If not, it calls cmp () and + * reversed the return value if the (global) reverse flag is set. + */ +int compare(char *el1, char *el2) +{ + int ret; + + if (field_cnt > GLOBAL) return cmp_fields(el1, el2); + + ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]); + return(fields[GLOBAL].reverse) ? -ret : ret; +} + +/* Cmp () is the actual compare routine. It compares according to the + * description given in the field pointer. + */ +int cmp(unsigned char *el1, unsigned char *el2, FIELD *field) +{ + int c1, c2; + + if (field->blanks) { /* Skip leading blanks */ + while (table[*el1] & BLANK) el1++; + while (table[*el2] & BLANK) el2++; + } + if (field->numeric) /* Compare numeric */ + return digits((char *) el1, (char *) el2, TRUE); + + for (;;) { + while (*el1 == *el2) { + if (*el1++ == '\n') /* EOLN on both strings */ + return SAME; + el2++; + } + if (*el1 == '\n') /* EOLN on string one */ + return LOWER; + if (*el2 == '\n') return HIGHER; + if (field->ascii) { /* Skip chars outside 040 - 0177 */ + if ((table[*el1] & ASCII) == 0) { + do { + el1++; + } while ((table[*el1] & ASCII) == 0); + continue; + } + if ((table[*el2] & ASCII) == 0) { + do { + el2++; + } while ((table[*el2] & ASCII) == 0); + continue; + } + } + if (field->dictionary) {/* Skip non-dict chars */ + if ((table[*el1] & DICT) == 0) { + do { + el1++; + } while ((table[*el1] & DICT) == 0); + continue; + } + if ((table[*el2] & DICT) == 0) { + do { + el2++; + } while ((table[*el2] & DICT) == 0); + continue; + } + } + if (field->fold_case) { /* Fold upper case to lower */ + if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A'; + if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A'; + if (c1 == c2) continue; + return c1 - c2; + } + return *el1 - *el2; + } + + /* NOTREACHED */ +} + +/* + * Digits compares () the two strings that point to a number of digits followed + * by an optional decimal point. + */ +int digits(char *str1, char *str2, BOOL check_sign) +/* check_sign; True if sign must be checked */ +{ + BOOL negative = FALSE; /* True if negative numbers */ + int diff, pow, ret; + +/* Check for optional minus or plus sign */ + if (check_sign) { + if (*str1 == '-') { + negative = TRUE; + str1++; + } else if (*str1 == '+') + str1++; + + if (*str2 == '-') { + if (negative == FALSE) return HIGHER; + str2++; + } else if (negative) + return LOWER; + else if (*str2 == '+') + str2++; + } + +/* Keep incrementing as long as digits are available and equal */ + while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) { + if (*str1 != *str2) break; + str1++; + str2++; + } + +/* First check for the decimal point. */ + if (*str1 == '.' || *str2 == '.') { + if (*str1 == '.') { + if (*str2 == '.') /* Both. Check decimal part */ + ret = digits(str1 + 1, str2 + 1, FALSE); + else + ret = (table[*str2] & DIGIT) ? LOWER : HIGHER; + } else + ret = (table[*str1] & DIGIT) ? HIGHER : LOWER; + } + +/* Now either two digits differ, or unknown char is seen (e.g. end of string) */ + else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) { + diff = *str1 - *str2; /* Basic difference */ + pow = 0; /* Check power of numbers */ + while (table[*str1++] & DIGIT) pow++; + while (table[*str2++] & DIGIT) pow--; + ret = (pow == 0) ? diff : pow; + } + +/* Unknown char. Check on which string it occurred */ + else { + if ((table[*str1] & DIGIT) == 0) + ret = (table[*str2] & DIGIT) ? LOWER : SAME; + else + ret = HIGHER; + } + +/* Reverse sense of comparisons if negative is true. (-1000 < -1) */ + return(negative) ? -ret : ret; +} + +/* Files_merge () merges all files as indicated by nr_of_files. Merging goes + * in numbers of files that can be opened at the same time. (OPEN_FILES) + */ +void files_merge(int file_cnt) /* Nr_of_files to merge */ +{ + register int i; + int limit; + + for (i = 0; i < file_cnt; i += OPEN_FILES) { + /* Merge last files and store in output file */ + if ((limit = i + OPEN_FILES) >= file_cnt) { + open_outfile(); + limit = file_cnt; + } else { /* Merge OPEN_FILES files and store in temp + * file */ + temp_files[16] = file_cnt / 26 + 'a'; + temp_files[17] = file_cnt % 26 + 'a'; + if ((out_fd = creat(temp_files, 0644)) < 0) + error(TRUE, "Cannot creat ", temp_files); + file_cnt++; + } + merge(i, limit); + } + +/* Cleanup mess */ + i = (only_merge) ? args_limit - args_offset : 0; + while (i < file_cnt) (void) unlink(file_name(i++)); +} + +/* Merge () merges the files between start_file and limit_file. */ +void merge(int start_file, int limit_file) +{ + register MERGE *smallest; /* Keeps track of smallest line */ + register int i; + int file_cnt = limit_file - start_file; /* Nr of files to merge */ + +/* Calculate size in core available for file_cnt merge structs */ + buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE; + + mbrk(mem_top); /* First reset mem to lowest loc. */ + disabled = 0; /* All files not done yet */ + +/* Set up merge structures. */ + for (i = start_file; i < limit_file; i++) { + smallest = &merge_f[i - start_file]; + if (!strcmp(file_name(i), "-")) /* File is stdin */ + smallest->fd = 0; + else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) { + smallest->fd = ERROR; + error(FALSE, "Cannot open ", file_name(i)); + disabled++; /* Done this file */ + continue; + } + smallest->buffer = msbrk(buf_size); + smallest->line = msbrk(LINE_SIZE); + smallest->cnt = smallest->read_chars = 0; + (void) read_line(smallest); /* Read first line */ + } + + if (disabled == file_cnt) { /* Couldn't open files */ + (void) close(out_fd); + return; + } + +/* Find a merg struct to assign smallest. */ + for (i = 0; i < file_cnt; i++) { + if (merge_f[i].fd != ERROR) { + smallest = &merge_f[i]; + break; + } + } + +/* Loop until all files minus one are done */ + while (disabled < file_cnt - 1) { + if (uniq) /* Skip all same lines */ + smallest = skip_lines(smallest, file_cnt); + else { /* Find smallest line */ + for (i = 0; i < file_cnt; i++) { + if (merge_f[i].fd == ERROR) + continue; /* We've had this one */ + if (compare(merge_f[i].line, smallest->line) < 0) + smallest = &merge_f[i]; + } + } /* Print line and read next */ + smallest = print(smallest, file_cnt); + } + + if (only_merge && uniq) + uniq_lines(smallest); /* Print only uniq lines */ + else /* Print rest of file */ + while (print(smallest, file_cnt) != NIL_MERGE); + + put_line(NIL_PTR); /* Flush output buffer */ +} + +/* Put_line () prints the line into the out_fd filedescriptor. If line equals + * NIL_PTR, the out_fd is flushed and closed. + */ +void put_line(char *line) +{ + static int index = 0; /* Index in out_buffer */ + + if (line == NIL_PTR) { /* Flush and close */ + mwrite(out_fd, out_buffer, index); + index = 0; + (void) close(out_fd); + return; + } + do { /* Fill out_buffer with line */ + out_buffer[index++] = *line; + if (index == IO_SIZE) { + mwrite(out_fd, out_buffer, IO_SIZE); + index = 0; + } + } while (*line++ != '\n'); +} + +/* + * Print () prints the line of the merg structure and tries to read another one. + * If this fails, it returns the next merg structure which file_descriptor is + * still open. If none could be found, a NIL structure is returned. + */ +MERGE *print(MERGE *merg, int file_cnt) /* Nr of files that are being merged */ +{ + register int i; + + put_line(merg->line); /* Print the line */ + + if (read_line(merg) == ERROR) { /* Read next line */ + for (i = 0; i < file_cnt; i++) { + if (merge_f[i].fd != ERROR) { + merg = &merge_f[i]; + break; + } + } + if (i == file_cnt) /* No more files left */ + return NIL_MERGE; + } + return merg; +} + +/* Read_line () reads a line from the fd from the merg struct. If the read + * failed, disabled is incremented and the file is closed. Readings are + * done in buf_size bytes. + * Lines longer than LINE_SIZE are silently truncated. + */ +int read_line(MERGE *merg) +{ + register char *ptr = merg->line - 1; /* Ptr buf that will hold line */ + + do { + ptr++; + if (merg->cnt == merg->read_chars) { /* Read new buffer */ + if ((merg->read_chars = + read(merg->fd, merg->buffer, buf_size)) <= 0) { + (void) close(merg->fd); /* OOPS */ + merg->fd = ERROR; + disabled++; + return ERROR; + } + merg->cnt = 0; + } + *ptr = merg->buffer[merg->cnt++]; /* Assign next char of line */ + if (ptr - merg->line == LINE_SIZE - 1) + *ptr = '\n'; /* Truncate very long lines */ + } while (*ptr != '\n' && *ptr != '\0'); + + if (*ptr == '\0') /* Add '\n' to last line */ + *ptr = '\n'; + *++ptr = '\0'; /* Add '\0' */ + return OK; +} + +/* Skip_lines () skips all same lines in all the files currently being merged. + * It returns a pointer to the merge struct containing the smallest line. + */ +MERGE *skip_lines(MERGE *smallest, int file_cnt) +{ + register int i; + int ret; + + if (disabled == file_cnt - 1) /* We've had all */ + return smallest; + + for (i = 0; i < file_cnt; i++) { + if (merge_f[i].fd == ERROR || smallest == &merge_f[i]) + continue; /* Don't check same file */ + while ((ret = compare(merge_f[i].line, smallest->line)) == 0) { + if (read_line(&merge_f[i]) == ERROR) break; /* EOF */ + } + if (ret < 0) /* Line wasn't smallest. Try again */ + return skip_lines(&merge_f[i], file_cnt); + } + return smallest; +} + +/* Uniq_lines () prints only the uniq lines out of the fd of the merg struct. */ +void uniq_lines(MERGE *merg) +{ + char lastline[LINE_SIZE]; /* Buffer to hold last line */ + + for (;;) { + put_line(merg->line); /* Print this line */ + copy(lastline, merg->line); /* and save it */ + if (read_line(merg) == ERROR) /* Read the next */ + return; + /* Keep reading until lines duffer */ + while (compare(lastline, merg->line) == SAME) + if (read_line(merg) == ERROR) return; + } + + /* NOTREACHED */ +} + +/* + * Check_file () checks if a file is sorted in order according to the arguments + * given in main (). + */ +void check_file(int fd, char *file) +{ + register MERGE *merg; /* 1 file only */ + char lastline[LINE_SIZE]; /* Save last line */ + register int ret; /* ret status of compare */ + + if (fd == 0) file = "stdin"; + merg = (MERGE *) mem_top; /* Assign MERGE structure */ + merg->buffer = mem_top + sizeof(MERGE); + merg->line = msbrk(LINE_SIZE); + merg->cnt = merg->read_chars = 0; + merg->fd = fd; + buf_size = MEMORY_SIZE - sizeof(MERGE); + + if (read_line(merg) == ERROR) /* Read first line */ + return; + copy(lastline, merg->line); /* and save it */ + + for (;;) { + if (read_line(merg) == ERROR) /* EOF reached */ + break; + if ((ret = compare(lastline, merg->line)) > 0) { + error(FALSE, "Disorder in file ", file); + write(2, merg->line, length(merg->line)); + break; + } else if (ret < 0) /* Copy if lines not equal */ + copy(lastline, merg->line); + else if (uniq) { + error(FALSE, "Non uniq line in file ", file); + write(2, merg->line, length(merg->line)); + break; + } + } + + mbrk(mem_top); /* Reset mem */ +} + +/* Length () returns the length of the argument line including the linefeed. */ +int length(char *line) +{ + register int i = 1; /* Add linefeed */ + + while (*line++ != '\n') i++; + return i; +} + +/* Copy () copies the src line into the dest line including linefeed. */ +void copy(char *dest, char *src) +{ + while ((*dest++ = *src++) != '\n'); +} + +/* Msbrk() does a sbrk() and checks the return value. */ +char *msbrk(int size) +{ + register char *address; + + if ((address = sbrk(size)) == (char *) -1) + error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR); + return address; +} + +/* Mbrk() does a brk() and checks the return value. */ +void mbrk(char *address) +{ + if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR); +} + +void catch(signal_t dummy) /* to satisfy the prototype */ +{ + register int i; + + signal(SIGINT, SIG_IGN); + only_merge = FALSE; + for (i = 0; i < 26; i++) (void) unlink(file_name(i)); + exit(2); +} diff --git a/Applications/util/tail.c b/Applications/util/tail.c new file mode 100644 index 00000000..7429ca7c --- /dev/null +++ b/Applications/util/tail.c @@ -0,0 +1,390 @@ +/* + Copyright (c) 1987,1997, Prentice Hall + All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* tail - copy the end of a file Author: Norbert Schlenker */ + +/* Syntax: tail [-f] [-c number | -n number] [file] + * tail -[number][c|l][f] [file] (obsolescent) + * tail +[number][c|l][f] [file] (obsolescent) + * Flags: + * -c number Measure starting point in bytes. If number begins + * with '+', the starting point is relative to the + * the file's beginning. If number begins with '-' + * or has no sign, the starting point is relative to + * the end of the file. + * -f Keep trying to read after EOF on files and FIFOs. + * -n number Measure starting point in lines. The number + * following the flag has significance similar to + * that described for the -c flag. + * + * If neither -c nor -n are specified, the default is tail -n 10. + * + * In the obsolescent syntax, an argument with a 'c' following the + * (optional) number is equivalent to "-c number" in the standard + * syntax, with number including the leading sign ('+' or '-') of the + * argument. An argument with 'l' following the number is equivalent + * to "-n number" in the standard syntax. If the number is not + * specified, 10 is used as the default. If neither 'c' nor 'l' are + * specified, 'l' is assumed. The character 'f' may be suffixed to + * the argument and is equivalent to specifying "-f" in the standard + * syntax. Look for lines marked "OBSOLESCENT". + * + * If no file is specified, standard input is assumed. + * + * P1003.2 does not specify tail's behavior when a count of 0 is given. + * It also does not specify clearly whether the first byte (line) of a + * file should be numbered 0 or 1. Historical behavior is that the + * first byte is actually number 1 (contrary to all Unix standards). + * Historically, a count of 0 (or -0) results in no output whatsoever, + * while a count of +0 results in the entire file being copied (just like + * +1). The implementor does not agree with these behaviors, but has + * copied them slavishly. Look for lines marked "HISTORICAL". + * + * Author: Norbert Schlenker + * Copyright: None. Released to the public domain. + * Reference: P1003.2 section 4.59 (draft 10) + * Notes: Under Minix, this program requires chmem =30000. + * Bugs: No internationalization support; all messages are in English. + */ + +/* Force visible Posix names */ +#ifndef _POSIX_SOURCE +#define _POSIX_SOURCE 1 +#endif + +/* External interfaces */ +#include +#include +#include +#include +#include +#include + +/* External interfaces that should have been standardized into */ +extern char *optarg; +extern int optind; + +/* We expect this constant to be defined in in a Posix program, + * but we'll specify it here just in case it's been left out. + */ +#ifndef LINE_MAX +#define LINE_MAX 2048 /* minimum acceptable lower bound */ +#endif + +/* Magic numbers suggested or required by Posix specification */ +#define SUCCESS 0 /* exit code in case of success */ +#define FAILURE 1 /* or failure */ +#define DEFAULT_COUNT 10 /* default number of lines or bytes */ +#define MIN_BUFSIZE (LINE_MAX * DEFAULT_COUNT) +#define SLEEP_INTERVAL 1 /* sleep for one second intervals with -f */ + +#define FALSE 0 +#define TRUE 1 + +/* Internal functions - prototyped under Minix */ +int tail(int count, int bytes, int read_until_killed); +int keep_reading(void); +void usage(void); + +int main(int argc, char *argv[]) +{ + int cflag = FALSE; + int nflag = FALSE; + int fflag = FALSE; + int number = -DEFAULT_COUNT; + char *suffix; + int opt; + struct stat stat_buf; + +/* Determining whether this invocation is via the standard syntax or + * via an obsolescent one is a nasty kludge. Here it is, but there is + * no pretense at elegance. + */ + if (argc == 1) { /* simple: default read of a pipe */ + exit(tail(-DEFAULT_COUNT, 0, fflag)); + } + if ((argv[1][0] == '+') || /* OBSOLESCENT */ + (argv[1][0] == '-' && ((isdigit(argv[1][1])) || + (argv[1][1] == 'l') || + (argv[1][1] == 'c' && argv[1][2] == 'f')))) { + --argc; ++argv; + if (isdigit(argv[0][1])) { + number = (int)strtol(argv[0], &suffix, 10); + if (number == 0) { /* HISTORICAL */ + if (argv[0][0] == '+') + number = 1; + else + exit(SUCCESS); + } + } else { + number = (argv[0][0] == '+') ? DEFAULT_COUNT : -DEFAULT_COUNT; + suffix = &(argv[0][1]); + } + if (*suffix != '\0') { + if (*suffix == 'c') { + cflag = TRUE; + ++suffix; + } + else + if (*suffix == 'l') { + nflag = TRUE; + ++suffix; + } + } + if (*suffix != '\0') { + if (*suffix == 'f') { + fflag = TRUE; + ++suffix; + } + } + if (*suffix != '\0') { /* bad form: assume to be a file name */ + number = -DEFAULT_COUNT; + cflag = nflag = FALSE; + fflag = FALSE; + } else { + --argc; ++argv; + } + } else { /* new standard syntax */ + while ((opt = getopt(argc, argv, "c:fn:")) != EOF) { + switch (opt) { + case 'c': + cflag = TRUE; + if (*optarg == '+' || *optarg == '-') + number = atoi(optarg); + else + if (isdigit(*optarg)) + number = -atoi(optarg); + else + usage(); + if (number == 0) { /* HISTORICAL */ + if (*optarg == '+') + number = 1; + else + exit(SUCCESS); + } + break; + case 'f': + fflag = TRUE; + break; + case 'n': + nflag = TRUE; + if (*optarg == '+' || *optarg == '-') + number = atoi(optarg); + else + if (isdigit(*optarg)) + number = -atoi(optarg); + else + usage(); + if (number == 0) { /* HISTORICAL */ + if (*optarg == '+') + number = 1; + else + exit(SUCCESS); + } + break; + default: + usage(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + } + + if (argc > 1 || /* too many arguments */ + (cflag && nflag)) { /* both bytes and lines specified */ + usage(); + } + + if (argc > 0) { /* an actual file */ + if (freopen(argv[0], "r", stdin) != stdin) { + fputs("tail: could not open ", stderr); + fputs(argv[0], stderr); + fputs("\n", stderr); + exit(FAILURE); + } + /* There is an optimization possibility here. If a file is being + * read, we need not look at the front of it. If we seek backwards + * from the end, we can (potentially) avoid looking at most of the + * file. Some systems fail when asked to seek backwards to a point + * before the start of the file, so we avoid that possibility. + */ + if (number < 0 && fstat(fileno(stdin), &stat_buf) == 0) { + long offset = cflag ? (long)number : (long)number * LINE_MAX; + + if (-offset < stat_buf.st_size) + fseek(stdin, offset, SEEK_END); + } + } else { + fflag = FALSE; /* force -f off when reading a pipe */ + } + exit(tail(number, cflag, fflag)); + /* NOTREACHED */ +} + +int tail(int count, int bytes, int read_until_killed) +/* count; lines or bytes desired */ +/* bytes; TRUE if we want bytes */ +/* read_until_killed; keep reading at EOF */ +{ + int c; + char *buf; /* pointer to input buffer */ + char *buf_end; /* and one past its end */ + char *start; /* pointer to first desired character in buf */ + char *finish; /* pointer past last desired character */ + int wrapped_once = FALSE; /* TRUE after buf has been filled once */ + +/* This is magic. If count is positive, it means start at the count'th + * line or byte, with the first line or byte considered number 1. Thus, + * we want to SKIP one less line or byte than the number specified. In + * the negative case, we look backward from the end of the file for the + * (count + 1)'th newline or byte, so we really want the count to be one + * LARGER than was specified (in absolute value). In either case, the + * right thing to do is: + */ + --count; + +/* Count is positive: skip the desired lines or bytes and then copy. */ + if (count >= 0) { + while (count > 0 && (c = getchar()) != EOF) { + if (bytes || c == '\n') + --count; + } + while ((c = getchar()) != EOF) { + if (putchar(c) == EOF) + return FAILURE; + } + if (read_until_killed) + return keep_reading(); + return ferror(stdin) ? FAILURE : SUCCESS; + } + +/* Count is negative: allocate a reasonably large buffer. */ + if ((buf = (char *)malloc(MIN_BUFSIZE + 1)) == (char *)NULL) { + fputs("tail: out of memory\n", stderr); + return FAILURE; + } + buf_end = buf + (MIN_BUFSIZE + 1); + +/* Read the entire file into the buffer. */ + finish = buf; + while ((c = getchar()) != EOF) { + *finish++ = c; + if (finish == buf_end) { + finish = buf; + wrapped_once = TRUE; + } + } + if (ferror(stdin)) + return FAILURE; + +/* Back up inside the buffer. The count has already been adjusted to + * back up exactly one character too far, so we will bump the buffer + * pointer once after we're done. + * + * BUG: For large line counts, the buffer may not be large enough to + * hold all the lines. The specification allows the program to + * fail in such a case - this program will simply dump the entire + * buffer's contents as its best attempt at the desired behavior. + */ + if (finish != buf || wrapped_once) { /* file was not empty */ + start = (finish == buf) ? buf_end - 1 : finish - 1; + while (start != finish) { + if ((bytes || *start == '\n') && ++count == 0) + break; + if (start == buf) { + start = buf_end - 1; + if (!wrapped_once) /* never wrapped: stop now */ + break; + } else { + --start; + } + } + if (++start == buf_end) { /* bump after going too far */ + start = buf; + } + if (finish > start) { + fwrite(start, 1, finish - start, stdout); + } else { + fwrite(start, 1, buf_end - start, stdout); + fwrite(buf, 1, finish - buf, stdout); + } + } + if (read_until_killed) + return keep_reading(); + return ferror(stdout) ? FAILURE : SUCCESS; +} + +/* Wake at intervals to reread standard input. Copy anything read to + * standard output and then go to sleep again. + */ +int keep_reading(void) +{ + char buf[1024]; + int n; + int i; + off_t pos; + struct stat st; + + pos = lseek(0, (off_t) 0, SEEK_CUR); + for (;;) { + for (i = 0; i < 60; i++) { + while ((n = read(0, buf, sizeof(buf))) > 0) { + if (write(1, buf, n) < 0) return FAILURE; + } + if (n < 0) return FAILURE; + + sleep(SLEEP_INTERVAL); + } + + /* Rewind if suddenly truncated. */ + if (pos != -1) { + if (fstat(0, &st) == -1) { + pos = -1; + } else + if (st.st_size < pos) { + pos = lseek(0, (off_t) 0, SEEK_SET); + } else { + pos = st.st_size; + } + } + } +} + +/* Tell the user the standard syntax. */ +void usage(void) +{ + fputs("Usage: tail [-f] [-c number | -n number] [file]\n", stderr); + exit(FAILURE); +} diff --git a/Applications/util/tee.c b/Applications/util/tee.c new file mode 100644 index 00000000..57018680 --- /dev/null +++ b/Applications/util/tee.c @@ -0,0 +1,96 @@ +/* + Copyright (c) 1987,1997, Prentice Hall + All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* tee - pipe fitting Author: Paul Polderman */ + +#include +#include +#include +#include +#include +#include + +#define MAXFD 18 +#define CHUNK_SIZE 4096 + +int fd[MAXFD]; + +int main(int argc, char *argv[]) +{ + char iflag = 0, aflag = 0; + char buf[CHUNK_SIZE]; + int i, s, n; + + argv++; + --argc; + while (argc > 0 && argv[0][0] == '-') { + switch (argv[0][1]) { + case 'i': /* Interrupt turned off. */ + iflag++; + break; + case 'a': /* Append to outputfile(s), instead of + * overwriting them. */ + aflag++; + break; + default: + fprintf(stderr,"Usage: tee [-i] [-a] [files].\n"); + exit(1); + } + argv++; + --argc; + } + fd[0] = 1; /* Always output to stdout. */ + for (s = 1; s < MAXFD && argc > 0; --argc, argv++, s++) { + if (aflag && (fd[s] = open(*argv, O_RDWR)) >= 0) { + lseek(fd[s], 0L, SEEK_END); + continue; + } else { + if ((fd[s] = creat(*argv, 0666)) >= 0) continue; + } + fprintf(stderr,"Cannot open output file: "); + fprintf(stderr,*argv); + fprintf(stderr,"\n"); + exit(2); + } + + if (iflag) signal(SIGINT, SIG_IGN); + + while ((n = read(0, buf, CHUNK_SIZE)) > 0) { + for (i = 0; i < s; i++) write(fd[i], buf, n); + } + + for (i = 0; i < s; i++) /* Close all fd's */ + close(fd[i]); + return(0); +}