From: Alan Cox Date: Mon, 29 Dec 2014 22:28:58 +0000 (+0000) Subject: grep: replace minimal grep with a full implementation X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=8ebb835fa3cfbc287b7a8867372e3b2a97bf427f;p=FUZIX.git grep: replace minimal grep with a full implementation --- diff --git a/Applications/util/grep.c b/Applications/util/grep.c index 7e274163..f7b798e7 100644 --- a/Applications/util/grep.c +++ b/Applications/util/grep.c @@ -1,161 +1,310 @@ -/* - * Copyright (c) 1993 by David I. Bell - * Permission is granted to use, distribute, or modify this source, - * provided that this copyright notice remains intact. +/* grep - search a file for a pattern Author: Norbert Schlenker */ + +/* Norbert Schlenker (nfs@princeton.edu) 1990-02-08 + * Released into the public domain. + * + * Grep searches files for lines containing a pattern, as specified by + * a regular expression, and prints those lines. It is invoked by: + * grep [flags] [pattern] [file ...] + * + * Flags: + * -e pattern useful when pattern begins with '-' + * -c print a count of lines matched + * -i ignore case + * -l prints just file names, no lines (quietly overrides -n) + * -n printed lines are preceded by relative line numbers + * -s prints errors only (quietly overrides -l and -n) + * -v prints lines which don't contain the pattern * - * The "grep" built-in command. + * Semantic note: + * If both -l and -v are specified, grep prints the names of those + * files which do not contain the pattern *anywhere*. + * + * Exit: + * Grep sets an exit status which can be tested by the caller. + * Note that these settings are not necessarily compatible with + * any other version of grep, especially when -v is specified. + * Possible status values are: + * 0 if any matches are found + * 1 if no matches are found + * 2 if syntax errors are detected or any file cannot be opened */ -#include -#include + +/* External interfaces */ +#include +#include /* Thanks to Henry Spencer */ +#include #include +#include +#include + +/* Internal constants */ +#define MATCH 0 /* exit code: some match somewhere */ +#define NO_MATCH 1 /* exit code: no match on any line */ +#define FAILURE 2 /* exit code: syntax error or bad file name */ + +/* Macros */ +#define SET_FLAG(c) (flags[(c)-'a'] = 1) +#define FLAG(c) (flags[(c)-'a'] != 0) + +#define uppercase(c) (((unsigned) ((c) - 'A')) <= ('Z' - 'A')) +#define downcase(c) ((c) - 'A' + 'a') -typedef unsigned char BOOL; +/* Private storage */ +static char *program; /* program name */ +static char flags[26]; /* invocation flags */ +static regexp *expression; /* compiled search pattern */ -#ifndef FALSE -#define FALSE 0 -#endif +/* External variables. */ +extern int optind; +extern char *optarg; -#ifndef TRUE -#define TRUE 1 -#endif +/* Internal interfaces */ +static int match(FILE *input, char *label, char *filename); +static char *get_line(FILE *input); +static char *map_nocase(char *line); +static void error_exit(const char *s); +void regerror(const char *msg); -BOOL intflag; -static char buf[8192]; -/* - * See if the specified word is found in the specified string. +int main(int argc, char *argv[]) +{ + int opt; /* option letter from getopt() */ + char *pattern; /* search pattern */ + int exit_status = NO_MATCH; /* exit status for our caller */ + int file_status = 0; /* status of search in one file */ + FILE *input; /* input file (if not stdin) */ + + program = argv[0]; + memset(flags, 0, sizeof(flags)); + pattern = NULL; + +/* Process any command line flags. */ + while ((opt = getopt(argc, argv, "e:cilnsv")) != EOF) { + if (opt == '?') + exit_status = FAILURE; + else + if (opt == 'e') + pattern = optarg; + else + SET_FLAG(opt); + } + +/* Detect a few problems. */ + if ((exit_status == FAILURE) || (optind == argc && pattern == NULL)) + error_exit("Usage: %s [-cilnsv] [-e] expression [file ...]\n"); + +/* Ensure we have a usable pattern. */ + if (pattern == NULL) + pattern = argv[optind++]; + +/* Map pattern to lowercase if -i given. */ + if (FLAG('i')) { + char *p; + for (p = pattern; *p != '\0'; p++) { + if (uppercase(*p)) + *p = downcase(*p); + } + } + + if ((expression = regcomp(pattern)) == NULL) + error_exit("%s: bad regular expression\n"); + +/* Process the files appropriately. */ + if (optind == argc) { /* no file names - find pattern in stdin */ + exit_status = match(stdin, (char *) NULL, ""); + } + else + if (optind + 1 == argc) { /* one file name - find pattern in it */ + if (strcmp(argv[optind], "-") == 0) { + exit_status = match(stdin, (char *) NULL, "-"); + } else { + if ((input = fopen(argv[optind], "r")) == NULL) { + fprintf(stderr, "%s: couldn't open %s\n", + program, argv[optind]); + exit_status = FAILURE; + } + else { + exit_status = match(input, (char *) NULL, argv[optind]); + } + } + } + else + while (optind < argc) { /* lots of file names - find pattern in all */ + if (strcmp(argv[optind], "-") == 0) { + file_status = match(stdin, "-", "-"); + } else { + if ((input = fopen(argv[optind], "r")) == NULL) { + fprintf(stderr, "%s: couldn't open %s\n", + program, argv[optind]); + exit_status = FAILURE; + } else { + file_status = match(input, argv[optind], argv[optind]); + fclose(input); + } + } + if (exit_status != FAILURE) + exit_status &= file_status; + ++optind; + } + return(exit_status); +} + + +/* match - matches the lines of a file with the regular expression. + * To improve performance when either -s or -l is specified, this + * function handles those cases specially. */ -static BOOL search(char *string, char *word, BOOL ignorecase) + +static int match(FILE *input, char *label, char *filename) { - char *cp1, *cp2; - int ch1, ch2, len, lowfirst; + char *line, *testline; /* pointers to input line */ + long int lineno = 0; /* line number */ + long int matchcount = 0; /* lines matched */ + int status = NO_MATCH; /* summary of what was found in this file */ + + if (FLAG('s') || FLAG('l')) { + while ((line = get_line(input)) != NULL) { + testline = FLAG('i') ? map_nocase(line) : line; + if (regexec(expression, testline)) { + status = MATCH; + break; + } + } + if (FLAG('l')) + if ((!FLAG('v') && status == MATCH) || + ( FLAG('v') && status == NO_MATCH)) + puts(filename); + return status; + } + + while ((line = get_line(input)) != NULL) { + ++lineno; + testline = FLAG('i') ? map_nocase(line) : line; + if (regexec(expression, testline)) { + status = MATCH; + if (!FLAG('v')) { + if (label != NULL) + printf("%s:", label); + if (FLAG('n')) + printf("%ld:", lineno); + if (!FLAG('c')) puts(line); + matchcount++; + } + } else { + if (FLAG('v')) { + if (label != NULL) + printf("%s:", label); + if (FLAG('n')) + printf("%ld:", lineno); + if (!FLAG('c')) puts(line); + matchcount++; + } + } + } + if (FLAG('c')) printf("%ld\n", matchcount); + return status; +} - len = strlen(word); - if (!ignorecase) { - while (TRUE) { - string = strchr(string, word[0]); - if (string == NULL) - return FALSE; +/* get_line - fetch a line from the input file + * This function reads a line from the input file into a dynamically + * allocated buffer. If the line is too long for the current buffer, + * attempts will be made to increase its size to accomodate the line. + * The trailing newline is stripped before returning to the caller. + */ - if (memcmp(string, word, len) == 0) - return TRUE; +#define FIRST_BUFFER (size_t)256 /* first buffer size */ - string++; +static char *buf = NULL; /* input buffer */ +static size_t buf_size = 0; /* input buffer size */ + +static char *get_line(FILE *input) +{ + int n; + register char *bp; + register int c; + char *new_buf; + size_t new_size; + + if (buf_size == 0) { + if ((buf = (char *) malloc(FIRST_BUFFER)) == NULL) + error_exit("%s: not enough memory\n"); + buf_size = FIRST_BUFFER; + } + + bp = buf; + n = buf_size; + while (1) { + while (--n > 0 && (c = getc(input)) != EOF) { + if (c == '\n') { + *bp = '\0'; + return buf; + } + *bp++ = c; } - } - /* - * Here if we need to check case independence. - * Do the search by lower casing both strings. - */ - lowfirst = *word; - if (isupper(lowfirst)) - lowfirst = tolower(lowfirst); - - while (TRUE) { - while (*string && (*string != lowfirst) && - (!isupper(*string) || (tolower(*string) != lowfirst))) - string++; - - if (*string == '\0') - return FALSE; - - cp1 = string; - cp2 = word; - - do { - if (*cp2 == '\0') - return TRUE; - - ch1 = *cp1++; - if (isupper(ch1)) - ch1 = tolower(ch1); - - ch2 = *cp2++; - if (isupper(ch2)) - ch2 = tolower(ch2); - - } while (ch1 == ch2); - - string++; - } + if (c == EOF) + return (ferror(input) || bp == buf) ? NULL : buf; + new_size = buf_size << 1; + if ((new_buf = (char *) realloc(buf, new_size)) == NULL) { + fprintf(stderr, "%s: line too long - truncated\n", program); + while ((c = getc(input)) != EOF && c != '\n') ; + *bp = '\0'; + return buf; + } else { + bp = new_buf + (buf_size - 1); + n = buf_size + 1; + buf = new_buf; + buf_size = new_size; + } + } } -void main(int argc, char *argv[]) +/* map_nocase - map a line down to lowercase letters only. + * bad points: assumes line gotten from get_line. + * there is more than A-Z you say? + */ + +static char *map_nocase(char *line) { - FILE *fp; - char *word, *name, *cp; - BOOL tellname, ignorecase, tellline; - long line; - - ignorecase = FALSE; - tellline = FALSE; - - argc--; - argv++; - - if (**argv == '-') { - argc--; - cp = *argv++; - - while (*++cp) - switch (*cp) { - case 'i': - ignorecase = TRUE; - break; - - case 'n': - tellline = TRUE; - break; - - default: - fprintf(stderr, "Unknown option\n"); - return; - } - } - word = *argv++; - argc--; - - tellname = (argc > 1); - - while (argc-- > 0) { - name = *argv++; - - fp = fopen(name, "r"); - if (fp == NULL) { - perror(name); - continue; - } - line = 0; - - while (fgets(buf, sizeof(buf), fp)) { - if (intflag) { - fclose(fp); - return; - } - line++; - - cp = &buf[strlen(buf) - 1]; - if (*cp != '\n') - fprintf(stderr, "%s: Line too long\n", name); - - if (search(buf, word, ignorecase)) { - if (tellname) - printf("%s: ", name); - if (tellline) - printf("%d: ", line); - - fputs(buf, stdout); - } + static char *mapped; + static size_t map_size = 0; + char *mp; + + if (map_size < buf_size) { + if (map_size == 0) { + mapped = (char *) malloc(buf_size); + } else { + mapped = (char *) realloc(mapped, buf_size); } + if (mapped == NULL) + error_exit("%s: not enough memory\n"); + map_size = buf_size; + } - if (ferror(fp)) - perror(name); + mp = mapped; + do { + *mp++ = uppercase(*line) ? downcase(*line) : *line; + } while (*line++ != '\0'); - fclose(fp); - } + return mapped; } -/* END CODE */ + +/* Regular expression code calls this routine to print errors. */ + +void regerror(const char *s) +{ + fprintf(stderr, "regexp: %s\n", s); +} + + +/* Common exit point for outrageous errors. */ + +static void error_exit(const char *msg) +{ + fprintf(stderr, msg, program); + exit(FAILURE); +} diff --git a/Library/include/regexp.h b/Library/include/regexp.h index 9dc0b48f..eaf427c8 100644 --- a/Library/include/regexp.h +++ b/Library/include/regexp.h @@ -19,9 +19,9 @@ typedef struct regexp { char program[1]; /* Unwarranted chumminess with compiler. */ } regexp; -extern regexp *regcomp __P((char *)); -extern int regexec __P((regexp *prog, char *string)); -extern void regsub __P((regexp *prog, char *source, char *dest)); -extern void regerror __P((char *)); +extern regexp *regcomp(char *); +extern int regexec(regexp *__prog, char *__string); +extern void regsub(regexp *__prog, char *__source, char *__dest); +extern void regerror(char *); #endif