expr: Add expr
authorAlan Cox <alan@etchedpixels.co.uk>
Mon, 16 May 2016 19:16:26 +0000 (20:16 +0100)
committerAlan Cox <alan@etchedpixels.co.uk>
Mon, 16 May 2016 19:16:26 +0000 (20:16 +0100)
Applications/MWC/cmd/Makefile
Applications/MWC/cmd/expr.y [new file with mode: 0644]

index ade8fdb..94f5860 100644 (file)
@@ -11,7 +11,7 @@ PROGLOAD=`(cat ../../Kernel/platform/config.h; echo PROGLOAD) | cpp -E | tail -n
 
 .SUFFIXES: .c .rel
 
-SRCSNS = test.c
+SRCSNS = expr.c test.c
 
 SRCS  = ac.c almanac.c at.c col.c deroff.c find.c moo.c pr.c tar.c ttt.c
 
@@ -39,6 +39,9 @@ $(OBJSNS): %.rel: %.c
 $(OBJSBAD): %.rel: %.c
        $(FCC) $(PLATFORM) -c $<
 
+expr.c: expr.y
+       byacc -o expr.c expr.y
+
 find.c: find.y
        byacc -o find.c find.y
 
diff --git a/Applications/MWC/cmd/expr.y b/Applications/MWC/cmd/expr.y
new file mode 100644 (file)
index 0000000..a550a6e
--- /dev/null
@@ -0,0 +1,676 @@
+/*
+ * cmd/expr.y
+ * yacc grammar for expr.
+ * Designed so no stdio is called,
+ * which makes the final object code about 1/3 smaller.
+ * To make, say
+ *     yacc expr.y; cc -O -o expr y.tab.c;
+ */
+
+%{
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <limits.h>
+
+#define        LONGLEN 12              /* max length of NUL-terminated ASCII long */
+#define        CODELEN 512             /* initial length of code buffer */
+#define        CODEINC 128             /* code buffer length increment */
+
+#define        EOF     (-1)
+#define TRUE   (0 == 0)
+#define FALSE  (0 != 0)
+#define true(e)        (*(e) == '\0' || (isnum(e) && atol(e) == 0) ? FALSE : TRUE)
+
+int    exstat;         /* exit status returned by main() */
+char   *result;        /* final expr printed by main */
+char   s0[] = "0";     /* `0' integer - `false' expr */
+char   s1[] = "1";     /* `1' integer - `true' expr */
+
+char *ltoascii(long n);
+char *regexp(char *e1, char *e2);
+char *arithop(int op, char *e1, char *e2);
+char *relop(int op, char *e1, char *e2);
+int isnum(char *e);
+void compile(char *e);
+char *match(char *lp, char *cp);
+char *overflow(char *pc);
+void output(char *s, int fildes);
+int yylex(void);
+void yyerror(const char *p);
+void regerror(void);
+void dieat (void);
+void divzero(void);
+void errexit(void);
+
+/*
+ * The following names are used by the regular expression operator.
+ */
+#define BRSIZE 10                      /* Length of brace list */
+typedef        struct {
+       char    *b_bp;                  /* Ptr to start of string matched */
+       char    *b_ep;                  /* Ptr to end of string matched */
+} BRACE;
+
+#define CSNUL  000                     /* End of expression */
+#define CSSOL  001                     /* Match start of line */
+#define CSEOL  002                     /* End of line */
+#define CSOPR  003                     /* \( */
+#define CSCPR  004                     /* \) */
+#define CSBRN  005                     /* Match nth brace */
+#define CSDOT  006                     /* Any character */
+#define CMDOT  007                     /* Stream of any characters */
+#define CSCHR  010                     /* Match given character */
+#define CMCHR  011                     /* Match stream of given characters */
+#define CSCCL  014                     /* Character class */
+#define CMCCL  015                     /* Stream of character class */
+#define CSNCL  016                     /* Not character class */
+#define CMNCL  017                     /* Stream of not char class */
+
+#define        getx(c)         *e++
+#define ungetx(c)      --e
+
+BRACE  brlist[BRSIZE];                 /* brace list */
+int    brcount;                        /* # of braces in reg_expr */
+char   *codebuf;                       /* Ptr to a compiled regular expr */
+int    cbsiz = CODELEN;                        /* Initial size of codebuf */
+
+#define YYSTYPE        char *
+
+%}
+
+
+
+
+%token STR
+
+%left  '|'
+%left  '&'
+%left  '<' '>' LE GE EQ NEQ    /* relop tokens */
+%left  '+' '-'                 /* arithop tokens */
+%left  '*' '/' '%'             /* arithop tokens */
+%left  ':'                     /* regexp token */
+%right UMINUS '!' LEN          /* uop tokens */
+
+
+
+
+%%
+
+
+
+
+start:
+       expr            { result = $1; exstat = true(result) ? 0 : 1; }
+;
+
+expr:
+       '(' expr ')'    { $$ = $2; }
+
+|      LEN expr        { $$ = ltoascii((long)strlen($2)); }
+|      '!' expr        { $$ = true($2) ? s0 : s1; }
+|      '-' expr %prec UMINUS
+                       { if (isnum($2))
+                               $$ = ltoascii(-atol($2));
+                       else {
+                               --avx;
+                               yyerror("");
+                       }
+                       }
+
+|      expr ':' expr   { $$ = regexp($1, $3); }
+
+|      expr '*' expr   { $$ = arithop('*', $1, $3); }
+|      expr '/' expr   { $$ = arithop('/', $1, $3); }
+|      expr '%' expr   { $$ = arithop('%', $1, $3); }
+|      expr '+' expr   { $$ = arithop('+', $1, $3); }
+|      expr '-' expr   { $$ = arithop('-', $1, $3); }
+
+|      expr '<' expr   { $$ = relop('<', $1, $3); }
+|      expr '>' expr   { $$ = relop('>', $1, $3); }
+|      expr LE expr    { $$ = relop(LE, $1, $3); }
+|      expr GE expr    { $$ = relop(GE, $1, $3); }
+|      expr EQ expr    { $$ = relop(EQ, $1, $3); }
+|      expr NEQ expr   { $$ = relop(NEQ, $1, $3); }
+
+|      expr '&' expr   { $$ = true($3) ? $1 : s0; }
+|      expr '|' expr   { $$ = true($1) ? $1 : $3; }
+
+|      '{' expr ',' expr '}'           { $$ = true($2) ? $4 : s0; }
+|      '{' expr ',' expr ',' expr '}'  { $$ = true($2) ? $4 : $6; }
+
+|      STR             { $$ = $1; }
+|      error           { yyerror(""); }
+;
+
+
+
+
+%%
+
+
+
+
+char   **av;           /* Global version of argv[] in main() */
+int    avx;            /* Index into av[] */
+
+int main(int argc, char *argv[])
+{
+       if (argc == 1)
+               return 2;
+       av = argv;
+       yyparse();
+       output(result, 1);
+       output("\n", 1);
+       return exstat;
+}
+
+
+char *arithop(int op, char *e1, char *e2)
+{
+       register long v1, v2;
+
+       if (!isnum(e1)) {
+               avx -= 3;
+               yyerror("");
+       }
+       if (!isnum(e2)) {
+               --avx;
+               yyerror("");
+       }
+
+       v1 = atol(e1);
+       v2 = atol(e2);
+       switch (op) {
+       case '+':
+               v1 += v2;
+               break;
+       case '-':
+               v1 -= v2;
+               break;
+       case '*':
+               v1 *= v2;
+               break;
+       case '/':
+               if (v2 == 0) {
+                       divzero ();
+                       v1 = LONG_MAX;
+               } else
+                       v1 /= v2;
+               break;
+       case '%':
+               if (v2 == 0) {
+                       divzero ();
+                       v1 = 0;
+               } else
+                       v1 %= v2;
+               break;
+       }
+       return ltoascii(v1);
+}
+
+char *relop(int op, char *e1, char *e2)
+{
+       register int cmp;
+       register long v1, v2;
+
+       if (!isnum(e1) || !isnum(e2))
+               cmp = strcmp(e1, e2);
+       else {
+               v1 = atol(e1);
+               v2 = atol(e2);
+               cmp = (v1 > v2) ? 1 : (v1 == v2) ? 0 : -1;
+       }
+       switch (op) {
+       case '<':
+               return (cmp < 0) ? s1 : s0;
+       case '>':
+               return (cmp > 0) ? s1 : s0;
+       case LE:
+               return (cmp <= 0) ? s1 : s0;
+       case GE:
+               return (cmp >= 0) ? s1 : s0;
+       case EQ:
+               return (cmp == 0) ? s1 : s0;
+       case NEQ:
+               return (cmp != 0) ? s1 : s0;
+       }
+}
+
+
+char *regexp(char *e1, char *e2)
+{
+       register char *a = e1;
+       register char *b;
+       register BRACE *brp;
+
+       codebuf = malloc(CODELEN);
+       compile(e2);
+       if (brcount > 0)        /* brcount is now the number of braces in e2 */
+               brlist[brcount].b_bp = brlist[brcount].b_ep = NULL;
+#if    1
+       /* Posix P1003.2 4.22.7.1: pattern search is anchored to beginning. */
+       b = match(a, codebuf);
+#else
+       /* This code searches for unanchored match. */
+       if (codebuf[0] == CSSOL)
+               b = match(a, codebuf + 1);
+       else
+               for ( ; *a != '\0'; ++a)
+                       if ((b = match(a, codebuf)) != NULL)
+                               break;
+#endif
+       if (b == NULL)
+#if    1
+               /* If \(\) used, string-valued return */
+               return brcount == 0 ? "0" : "";
+#else
+               return "0";
+#endif
+       if (brcount == 0)
+               return ltoascii((long)(b - a));
+
+       /* Remaining case is extraction of fields */
+       for (a = e1, brp = brlist; (b = brp->b_bp) != NULL; ++brp)
+               while (b < brp->b_ep)
+                       *a++ = *b++;
+       *a = '\0';
+       free (codebuf);
+       return e1;
+}
+
+
+int isnum(char *e)
+{
+       register int c;
+
+       if ((c = * e ++) == '-')        /* || c == '+' Removed for POSIX */
+               c = * e ++;
+       do
+               if (! isdigit (c))
+                       return FALSE;
+       while ((c = * e ++) != 0);
+       return TRUE;
+}
+
+/*
+ * Convert long to ascii. Return pointer to the necessary malloced storage.
+ */
+char *ltoascii(long n)
+{
+       char buf[LONGLEN];
+       register char *bp = buf;
+       register char *ep;
+       register char *e;
+
+       e = ep = malloc(LONGLEN);
+       if (n < 0) {
+               *ep++ = '-';
+               n = -n;
+       }
+       do {
+               *bp++ = (n % 10) + '0';
+               n /= 10;
+       } while (n > 0);
+       while (bp > buf)
+               *ep++ = *--bp;
+       *ep = '\0';
+       return e;
+}
+
+
+/*
+ * Compile the regular expression e into codebuf.
+ * Invoke regerror() on a regular expression syntax error.
+ */
+void compile(char *e)
+{
+       register int c;
+       register char *cp, *lcp;
+       int blevel, n, notflag, bstack[BRSIZE + 1];
+
+       brcount = 0;
+       blevel = 0;
+       cp = &codebuf[0];
+       if ((c = getx(c)) == '^') {
+               *cp++ = CSSOL;
+               c = getx(c);
+       }
+       while (c != '\0') {
+               if (cp > &codebuf[cbsiz-4])
+                       cp = overflow(cp);
+               switch (c) {
+               case '*':
+                       regerror();
+               case '.':
+                       if ((c = getx(c)) != '*') {
+                               *cp++ = CSDOT;
+                               continue;
+                       }
+                       *cp++ = CMDOT;
+                       c = getx(c);
+                       continue;
+               case '$':
+                       if ((c = getx(c)) != '\0') {
+                               ungetx(c);
+                               c = '$';
+                               goto character;
+                       }
+                       *cp++ = CSEOL;
+                       continue;
+               case '[':
+                       /*
+                        * lcp[0] will contain C<S|M><C|N>CL. lcp[1] will be
+                        * the number of chars in the class. These are followed
+                        * by the members of the class singly enumerated.
+                        * ']' is valid only at the start of the member list.
+                        * '-' is valid only at the end of the member list.
+                        */
+                       lcp = cp;
+                       if ((c = getx(c)) == '^')
+                               notflag = TRUE;
+                       else {
+                               notflag = FALSE;
+                               ungetx(c);
+                       }
+                       cp += 2;
+                       if ((c = getx(c)) == ']')
+                               *cp++ = c;
+                       else
+                               ungetx(c);
+                       while ((c = getx(c)) != ']') {
+                               if (c == '\0')
+                                       regerror();
+                               if (c!='-' || cp==lcp+2) {
+                                       if (cp >= &codebuf[cbsiz-4])
+                                               cp = overflow(cp);
+                                       *cp++ = c;
+                                       continue;
+                               }
+
+                               /* c = '-' now. Lookahead at the next char */
+                               if ((c = getx(c)) == '\0')
+                                       regerror();
+                               if (c == ']') {
+                                       *cp++ = '-';
+                                       ungetx(c);
+                                       continue;
+                               }
+                               if ((n=cp[-1]) > c)
+                                       regerror();
+                               while (++n <= c) {
+                                       if (cp >= &codebuf[cbsiz-4])
+                                               cp = overflow(cp);
+                                       *cp++ = n;
+                               }
+                       }
+                       if ((c = getx(c)) == '*') {
+                               lcp[0] = (notflag) ? CMNCL : CMCCL;
+                               c = getx(c);
+                       }
+                       else
+                               lcp[0] = (notflag) ? CSNCL : CSCCL;
+                       if ((n=cp-(lcp+2)) > 255)
+                               regerror();
+                       *++lcp = n;
+                       continue;
+               case '\\':
+                       switch (c = getx(c)) {
+                       case '\0':
+                               regerror();
+                       case '(':
+                               *cp++ = CSOPR;
+                               *cp++ = bstack[blevel++] = brcount++;
+                               c = getx(c);
+                               continue;
+                       case ')':
+                               if (blevel == 0)
+                                       regerror();
+                               *cp++ = CSCPR;
+                               *cp++ = bstack[--blevel];
+                               c = getx(c);
+                               continue;
+                       default:
+                               if (isascii(c) && isdigit(c)) {
+                                       *cp++ = CSBRN;
+                                       *cp++ = c-'0' - 1;
+                                       c = getx(c);
+                                       continue;
+                               }
+                       }
+               default:
+               character:
+                       *cp++ = CSCHR;
+                       *cp++ = c;
+                       if ((c = getx(c)) == '*') {
+                               cp[-2] = CMCHR;
+                               c = getx(c);
+                       }
+               }
+       }
+       *cp++ = CSNUL;
+       return;
+}
+
+/*
+ * Given a pointer to a compiled expression `cp' and a pointer to a line `lp',
+ * return a ptr to the char following the last char of the match
+ * if successful, NULL otherwise.
+ */
+char *match(char *lp, char *cp)
+{
+       register int n;
+       char *llp, *lcp;
+
+       for (;;) {
+               switch (*cp++) {
+               case CSNUL:
+                       return lp;
+               case CSEOL:
+                       if (*lp)
+                               return NULL;
+                       return lp;
+               case CSOPR:
+                       brlist[*cp++].b_bp = lp;
+                       continue;
+               case CSCPR:
+                       brlist[*cp++].b_ep = lp;
+                       continue;
+               case CSBRN:
+                       n = *cp++;
+                       lcp = cp;
+                       cp = brlist[n].b_bp;
+                       n = brlist[n].b_ep - cp;
+                       if (n > strlen(lp))
+                               return NULL;
+                       while (n-- > 0)
+                               if (*lp++ != *cp++)
+                                       return NULL;
+                       cp = lcp;
+                       continue;
+               case CSDOT:
+                       if (*lp++ == '\0')
+                               return NULL;
+                       continue;
+               case CMDOT:
+                       llp = lp;
+                       while (*lp)
+                               lp++;
+                       goto star;
+               case CSCHR:
+                       if (*cp++ != *lp++)
+                               return NULL;
+                       continue;
+               case CMCHR:
+                       llp = lp;
+                       while (*cp == *lp)
+                               lp++;
+                       cp++;
+                       goto star;
+               case CSCCL:
+                       n = *cp++;
+                       while (*cp++ != *lp)
+                               if (--n == 0)
+                                       return NULL;
+                       lp++;
+                       cp += n-1;
+                       continue;
+               case CMCCL:
+                       llp = lp;
+                       lcp = cp;
+                       while (*lp) {
+                               cp = lcp;
+                               n = *cp++;
+                               while (*cp++ != *lp)
+                                       if (--n == 0)
+                                               goto star;
+                               lp++;
+                       }
+                       cp = lcp + *lcp + 1;
+                       goto star;
+               case CSNCL:
+                       if (*lp == '\0')
+                               return NULL;
+                       n = *cp++;
+                       while (n--)
+                               if (*cp++ == *lp)
+                                       return NULL;
+                       lp++;
+                       continue;
+               case CMNCL:
+                       llp = lp;
+                       lcp = cp;
+                       while (*lp) {
+                               cp = lcp;
+                               n = *cp++;
+                               while (n--) {
+                                       if (*cp++ == *lp) {
+                                               cp = lcp + *lcp + 1;
+                                               goto star;
+                                       }
+                               }
+                               lp++;
+                       }
+                       cp = lcp + *lcp + 1;
+               star:
+                       do {
+                               if (lcp=match(lp, cp))
+                                       return lcp;
+                       } while (--lp >= llp);
+                       return NULL;
+               }
+       }
+}
+
+/*
+ * overflow enlarges codebuf by CODEINC bytes. The argument is a pointer
+ * to a position in codebuf - the function returns a pointer with the same
+ * relative position in the new buffer.
+ */
+char *overflow(char *pc)
+{
+       register int posn = pc - codebuf;
+
+       if ((codebuf = realloc(codebuf, cbsiz += CODEINC)) == NULL)
+               regerror();
+       return codebuf + posn;
+}
+
+/*
+ * An output function to avoid having to include stdio.
+ */
+void output(char *s, int fildes)
+{
+       register int len;
+
+       len = strlen(s);
+       if ((write(fildes, s, len)) != len)
+               exit(3);
+}
+
+
+int yylex(void)
+{
+       register int c;
+
+       if ((yylval = av[++avx]) == NULL)
+               return EOF;
+       if (av[avx][1] == '\0')
+               switch (c = av[avx][0]) {
+               case '{':
+               case '}':
+               case ',':
+               case '|':
+               case '&':
+               case '<':
+               case '>':
+               case '+':
+               case '-':
+               case '*':
+               case '/':
+               case '%':
+               case ':':
+               case '!':
+               case '(':
+               case ')':
+                       return c;
+               default:
+                       return STR;
+               }
+       if (av[avx][1] == '='  &&  av[avx][2] == '\0')
+               switch (c = av[avx][0]) {
+               case '<':
+                       return LE;
+               case '>':
+                       return GE;
+               case '=':
+                       return EQ;
+               case '!':
+                       return NEQ;
+               default:
+                       return STR;
+               }
+       return (strcmp(yylval, "len") == 0) ? LEN : STR;
+}
+
+/*
+ * The common code between yyerror() and regerror() (in regexp.c) is split
+ * off into errexit().
+ */
+void yyerror(const char *p)
+{
+       output("expr: ", 2);
+       errexit();
+}
+
+void regerror(void)
+{
+       output("expr: regular expression ", 2);
+       --avx;
+       errexit();
+}
+
+void dieat (void)
+{
+       output(ltoascii((long) avx), 2);
+       output("\n", 2);
+       exit(2);
+}
+
+void divzero(void)
+{
+       output ("Attempt to divide by zero at argument # ", 2);
+       -- avx;
+       dieat ();
+}
+
+void errexit(void)
+{
+       output("syntax error at argument # ", 2);
+       dieat ();
+}
+
+/* end of cmd/expr.y */