basic: snapshot further work
authorAlan Cox <alan@linux.intel.com>
Sun, 5 Nov 2017 22:04:25 +0000 (22:04 +0000)
committerAlan Cox <alan@linux.intel.com>
Sun, 5 Nov 2017 22:04:25 +0000 (22:04 +0000)
Add all the ECMA55 tokens as a base point
Rework the expression evaluator to be a lot simpler cleaner and also fix bugs

Still need to fix the logic for function(param1,param2)

Applications/basic/README
Applications/basic/calc.c
Applications/basic/maketokens.c
Applications/basic/maketoks [new file with mode: 0755]
Applications/basic/tokenizer.c
Applications/basic/tokens.h

index cf5429d..1539270 100644 (file)
@@ -1 +1,66 @@
 Internal working tree for the replacmeent of ubasic
+
+- Ideas
+
+Put the start of the workspace above the end of the biggest of interpreter
+and editor, add a fixed entry point to each and we can then treat them as
+overlays with a common header stub!
+
+- Compiler
+
+- Expressions
+
+Fix FOO(bar,baz)
+
+Make PI and RND not functions but constants
+
+Parse FN X as a special type of function and also FN X(y)
+
+
+
+Strings
+
+need to push ptr to string and length in calc (so we can treat "foo" and
+A$ the same way)
+
+temporary strings go just below the return stack as we don;t change that
+during anything with string variables.
+
+Note: GOSUB expr will result in an eval to an integer thus removing any string
+temporary need *before* the stacking.
+
+
+Ecma-55 requires unique to two letters. So given case doesn't matter we could
+pack variables with a 16bit lead tag as
+
+[5.first][6.second]            (A0-A9 AA-ZZ]
+[type.2]
+00                             number
+01                             integer (for xx% if we add it)
+10                             string
+11                             fornext
+[dims.3]                       number of array dims  (0 = not array)
+
+lead byte 0 = end (because would be variable '@@'
+
+Then we can pack vars as
+
+[type/code.w]
+[value]
+
+or for array or string
+
+[type/code.w]
+[totalsize]
+[lengths.w * n]  (0 if non array)
+[data]
+
+
+which means our search becomes
+
+       while (*ptr && (*(uint16_t *)ptr & LOW11) != seek) {
+               if (ptr[1] & SIZED)
+                       ptr += *(uint16_t *)(ptr + 2);
+               else
+                       ptr += 2;       /* 2 or 5 if fp basic)
+       }
index f0ccafa..8e2f211 100644 (file)
@@ -5,8 +5,11 @@
 #include <ctype.h>
 #include <errno.h>
 
+#include "tokens_def.h"
 
-static const char *ctypes[9] = {
+#define DEBUG
+
+static const char *ctypes[11] = {
     "eof",
     "constant",
     "leftassoc",
@@ -16,6 +19,24 @@ static const char *ctypes[9] = {
     "unary",
     "symbol",
     "postfix",
+    "function arg",
+    "rightassoc"
+};
+
+enum {
+    LOGIC_OR,
+    LOGIC_AND,
+    LOGIC_NOT,
+    LOGIC_COMPARE,
+    ARITH_AS,          /* add sub */
+    ARITH_DM,          /* div mul */
+    ARITH_UM,          /* umary minus */
+    ARITH_O,           /* orders */
+    ARITH_B,           /* brackets */
+    ARITH_FUNC,                /* functions */
+    ARITH_ARRAY,       /* array dereferencing */
+    FN_COMMA,          /* , in functions */
+    FN_CALL,           /* function call */
 };
 
 struct token {
@@ -32,6 +53,8 @@ struct token {
 #define UNARY                  6
 #define SYMBOL                 7
 #define POSTFIX                        8
+#define FN_ARG                 9
+#define        RIGHT_ASSOC             10
     uint16_t data;
 };
 
@@ -43,7 +66,40 @@ struct token {
 
 char *input;
 
-static const char ops[] = {"+-*/%"};
+static const uint8_t ops[] = {
+                        '!',           /* FIXME: for testing only */
+                        '+', '-',
+                        '*', '/',
+                        '(',')',
+                        TOK_ORD, '^',
+                        TOK_NOT, TOK_AND, TOK_OR,
+                        '=', '<', '>',
+                        TOK_NE, TOK_LE, TOK_GE,
+                        ',',
+                        0
+};
+
+static uint8_t oppri[] = {
+                            FN_CALL,
+                            ARITH_AS, ARITH_AS,
+                            ARITH_DM, ARITH_DM,
+                            ARITH_B, 0,
+                            ARITH_O, ARITH_O,
+                            LOGIC_NOT, LOGIC_AND, LOGIC_OR,
+                            LOGIC_COMPARE, LOGIC_COMPARE, LOGIC_COMPARE,
+                            LOGIC_COMPARE, LOGIC_COMPARE, LOGIC_COMPARE,
+                            FN_COMMA,
+                          };
+static uint8_t opclass[] = { FUNCTION,
+                             LEFT_ASSOC, LEFT_ASSOC,   /* +- */
+                             LEFT_ASSOC, LEFT_ASSOC, /* */
+                             LEFTPAREN, RIGHTPAREN,
+                             RIGHT_ASSOC, RIGHT_ASSOC,
+                             UNARY, LEFT_ASSOC, LEFT_ASSOC,
+                             LEFT_ASSOC, LEFT_ASSOC, LEFT_ASSOC,
+                             LEFT_ASSOC, LEFT_ASSOC, LEFT_ASSOC,
+                             FN_ARG,
+                          };
 
 static struct token eoftok = {
     0,
@@ -51,10 +107,10 @@ static struct token eoftok = {
     0
 };
 
-static struct token eoftok2 = {
-    ';',
-    EOF_TOKEN,
-    0
+static struct token starttok = {
+    '(',
+    LEFTPAREN,
+    ARITH_B
 };
 
 static struct token peek;
@@ -63,7 +119,7 @@ static int peeked;
 struct token *token(void)
 {
     static struct token n;
-    char *x;
+    uint8_t *x;
 
     if (peeked) {
         peeked = 0;
@@ -75,32 +131,12 @@ struct token *token(void)
     if (*input == 0)
         return &eoftok;
 
-    if (*input == ';')
-        return &eoftok2;
+    n.tok = *input;
 
     if ((x = strchr(ops, *input)) != NULL) {
-        n.tok = *input;                /* Op code */
-        n.class = LEFT_ASSOC;
-        n.data = 1 + x - ops;  /* Priority */
-        input++;
-        return &n;
-    }
-    if (*input == '(') {
-        n.tok = *input;
-        n.class = LEFTPAREN;
-        input++;
-        return &n;
-    }
-    if (*input == ')') {
-        n.tok = *input;
-        n.class = RIGHTPAREN;
-        input++;
-        return &n;
-    }
-    if (*input == '!') {
-        n.tok = '!';
-        n.class = UNARY;
-        n.data = 0;
+        uint8_t o = (uint8_t)(x - ops);
+        n.class = opclass[o];
+        n.data = oppri[o];
         input++;
         return &n;
     }
@@ -170,6 +206,9 @@ void popop(void)
         exit(1);
     }
     t = *--optop;
+#ifdef DEBUG
+    printf("popop %c\n", t.tok);
+#endif    
     switch(t.tok) {
         case '(':
             break;
@@ -181,12 +220,9 @@ void popop(void)
             break;
         case '/':
             tmp = pop();
+//            if (tmp === 0) ...
             push(pop() / tmp);
             break;
-        case '%':
-            tmp = pop();
-            push(pop() % tmp);
-            break;
         case '-':
             tmp = pop();
             if(t.class == LEFT_ASSOC)
@@ -195,8 +231,59 @@ void popop(void)
                 push(-tmp);
             break;
         case '!':
+        case TOK_NOT:
             push(!pop());
             break;
+        case TOK_AND:
+            push(pop() && pop());
+            break;
+        case TOK_OR:
+            push(pop() || pop());
+            break;
+        case '=':
+            push(pop() == pop());
+            break;
+        case '<':
+            push(pop() >= pop());
+            break;
+        case '>':
+            push(pop() <= pop());
+            break;
+        case TOK_NE:
+            push(pop() != pop());
+            break;
+        case TOK_LE:
+            push(pop() > pop());
+            break;
+        case TOK_GE:
+            push(pop() < pop());
+            break;
+#ifdef FLOAT            
+        case TOK_ORD:
+            tmp = pop();
+            push powf(pop(), tnp);
+            break;
+#endif
+        case TOK_INT:
+            push((int)pop());
+            break;
+        case TOK_ABS:
+            push(abs(pop()));
+            break;
+        case TOK_SGN:
+            tmp = pop();
+            if (tmp < 0)
+                push(-1);
+            else if (tmp == 0)
+                push(0);
+            else
+                push(1);
+            break;
+        case TOK_MOD:
+            tmp = pop();
+//            if (tmp == 0)...
+            push(pop() % tmp);
+            break;
         default:
             if (t.class == FUNCTION) {
                 /* ??? */
@@ -212,7 +299,7 @@ void pushop(const struct token *t)
         fprintf(stderr, "pushop: too complex\n");
         exit(1);
     }
-#ifdef DEBUG
+#ifdef DEBUG    
     printf("pushop %d %c\n",t->class,t->tok);
 #endif    
     *optop++ = *t;
@@ -237,6 +324,7 @@ void popout(void) {
 void popout_final(void)
 {
     do_popout(LEFTPAREN);
+    popop();
     if (optop != opstack)
         fprintf(stderr, "Unbalanced brackets expression end.\n");
     printf("Answer = %u\n", pop());
@@ -269,7 +357,7 @@ void neednext(uint8_t h, uint8_t n)
 static const struct token fncall = {
     FUNCTION,
     FUNCTION,
-    0
+    FN_CALL
 };
 
 /* Write out an expression tree as we linearly parse the code. We arrange
@@ -290,13 +378,13 @@ static const struct token fncall = {
    
    */
 
-const struct token *eval(int in_decl)
+void eval(void)
 {
     struct token *t;
     next = OPERAND;
 
     while((t = token())->class != EOF_TOKEN) {
-#if DEBUG
+#ifdef DEBUG
         printf("|Token %d Class %s Data %d\n",
             t->tok, ctypes[t->class], t->data);
 #endif            
@@ -306,31 +394,46 @@ const struct token *eval(int in_decl)
                 push(t->data);
                 break;
             case SYMBOL:
-                /* symbols might be functions - tidy this up */
                 neednext(OPERAND, OPERATOR|LPAREN);
                 push(t->data);
                 break;
             case UNARY:
-                neednext(OPERAND, OPERAND);
+                neednext(OPERAND|LPAREN, OPERAND);
                 pushop(t);
                 break;
             case LEFT_ASSOC:
                 /* Umary minus is special */
                 if (t->tok == '-' && next == OPERAND) {
-                    neednext(OPERAND, OPERAND);
+                    neednext(OPERAND|LPAREN, OPERAND);
                     t->class = UNARY;
                     pushop(t);
                     break;
                 }
             case FUNCTION:
-                neednext(OPERATOR, OPERAND);
-                while (optop > opstack && optop->class == LEFT_ASSOC &&
-                       PRECEDENCE(t) <= PRECEDENCE(optop))
-                    popop();
-                pushop(t);
-                if (optop > opstack && optop->class == FUNCTION)
+            case RIGHT_ASSOC:
+                if (CLASS(t) == FUNCTION)
+                    neednext(OPERAND, OPERAND);
+                else
+                    neednext(OPERATOR, OPERAND);
+                do {
+                    if (optop == opstack || CLASS(optop-1) == LEFTPAREN) {
+                        pushop(t);
+                        break;
+                    }
+                    if (PRECEDENCE(t) > PRECEDENCE(optop-1)) {
+                        pushop(t);
+                        break;
+                    }
+                    if (CLASS(t) == RIGHT_ASSOC && PRECEDENCE(t) == PRECEDENCE(optop-1)) {
+                        pushop(t);
+                        break;
+                    }
                     popop();
+                } while (optop > opstack);
+                if (optop == opstack)
+                    fprintf(stderr, "internal -> bracket lost\n");
                 break;
+                
             case LEFTPAREN:
                 if (next & OPERATOR) {
                     /* Function call */
@@ -341,6 +444,12 @@ const struct token *eval(int in_decl)
                     neednext(OPERAND|LPAREN,OPERAND);
                 pushop(t);
                 break;
+            case FN_ARG:
+                /* Calculate the argument before the comma if any and put
+                   it on the data stack */
+                do_popout(FUNCTION);
+                neednext(OPERATOR,OPERAND|LPAREN);
+                break;
             case RIGHTPAREN:
                 neednext(OPERATOR|RPAREN, OPERATOR);
                 popout();
@@ -353,7 +462,6 @@ const struct token *eval(int in_decl)
 done:
     neednext(OPERATOR,0);
     popout_final();
-    return t;
 }
 
 int main(int argc, char *argv[])
@@ -361,5 +469,6 @@ int main(int argc, char *argv[])
     char buf[512];
     fgets(buf, 512, stdin);
     input = buf;
-    eval(0);
+    pushop(&starttok);
+    eval();
 }
index bdd985f..fc26c48 100644 (file)
@@ -1,19 +1,70 @@
 #include <stdio.h>
+#include <string.h>
 #include <ctype.h>
 
 char *tokens[] = {
     "PRINT",
+    "INPUT",
     "IF",
-    "GO",
+    "THEN",
+    "ELSE",
+    "FOR",
     "TO",
+    "NEXT",
+    "STEP",
+    "GO",
     "SUB",
-    "LET",
-    "INPUT",
     "RETURN",
+    "LET",
+    "REM",
     "CLEAR",
     "LIST",
     "RUN",
+    "STOP",
     "END",
+    "DIM",
+    "RANDOMIZE",
+    "READ",
+    "DATA",
+    "RESTORE",
+    "OPTION",
+    "BASE",
+    "TAB",
+    "AT",
+    "CLS",
+    "SAVE",
+    "LOAD",
+    "DEF",
+    "FN",
+    "ON",
+    "AND",
+    "OR",
+    "NOT",
+    "<=|LE",
+    ">=|GE",
+    "<>|NE",
+    "**|ORD",
+    "ABS",
+    "ATN",
+    "CODE",
+    "COS",
+    "EXP",
+    "INT",
+    "LOG",
+    "RND",
+    "SGN",
+    "SIN",
+    "SQR",
+    "TAN",
+    
+    "LEN",
+    "VAL",
+    "MOD",
+    "LEFT$",
+    "RIGHT$",
+    "MID$",
+    "CHR$",
+    "INKEY$",
     NULL
 };
 
@@ -25,16 +76,20 @@ int main(int argc, char *argv[])
     char *x;
     printf("#define TOKEN_BASE %d\n", tokbase);
     while(x = *p) {
+        char *y = strchr(x, '|');
+        if (y)
+            y++;
+        else
+            y = x;
         putchar('\t');
-        while(x[1])
+        while(x[1] && x[1] != '|')
             printf("'%c',", *x++);
         printf("0x%02X,\n", *x|0x80);
         printf("#define TOK_");
-        x = *p;
-        while(*x) {
-            if (isalnum(*x))
-                putchar(*x);
-            x++;
+        while(*y) {
+            if (isalnum(*y))
+                putchar(*y);
+            y++;
         }
         printf(" %d\n", tokbase++);
         p++;
diff --git a/Applications/basic/maketoks b/Applications/basic/maketoks
new file mode 100755 (executable)
index 0000000..d89e4ba
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+./maketokens |grep '^#' >tokens_def.h
+./maketokens > tokens.h
index 400683b..2de82ff 100644 (file)
@@ -156,7 +156,7 @@ void tokenize_line(uint8_t * input)
                        uint8_t *f = p++;
                        /* Otherwise we keep adding letters and trying to tokenize. This
                           is inefficient but we don't do it often */
-                       while (isalnum(*p) || *p == '.') {
+                       while (*p) {
                                *p |= 0x80;
                                t = tokget(f);
                                if (t == 0)
index 0c10ee7..c0eb19c 100644 (file)
 #define TOKEN_BASE 192
        'P','R','I','N',0xD4,
 #define TOK_PRINT 192
+       'I','N','P','U',0xD4,
+#define TOK_INPUT 193
        'I',0xC6,
-#define TOK_IF 193
-       'G',0xCF,
-#define TOK_GO 194
+#define TOK_IF 194
+       'T','H','E',0xCE,
+#define TOK_THEN 195
+       'E','L','S',0xC5,
+#define TOK_ELSE 196
+       'F','O',0xD2,
+#define TOK_FOR 197
        'T',0xCF,
-#define TOK_TO 195
+#define TOK_TO 198
+       'N','E','X',0xD4,
+#define TOK_NEXT 199
+       'S','T','E',0xD0,
+#define TOK_STEP 200
+       'G',0xCF,
+#define TOK_GO 201
        'S','U',0xC2,
-#define TOK_SUB 196
-       'L','E',0xD4,
-#define TOK_LET 197
-       'I','N','P','U',0xD4,
-#define TOK_INPUT 198
+#define TOK_SUB 202
        'R','E','T','U','R',0xCE,
-#define TOK_RETURN 199
+#define TOK_RETURN 203
+       'L','E',0xD4,
+#define TOK_LET 204
+       'R','E',0xCD,
+#define TOK_REM 205
        'C','L','E','A',0xD2,
-#define TOK_CLEAR 200
+#define TOK_CLEAR 206
        'L','I','S',0xD4,
-#define TOK_LIST 201
+#define TOK_LIST 207
        'R','U',0xCE,
-#define TOK_RUN 202
+#define TOK_RUN 208
+       'S','T','O',0xD0,
+#define TOK_STOP 209
        'E','N',0xC4,
-#define TOK_END 203
+#define TOK_END 210
+       'D','I',0xCD,
+#define TOK_DIM 211
+       'R','A','N','D','O','M','I','Z',0xC5,
+#define TOK_RANDOMIZE 212
+       'R','E','A',0xC4,
+#define TOK_READ 213
+       'D','A','T',0xC1,
+#define TOK_DATA 214
+       'R','E','S','T','O','R',0xC5,
+#define TOK_RESTORE 215
+       'O','P','T','I','O',0xCE,
+#define TOK_OPTION 216
+       'B','A','S',0xC5,
+#define TOK_BASE 217
+       'T','A',0xC2,
+#define TOK_TAB 218
+       'A',0xD4,
+#define TOK_AT 219
+       'C','L',0xD3,
+#define TOK_CLS 220
+       'S','A','V',0xC5,
+#define TOK_SAVE 221
+       'L','O','A',0xC4,
+#define TOK_LOAD 222
+       'A','N',0xC4,
+#define TOK_AND 223
+       'O',0xD2,
+#define TOK_OR 224
+       'N','O',0xD4,
+#define TOK_NOT 225
+       '<',0xBD,
+#define TOK_LE 226
+       '>',0xBD,
+#define TOK_GE 227
+       '<',0xBE,
+#define TOK_NE 228
+       '*',0xAA,
+#define TOK_ORD 229
+       'I','N',0xD4,
+#define TOK_INT 230
+       'A','B',0xD3,
+#define TOK_ABS 231
+       'S','G',0xCE,
+#define TOK_SGN 232
+       'L','E',0xCE,
+#define TOK_LEN 233
+       'C','O','D',0xC5,
+#define TOK_CODE 234
+       'V','A',0xCC,
+#define TOK_VAL 235
+       'M','O',0xC4,
+#define TOK_MOD 236
+       'L','E','F','T',0xA4,
+#define TOK_LEFT 237
+       'R','I','G','H','T',0xA4,
+#define TOK_RIGHT 238
+       'M','I','D',0xA4,
+#define TOK_MID 239
+       'C','H','R',0xA4,
+#define TOK_CHR 240
+       'I','N','K','E','Y',0xA4,
+#define TOK_INKEY 241