Add a whole bunch of VC4 opcodes.
authorDavid Given <dg@cowlark.com>
Fri, 17 May 2013 22:30:49 +0000 (23:30 +0100)
committerDavid Given <dg@cowlark.com>
Fri, 17 May 2013 22:30:49 +0000 (23:30 +0100)
--HG--
branch : dtrg-videocore

mach/vc4/as/mach1.c
mach/vc4/as/mach2.c
mach/vc4/as/mach3.c
mach/vc4/as/mach4.c
mach/vc4/as/mach5.c
mach/vc4/test/opcodes.s

index 96a8a07..915d1fb 100644 (file)
@@ -4,3 +4,24 @@
  * This file is redistributable under the terms of the 3-clause BSD license.
  * See the file 'Copying' in the root of the distribution for the full text.
  */
+
+#include "binary.h"
+
+#define ALWAYS 14
+
+extern void alu_instr_reg(unsigned opcode, unsigned cc, unsigned rd,
+       unsigned ra, unsigned rb);
+
+extern void alu_instr_lit(unsigned opcode, unsigned cc, unsigned rd,
+       unsigned ra, unsigned value);
+
+extern void misc_instr_reg(unsigned opcode, unsigned cc, unsigned rd,
+       unsigned ra, unsigned rb);
+
+extern void misc_instr_lit(unsigned opcode, unsigned cc, unsigned rd,
+       unsigned ra, unsigned value);
+
+extern void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr);
+
+extern void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg,
+       unsigned extrareg);
\ No newline at end of file
index c69007d..6fd2b0e 100644 (file)
@@ -6,13 +6,17 @@
  */
 
 %token <y_word> GPR
+%token <y_word> CC
 
 %token <y_word> OP
+%token <y_word> OP_BRANCH
 %token <y_word> OP_ONEREG
 %token <y_word> OP_ONELREG
 %token <y_word> OP_ALU
+%token <y_word> OP_FPU
 %token <y_word> OP_MEM
-%token <y_word> OP_BREG
+%token <y_word> OP_MISC
+%token <y_word> OP_MISCL
 %token <y_word> OP_STACK
 
 /* Other token types */
index b36652d..b2cd1c2 100644 (file)
@@ -5,8 +5,6 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
-#include "binary.h"
-
 /* Integer registers */
 
 0,     GPR,        0,          "r0",
@@ -17,7 +15,6 @@
 0,     GPR,        5,          "r5",
 
 0,     GPR,        6,          "r6",
-0,     GPR,        6,          "fp",
 0,     GPR,        7,          "r7",
 0,     GPR,        8,          "r8",
 0,     GPR,        9,          "r9",
@@ -42,6 +39,7 @@
 0,     GPR,        26,         "r26",
 0,     GPR,        26,         "lr",
 0,     GPR,        27,         "r27",
+0,     GPR,        27,         "fp",
 0,     GPR,        28,         "r28",
 0,     GPR,        29,         "r29",
 0,     GPR,        30,         "r30",
 0,     GPR,        31,         "r31",
 0,     GPR,        31,         "pc",
 
+/* Condition codes */
+
+0,     CC,         0,          ".eq",
+0,     CC,         1,          ".ne",
+0,     CC,         2,          ".cs",
+0,     CC,         2,          ".lo",
+0,     CC,         3,          ".cc",
+0,     CC,         3,          ".hg",
+0,     CC,         4,          ".mi",
+0,     CC,         5,          ".pl",
+0,     CC,         6,          ".vs",
+0,     CC,         7,          ".vc",
+0,     CC,         8,          ".hi",
+0,     CC,         9,          ".ls",
+0,     CC,         10,         ".ge",
+0,     CC,         11,         ".lt",
+0,     CC,         12,         ".gt",
+0,     CC,         13,         ".le",
+0,     CC,         15,         ".f",
+
 /* Special instructions */
 
 0,     OP,                    B16(00000000,00000001),                  "nop",
 0,     OP,                    B16(00000000,00001010),                  "rti",
 
-0,     OP_ONEREG,             B16(00000000,01000000),                  "b",
-0,     OP_ONEREG,             B16(00000000,01100000),                  "bl",
+0,     OP_BRANCH,             0,                                       "b",
+0,     OP_BRANCH,             1,                                       "bl",
+
 0,     OP_ONELREG,            B16(00000000,10000000),                  "tbb",
 0,     OP_ONELREG,            B16(00000000,10100000),                  "tbs",
 
 0,     OP_ALU,                B8(00011110),                            "asr",
 0,     OP_ALU,                B8(00011111),                            "abs",
 
+0,     OP_MISC,               B16(11001000,00000000),                  "fadd",
+0,     OP_MISC,               B16(11001000,00100000),                  "fsub",
+0,     OP_MISC,               B16(11001000,01000000),                  "fmul",
+0,     OP_MISC,               B16(11001000,01100000),                  "fdiv",
+0,     OP_MISC,               B16(11001000,10000000),                  "fcmp",
+0,     OP_MISC,               B16(11001000,10100000),                  "fabs",
+0,     OP_MISC,               B16(11001000,11000000),                  "frsb",
+0,     OP_MISC,               B16(11001000,11100000),                  "fmax",
+0,     OP_MISC,               B16(11001001,00000000),                  "frcp",
+0,     OP_MISC,               B16(11001001,00100000),                  "frsqrt",
+0,     OP_MISC,               B16(11001001,01000000),                  "fnmul",
+0,     OP_MISC,               B16(11001001,01100000),                  "fmin",
+0,     OP_MISC,               B16(11001001,10000000),                  "fld1",
+0,     OP_MISC,               B16(11001001,10100000),                  "fld0",
+0,     OP_MISC,               B16(11001001,11000000),                  "log2",
+0,     OP_MISC,               B16(11001001,11100000),                  "exp2",
+0,     OP_MISC,               B16(11000101,11100000),                  "adds256",
+
+0,     OP_MISCL,              B16(11000100,10000000),                  "divs",
+0,     OP_MISCL,              B16(11000100,11100000),                  "divu",
 
+0,     OP_STACK,              B16(00000010,00000000),                  "push",
+0,     OP_STACK,              B16(00000010,10000000),                  "pop",
index d1320da..05dba83 100644 (file)
@@ -5,15 +5,12 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
-#include "binary.h"
-
 operation
        : OP                                              { emit2($1); }
 
-       | OP_ONEREG GPR
-       {
-               emit2($1 | ($2<<0));
-       }
+       | OP_BRANCH GPR                        { emit2($1 | ($2<<0)); }
+       | OP_BRANCH expr                       { branch_instr($1, ALWAYS, &$2); }
+       | OP_BRANCH CC expr                    { branch_instr($1, $2, &$3); }
 
        | OP_ONELREG GPR
        {
@@ -22,17 +19,28 @@ operation
                emit2($1 | ($2<<0));
        }
 
-       | OP_ALU GPR ',' GPR
-       {
-               emit2(B16(01000000, 00000000) | ($1<<8) | ($2<<0) | ($4<<4));
-       }
+       | OP_ALU GPR ',' GPR                   { alu_instr_reg($1, ALWAYS, $2, $2, $4); }
+       | OP_ALU GPR ',' GPR ',' GPR           { alu_instr_reg($1, ALWAYS, $2, $4, $6); }
+       | OP_ALU CC GPR ',' GPR                { alu_instr_reg($1, $2, $3, $3, $5); }
+       | OP_ALU CC GPR ',' GPR ',' GPR        { alu_instr_reg($1, $2, $3, $5, $7); }
 
-       | OP_ALU GPR ',' '#' u5
-       {
-               if ($1 >= 0x10)
-                       serror("cannot use this ALU operation in 2op form");
-               emit2(B16(01100000, 00000000) | ($1<<9) | ($2<<0) | ($5<<4));
-       }
+       | OP_ALU GPR ',' '#' absexp            { alu_instr_lit($1, ALWAYS, $2, $2, $5); }
+       | OP_ALU GPR ',' GPR ',' '#' absexp    { alu_instr_lit($1, ALWAYS, $2, $4, $7); }
+       | OP_ALU CC GPR ',' '#' absexp         { alu_instr_lit($1, $2, $3, $3, $6); }
+       | OP_ALU CC GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $5, $8); }
+
+       | OP_MISC GPR ',' GPR ',' GPR          { misc_instr_reg($1, ALWAYS, $2, $4, $6); }
+       | OP_MISC CC GPR ',' GPR ',' GPR       { misc_instr_reg($1, $2, $3, $5, $7); }
+
+       | OP_MISCL GPR ',' GPR ',' GPR         { misc_instr_reg($1, ALWAYS, $2, $4, $6); }
+       | OP_MISCL CC GPR ',' GPR ',' GPR      { misc_instr_reg($1, $2, $3, $5, $7); }
+       | OP_MISCL GPR ',' GPR ',' '#' absexp  { misc_instr_lit($1, ALWAYS, $2, $4, $7); }
+       | OP_MISCL CC GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, $2, $3, $5, $8); }
+
+       | OP_STACK GPR                         { stack_instr($1, $2, $2, -1); }
+       | OP_STACK GPR ',' GPR                 { stack_instr($1, $2, $2, $4); }
+       | OP_STACK GPR '-' GPR                 { stack_instr($1, $2, $4, -1); }
+       | OP_STACK GPR '-' GPR ',' GPR         { stack_instr($1, $2, $4, $6); }
        ;
 
 e16
index 668f4b7..83d73da 100644 (file)
@@ -5,3 +5,208 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
+/* Assemble an ALU instruction where rb is a register. */
+
+void alu_instr_reg(unsigned op, unsigned cc,
+       unsigned rd, unsigned ra, unsigned rb)
+{
+       /* Can we use short form? */
+
+       if ((cc == ALWAYS) && (ra == rd))
+       {
+               emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0));
+               return;
+       }
+
+    /* Long form, then. */
+
+    emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
+    emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
+}
+
+/* Assemble an ALU instruction where rb is a literal. */
+
+void alu_instr_lit(unsigned op, unsigned cc,
+       unsigned rd, unsigned ra, unsigned value)
+{
+       /* 16 bit short form? */
+
+       if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) &&
+               !(ra & 0x10))
+       {
+               emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0));
+               return;
+       }
+
+       /* 32 bit medium form? */
+
+    if (value >= 0x1f)
+    {
+        emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
+        emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
+        return;
+    }
+
+       /* Long form, then. */
+
+       if (cc != ALWAYS)
+               serror("cannot use condition codes with ALU literals this big");
+
+       /* add is special. */
+
+       if (op == B8(00000010))
+               emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0));
+       else
+       {
+               if (ra != rd)
+                       serror("can only use 2op form of ALU instructions with literals this big");
+               emit2(B16(11101000,00000000) | (op<<5) | (rd<<0));
+       }
+
+       emit4(value);
+}
+
+/* Miscellaneous instructions with three registers and a cc. */
+
+void misc_instr_reg(unsigned op, unsigned cc,
+       unsigned rd, unsigned ra, unsigned rb)
+{
+       emit2(op | (rd<<0));
+       emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
+}
+
+/* Miscellaneous instructions with two registers, a literal, and a cc. */
+
+void misc_instr_lit(unsigned op, unsigned cc,
+       unsigned rd, unsigned ra, unsigned value)
+{
+    if (value < 0x1f)
+        serror("only constants from 0..31 can be used here");
+
+       emit2(op | (rd<<0));
+       emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
+}
+
+/* Assemble a branch instruction. This may be a near branch into this
+ * object file, or a far branch which requires a fixup. */
+
+void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr)
+{
+       unsigned type = expr->typ & S_TYP;
+
+       /* Sanity checking. */
+
+       if (bl && (cc != ALWAYS))
+               serror("can't use condition codes with bl");
+       if (type == S_ABS)
+               serror("can't use absolute addresses here");
+
+       switch (pass)
+       {
+               case 0:
+                       /* Calculate size of instructions only. For now we just assume
+                        * that they're going to be the maximum size, 32 bits. */
+
+                       emit4(0);
+                       break;
+
+               case 1:
+               case 2:
+               {
+                       /* The VC4 branch instructions express distance in 2-byte
+                        * words. */
+
+                       int d = (expr->val - DOTVAL) / 2;
+
+               /* We now know the worst case for the instruction layout. At
+                * this point we can emit the instructions, which may shrink
+                * the code. */
+
+                       if (!bl && (type == DOTTYP))
+                       {
+                   /* This is a reference to code within this section. If it's
+                    * close enough to the program counter, we can use a short-
+                    * form instruction. */
+
+                   if ((d >= -128) && (d < 127))
+                   {
+                                       emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
+                                       break;
+                               }
+                       }
+
+                       /* Absolute addresses and references to other sections
+                        * need the full 32 bits. */
+
+                       newrelo(expr->typ, RELOVC4 | RELPC);
+
+                       if (bl)
+                       {
+                               unsigned v = d & 0x07ffffff;
+                               unsigned hiv = v >> 23;
+                               unsigned lov = v & 0x007fffff;
+                               emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
+                               emit2(B16(00000000,00000000) | (lov&0xffff));
+                       }
+                       else
+                       {
+                               unsigned v = d & 0x007fffff;
+                               emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
+                               emit2(B16(00000000,00000000) | (v&0xffff));
+                       }
+                       break;
+        }
+       }
+}
+
+void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg,
+       unsigned extrareg)
+{
+    unsigned b;
+
+    switch (loreg)
+    {
+        case 0: b = 0; break;
+        case 6: b = 1; break;
+        case 16: b = 2; break;
+        case 24: b = 3; break;
+
+        case 26: /* lr */
+            extrareg = 26;
+            hireg = 31;
+            loreg = 0;
+            b = 0;
+            break;
+
+               case 31: /* pc */
+                       extrareg = 31;
+                       hireg = 31;
+                       loreg = 0;
+                       b = 0;
+                       break;
+
+               default:
+                       serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc");
+       }
+
+       if (opcode & 0x0080)
+       {
+               /* Pop */
+               if (extrareg == 26)
+                       serror("cannot pop lr");
+       }
+       else
+       {
+               /* Push */
+               if (extrareg == 31)
+                       serror("cannot push pc");
+       }
+
+       if (hireg < loreg)
+               serror("invalid register range");
+
+       emit2(opcode | (b<<5) | (hireg<<0) |
+               ((extrareg != -1) ? 0x0100 : 0));
+}
+
+
index 894a754..6dedc41 100644 (file)
@@ -25,6 +25,8 @@ main:
        tbs r0
        tbs r15
 
+       nop
+
        mov r0, r1
     cmn r0, r1
     add r0, r1
@@ -58,20 +60,225 @@ main:
        asr r0, r1
        abs r0, r1
 
-       mov r0, #31
-    cmn r0, #31
-    add r0, #31
-       bic r0, #31
-       mul r0, #31
-       eor r0, #31
-       sub r0, #31
-       and r0, #31
-       mvn r0, #31
-       ror r0, #31
-       cmp r0, #31
-       rsb r0, #31
-       btst r0, #31
-       or r0, #31
-       extu r0, #31
-       max r0, #31
+       nop
+
+       mov.f r0, r1
+    cmn.f r0, r1
+    add.f r0, r1
+       bic.f r0, r1
+       mul.f r0, r1
+       eor.f r0, r1
+       sub.f r0, r1
+       and.f r0, r1
+       mvn.f r0, r1
+       ror.f r0, r1
+       cmp.f r0, r1
+       rsb.f r0, r1
+       btst.f r0, r1
+       or.f r0, r1
+       extu.f r0, r1
+       max.f r0, r1
+       bset.f r0, r1
+       min.f r0, r1
+       bclr.f r0, r1
+       adds2.f r0, r1
+       bchg.f r0, r1
+       adds4.f r0, r1
+       adds8.f r0, r1
+       adds16.f r0, r1
+       exts.f r0, r1
+       neg.f r0, r1
+       lsr.f r0, r1
+       clz.f r0, r1
+       lsl.f r0, r1
+       brev.f r0, r1
+       asr.f r0, r1
+       abs.f r0, r1
+
+       nop
+
+       mov r0, r1, r2
+    cmn r0, r1, r2
+    add r0, r1, r2
+       bic r0, r1, r2
+       mul r0, r1, r2
+       eor r0, r1, r2
+       sub r0, r1, r2
+       and r0, r1, r2
+       mvn r0, r1, r2
+       ror r0, r1, r2
+       cmp r0, r1, r2
+       rsb r0, r1, r2
+       btst r0, r1, r2
+       or r0, r1, r2
+       extu r0, r1, r2
+       max r0, r1, r2
+       bset r0, r1, r2
+       min r0, r1, r2
+       bclr r0, r1, r2
+       adds2 r0, r1, r2
+       bchg r0, r1, r2
+       adds4 r0, r1, r2
+       adds8 r0, r1, r2
+       adds16 r0, r1, r2
+       exts r0, r1, r2
+       neg r0, r1, r2
+       lsr r0, r1, r2
+       clz r0, r1, r2
+       lsl r0, r1, r2
+       brev r0, r1, r2
+       asr r0, r1, r2
+       abs r0, r1, r2
+
+       nop
+
+       mov r0, #0x1f
+    cmn r0, #0x1f
+    add r0, #0x1f
+       bic r0, #0x1f
+       mul r0, #0x1f
+       eor r0, #0x1f
+       sub r0, #0x1f
+       and r0, #0x1f
+       mvn r0, #0x1f
+       ror r0, #0x1f
+       cmp r0, #0x1f
+       rsb r0, #0x1f
+       btst r0, #0x1f
+       or r0, #0x1f
+       extu r0, #0x1f
+       max r0, #0x1f
+       bset r0, #0x1f
+       min r0, #0x1f
+       bclr r0, #0x1f
+       adds2 r0, #0x1f
+       bchg r0, #0x1f
+       adds4 r0, #0x1f
+       adds8 r0, #0x1f
+       adds16 r0, #0x1f
+       exts r0, #0x1f
+       neg r0, #0x1f
+       lsr r0, #0x1f
+       clz r0, #0x1f
+       lsl r0, #0x1f
+       brev r0, #0x1f
+       asr r0, #0x1f
+       abs r0, #0x1f
+
+       nop
+
+       mov.f r0, #0x1f
+    cmn.f r0, #0x1f
+    add.f r0, #0x1f
+       bic.f r0, #0x1f
+       mul.f r0, #0x1f
+       eor.f r0, #0x1f
+       sub.f r0, #0x1f
+       and.f r0, #0x1f
+       mvn.f r0, #0x1f
+       ror.f r0, #0x1f
+       cmp.f r0, #0x1f
+       rsb.f r0, #0x1f
+       btst.f r0, #0x1f
+       or.f r0, #0x1f
+       extu.f r0, #0x1f
+       max.f r0, #0x1f
+       bset.f r0, #0x1f
+       min.f r0, #0x1f
+       bclr.f r0, #0x1f
+       adds2.f r0, #0x1f
+       bchg.f r0, #0x1f
+       adds4.f r0, #0x1f
+       adds8.f r0, #0x1f
+       adds16.f r0, #0x1f
+       exts.f r0, #0x1f
+       neg.f r0, #0x1f
+       lsr.f r0, #0x1f
+       clz.f r0, #0x1f
+       lsl.f r0, #0x1f
+       brev.f r0, #0x1f
+       asr.f r0, #0x1f
+       abs.f r0, #0x1f
+
+       add r0, #0x12345678
+       add r0, r1, #0x12345678
+       sub r0, #0x12345678
+
+       nop
+
+       fadd r0, r1, r2
+       fsub r0, r1, r2
+       fmul r0, r1, r2
+       fdiv r0, r1, r2
+       fcmp r0, r1, r2
+       fabs r0, r1, r2
+       frsb r0, r1, r2
+       fmax r0, r1, r2
+       frcp r0, r1, r2
+       frsqrt r0, r1, r2
+       fnmul r0, r1, r2
+       fmin r0, r1, r2
+       fld1 r0, r1, r2
+       fld0 r0, r1, r2
+       log2 r0, r1, r2
+       exp2 r0, r1, r2
+       divs r0, r1, r2
+       divu r0, r1, r2
+       divs r0, r1, #31
+       divu r0, r1, #31
+       adds256 r0, r1, r2
+
+       nop
+
+       fadd.f r0, r1, r2
+       fsub.f r0, r1, r2
+       fmul.f r0, r1, r2
+       fdiv.f r0, r1, r2
+       fcmp.f r0, r1, r2
+       fabs.f r0, r1, r2
+       frsb.f r0, r1, r2
+       fmax.f r0, r1, r2
+       frcp.f r0, r1, r2
+       frsqrt.f r0, r1, r2
+       fnmul.f r0, r1, r2
+       fmin.f r0, r1, r2
+       fld1.f r0, r1, r2
+       fld0.f r0, r1, r2
+       log2.f r0, r1, r2
+       exp2.f r0, r1, r2
+       divs.f r0, r1, r2
+       divu.f r0, r1, r2
+       divs.f r0, r1, #31
+       divu.f r0, r1, #31
+       adds256.f r0, r1, r2
+
+label:
+       b label
+       b forward
+       b label
+       b main
+       b.f label
+       b.f forward
+       b.f main
+       bl label
+       bl forward
+       bl main
+forward:
+
+       push r0
+       push r0, lr
+       push r0-r5
+       push r0-r5, lr
+       push r6
+       push r16
+       push r24
+       push lr
 
+       pop r0
+       pop r0, pc
+       pop r0-r5
+       pop r0-r5, pc
+       pop r6
+       pop r16
+       pop r24
+       pop pc