From: David Given Date: Fri, 17 May 2013 22:30:49 +0000 (+0100) Subject: Add a whole bunch of VC4 opcodes. X-Git-Tag: release-6-0-pre-5~10^2~74 X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=26877d3c4f65ca0889c0ce34c7dc277d47c26753;p=ack.git Add a whole bunch of VC4 opcodes. --HG-- branch : dtrg-videocore --- diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 96a8a07fa..915d1fb51 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -4,3 +4,24 @@ * This file is redistributable under the terms of the 3-clause BSD license. * See the file 'Copying' in the root of the distribution for the full text. */ + +#include "binary.h" + +#define ALWAYS 14 + +extern void alu_instr_reg(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned rb); + +extern void alu_instr_lit(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned value); + +extern void misc_instr_reg(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned rb); + +extern void misc_instr_lit(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned value); + +extern void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr); + +extern void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, + unsigned extrareg); \ No newline at end of file diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index c69007de0..6fd2b0e45 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -6,13 +6,17 @@ */ %token GPR +%token CC %token OP +%token OP_BRANCH %token OP_ONEREG %token OP_ONELREG %token OP_ALU +%token OP_FPU %token OP_MEM -%token OP_BREG +%token OP_MISC +%token OP_MISCL %token OP_STACK /* Other token types */ diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index b36652db7..b2cd1c2c9 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -5,8 +5,6 @@ * See the file 'Copying' in the root of the distribution for the full text. */ -#include "binary.h" - /* Integer registers */ 0, GPR, 0, "r0", @@ -17,7 +15,6 @@ 0, GPR, 5, "r5", 0, GPR, 6, "r6", -0, GPR, 6, "fp", 0, GPR, 7, "r7", 0, GPR, 8, "r8", 0, GPR, 9, "r9", @@ -42,6 +39,7 @@ 0, GPR, 26, "r26", 0, GPR, 26, "lr", 0, GPR, 27, "r27", +0, GPR, 27, "fp", 0, GPR, 28, "r28", 0, GPR, 29, "r29", 0, GPR, 30, "r30", @@ -49,13 +47,34 @@ 0, GPR, 31, "r31", 0, GPR, 31, "pc", +/* Condition codes */ + +0, CC, 0, ".eq", +0, CC, 1, ".ne", +0, CC, 2, ".cs", +0, CC, 2, ".lo", +0, CC, 3, ".cc", +0, CC, 3, ".hg", +0, CC, 4, ".mi", +0, CC, 5, ".pl", +0, CC, 6, ".vs", +0, CC, 7, ".vc", +0, CC, 8, ".hi", +0, CC, 9, ".ls", +0, CC, 10, ".ge", +0, CC, 11, ".lt", +0, CC, 12, ".gt", +0, CC, 13, ".le", +0, CC, 15, ".f", + /* Special instructions */ 0, OP, B16(00000000,00000001), "nop", 0, OP, B16(00000000,00001010), "rti", -0, OP_ONEREG, B16(00000000,01000000), "b", -0, OP_ONEREG, B16(00000000,01100000), "bl", +0, OP_BRANCH, 0, "b", +0, OP_BRANCH, 1, "bl", + 0, OP_ONELREG, B16(00000000,10000000), "tbb", 0, OP_ONELREG, B16(00000000,10100000), "tbs", @@ -92,4 +111,26 @@ 0, OP_ALU, B8(00011110), "asr", 0, OP_ALU, B8(00011111), "abs", +0, OP_MISC, B16(11001000,00000000), "fadd", +0, OP_MISC, B16(11001000,00100000), "fsub", +0, OP_MISC, B16(11001000,01000000), "fmul", +0, OP_MISC, B16(11001000,01100000), "fdiv", +0, OP_MISC, B16(11001000,10000000), "fcmp", +0, OP_MISC, B16(11001000,10100000), "fabs", +0, OP_MISC, B16(11001000,11000000), "frsb", +0, OP_MISC, B16(11001000,11100000), "fmax", +0, OP_MISC, B16(11001001,00000000), "frcp", +0, OP_MISC, B16(11001001,00100000), "frsqrt", +0, OP_MISC, B16(11001001,01000000), "fnmul", +0, OP_MISC, B16(11001001,01100000), "fmin", +0, OP_MISC, B16(11001001,10000000), "fld1", +0, OP_MISC, B16(11001001,10100000), "fld0", +0, OP_MISC, B16(11001001,11000000), "log2", +0, OP_MISC, B16(11001001,11100000), "exp2", +0, OP_MISC, B16(11000101,11100000), "adds256", + +0, OP_MISCL, B16(11000100,10000000), "divs", +0, OP_MISCL, B16(11000100,11100000), "divu", +0, OP_STACK, B16(00000010,00000000), "push", +0, OP_STACK, B16(00000010,10000000), "pop", diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index d1320dae1..05dba83b8 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -5,15 +5,12 @@ * See the file 'Copying' in the root of the distribution for the full text. */ -#include "binary.h" - operation : OP { emit2($1); } - | OP_ONEREG GPR - { - emit2($1 | ($2<<0)); - } + | OP_BRANCH GPR { emit2($1 | ($2<<0)); } + | OP_BRANCH expr { branch_instr($1, ALWAYS, &$2); } + | OP_BRANCH CC expr { branch_instr($1, $2, &$3); } | OP_ONELREG GPR { @@ -22,17 +19,28 @@ operation emit2($1 | ($2<<0)); } - | OP_ALU GPR ',' GPR - { - emit2(B16(01000000, 00000000) | ($1<<8) | ($2<<0) | ($4<<4)); - } + | OP_ALU GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $2, $4); } + | OP_ALU GPR ',' GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_ALU CC GPR ',' GPR { alu_instr_reg($1, $2, $3, $3, $5); } + | OP_ALU CC GPR ',' GPR ',' GPR { alu_instr_reg($1, $2, $3, $5, $7); } - | OP_ALU GPR ',' '#' u5 - { - if ($1 >= 0x10) - serror("cannot use this ALU operation in 2op form"); - emit2(B16(01100000, 00000000) | ($1<<9) | ($2<<0) | ($5<<4)); - } + | OP_ALU GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $2, $5); } + | OP_ALU GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_ALU CC GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $3, $6); } + | OP_ALU CC GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $5, $8); } + + | OP_MISC GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISC CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + + | OP_MISCL GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISCL CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + | OP_MISCL GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_MISCL CC GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, $2, $3, $5, $8); } + + | OP_STACK GPR { stack_instr($1, $2, $2, -1); } + | OP_STACK GPR ',' GPR { stack_instr($1, $2, $2, $4); } + | OP_STACK GPR '-' GPR { stack_instr($1, $2, $4, -1); } + | OP_STACK GPR '-' GPR ',' GPR { stack_instr($1, $2, $4, $6); } ; e16 diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 668f4b748..83d73da2a 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -5,3 +5,208 @@ * See the file 'Copying' in the root of the distribution for the full text. */ +/* Assemble an ALU instruction where rb is a register. */ + +void alu_instr_reg(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned rb) +{ + /* Can we use short form? */ + + if ((cc == ALWAYS) && (ra == rd)) + { + emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0)); + return; + } + + /* Long form, then. */ + + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Assemble an ALU instruction where rb is a literal. */ + +void alu_instr_lit(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned value) +{ + /* 16 bit short form? */ + + if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) && + !(ra & 0x10)) + { + emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0)); + return; + } + + /* 32 bit medium form? */ + + if (value >= 0x1f) + { + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); + return; + } + + /* Long form, then. */ + + if (cc != ALWAYS) + serror("cannot use condition codes with ALU literals this big"); + + /* add is special. */ + + if (op == B8(00000010)) + emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0)); + else + { + if (ra != rd) + serror("can only use 2op form of ALU instructions with literals this big"); + emit2(B16(11101000,00000000) | (op<<5) | (rd<<0)); + } + + emit4(value); +} + +/* Miscellaneous instructions with three registers and a cc. */ + +void misc_instr_reg(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned rb) +{ + emit2(op | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Miscellaneous instructions with two registers, a literal, and a cc. */ + +void misc_instr_lit(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned value) +{ + if (value < 0x1f) + serror("only constants from 0..31 can be used here"); + + emit2(op | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); +} + +/* Assemble a branch instruction. This may be a near branch into this + * object file, or a far branch which requires a fixup. */ + +void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr) +{ + unsigned type = expr->typ & S_TYP; + + /* Sanity checking. */ + + if (bl && (cc != ALWAYS)) + serror("can't use condition codes with bl"); + if (type == S_ABS) + serror("can't use absolute addresses here"); + + switch (pass) + { + case 0: + /* Calculate size of instructions only. For now we just assume + * that they're going to be the maximum size, 32 bits. */ + + emit4(0); + break; + + case 1: + case 2: + { + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + int d = (expr->val - DOTVAL) / 2; + + /* We now know the worst case for the instruction layout. At + * this point we can emit the instructions, which may shrink + * the code. */ + + if (!bl && (type == DOTTYP)) + { + /* This is a reference to code within this section. If it's + * close enough to the program counter, we can use a short- + * form instruction. */ + + if ((d >= -128) && (d < 127)) + { + emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); + break; + } + } + + /* Absolute addresses and references to other sections + * need the full 32 bits. */ + + newrelo(expr->typ, RELOVC4 | RELPC); + + if (bl) + { + unsigned v = d & 0x07ffffff; + unsigned hiv = v >> 23; + unsigned lov = v & 0x007fffff; + emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); + emit2(B16(00000000,00000000) | (lov&0xffff)); + } + else + { + unsigned v = d & 0x007fffff; + emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); + emit2(B16(00000000,00000000) | (v&0xffff)); + } + break; + } + } +} + +void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, + unsigned extrareg) +{ + unsigned b; + + switch (loreg) + { + case 0: b = 0; break; + case 6: b = 1; break; + case 16: b = 2; break; + case 24: b = 3; break; + + case 26: /* lr */ + extrareg = 26; + hireg = 31; + loreg = 0; + b = 0; + break; + + case 31: /* pc */ + extrareg = 31; + hireg = 31; + loreg = 0; + b = 0; + break; + + default: + serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc"); + } + + if (opcode & 0x0080) + { + /* Pop */ + if (extrareg == 26) + serror("cannot pop lr"); + } + else + { + /* Push */ + if (extrareg == 31) + serror("cannot push pc"); + } + + if (hireg < loreg) + serror("invalid register range"); + + emit2(opcode | (b<<5) | (hireg<<0) | + ((extrareg != -1) ? 0x0100 : 0)); +} + + diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index 894a7540e..6dedc4168 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -25,6 +25,8 @@ main: tbs r0 tbs r15 + nop + mov r0, r1 cmn r0, r1 add r0, r1 @@ -58,20 +60,225 @@ main: asr r0, r1 abs r0, r1 - mov r0, #31 - cmn r0, #31 - add r0, #31 - bic r0, #31 - mul r0, #31 - eor r0, #31 - sub r0, #31 - and r0, #31 - mvn r0, #31 - ror r0, #31 - cmp r0, #31 - rsb r0, #31 - btst r0, #31 - or r0, #31 - extu r0, #31 - max r0, #31 + nop + + mov.f r0, r1 + cmn.f r0, r1 + add.f r0, r1 + bic.f r0, r1 + mul.f r0, r1 + eor.f r0, r1 + sub.f r0, r1 + and.f r0, r1 + mvn.f r0, r1 + ror.f r0, r1 + cmp.f r0, r1 + rsb.f r0, r1 + btst.f r0, r1 + or.f r0, r1 + extu.f r0, r1 + max.f r0, r1 + bset.f r0, r1 + min.f r0, r1 + bclr.f r0, r1 + adds2.f r0, r1 + bchg.f r0, r1 + adds4.f r0, r1 + adds8.f r0, r1 + adds16.f r0, r1 + exts.f r0, r1 + neg.f r0, r1 + lsr.f r0, r1 + clz.f r0, r1 + lsl.f r0, r1 + brev.f r0, r1 + asr.f r0, r1 + abs.f r0, r1 + + nop + + mov r0, r1, r2 + cmn r0, r1, r2 + add r0, r1, r2 + bic r0, r1, r2 + mul r0, r1, r2 + eor r0, r1, r2 + sub r0, r1, r2 + and r0, r1, r2 + mvn r0, r1, r2 + ror r0, r1, r2 + cmp r0, r1, r2 + rsb r0, r1, r2 + btst r0, r1, r2 + or r0, r1, r2 + extu r0, r1, r2 + max r0, r1, r2 + bset r0, r1, r2 + min r0, r1, r2 + bclr r0, r1, r2 + adds2 r0, r1, r2 + bchg r0, r1, r2 + adds4 r0, r1, r2 + adds8 r0, r1, r2 + adds16 r0, r1, r2 + exts r0, r1, r2 + neg r0, r1, r2 + lsr r0, r1, r2 + clz r0, r1, r2 + lsl r0, r1, r2 + brev r0, r1, r2 + asr r0, r1, r2 + abs r0, r1, r2 + + nop + + mov r0, #0x1f + cmn r0, #0x1f + add r0, #0x1f + bic r0, #0x1f + mul r0, #0x1f + eor r0, #0x1f + sub r0, #0x1f + and r0, #0x1f + mvn r0, #0x1f + ror r0, #0x1f + cmp r0, #0x1f + rsb r0, #0x1f + btst r0, #0x1f + or r0, #0x1f + extu r0, #0x1f + max r0, #0x1f + bset r0, #0x1f + min r0, #0x1f + bclr r0, #0x1f + adds2 r0, #0x1f + bchg r0, #0x1f + adds4 r0, #0x1f + adds8 r0, #0x1f + adds16 r0, #0x1f + exts r0, #0x1f + neg r0, #0x1f + lsr r0, #0x1f + clz r0, #0x1f + lsl r0, #0x1f + brev r0, #0x1f + asr r0, #0x1f + abs r0, #0x1f + + nop + + mov.f r0, #0x1f + cmn.f r0, #0x1f + add.f r0, #0x1f + bic.f r0, #0x1f + mul.f r0, #0x1f + eor.f r0, #0x1f + sub.f r0, #0x1f + and.f r0, #0x1f + mvn.f r0, #0x1f + ror.f r0, #0x1f + cmp.f r0, #0x1f + rsb.f r0, #0x1f + btst.f r0, #0x1f + or.f r0, #0x1f + extu.f r0, #0x1f + max.f r0, #0x1f + bset.f r0, #0x1f + min.f r0, #0x1f + bclr.f r0, #0x1f + adds2.f r0, #0x1f + bchg.f r0, #0x1f + adds4.f r0, #0x1f + adds8.f r0, #0x1f + adds16.f r0, #0x1f + exts.f r0, #0x1f + neg.f r0, #0x1f + lsr.f r0, #0x1f + clz.f r0, #0x1f + lsl.f r0, #0x1f + brev.f r0, #0x1f + asr.f r0, #0x1f + abs.f r0, #0x1f + + add r0, #0x12345678 + add r0, r1, #0x12345678 + sub r0, #0x12345678 + + nop + + fadd r0, r1, r2 + fsub r0, r1, r2 + fmul r0, r1, r2 + fdiv r0, r1, r2 + fcmp r0, r1, r2 + fabs r0, r1, r2 + frsb r0, r1, r2 + fmax r0, r1, r2 + frcp r0, r1, r2 + frsqrt r0, r1, r2 + fnmul r0, r1, r2 + fmin r0, r1, r2 + fld1 r0, r1, r2 + fld0 r0, r1, r2 + log2 r0, r1, r2 + exp2 r0, r1, r2 + divs r0, r1, r2 + divu r0, r1, r2 + divs r0, r1, #31 + divu r0, r1, #31 + adds256 r0, r1, r2 + + nop + + fadd.f r0, r1, r2 + fsub.f r0, r1, r2 + fmul.f r0, r1, r2 + fdiv.f r0, r1, r2 + fcmp.f r0, r1, r2 + fabs.f r0, r1, r2 + frsb.f r0, r1, r2 + fmax.f r0, r1, r2 + frcp.f r0, r1, r2 + frsqrt.f r0, r1, r2 + fnmul.f r0, r1, r2 + fmin.f r0, r1, r2 + fld1.f r0, r1, r2 + fld0.f r0, r1, r2 + log2.f r0, r1, r2 + exp2.f r0, r1, r2 + divs.f r0, r1, r2 + divu.f r0, r1, r2 + divs.f r0, r1, #31 + divu.f r0, r1, #31 + adds256.f r0, r1, r2 + +label: + b label + b forward + b label + b main + b.f label + b.f forward + b.f main + bl label + bl forward + bl main +forward: + + push r0 + push r0, lr + push r0-r5 + push r0-r5, lr + push r6 + push r16 + push r24 + push lr + pop r0 + pop r0, pc + pop r0-r5 + pop r0-r5, pc + pop r6 + pop r16 + pop r24 + pop pc