From: David Given <dg@cowlark.com>
Date: Fri, 17 May 2013 22:30:49 +0000 (+0100)
Subject: Add a whole bunch of VC4 opcodes.
X-Git-Tag: release-6-0-pre-5~10^2~74
X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=26877d3c4f65ca0889c0ce34c7dc277d47c26753;p=ack.git

Add a whole bunch of VC4 opcodes.

--HG--
branch : dtrg-videocore
---

diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c
index 96a8a07fa..915d1fb51 100644
--- a/mach/vc4/as/mach1.c
+++ b/mach/vc4/as/mach1.c
@@ -4,3 +4,24 @@
  * This file is redistributable under the terms of the 3-clause BSD license.
  * See the file 'Copying' in the root of the distribution for the full text.
  */
+
+#include "binary.h"
+
+#define ALWAYS 14
+
+extern void alu_instr_reg(unsigned opcode, unsigned cc, unsigned rd,
+	unsigned ra, unsigned rb);
+
+extern void alu_instr_lit(unsigned opcode, unsigned cc, unsigned rd,
+	unsigned ra, unsigned value);
+
+extern void misc_instr_reg(unsigned opcode, unsigned cc, unsigned rd,
+	unsigned ra, unsigned rb);
+
+extern void misc_instr_lit(unsigned opcode, unsigned cc, unsigned rd,
+	unsigned ra, unsigned value);
+
+extern void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr);
+
+extern void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg,
+	unsigned extrareg);
\ No newline at end of file
diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c
index c69007de0..6fd2b0e45 100644
--- a/mach/vc4/as/mach2.c
+++ b/mach/vc4/as/mach2.c
@@ -6,13 +6,17 @@
  */
 
 %token <y_word> GPR
+%token <y_word> CC
 
 %token <y_word> OP
+%token <y_word> OP_BRANCH
 %token <y_word> OP_ONEREG
 %token <y_word> OP_ONELREG
 %token <y_word> OP_ALU
+%token <y_word> OP_FPU
 %token <y_word> OP_MEM
-%token <y_word> OP_BREG
+%token <y_word> OP_MISC
+%token <y_word> OP_MISCL
 %token <y_word> OP_STACK
 
 /* Other token types */
diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c
index b36652db7..b2cd1c2c9 100644
--- a/mach/vc4/as/mach3.c
+++ b/mach/vc4/as/mach3.c
@@ -5,8 +5,6 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
-#include "binary.h"
-
 /* Integer registers */
 
 0,     GPR,        0,          "r0",
@@ -17,7 +15,6 @@
 0,     GPR,        5,          "r5",
 
 0,     GPR,        6,          "r6",
-0,     GPR,        6,          "fp",
 0,     GPR,        7,          "r7",
 0,     GPR,        8,          "r8",
 0,     GPR,        9,          "r9",
@@ -42,6 +39,7 @@
 0,     GPR,        26,         "r26",
 0,     GPR,        26,         "lr",
 0,     GPR,        27,         "r27",
+0,     GPR,        27,         "fp",
 0,     GPR,        28,         "r28",
 0,     GPR,        29,         "r29",
 0,     GPR,        30,         "r30",
@@ -49,13 +47,34 @@
 0,     GPR,        31,         "r31",
 0,     GPR,        31,         "pc",
 
+/* Condition codes */
+
+0,     CC,         0,          ".eq",
+0,     CC,         1,          ".ne",
+0,     CC,         2,          ".cs",
+0,     CC,         2,          ".lo",
+0,     CC,         3,          ".cc",
+0,     CC,         3,          ".hg",
+0,     CC,         4,          ".mi",
+0,     CC,         5,          ".pl",
+0,     CC,         6,          ".vs",
+0,     CC,         7,          ".vc",
+0,     CC,         8,          ".hi",
+0,     CC,         9,          ".ls",
+0,     CC,         10,         ".ge",
+0,     CC,         11,         ".lt",
+0,     CC,         12,         ".gt",
+0,     CC,         13,         ".le",
+0,     CC,         15,         ".f",
+
 /* Special instructions */
 
 0,     OP,                    B16(00000000,00000001),                  "nop",
 0,     OP,                    B16(00000000,00001010),                  "rti",
 
-0,     OP_ONEREG,             B16(00000000,01000000),                  "b",
-0,     OP_ONEREG,             B16(00000000,01100000),                  "bl",
+0,     OP_BRANCH,             0,                                       "b",
+0,     OP_BRANCH,             1,                                       "bl",
+
 0,     OP_ONELREG,            B16(00000000,10000000),                  "tbb",
 0,     OP_ONELREG,            B16(00000000,10100000),                  "tbs",
 
@@ -92,4 +111,26 @@
 0,     OP_ALU,                B8(00011110),                            "asr",
 0,     OP_ALU,                B8(00011111),                            "abs",
 
+0,     OP_MISC,               B16(11001000,00000000),                  "fadd",
+0,     OP_MISC,               B16(11001000,00100000),                  "fsub",
+0,     OP_MISC,               B16(11001000,01000000),                  "fmul",
+0,     OP_MISC,               B16(11001000,01100000),                  "fdiv",
+0,     OP_MISC,               B16(11001000,10000000),                  "fcmp",
+0,     OP_MISC,               B16(11001000,10100000),                  "fabs",
+0,     OP_MISC,               B16(11001000,11000000),                  "frsb",
+0,     OP_MISC,               B16(11001000,11100000),                  "fmax",
+0,     OP_MISC,               B16(11001001,00000000),                  "frcp",
+0,     OP_MISC,               B16(11001001,00100000),                  "frsqrt",
+0,     OP_MISC,               B16(11001001,01000000),                  "fnmul",
+0,     OP_MISC,               B16(11001001,01100000),                  "fmin",
+0,     OP_MISC,               B16(11001001,10000000),                  "fld1",
+0,     OP_MISC,               B16(11001001,10100000),                  "fld0",
+0,     OP_MISC,               B16(11001001,11000000),                  "log2",
+0,     OP_MISC,               B16(11001001,11100000),                  "exp2",
+0,     OP_MISC,               B16(11000101,11100000),                  "adds256",
+
+0,     OP_MISCL,              B16(11000100,10000000),                  "divs",
+0,     OP_MISCL,              B16(11000100,11100000),                  "divu",
 
+0,     OP_STACK,              B16(00000010,00000000),                  "push",
+0,     OP_STACK,              B16(00000010,10000000),                  "pop",
diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c
index d1320dae1..05dba83b8 100644
--- a/mach/vc4/as/mach4.c
+++ b/mach/vc4/as/mach4.c
@@ -5,15 +5,12 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
-#include "binary.h"
-
 operation
 	: OP                                              { emit2($1); }
 
-	| OP_ONEREG GPR
-	{
-		emit2($1 | ($2<<0));
-	}
+	| OP_BRANCH GPR                        { emit2($1 | ($2<<0)); }
+	| OP_BRANCH expr                       { branch_instr($1, ALWAYS, &$2); }
+	| OP_BRANCH CC expr                    { branch_instr($1, $2, &$3); }
 
 	| OP_ONELREG GPR
 	{
@@ -22,17 +19,28 @@ operation
 		emit2($1 | ($2<<0));
 	}
 
-	| OP_ALU GPR ',' GPR
-	{
-		emit2(B16(01000000, 00000000) | ($1<<8) | ($2<<0) | ($4<<4));
-	}
+	| OP_ALU GPR ',' GPR                   { alu_instr_reg($1, ALWAYS, $2, $2, $4); }
+	| OP_ALU GPR ',' GPR ',' GPR           { alu_instr_reg($1, ALWAYS, $2, $4, $6); }
+	| OP_ALU CC GPR ',' GPR                { alu_instr_reg($1, $2, $3, $3, $5); }
+	| OP_ALU CC GPR ',' GPR ',' GPR        { alu_instr_reg($1, $2, $3, $5, $7); }
 
-	| OP_ALU GPR ',' '#' u5
-	{
-		if ($1 >= 0x10)
-			serror("cannot use this ALU operation in 2op form");
-		emit2(B16(01100000, 00000000) | ($1<<9) | ($2<<0) | ($5<<4));
-	}
+	| OP_ALU GPR ',' '#' absexp            { alu_instr_lit($1, ALWAYS, $2, $2, $5); }
+	| OP_ALU GPR ',' GPR ',' '#' absexp    { alu_instr_lit($1, ALWAYS, $2, $4, $7); }
+	| OP_ALU CC GPR ',' '#' absexp         { alu_instr_lit($1, $2, $3, $3, $6); }
+	| OP_ALU CC GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $5, $8); }
+
+	| OP_MISC GPR ',' GPR ',' GPR          { misc_instr_reg($1, ALWAYS, $2, $4, $6); }
+	| OP_MISC CC GPR ',' GPR ',' GPR       { misc_instr_reg($1, $2, $3, $5, $7); }
+
+	| OP_MISCL GPR ',' GPR ',' GPR         { misc_instr_reg($1, ALWAYS, $2, $4, $6); }
+	| OP_MISCL CC GPR ',' GPR ',' GPR      { misc_instr_reg($1, $2, $3, $5, $7); }
+	| OP_MISCL GPR ',' GPR ',' '#' absexp  { misc_instr_lit($1, ALWAYS, $2, $4, $7); }
+	| OP_MISCL CC GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, $2, $3, $5, $8); }
+
+	| OP_STACK GPR                         { stack_instr($1, $2, $2, -1); }
+	| OP_STACK GPR ',' GPR                 { stack_instr($1, $2, $2, $4); }
+	| OP_STACK GPR '-' GPR                 { stack_instr($1, $2, $4, -1); }
+	| OP_STACK GPR '-' GPR ',' GPR         { stack_instr($1, $2, $4, $6); }
 	;
 
 e16
diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c
index 668f4b748..83d73da2a 100644
--- a/mach/vc4/as/mach5.c
+++ b/mach/vc4/as/mach5.c
@@ -5,3 +5,208 @@
  * See the file 'Copying' in the root of the distribution for the full text.
  */
 
+/* Assemble an ALU instruction where rb is a register. */
+
+void alu_instr_reg(unsigned op, unsigned cc,
+	unsigned rd, unsigned ra, unsigned rb)
+{
+	/* Can we use short form? */
+
+	if ((cc == ALWAYS) && (ra == rd))
+	{
+		emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0));
+		return;
+	}
+
+    /* Long form, then. */
+
+    emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
+    emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
+}
+
+/* Assemble an ALU instruction where rb is a literal. */
+
+void alu_instr_lit(unsigned op, unsigned cc,
+	unsigned rd, unsigned ra, unsigned value)
+{
+	/* 16 bit short form? */
+
+	if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) &&
+		!(ra & 0x10))
+	{
+		emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0));
+		return;
+	}
+
+	/* 32 bit medium form? */
+
+    if (value >= 0x1f)
+    {
+        emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
+        emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
+        return;
+    }
+
+	/* Long form, then. */
+
+	if (cc != ALWAYS)
+		serror("cannot use condition codes with ALU literals this big");
+
+	/* add is special. */
+
+	if (op == B8(00000010))
+		emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0));
+	else
+	{
+		if (ra != rd)
+			serror("can only use 2op form of ALU instructions with literals this big");
+		emit2(B16(11101000,00000000) | (op<<5) | (rd<<0));
+	}
+
+	emit4(value);
+}
+
+/* Miscellaneous instructions with three registers and a cc. */
+
+void misc_instr_reg(unsigned op, unsigned cc,
+	unsigned rd, unsigned ra, unsigned rb)
+{
+	emit2(op | (rd<<0));
+	emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
+}
+
+/* Miscellaneous instructions with two registers, a literal, and a cc. */
+
+void misc_instr_lit(unsigned op, unsigned cc,
+	unsigned rd, unsigned ra, unsigned value)
+{
+    if (value < 0x1f)
+        serror("only constants from 0..31 can be used here");
+
+	emit2(op | (rd<<0));
+	emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
+}
+
+/* Assemble a branch instruction. This may be a near branch into this
+ * object file, or a far branch which requires a fixup. */
+
+void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr)
+{
+	unsigned type = expr->typ & S_TYP;
+
+	/* Sanity checking. */
+
+	if (bl && (cc != ALWAYS))
+		serror("can't use condition codes with bl");
+	if (type == S_ABS)
+		serror("can't use absolute addresses here");
+
+	switch (pass)
+	{
+		case 0:
+			/* Calculate size of instructions only. For now we just assume
+			 * that they're going to be the maximum size, 32 bits. */
+
+			emit4(0);
+			break;
+
+		case 1:
+		case 2:
+		{
+			/* The VC4 branch instructions express distance in 2-byte
+			 * words. */
+
+			int d = (expr->val - DOTVAL) / 2;
+
+        	/* We now know the worst case for the instruction layout. At
+        	 * this point we can emit the instructions, which may shrink
+        	 * the code. */
+
+			if (!bl && (type == DOTTYP))
+			{
+        	    /* This is a reference to code within this section. If it's
+        	     * close enough to the program counter, we can use a short-
+        	     * form instruction. */
+
+        	    if ((d >= -128) && (d < 127))
+        	    {
+					emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
+					break;
+				}
+			}
+
+			/* Absolute addresses and references to other sections
+			 * need the full 32 bits. */
+
+			newrelo(expr->typ, RELOVC4 | RELPC);
+
+			if (bl)
+			{
+				unsigned v = d & 0x07ffffff;
+				unsigned hiv = v >> 23;
+				unsigned lov = v & 0x007fffff;
+				emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
+				emit2(B16(00000000,00000000) | (lov&0xffff));
+			}
+			else
+			{
+				unsigned v = d & 0x007fffff;
+				emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
+				emit2(B16(00000000,00000000) | (v&0xffff));
+			}
+			break;
+        }
+	}
+}
+
+void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg,
+	unsigned extrareg)
+{
+    unsigned b;
+
+    switch (loreg)
+    {
+        case 0: b = 0; break;
+        case 6: b = 1; break;
+        case 16: b = 2; break;
+        case 24: b = 3; break;
+
+        case 26: /* lr */
+            extrareg = 26;
+            hireg = 31;
+            loreg = 0;
+            b = 0;
+            break;
+
+		case 31: /* pc */
+			extrareg = 31;
+			hireg = 31;
+			loreg = 0;
+			b = 0;
+			break;
+
+		default:
+			serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc");
+	}
+
+	if (opcode & 0x0080)
+	{
+		/* Pop */
+		if (extrareg == 26)
+			serror("cannot pop lr");
+	}
+	else
+	{
+		/* Push */
+		if (extrareg == 31)
+			serror("cannot push pc");
+	}
+
+	if (hireg < loreg)
+		serror("invalid register range");
+
+	emit2(opcode | (b<<5) | (hireg<<0) |
+		((extrareg != -1) ? 0x0100 : 0));
+}
+
+
diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s
index 894a7540e..6dedc4168 100644
--- a/mach/vc4/test/opcodes.s
+++ b/mach/vc4/test/opcodes.s
@@ -25,6 +25,8 @@ main:
 	tbs r0
 	tbs r15
 
+	nop
+
 	mov r0, r1
     cmn r0, r1
     add r0, r1
@@ -58,20 +60,225 @@ main:
 	asr r0, r1
 	abs r0, r1
 
-	mov r0, #31
-    cmn r0, #31
-    add r0, #31
-	bic r0, #31
-	mul r0, #31
-	eor r0, #31
-	sub r0, #31
-	and r0, #31
-	mvn r0, #31
-	ror r0, #31
-	cmp r0, #31
-	rsb r0, #31
-	btst r0, #31
-	or r0, #31
-	extu r0, #31
-	max r0, #31
+	nop
+
+	mov.f r0, r1
+    cmn.f r0, r1
+    add.f r0, r1
+	bic.f r0, r1
+	mul.f r0, r1
+	eor.f r0, r1
+	sub.f r0, r1
+	and.f r0, r1
+	mvn.f r0, r1
+	ror.f r0, r1
+	cmp.f r0, r1
+	rsb.f r0, r1
+	btst.f r0, r1
+	or.f r0, r1
+	extu.f r0, r1
+	max.f r0, r1
+	bset.f r0, r1
+	min.f r0, r1
+	bclr.f r0, r1
+	adds2.f r0, r1
+	bchg.f r0, r1
+	adds4.f r0, r1
+	adds8.f r0, r1
+	adds16.f r0, r1
+	exts.f r0, r1
+	neg.f r0, r1
+	lsr.f r0, r1
+	clz.f r0, r1
+	lsl.f r0, r1
+	brev.f r0, r1
+	asr.f r0, r1
+	abs.f r0, r1
+
+	nop
+
+	mov r0, r1, r2
+    cmn r0, r1, r2
+    add r0, r1, r2
+	bic r0, r1, r2
+	mul r0, r1, r2
+	eor r0, r1, r2
+	sub r0, r1, r2
+	and r0, r1, r2
+	mvn r0, r1, r2
+	ror r0, r1, r2
+	cmp r0, r1, r2
+	rsb r0, r1, r2
+	btst r0, r1, r2
+	or r0, r1, r2
+	extu r0, r1, r2
+	max r0, r1, r2
+	bset r0, r1, r2
+	min r0, r1, r2
+	bclr r0, r1, r2
+	adds2 r0, r1, r2
+	bchg r0, r1, r2
+	adds4 r0, r1, r2
+	adds8 r0, r1, r2
+	adds16 r0, r1, r2
+	exts r0, r1, r2
+	neg r0, r1, r2
+	lsr r0, r1, r2
+	clz r0, r1, r2
+	lsl r0, r1, r2
+	brev r0, r1, r2
+	asr r0, r1, r2
+	abs r0, r1, r2
+
+	nop
+
+	mov r0, #0x1f
+    cmn r0, #0x1f
+    add r0, #0x1f
+	bic r0, #0x1f
+	mul r0, #0x1f
+	eor r0, #0x1f
+	sub r0, #0x1f
+	and r0, #0x1f
+	mvn r0, #0x1f
+	ror r0, #0x1f
+	cmp r0, #0x1f
+	rsb r0, #0x1f
+	btst r0, #0x1f
+	or r0, #0x1f
+	extu r0, #0x1f
+	max r0, #0x1f
+	bset r0, #0x1f
+	min r0, #0x1f
+	bclr r0, #0x1f
+	adds2 r0, #0x1f
+	bchg r0, #0x1f
+	adds4 r0, #0x1f
+	adds8 r0, #0x1f
+	adds16 r0, #0x1f
+	exts r0, #0x1f
+	neg r0, #0x1f
+	lsr r0, #0x1f
+	clz r0, #0x1f
+	lsl r0, #0x1f
+	brev r0, #0x1f
+	asr r0, #0x1f
+	abs r0, #0x1f
+
+	nop
+
+	mov.f r0, #0x1f
+    cmn.f r0, #0x1f
+    add.f r0, #0x1f
+	bic.f r0, #0x1f
+	mul.f r0, #0x1f
+	eor.f r0, #0x1f
+	sub.f r0, #0x1f
+	and.f r0, #0x1f
+	mvn.f r0, #0x1f
+	ror.f r0, #0x1f
+	cmp.f r0, #0x1f
+	rsb.f r0, #0x1f
+	btst.f r0, #0x1f
+	or.f r0, #0x1f
+	extu.f r0, #0x1f
+	max.f r0, #0x1f
+	bset.f r0, #0x1f
+	min.f r0, #0x1f
+	bclr.f r0, #0x1f
+	adds2.f r0, #0x1f
+	bchg.f r0, #0x1f
+	adds4.f r0, #0x1f
+	adds8.f r0, #0x1f
+	adds16.f r0, #0x1f
+	exts.f r0, #0x1f
+	neg.f r0, #0x1f
+	lsr.f r0, #0x1f
+	clz.f r0, #0x1f
+	lsl.f r0, #0x1f
+	brev.f r0, #0x1f
+	asr.f r0, #0x1f
+	abs.f r0, #0x1f
+
+	add r0, #0x12345678
+	add r0, r1, #0x12345678
+	sub r0, #0x12345678
+
+	nop
+
+	fadd r0, r1, r2
+	fsub r0, r1, r2
+	fmul r0, r1, r2
+	fdiv r0, r1, r2
+	fcmp r0, r1, r2
+	fabs r0, r1, r2
+	frsb r0, r1, r2
+	fmax r0, r1, r2
+	frcp r0, r1, r2
+	frsqrt r0, r1, r2
+	fnmul r0, r1, r2
+	fmin r0, r1, r2
+	fld1 r0, r1, r2
+	fld0 r0, r1, r2
+	log2 r0, r1, r2
+	exp2 r0, r1, r2
+	divs r0, r1, r2
+	divu r0, r1, r2
+	divs r0, r1, #31
+	divu r0, r1, #31
+	adds256 r0, r1, r2
+
+	nop
+
+	fadd.f r0, r1, r2
+	fsub.f r0, r1, r2
+	fmul.f r0, r1, r2
+	fdiv.f r0, r1, r2
+	fcmp.f r0, r1, r2
+	fabs.f r0, r1, r2
+	frsb.f r0, r1, r2
+	fmax.f r0, r1, r2
+	frcp.f r0, r1, r2
+	frsqrt.f r0, r1, r2
+	fnmul.f r0, r1, r2
+	fmin.f r0, r1, r2
+	fld1.f r0, r1, r2
+	fld0.f r0, r1, r2
+	log2.f r0, r1, r2
+	exp2.f r0, r1, r2
+	divs.f r0, r1, r2
+	divu.f r0, r1, r2
+	divs.f r0, r1, #31
+	divu.f r0, r1, #31
+	adds256.f r0, r1, r2
+
+label:
+	b label
+	b forward
+	b label
+	b main
+	b.f label
+	b.f forward
+	b.f main
+	bl label
+	bl forward
+	bl main
+forward:
+
+	push r0
+	push r0, lr
+	push r0-r5
+	push r0-r5, lr
+	push r6
+	push r16
+	push r24
+	push lr
 
+	pop r0
+	pop r0, pc
+	pop r0-r5
+	pop r0-r5, pc
+	pop r6
+	pop r16
+	pop r24
+	pop pc