Reworked VC4 relocations and some of the instruction encoding to be actually correct...
authorDavid Given <dg@cowlark.com>
Tue, 21 May 2013 22:17:30 +0000 (23:17 +0100)
committerDavid Given <dg@cowlark.com>
Tue, 21 May 2013 22:17:30 +0000 (23:17 +0100)
--HG--
branch : dtrg-videocore

mach/vc4/as/mach5.c
mach/vc4/libem/dummy.s
plat/rpi/boot.s
util/led/relocate.c

index d6f71e7..b1ce314 100644 (file)
@@ -7,6 +7,11 @@
 
 #define maskx(v, x) (v & ((1<<(x))-1))
 
+static void toobig(void)
+{
+       serror("offset too big to encode into instruction");
+}
+
 /* Assemble an ALU instruction where rb is a register. */
 
 void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
@@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
 
 void branch_instr(int bl, int cc, struct expr_t* expr)
 {
+       quad pc = DOTVAL;
        quad type = expr->typ & S_TYP;
 
        /* Sanity checking. */
@@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
                        /* The VC4 branch instructions express distance in 2-byte
                         * words. */
 
-                       int d = (expr->val - DOTVAL) / 2;
+                       int d = (expr->val - pc) / 2;
 
                /* We now know the worst case for the instruction layout. At
                 * this point we can emit the instructions, which may shrink
@@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
                     * close enough to the program counter, we can use a short-
                     * form instruction. */
 
-                   if ((d >= -128) && (d < 127))
+                   if (fitx(d, 7))
                    {
                                        emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
                                        break;
@@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
                        /* Absolute addresses and references to other sections
                         * need the full 32 bits. */
 
-                       newrelo(expr->typ, RELOVC4 | RELPC);
+                       newrelo(expr->typ, RELOVC4|RELPC);
 
                        if (bl)
                        {
-                               quad v = d & 0x07ffffff;
-                               quad hiv = v >> 23;
-                               quad lov = v & 0x007fffff;
+                               quad v, hiv, lov;
+
+                               if (!fitx(d, 27))
+                                       toobig();
+
+                               v = maskx(d, 27);
+                               hiv = v >> 23;
+                               lov = v & 0x007fffff;
                                emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
                                emit2(B16(00000000,00000000) | (lov&0xffff));
                        }
                        else
                        {
-                               quad v = d & 0x007fffff;
+                               quad v;
+
+                               if (!fitx(d, 23))
+                                       toobig();
+
+                               v = maskx(d, 23);
                                emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
                                emit2(B16(00000000,00000000) | (v&0xffff));
                        }
@@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
 
 void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 {
+       static const char sizes[] = {4, 2, 1, 2};
+       int size = sizes[opcode];
        quad type = expr->typ & S_TYP;
 
        /* Sanity checking. */
@@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
                case 1:
                case 2:
                {
-                       /* The VC4 branch instructions express distance in 2-byte
-                        * words. */
-
-                       int d = (expr->val - DOTVAL) / 2;
+                       int d = expr->val - DOTVAL;
 
                /* We now know the worst case for the instruction layout. At
                 * this point we can emit the instructions, which may shrink
@@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 
                        if (type == DOTTYP)
                        {
+                               int scaledd = d/size;
+
                    /* This is a reference to an address within this section. If
                     * it's close enough to the program counter, we can use a
                     * shorter instruction. */
 
-                               if (fitx(d, 16))
+                               if (fitx(scaledd, 16))
                                {
                     emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
-                    emit2(d);
+                    emit2(scaledd);
                     return;
                 }
                        }
 
                        /* Otherwise we need the full 48 bits. */
 
-            if (!fitx(d, 27))
-                serror("offset too big to encode into instruction");
+                       newrelo(expr->typ, RELOVC4|RELPC);
+
+                       /* VC4 relocations store the PC-relative delta into the
+                        * destination section in the instruction data. The linker will
+                        * massage this, and scale it appropriately. */
 
-                       newrelo(expr->typ, RELOVC4 | RELPC);
+            if (!fitx(d, 27))
+                               toobig();
 
             emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
             emit4((31<<27) | maskx(d, 27));
@@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs)
 
 void lea_address_instr(int rd, struct expr_t* expr)
 {
-       newrelo(expr->typ, RELOVC4);
+       quad pc = DOTVAL;
+       quad type = expr->typ & S_TYP;
+
+       if (type == S_ABS)
+               serror("can't use absolute addresses here");
+
+       newrelo(expr->typ, RELOVC4|RELPC);
+
+       /* VC4 relocations store the PC-relative delta into the
+        * destination section in the instruction data. The linker will
+        * massage this, and scale it appropriately. */
+
        emit2(B16(11100101,00000000) | (rd<<0));
-       emit4(expr->val);
+       emit4(expr->val - pc);
 }
 
index 4edaa03..fdbcc4c 100644 (file)
@@ -7,3 +7,8 @@
  */
 
 #include "videocore.h"
+
+.define        __dummy
+.sect .data
+__dummy:
+
index 3cf4f3f..b848e65 100644 (file)
 .sect .text
 
 begtext:
+       lea r15, begtext
+       st sp, .returnsp
+       st lr, .returnlr
+
 #if 0
        ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
        
@@ -33,6 +37,12 @@ begtext:
 #endif
        b __m_a_i_n
 
+.define __exit
+__exit:
+       ld sp, .returnsp
+       ld lr, .returnlr
+       b lr
+
 ! Define symbols at the beginning of our various segments, so that we can find
 ! them. (Except .text, which has already been done.)
 
@@ -47,3 +57,9 @@ begtext:
 .comm .trppc, 4
 .comm .ignmask, 4
 .comm _errno, 4
+
+! We store the stack pointer and return address on entry so that we can
+! cleanly exit.
+
+.comm .returnsp, 4
+.comm .returnlr, 4
index f44a34b..3cc9ff9 100644 (file)
@@ -8,6 +8,7 @@ static char rcsid[] = "$Id$";
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <assert.h>
 #include "out.h"
 #include "const.h"
@@ -44,6 +45,65 @@ static long read4(char* addr, int type)
                return ((long)word1 << (2 * WIDTH)) + word0;
 }
 
+/* VideoCore 4 fixups are complex as we need to patch the instruction in
+ * one of several different ways (depending on what the instruction is).
+ */
+
+static long get_vc4_valu(char* addr)
+{
+       uint16_t opcode = read2(addr, 0);
+
+       if ((opcode & 0xff00) == 0xe700)
+       {
+               /* ld<w> rd, $+o:  [1110 0111 ww 0 d:5] [11111 o:27]
+                * st<w> rd, $+o:  [1110 0111 ww 1 d:5] [11111 o:27]
+                */
+
+               int32_t value = read4(addr+2, 0);
+               value &= 0x07ffffff;
+               value = value<<5>>5;
+               return value;
+       }
+
+       if ((opcode & 0xf080) == 0x9000)
+       {
+               /* b<cc> $+o*2:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
+                * Yes, big-endian (the first 16 bits is the MSB).
+                */
+
+               uint32_t value = read4(addr, RELWR);
+               value &= 0x007fffff;
+               value = value<<9>>9;
+               value *= 2;
+               return value;
+       }
+
+       if ((opcode & 0xf080) == 0x9080)
+       {
+               /* bl $+o*2:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
+                * Yes, big-endian (the first 16 bits is the MSB).
+                * (Note that o is split.)
+                */
+
+               int32_t value = read4(addr, RELWR);
+               int32_t lov = value & 0x007fffff;
+               int32_t hiv = value & 0x0f000000;
+               value = lov | (hiv>>1);
+               value = value<<5>>5;
+               value *= 2;
+               return value;
+       }
+
+       if ((opcode & 0xffe0) == 0xe500)
+       {
+        /* lea: [1110 0101 000 d:5] [o:32] */
+
+        return read4(addr+2, 0);
+    }
+
+       assert(0 && "unrecognised VC4 instruction");
+}
+
 /*
  * The bits in type indicate how many bytes the value occupies and what
  * significance should be attributed to each byte.
@@ -65,21 +125,7 @@ getvalu(addr, type)
        case RELOH2:
                return read2(addr, type) << 16;
        case RELOVC4:
-       {
-               long i = read4(addr, type);
-               if (i & 0x00800000)
-               {
-            /* Branch instruction. */
-            return (i<<9)>>9;
-               }
-               else
-               {
-                       /* Branch-link instruction. */
-                       long hi = (i<<4)>>28;
-                       long lo = (i & 0x007fffff);
-                       return lo | (hi<<23);
-               }
-       }
+               return get_vc4_valu(addr);
        default:
                fatal("bad relocation size");
        }
@@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type)
        }
 }
 
+/* VideoCore 4 fixups are complex as we need to patch the instruction in
+ * one of several different ways (depending on what the instruction is).
+ */
+
+static void put_vc4_valu(char* addr, long value)
+{
+       uint16_t opcode = read2(addr, 0);
+
+       if ((opcode & 0xff00) == 0xe700)
+       {
+               /* ld<w> rd, o, (pc):  [1110 0111 ww 0 d:5] [11111 o:27]
+                * st<w> rd, o, (pc):  [1110 0111 ww 1 d:5] [11111 o:27]
+                */
+
+               uint32_t v = read4(addr+2, 0);
+               v &= 0xf8000000;
+               v |= value & 0x07ffffff;
+               write4(v, addr+2, 0);
+       }
+       else if ((opcode & 0xf080) == 0x9000)
+       {
+               /* b<cc> dest:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
+                * Yes, big-endian (the first 16 bits is the MSB).
+                */
+
+               uint32_t v = read4(addr, RELWR);
+               v &= 0xff800000;
+               v |= (value/2) & 0x007fffff;
+               write4(v, addr, RELWR);
+       }
+       else if ((opcode & 0xf080) == 0x9080)
+       {
+               /* bl dest:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
+                * Yes, big-endian (the first 16 bits is the MSB).
+                * (Note that o is split.)
+                */
+
+               uint32_t v = read4(addr, RELWR);
+               uint32_t lovalue = (value/2) & 0x007fffff;
+               uint32_t hivalue = (value/2) & 0x07800000;
+               v &= 0xf0800000;
+               v |= lovalue | (hivalue<<1);
+               write4(v, addr, RELWR);
+       }
+       else if ((opcode & 0xffe0) == 0xe500)
+       {
+        /* lea: [1110 0101 000 d:5] [o:32] */
+
+               write4(value, addr+2, 0);
+    }
+    else
+               assert(0 && "unrecognised VC4 instruction");
+}
+
 /*
  * The bits in type indicate how many bytes the value occupies and what
  * significance should be attributed to each byte.
@@ -156,27 +256,8 @@ putvalu(valu, addr, type)
                write2(valu>>16, addr, type);
                break;
        case RELOVC4:
-       {
-               long i = read4(addr, type);
-               if (i & 0x00800000)
-               {
-                       /* Branch instruction. */
-                       unsigned v = (valu/2) & 0x007fffff;
-                       i &= ~0x007fffff;
-                       i |= v;
-               }
-               else
-               {
-                       /* Branch-link instruction. */
-               unsigned v = (valu/2) & 0x07ffffff;
-               unsigned hiv = v >> 23;
-               unsigned lov = v & 0x007fffff;
-                       i &= ~0x0f7fffff;
-                       i |= (lov>>16) | (hiv<<24);
-               }
-               write4(i, addr, type);
+               put_vc4_valu(addr, valu);
                break;
-       }
        default:
                fatal("bad relocation size");
        }