From 2cc2c0ae98e0b7be4ea803ff8a3ec5ea20e1de97 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 29 Oct 2016 11:57:56 +0200 Subject: [PATCH] Lots more opcodes. Rearrange the stack layout so that fp->ab is a fixed value (needed for CHAINFP and FPTOAB). Wire up lfrs to calls via a phi when necessary, to allow call-bra-lfr chains. --- mach/powerpc/mcg/platform.c | 30 +++-- mach/powerpc/mcg/table | 155 +++++++--------------- mach/proto/mcg/hop.c | 6 + mach/proto/mcg/mcg.h | 1 + mach/proto/mcg/pass_livevreganalysis.c | 2 + mach/proto/mcg/pass_registerallocator.c | 12 +- mach/proto/mcg/pass_returnvalues.c | 123 +++++++++++++++++ mach/proto/mcg/pass_typeinference.c | 4 +- mach/proto/mcg/procedure.c | 1 + mach/proto/mcg/treebuilder.c | 168 ++++++++++++++---------- util/mcgg/ir.dat | 12 +- 11 files changed, 316 insertions(+), 198 deletions(-) create mode 100644 mach/proto/mcg/pass_returnvalues.c diff --git a/mach/powerpc/mcg/platform.c b/mach/powerpc/mcg/platform.c index 4a77118ca..a8fd1af7c 100644 --- a/mach/powerpc/mcg/platform.c +++ b/mach/powerpc/mcg/platform.c @@ -4,19 +4,20 @@ * * | ...params... * | --------------- <- ab + * | old FR + * | old FP + * | --------------- <- st, fp (a.k.a. lb) * | spills * | --------------- * | saved regs - * | LR - * | FP - * | --------------- <- st, fp (a.k.a. lb) + * | --------------- * | locals * | --------------- <- sp * V ...user area... * * st indexes up; lb indexes down. * - * We ensure that dereferencing fp always produces the caller's fp. + * Note that [fp] == old_fp and ab == fp + 8. */ static ARRAYOF(struct hreg) saved_regs; @@ -39,28 +40,29 @@ void platform_calculate_offsets(void) } current_proc->fp_to_st = 0; - current_proc->fp_to_ab = current_proc->spills_size + current_proc->saved_size + 8; - current_proc->fp_to_lb = 0; + current_proc->fp_to_ab = 8; + current_proc->fp_to_lb = -(current_proc->spills_size + current_proc->saved_size); } struct hop* platform_prologue(void) { int i; int saved_offset; + int spoffset = current_proc->saved_size + current_proc->spills_size + + current_proc->locals_size; struct hop* hop = new_hop(current_proc->entry, NULL); - hop_add_insel(hop, "! saved_size = %d+8 bytes", current_proc->saved_size); + hop_add_insel(hop, "! saved_size = %d bytes", current_proc->saved_size); hop_add_insel(hop, "! spills_size = %d bytes", current_proc->spills_size); hop_add_insel(hop, "! locals_size = %d bytes", current_proc->locals_size); - hop_add_insel(hop, "addi sp, sp, %d", -(current_proc->fp_to_ab + current_proc->locals_size)); + hop_add_insel(hop, "addi sp, sp, %d", -(spoffset + 8)); hop_add_insel(hop, "mfspr r0, lr"); - - hop_add_insel(hop, "stw fp, %d(sp)", current_proc->locals_size + 0); - hop_add_insel(hop, "stw r0, %d(sp)", current_proc->locals_size + 4); - hop_add_insel(hop, "addi fp, sp, %d", current_proc->locals_size); + hop_add_insel(hop, "stw fp, %d(sp)", spoffset + 0); + hop_add_insel(hop, "stw r0, %d(sp)", spoffset + 4); + hop_add_insel(hop, "addi fp, sp, %d", spoffset); /* Saved reg offsets are negative. */ - saved_offset = current_proc->saved_size + 8; + saved_offset = -current_proc->spills_size; for (i=0; isaved_size + 8; + saved_offset = -current_proc->spills_size; for (i=0; i ubyte0" - cost 1; - - out:(int)ubyte0 = CIU41(in:(int)ubyte0) - with %out == %in - emit "! CIU41(ubyte0) -> ubyte0" - cost 1; - - out:(int)ubyteX = CIU41(in:(int)ubyteX) - with %out == %in - emit "! CIU41(ubyteX) -> ubyteX" - cost 1; - - out:(int)reg = CII14(in:(int)ubyteX) - emit "extsb %out, %in ! CII14(ubyteX) -> reg" + out:(fret)reg = FROMUI.F(in:(ret)reg) + with corrupted(volatile) + emit "bl .fromui2f" cost 4; - /* short conversions */ - - out:(int)ushort0 = CIU24(in:(int)ushort0) - with %out == %in - emit "! CIU24(ushort0) -> ushort0" - cost 1; - - out:(int)ushort0 = CIU42(in:(int)ushort0) - with %out == %in - emit "! CIU42(ushort0) -> ushort0" - cost 1; - - out:(int)ushortX = CIU42(in:(int)ushortX) - with %out == %in - emit "! CIU42(ushortX) -> ushortX" - cost 1; - - out:(int)reg = CII24(in:(int)ushort0) - with %out == %in - emit "! CII24(ushort0) -> reg" - cost 4; - - out:(int)reg = CII24(in:(int)ushortX) - emit "extsh %out, %in" + out:(dret)reg = FROMUI.D(in:(ret)reg) + with corrupted(volatile) + emit "bl .fromui2d" cost 4; -#endif + /* Locals */ @@ -452,6 +430,7 @@ PATTERNS + /* Memory addressing modes */ address = ADD.I(addr:(int)reg, offset:CONST.I) @@ -484,38 +463,26 @@ PATTERNS emit "b $false" cost 8; - CALL(dest:LABEL.I) - with corrupted(volatile) - emit "bl $dest" - cost 4; - - out:(ret)reg = CALL.I(dest:LABEL.I) - with corrupted(volatile) - emit "bl $dest" - cost 4; - - out:(lret)reg = CALL.L(dest:LABEL.I) - with corrupted(volatile) - emit "bl $dest" - cost 4; + #define CALLLABEL(insn) \ + insn (dest:LABEL.I) \ + with corrupted(volatile) \ + emit "bl $dest" \ + cost 4; - CALL(dest:(int)reg) - with corrupted(volatile) - emit "mtspr ctr, %dest" - emit "bcctrl 20, 0, 0" - cost 8; + CALLLABEL(CALL) + out:(int)reg = CALLLABEL(CALL.I) + out:(long)reg = CALLLABEL(CALL.L) - out:(ret)reg = CALL.I(dest:(int)reg) - with corrupted(volatile) - emit "mtspr ctr, %dest" - emit "bcctrl 20, 0, 0" - cost 8; + #define CALLINDIRECT(insn) \ + insn (dest:(int)reg) \ + with corrupted(volatile) \ + emit "mtspr ctr, %dest" \ + emit "bcctrl 20, 0, 0" \ + cost 8; - out:(lret)reg = CALL.L(dest:(int)reg) - with corrupted(volatile) - emit "mtspr ctr, %dest" - emit "bcctrl 20, 0, 0" - cost 8; + CALLINDIRECT(CALL) + out:(int)reg = CALLINDIRECT(CALL.I) + out:(long)reg = CALLINDIRECT(CALL.L) JUMP(dest:LABEL.I) emit "b $dest" @@ -701,6 +668,12 @@ PATTERNS emit "lfd %out, %addr" cost 4; + out:(float)reg = in:CONST.F + when specific_constant(%in, 0) + emit "la r0, .fd_00000000" + emit "lfs %out, 0(r0)" + cost 12; + FPU4R(ADDF.F, "fadds") FPU8R(ADDF.D, "fadd") @@ -729,35 +702,5 @@ PATTERNS emit "fcmpu %cr, %left, %right" cost 4; - #if 0 - out:(ret)reg = CFI44(val:(fret)reg) - with corrupted(volatile) - emit "bl .cfi44" - cost 4; - - out:(fret)reg = CIF44(val:(ret)reg) - with corrupted(volatile) - emit "bl .cif44" - cost 4; - - out:(ret)reg = CFI84(val:(dret)reg) - with corrupted(volatile) - emit "bl .cfi84" - cost 4; - - out:(dret)reg = CIF48(val:(ret)reg) - with corrupted(volatile) - emit "bl .cif48" - cost 4; - - out:(float)reg = CFF84(val:(double)reg) - emit "frsp %out, %val" - cost 4; - - out:(double)reg = CFF48(val:(float)reg) - emit "fmr %out, %val" - cost 1; - #endif - /* vim: set sw=4 ts=4 expandtab : */ diff --git a/mach/proto/mcg/hop.c b/mach/proto/mcg/hop.c index 1afa04c50..27ea8be03 100644 --- a/mach/proto/mcg/hop.c +++ b/mach/proto/mcg/hop.c @@ -282,9 +282,15 @@ char* hop_render(struct hop* hop) case IR_CONST: appendf("%d", ir->u.ivalue); break; + + default: + assert(false); } break; } + + default: + assert(false); } } diff --git a/mach/proto/mcg/mcg.h b/mach/proto/mcg/mcg.h index 08f6828bf..540f5799c 100644 --- a/mach/proto/mcg/mcg.h +++ b/mach/proto/mcg/mcg.h @@ -118,6 +118,7 @@ extern void pass_register_allocator(void); extern void pass_remove_dead_blocks(void); extern void pass_remove_dead_phis(void); extern void pass_split_critical_edges(void); +extern void pass_wire_up_return_values(void); extern void platform_calculate_offsets(void); extern struct hop* platform_prologue(void); diff --git a/mach/proto/mcg/pass_livevreganalysis.c b/mach/proto/mcg/pass_livevreganalysis.c index 889fe35b5..5481c6598 100644 --- a/mach/proto/mcg/pass_livevreganalysis.c +++ b/mach/proto/mcg/pass_livevreganalysis.c @@ -67,6 +67,8 @@ void pass_live_vreg_analysis(void) propagate_liveness(dominance.postorder.item[i]); } while (!finished); + + //assert(cfg.entry->liveins.count == 0); } /* vim: set sw=4 ts=4 expandtab : */ diff --git a/mach/proto/mcg/pass_registerallocator.c b/mach/proto/mcg/pass_registerallocator.c index 0a6eba638..7c348f0a3 100644 --- a/mach/proto/mcg/pass_registerallocator.c +++ b/mach/proto/mcg/pass_registerallocator.c @@ -120,7 +120,7 @@ static struct hreg* evict(struct vreg* vreg) * Shouldn't really happen in real life. */ return hreg; } - if (candidatein == candidateout) + if (candidatein && candidateout && (candidatein == candidateout)) { /* This is a through register. */ tracef('R', "R: evicting %%%d from %s\n", candidatein->id, hreg->id); @@ -513,8 +513,9 @@ static void assign_hregs_to_vregs(void) phi->prev->regsout, phi->ir->result); if (hreg && !pmap_findleft(old, hreg)) { - tracef('R', "R: import hreg %s for phi input %%%d from %s\n", - hreg->id, vreg->id, phi->prev->name); + tracef('R', "R: import hreg %s for %%%d, imported from %s %%%d\n", + hreg->id, vreg->id, + phi->prev->name, phi->ir->id); pmap_put(old, hreg, vreg); } } @@ -534,8 +535,9 @@ static void assign_hregs_to_vregs(void) struct phicongruence* c = vreg->congruence; struct hreg* hreg = allocate_phi_hreg(old, vreg, c->type); - tracef('R', "R: import fallback hreg %s for phi input %%%d from %s\n", - hreg->id, vreg->id, phi->prev->name); + tracef('R', "R: import fallback hreg %s for %%%d, imported from %s %%%d\n", + hreg->id, vreg->id, + phi->prev->name, phi->ir->id); pmap_add(old, hreg, vreg); } } diff --git a/mach/proto/mcg/pass_returnvalues.c b/mach/proto/mcg/pass_returnvalues.c new file mode 100644 index 000000000..3be801411 --- /dev/null +++ b/mach/proto/mcg/pass_returnvalues.c @@ -0,0 +1,123 @@ +#include "mcg.h" + +/* The ACK returns values from functions not on the stack but in a special + * 'register' which are read with lfr. This register is defined to survive + * asp, bra and gto. The way it's intended to work is that value just gets put + * in a particular hreg and stays there until lfr brings it to the attention of + * the code generator. + * + * Trouble is, while that worked on ncg, it doesn't work here because the + * register allocator may decide to insert moves arbitrarily. So we need to + * somehow turn this special register into a real register so that it can be + * kept alive. + * + * The easiest thing to do is to just push the result of call onto the stack... + * but that doesn't work either, because if someone does a call without an lfr, + * we don't want to be left with an unpopped value. + * + * So what we do is we find lfrs, and then we search for the call which + * generated the value, and then we hook up the IRs so there's a connection + * between the two. But beware! The lfr value survives bra! Which means a + * single lfr may actually read the value produced by *several* call + * instructions. You know what that means? Phis. + * + * (Luckily a single call instruction can't be read by multiple lfrs, because + * conditional branches trash the lfr value.) + */ + +static void find_call(struct basicblock* bb, int index, struct ir* lfr, + struct basicblock** callbb, struct ir** callir) +{ + if (index == -1) + index = bb->irs.count - 1; + + while (index >= 0) + { + struct ir* ir = bb->irs.item[index]; + switch (ir->opcode) + { + case IR_CALL: + ir->size = lfr->size; + *callbb = bb; + *callir = ir; + return; + + case IR_STACKADJUST: + case IR_GETRET: + case IR_JUMP: + /* lfr value preserved */ + break; + + default: + /* lfr value has been corrupted. */ + fatal("lfr reading corrupted value in %s", bb->name); + } + + index--; + } + + /* Our search hit the top of the block; we need to import the + * lfr value from a previous block. */ + + if (bb->prevs.count == 1) + { + /* Only a single predecessor, so no phi is necessary. */ + + find_call(bb->prevs.item[0], -1, lfr, callbb, callir); + } + else + { + /* We have multiple predecessors. This means that the lfr value may + * come from any of these blocks. We need a phi. */ + + int i; + struct ir* phi = new_ir0(IR_PHI, lfr->size); + + phi->root = phi; + array_insert(&bb->irs, phi, 0); + + for (i=0; iprevs.count; i++) + { + struct basicblock* prev = bb->prevs.item[i]; + struct basicblock* parentbb; + struct ir* parentir; + + find_call(prev, -1, phi, &parentbb, &parentir); + + pmap_add(&phi->u.phivalue, parentbb, parentir); + } + + *callbb = bb; + *callir = phi; + } +} + +static void wire_up_ir(struct basicblock* bb, int index) +{ + struct ir* lfr = bb->irs.item[index]; + struct basicblock* callbb; + struct ir* callir; + + find_call(bb, index, lfr, &callbb, &callir); + + lfr->left = callir; + lfr->opcode = IR_NOP; +} + +void pass_wire_up_return_values(void) +{ + int i, j; + + for (i=0; iirs.count; j++) + { + struct ir* ir = bb->irs.item[j]; + if (ir->opcode == IR_GETRET) + wire_up_ir(bb, j); + } + } +} + +/* vim: set sw=4 ts=4 expandtab : */ diff --git a/mach/proto/mcg/pass_typeinference.c b/mach/proto/mcg/pass_typeinference.c index c081a4b3d..f3b123e25 100644 --- a/mach/proto/mcg/pass_typeinference.c +++ b/mach/proto/mcg/pass_typeinference.c @@ -218,8 +218,8 @@ static struct ir* new_copy(char wanted, char real, struct ir* ir) else if ((wanted == 'L') && (real == 'D')) opcode = IR_COPYD; else - fatal("type mismatch: parent IR wanted %c, child IR provided %c", - wanted, real); + fatal("type mismatch: parent IR $%d wanted %c, child IR provided %c", + ir->id, wanted, real); copy = new_ir1(opcode, ir->size, ir); copy->type = wanted; diff --git a/mach/proto/mcg/procedure.c b/mach/proto/mcg/procedure.c index d09d30c05..7db7aff3a 100644 --- a/mach/proto/mcg/procedure.c +++ b/mach/proto/mcg/procedure.c @@ -179,6 +179,7 @@ void procedure_compile(struct procedure* proc) * and nexts (and then calling update_graph_data()). */ print_blocks('3'); + pass_wire_up_return_values(); pass_convert_stack_ops(); print_blocks('4'); pass_convert_locals_to_ssa(); diff --git a/mach/proto/mcg/treebuilder.c b/mach/proto/mcg/treebuilder.c index 66a69edef..73e454d2c 100644 --- a/mach/proto/mcg/treebuilder.c +++ b/mach/proto/mcg/treebuilder.c @@ -4,7 +4,6 @@ static struct basicblock* current_bb; static int stackptr; static struct ir* stack[64]; -static struct ir* lastcall; static struct ir* convert(struct ir* src, int srcsize, int destsize, int opcode); static struct ir* appendir(struct ir* ir); @@ -340,6 +339,20 @@ static void simple_test_neg(int size, int irop) ); } +static void helper_function(const char* name) +{ + /* Delegates to a helper function; these leave their result on the stack + * rather than returning values through lfr. */ + + materialise_stack(); + appendir( + new_ir1( + IR_CALL, 0, + new_labelir(name) + ) + ); +} + static void insn_simple(int opcode) { switch (opcode) @@ -364,6 +377,7 @@ static void insn_simple(int opcode) case op_cfu: simple_convert(IR_FROMF); break; /* FIXME: technically wrong */ case op_cfi: simple_convert(IR_FROMF); break; case op_cif: simple_convert(IR_FROMSI); break; + case op_cuf: simple_convert(IR_FROMUI); break; case op_cff: simple_convert(IR_FROMF); break; case op_cmp: @@ -384,7 +398,7 @@ static void insn_simple(int opcode) struct ir* dest = pop(EM_pointersize); materialise_stack(); - lastcall = appendir( + appendir( new_ir1( IR_CALL, 0, dest @@ -421,7 +435,7 @@ static void insn_simple(int opcode) { push( new_ir1( - IR_LOAD, 2, + (EM_wordsize == 2) ? IR_LOAD : IR_LOADH, EM_wordsize, new_labelir(".ignmask") ) ); @@ -432,7 +446,7 @@ static void insn_simple(int opcode) { appendir( new_ir2( - IR_STORE, 2, + (EM_wordsize == 2) ? IR_STORE : IR_STOREH, EM_wordsize, new_labelir(".ignmask"), pop(EM_wordsize) ) @@ -440,31 +454,32 @@ static void insn_simple(int opcode) break; } - case op_trp: - { - materialise_stack(); - appendir( + case op_trp: helper_function(".trp"); break; + case op_sig: helper_function(".sig"); break; + case op_rtt: helper_function(".rtt"); break; + + /* FIXME: These instructions are really complex and barely used + * (Modula-2 bitset support, I believe). Leave them until later. */ + case op_set: helper_function(".unimplemented_set"); break; + case op_ior: helper_function(".unimplemented_ior"); break; + + case op_dch: + push( new_ir1( - IR_CALL, 0, - new_labelir(".trp") + IR_CHAINFP, EM_pointersize, + pop(EM_pointersize) ) ); break; - } - - /* FIXME: These instructions are really complex and barely used - * (Modula-2 bitset support, I believe). Leave them until leter. */ - case op_set: - case op_ior: - { - appendir( + + case op_lpb: + push( new_ir1( - IR_CALL, 0, - new_labelir(".unimplemented") + IR_FPTOAB, EM_pointersize, + pop(EM_pointersize) ) ); break; - } case op_lni: { @@ -598,6 +613,28 @@ static struct ir* ptradd(struct ir* address, int offset) ); } +static void blockmove(struct ir* dest, struct ir* src, struct ir* size) +{ + /* memmove stack: ( size src dest -- ) */ + push(size); + push(src); + push(dest); + + materialise_stack(); + appendir( + new_ir1( + IR_CALL, 0, + new_labelir("memmove") + ) + ); + appendir( + new_ir1( + IR_STACKADJUST, EM_pointersize, + new_wordir(EM_pointersize*2 + EM_wordsize) + ) + ); +} + static void insn_ivalue(int opcode, arith value) { switch (opcode) @@ -725,7 +762,7 @@ static void insn_ivalue(int opcode, arith value) case op_zrf: { struct ir* ir = new_constir(value, 0); - ir->opcode = IR_CONSTF; + ir->opcode = IR_CONST; push(ir); break; } @@ -1012,9 +1049,13 @@ static void insn_ivalue(int opcode, arith value) case op_lfr: { - assert(lastcall != NULL); - lastcall->size = value; - push(lastcall); + push( + appendir( + new_ir0( + IR_GETRET, value + ) + ) + ); break; } @@ -1063,7 +1104,7 @@ static void insn_ivalue(int opcode, arith value) * the physical stack (which is very dubious). */ appendir( new_ir1( - IR_CALL, EM_wordsize, + IR_CALL, 0, new_labelir(helper) ) ); @@ -1094,26 +1135,11 @@ static void insn_ivalue(int opcode, arith value) case op_lxa: { - struct ir* ir; - - /* Walk the static chain. */ - - ir = new_ir0( - IR_GETFP, EM_pointersize - ); - - while (value--) - { - ir = new_ir1( - IR_CHAINFP, EM_pointersize, - ir - ); - } - - push( + /* What does this actually *do*? The spec doesn't say. */ + appendir( new_ir1( - IR_FPTOARGS, EM_pointersize, - ir + IR_CALL, 0, + new_labelir(".unimplemented_lxa") ) ); break; @@ -1166,6 +1192,17 @@ static void insn_ivalue(int opcode, arith value) { switch (value) { + case 0: + appendir( + new_ir1( + IR_FPTOLB, EM_pointersize, + new_ir0( + IR_GETFP, EM_pointersize + ) + ) + ); + break; + case 1: appendir( new_ir0( @@ -1185,6 +1222,15 @@ static void insn_ivalue(int opcode, arith value) { switch (value) { + case 0: + appendir( + new_ir1( + IR_SETFP, EM_pointersize, + pop(EM_pointersize) + ) + ); + break; + case 1: appendir( new_ir1( @@ -1204,27 +1250,19 @@ static void insn_ivalue(int opcode, arith value) case op_blm: { /* Input stack: ( src dest -- ) */ - /* Memmove stack: ( size src dest -- ) */ struct ir* dest = pop(EM_pointersize); struct ir* src = pop(EM_pointersize); + blockmove(dest, src, new_wordir(value)); + break; + } - push(new_wordir(value)); - push(src); - push(dest); - - materialise_stack(); - appendir( - new_ir1( - IR_CALL, 0, - new_labelir("memmove") - ) - ); - appendir( - new_ir1( - IR_STACKADJUST, EM_pointersize, - new_wordir(EM_pointersize*2 + EM_wordsize) - ) - ); + case op_bls: + { + /* Input stack: ( src dest size -- ) */ + struct ir* dest = pop(EM_pointersize); + struct ir* src = pop(EM_pointersize); + struct ir* size = pop(EM_wordsize); + blockmove(dest, src, size); break; } @@ -1426,7 +1464,7 @@ static void insn_lvalue(int opcode, const char* label, arith offset) case op_cal: assert(offset == 0); materialise_stack(); - lastcall = appendir( + appendir( new_ir1( IR_CALL, 0, new_labelir(label) @@ -1513,8 +1551,6 @@ void tb_procedure(void) for (i=0; iblocks.count; i++) generate_tree(current_proc->blocks.item[i]); - } /* vim: set sw=4 ts=4 expandtab : */ - diff --git a/util/mcgg/ir.dat b/util/mcgg/ir.dat index 1be1810fd..92fb5df0d 100644 --- a/util/mcgg/ir.dat +++ b/util/mcgg/ir.dat @@ -6,8 +6,7 @@ # ?: pull/push types from other ? parameters # Simple terminals -S ?=.. CONST # must be followed by float form -S ?=.. CONSTF +S ?=.. CONST V ?=.. REG S ?=?. NOP S I=.. LABEL @@ -95,7 +94,9 @@ S I=I. IFLT S I=I. IFLE # Procedures -S i=.. CALL +S i=I. CALL +S i=?. GETRET +S ?=i. SETRET # Flow control --- these never return V .=i. JUMP @@ -106,10 +107,11 @@ V .=.. RET # Special S ?=i. STACKADJUST -S ?=i. SETRET S i=.. GETFP +S ?=i. SETFP S i=.. GETSP S ?=i. SETSP S i=i. CHAINFP -S i=i. FPTOARGS +S i=i. FPTOAB +S i=i. FPTOLB -- 2.34.1