From 79a38ecc08d15f5c375cdf72f767440c788e4adb Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 13 Feb 2019 22:45:22 +0100 Subject: [PATCH] Instead of using parameterised rsts for stack access, add a huge swathe of automatically built helper tools. Star Trek goes up from 40243 to 40779 bytes, but should be a lot faster. --- mach/i80/libem/build.lua | 64 ++++++++++++- mach/i80/libem/faddr.h | 30 +++++++ mach/i80/libem/faddrn.h | 30 +++++++ mach/i80/libem/fload.h | 32 +++++++ mach/i80/libem/floadn.h | 38 ++++++++ mach/i80/libem/fstore.h | 32 +++++++ mach/i80/libem/fstoren.h | 39 ++++++++ mach/i80/libem/generate.sh | 4 + mach/i80/libem/rst.s | 90 ++++++------------- mach/i80/ncg/table | 179 ++++++++++++++++++------------------- plat/cpm/descr | 1 + 11 files changed, 384 insertions(+), 155 deletions(-) create mode 100644 mach/i80/libem/faddr.h create mode 100644 mach/i80/libem/faddrn.h create mode 100644 mach/i80/libem/fload.h create mode 100644 mach/i80/libem/floadn.h create mode 100644 mach/i80/libem/fstore.h create mode 100644 mach/i80/libem/fstoren.h create mode 100755 mach/i80/libem/generate.sh diff --git a/mach/i80/libem/build.lua b/mach/i80/libem/build.lua index ca5a13c65..45a32ba94 100644 --- a/mach/i80/libem/build.lua +++ b/mach/i80/libem/build.lua @@ -1,8 +1,68 @@ +local generated = {} + +definerule("generate", + { + body = { type="string" }, + offset = { type="object" }, + }, + function(e) + return normalrule { + name = e.name, + ins = { "./generate.sh", "./"..e.body }, + outleaves = { e.name..".s" }, + commands = { + "%{ins[1]} "..e.body.." "..e.offset.." > %{outs}" + } + } + end +) + +for i = 1, 128 do + generated[#generated+1] = generate { + name = "fload"..i, + body = "fload.h", + offset = i + } + + generated[#generated+1] = generate { + name = "floadn"..i, + body = "floadn.h", + offset = i + } + + generated[#generated+1] = generate { + name = "fstore"..i, + body = "fstore.h", + offset = i + } + + generated[#generated+1] = generate { + name = "fstoren"..i, + body = "fstoren.h", + offset = i + } + + generated[#generated+1] = generate { + name = "faddr"..i, + body = "faddr.h", + offset = i + } + + generated[#generated+1] = generate { + name = "faddrn"..i, + body = "faddrn.h", + offset = i + } +end + for _, plat in ipairs(vars.plats) do acklibrary { name = "lib_"..plat, - srcs = { "./*.s" }, - vars = { plat = plat }, + srcs = concat("./*.s", generated), + vars = { + plat = plat, + ["+ackcflags"] = {"-Imach/i80/libem"} + }, } end diff --git a/mach/i80/libem/faddr.h b/mach/i80/libem/faddr.h new file mode 100644 index 000000000..e88344080 --- /dev/null +++ b/mach/i80/libem/faddr.h @@ -0,0 +1,30 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Fetches the word at positive stack offset OFFSET into de. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.faddr, OFFSET) +LABEL(.faddr, OFFSET): + #if OFFSET == 0 + mov l, c + mov h, b + #elif OFFSET == 1 + mov l, c + mov h, b + inx h + #elif OFFSET == 2 + mov l, c + mov h, b + inx h + inx h + #else + lxi h, OFFSET + dad b + #endif + ret diff --git a/mach/i80/libem/faddrn.h b/mach/i80/libem/faddrn.h new file mode 100644 index 000000000..5e5545273 --- /dev/null +++ b/mach/i80/libem/faddrn.h @@ -0,0 +1,30 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Fetches the word at negative stack offset OFFSET into de. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.faddrn, OFFSET) +LABEL(.faddrn, OFFSET): + #if OFFSET == 0 + mov l, c + mov h, b + #elif OFFSET == 1 + mov l, c + mov h, b + dcx h + #elif OFFSET == 2 + mov l, c + mov h, b + dcx h + dcx h + #else + lxi h, -OFFSET + dad b + #endif + ret diff --git a/mach/i80/libem/fload.h b/mach/i80/libem/fload.h new file mode 100644 index 000000000..5b0e02e5d --- /dev/null +++ b/mach/i80/libem/fload.h @@ -0,0 +1,32 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Fetches the word at positive stack offset OFFSET into de. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.fload, OFFSET) +LABEL(.fload, OFFSET): + #if OFFSET == 0 + #error "0 shouldn't happen" + #elif OFFSET == 1 + mov l, c + mov h, b + inx h + #elif OFFSET == 2 + mov l, c + mov h, b + inx h + inx h + #else + lxi h, OFFSET + dad b + #endif + mov e, m + inx h + mov d, m + ret diff --git a/mach/i80/libem/floadn.h b/mach/i80/libem/floadn.h new file mode 100644 index 000000000..4a04b7137 --- /dev/null +++ b/mach/i80/libem/floadn.h @@ -0,0 +1,38 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Fetches the word at negative stack offset OFFSET into de. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.floadn, OFFSET) +LABEL(.floadn, OFFSET): + #if OFFSET == 0 + #error "0 shouldn't happen" + #elif OFFSET == 1 + mov l, c + mov h, b + mov d, m + dcx h + mov e, m + ret + #elif OFFSET == 2 + mov l, c + mov h, b + dcx h + mov d, m + dcx h + mov e, m + ret + #else + lxi h, -OFFSET + dad b + mov e, m + inx h + mov d, m + ret + #endif diff --git a/mach/i80/libem/fstore.h b/mach/i80/libem/fstore.h new file mode 100644 index 000000000..2a9bce62c --- /dev/null +++ b/mach/i80/libem/fstore.h @@ -0,0 +1,32 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Stores the word in de to positive stack offset OFFSET. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.fstore, OFFSET) +LABEL(.fstore, OFFSET): + #if OFFSET == 0 + #error "0 shouldn't happen" + #elif OFFSET == 1 + mov l, c + mov h, b + inx h + #elif OFFSET == 2 + mov l, c + mov h, b + inx h + inx h + #else + lxi h, OFFSET + dad b + #endif + mov m, e + inx h + mov m, d + ret diff --git a/mach/i80/libem/fstoren.h b/mach/i80/libem/fstoren.h new file mode 100644 index 000000000..a739825df --- /dev/null +++ b/mach/i80/libem/fstoren.h @@ -0,0 +1,39 @@ +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +! Stores the word in de to negative stack offset OFFSET. + +#define PASTE(a, b) a ## b +#define LABEL(prefix, offset) PASTE(prefix, offset) + +.define LABEL(.fstoren, OFFSET) +LABEL(.fstoren, OFFSET): + #if OFFSET == 0 + #error "0 shouldn't happen" + #elif OFFSET == 1 + mov l, c + mov h, b + mov m, d + dcx h + mov m, e + ret + #elif OFFSET == 2 + mov l, c + mov h, b + dcx h + mov m, d + dcx h + mov m, e + ret + #else + lxi h, -OFFSET + dad b + mov m, e + inx h + mov m, d + ret + #endif + diff --git a/mach/i80/libem/generate.sh b/mach/i80/libem/generate.sh new file mode 100755 index 000000000..7d8315c9b --- /dev/null +++ b/mach/i80/libem/generate.sh @@ -0,0 +1,4 @@ +#!/bin/sh +echo "#" +echo "#define OFFSET $2" +echo "#include \"$1\"" diff --git a/mach/i80/libem/rst.s b/mach/i80/libem/rst.s index a5234bd67..ca4cce830 100644 --- a/mach/i80/libem/rst.s +++ b/mach/i80/libem/rst.s @@ -5,68 +5,36 @@ .sect .bss .sect .text +! Which resets we install are determined by statistical analysis of Star +! Trek. When changing these, make sure to update the i80 table to match. +! 97 call .floadn2 +! 41 call .floadn4 +! 34 call .fload4 +! 28 call .fstoren2 + .define .rst_init .rst_init: - mvi a, 0xc3 ! jmp - sta 0x08 - sta 0x10 - sta 0x18 - lxi h, rst1 - shld 0x09 - lxi h, rst2 - shld 0x11 - lxi h, rst3 - shld 0x19 - ret - - ! de = [bc+const1] (remember bc is the frame pointer) -rst1: - pop h + lxi h, .floadn2 + lxi d, 0x0008 + call copy + lxi h, .floadn4 + lxi d, 0x0010 + call copy + lxi h, .fload4 + lxi d, 0x0018 + call copy + lxi h, .fstoren2 + lxi d, 0x0020 + jmp copy + +! Copies eight bytes from HL to DE. +copy: + mvi c, 8 +.1: mov a, m + stax d inx h - push h - - mov l, a - ral - sbb a - mov h, a - - dad b - mov e, m - inx h - mov d, m - ret - - ! [bc+const1] = de (remember bc is the frame pointer) -rst2: - pop h - mov a, m - inx h - push h - - mov l, a - ral - sbb a - mov h, a - - dad b - mov m, e - inx h - mov m, d - ret - - ! hl = bc+const1 -rst3: - pop h - mov a, m - inx h - push h - - mov l, a - ral - sbb a - mov h, a - - dad b - ret - + inx d + dcr c + jnz .1 + ret \ No newline at end of file diff --git a/mach/i80/ncg/table b/mach/i80/ncg/table index 405a7bfac..4db43d791 100644 --- a/mach/i80/ncg/table +++ b/mach/i80/ncg/table @@ -15,6 +15,8 @@ EM_BSIZE = 4 SL=4 +STACKHELPERS=128 + PROPERTIES areg /* the a-register */ @@ -48,6 +50,7 @@ const1 = { INT num; } 1 num. const2 = { INT num; } 2 num. smallconst2 = { INT num; } 2 num. label = { ADDR off; } 2 off. +plabel = { ADDR off; INT param; } 2 off param. m = { } 2 cost(0,3) "m". SETS @@ -69,6 +72,7 @@ INSTRUCTIONS ana reg1:ro kills a:cc cost(1, 4). ani const1:ro kills a:cc cost(2, 7). Call "call" label:ro cost(3,17). + Call "call" plabel:ro cost(3,17). /* 'call' is a reserved word */ /* cc label:ro cost(3,14). */ /* cm label:ro cost(3,14). */ @@ -253,22 +257,44 @@ pat ldc yields {const2, highw($1)} {const2, loww($1)} #ifdef USE_I80_RSTS - pat lol sfit($1, 8) - uses hlreg, areg, dereg + pat lol $1==0-2 + uses dereg, hlreg gen rst {const1, 1} - data1 {const1, $1} + yields de + + pat lol $1==0-4 + uses dereg, hlreg + gen + rst {const1, 2} + yields de + + pat lol $1==4 + uses dereg, hlreg + gen + rst {const1, 3} yields de #endif +pat lol ($1>0) && ($1<=STACKHELPERS) + uses dereg, hlreg + gen + Call {plabel, ".fload", $1} + yields de + +pat lol ($1<0) && ($1>=0-STACKHELPERS) + uses dereg, hlreg + gen + Call {plabel, ".floadn", 0-$1} + yields de + pat lol - uses hlreg={const2, $1}, dereg - gen - dad lb - mov e,{m} - inx hl - mov d,{m} - yields de + uses dereg + gen + dad lb + mov e, {m} + inx hl + mov d, {m} pat loe uses hlreg @@ -286,14 +312,17 @@ pat lof adp $1 loi 2 -#ifdef USE_I80_RSTS - pat lal sfit($1, 8) - uses dereg, hlreg, areg - gen - rst {const1, 3} - data1 {const1, $1} - yields hl -#endif +pat lal ($1>0) && ($1<=STACKHELPERS) + uses hlreg + gen + Call {plabel, ".faddr", $1} + yields hl + +pat lal ($1<0) && ($1>=0-STACKHELPERS) + uses hlreg + gen + Call {plabel, ".faddrn", 0-$1} + yields hl pat lal uses hlreg={const2,$1} @@ -457,22 +486,33 @@ pat stl lol $1==$2 with dereg yields de de leaving stl $1 #ifdef USE_I80_RSTS - pat stl sfit($1, 8) - with dereg - uses hlreg, areg - gen - rst {const1, 2} - data1 {const1, $1} + pat stl $1==0-2 + with dereg + uses hlreg + gen + rst {const1, 4} #endif +pat stl ($1>0) && ($1<=STACKHELPERS) + with dereg + uses hlreg + gen + Call {plabel, ".fstore", $1} + +pat stl ($1<0) && ($1>=0-STACKHELPERS) + with dereg + uses hlreg + gen + Call {plabel, ".fstoren", 0-$1} + pat stl with dereg - uses hlreg={const2, $1} - gen - dad lb - mov {m}, e - inx hl - mov {m}, d + uses hlreg={const2, $1} + gen + dad lb + mov {m}, e + inx hl + mov {m}, d pat ste loe $1==$2 with hlreg yields hl hl leaving ste $1 @@ -906,18 +946,25 @@ pat inc inx %1 yields %1 -#ifdef USE_I80_RSTS - pat inl sfit($1, 8) - uses hlreg, areg +pat inl ($1>0) && ($1