Add tree_loader (tokenizer), saves quite a lot more bytes, but work in progress
authorNick Downing <nick@ndcode.org>
Mon, 13 Jun 2022 16:56:17 +0000 (02:56 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 14 Jun 2022 07:39:01 +0000 (17:39 +1000)
emu_65c02/emu_65c02.c
loader/Makefile
loader/tree_decode.py [new file with mode: 0755]
loader/tree_encode.py [new file with mode: 0755]
loader/tree_loader.asm [new file with mode: 0644]

index 6cb59ea..6e2f19a 100644 (file)
 #define RESET_VECTOR 0xfffc
 
 #define TRACE 0
+#define MEM_TRACE 0
 
 extern char **environ;
 
@@ -565,6 +566,12 @@ uint8_t mem_read(uint16_t addr, bool isDbg) {
       vrEmu6502SetX(cpu, start_at_mission);
 #endif
 
+#if MEM_TRACE
+    {
+      int pc = vrEmu6502GetPC(cpu);
+      fprintf(stderr, "pc=%04x addr=%05x rd=%02x\n", pc, addr, mem[addr]);
+    }
+#endif
     return mem[addr];
   }
 
@@ -731,6 +738,12 @@ void mem_write(uint16_t addr, uint8_t val) {
     else if (addr < 0xe000 && (lc_state & LC_BANK) == LC_BANK2)
       addr -= 0x1000;
     mem[addr] = val;
+#if MEM_TRACE
+    {
+      int pc = vrEmu6502GetPC(cpu);
+      fprintf(stderr, "pc=%04x addr=%05x wr=%02x\n", pc, addr, mem[addr]);
+    }
+#endif
 
 #if 0 // decimals
     if (addr >= 0xb0 && addr < 0xc0) {
index 5846375..cb7904c 100755 (executable)
@@ -11,6 +11,7 @@ HEX2BIN=hex2bin.py
 
 RLE_LOADER=0x800
 RLE2_LOADER=0x800
+TREE_LOADER=0x800
 HIRES_LOADER=0x2000
 
 .PHONY: all
@@ -18,6 +19,7 @@ all: \
 star_blazer.bin \
 star_blazer_dejunked0.bin \
 star_blazer_dejunked1.bin \
+star_blazer_tree_loader.bin \
 star_blazer_rle2_loader.bin \
 star_blazer_rle_loader.bin
 
@@ -30,6 +32,18 @@ star_blazer_dejunked0.bin: star_blazer.bin
 star_blazer_dejunked1.bin: star_blazer.bin
        ./dejunk.py $< $@ 0xff
 
+star_blazer_tree_loader.bin: tree_loader.bin star_blazer_hires_loader.bin
+       ./tree_encode.py ${TREE_LOADER} $^ $@
+
+tree_loader.bin: tree_loader.ihx
+       ${HEX2BIN} $< $@
+
+tree_loader.ihx: tree_loader.rel
+       ${ASLINK} -n -m -u -i -b text=${TREE_LOADER} $@ $^
+
+tree_loader.rel: tree_loader.asm
+       ${AS6500} -l -o $<
+
 star_blazer_rle2_loader.bin: rle2_loader.bin star_blazer_hires_loader.bin
        ./rle2_encode.py ${RLE2_LOADER} $^ $@
 
diff --git a/loader/tree_decode.py b/loader/tree_decode.py
new file mode 100755 (executable)
index 0000000..39ec7ee
--- /dev/null
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+import sys
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+if len(sys.argv) < 3:
+  print(f'usage: {sys.argv[0]:s} in.bin out.bin')
+  sys.exit(EXIT_FAILURE)
+in_bin = sys.argv[1]
+out_bin = sys.argv[2]
+
+with open(in_bin, 'rb') as fin:
+  data = list(fin.read())
+  hdr = data[:4]
+  tree = data[4:]
+load_addr = hdr[0] | (hdr[1] << 8)
+assert hdr[2] == len(tree) & 0xff
+assert hdr[3] == len(tree) >> 8
+
+assert tree[0x1a] == 0xa9 # lda #NN
+assert tree[0x1e] == 0xa9 # lda #NN
+src = tree[0x1b] | (tree[0x1f] << 8)
+
+assert tree[0x22] == 0xa9 # lda #NN
+assert tree[0x26] == 0xa9 # lda #NN
+dest = tree[0x23] + (tree[0x27] << 8)
+
+assert tree[0x2a] == 0xa9 # lda #NN
+assert tree[0x2e] == 0xa9 # lda #NN
+count = tree[0x2b] + (tree[0x2f] << 8)
+
+assert tree[0x32] == 0xa9 # lda #NN
+assert tree[0x34] & ~0x20 == 0x18 # clc
+bits = tree[0x33] | ((tree[0x34] & 0x20) << 3)
+
+assert tree[0x5c] == 0x4c # jmp NNNN
+start = tree[0x5d] | (tree[0x5e] << 8)
+
+assert src == load_addr + len(tree) - 0x300
+dest += 1
+bin = [0] * (dest - start)
+print(f'tree {len(tree):04x} bin {len(bin):04x}')
+
+def decode(_bytes, bits, j):
+  while bits & 0xff:
+    byte = _bytes.pop()
+    if bits & 0x100:
+      j = decode(
+        [tree[-0x300 + byte], tree[-0x200 + byte]],
+        0x40 |
+          ((tree[-0x100 + byte] & 1) << 7) |
+          ((tree[-0x100 + byte] & 0x80) << 1),
+        j
+      )
+    else:
+      j -= 1
+      assert j >= 0
+      bin[j] = byte
+    bits <<= 1
+  assert len(_bytes) == 0
+  return j
+
+padding = 0
+while (bits & (1 << padding)) == 0:
+  padding += 1
+n = 8 - padding
+count |= (-1 << 16)
+
+i = len(tree) - 0x300
+j = len(bin)
+while count < 0:
+  i -= n
+  assert i >= 0x86
+  j = decode(tree[i:i + n], bits, j)
+
+  i -= 1
+  assert i >= 0x86 - 1 # last one is garbage
+  bits = (tree[i] << 1) | 1
+  n = 8
+
+  count += 1
+assert i == 0x86 - 1 # last one is garbage
+assert j == 0
+
+load_addr = start
+load_size = len(bin)
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+
+with open(out_bin, 'wb') as fout:
+  fout.write(bytes(hdr + bin))
diff --git a/loader/tree_encode.py b/loader/tree_encode.py
new file mode 100755 (executable)
index 0000000..d7a5359
--- /dev/null
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+
+import numpy
+import sys
+from heapdict import heapdict
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+if len(sys.argv) < 5:
+  print(f'usage: {sys.argv[0]:s} load_addr tree_loader.bin in.bin out.bin')
+  sys.exit(EXIT_FAILURE)
+load_addr = int(sys.argv[1], 0)
+tree_loader_bin = sys.argv[2]
+in_bin = sys.argv[3]
+out_bin = sys.argv[4]
+
+with open(tree_loader_bin, 'rb') as fin:
+  tree_loader = list(fin.read())
+assert len(tree_loader) == 0x86
+
+with open(in_bin, 'rb') as fin:
+  data = list(fin.read())
+  hdr = data[:4]
+  bin = data[4:]
+start = hdr[0] | (hdr[1] << 8)
+assert hdr[2] == len(bin) & 0xff
+assert hdr[3] == len(bin) >> 8
+
+text = list(bin)
+
+freq = heapdict() #{}
+for i in range(len(text) - 1):
+  pair = tuple(text[i:i + 2])
+  if pair not in freq:
+    freq[pair] = 0
+  freq[pair] -= 1
+
+pairs = []
+for i in range(0x100, 0x200):
+  best, _ = freq.peekitem()
+  #_, best = min([(v, k) for k, v in freq.items()])
+  pairs.append(best)
+  j = 0
+  while True:
+    try:
+      j = text.index(best[0], j)
+    except ValueError:
+      break
+    if j + 1 < len(text) and text[j + 1] == best[1]:
+      if j >= 1:
+        pair = (text[j - 1], best[0])
+        freq[pair] += 1
+        pair = (pair[0], i)
+        if pair not in freq:
+          freq[pair] = 0
+        freq[pair] -= 1
+      freq[best] += 1
+      if j + 2 < len(text):
+        pair = (best[1], text[j + 2])
+        freq[pair] += 1
+        pair = (i, pair[1])
+        if pair not in freq:
+          freq[pair] = 0
+        freq[pair] -= 1
+      text[j:j + 2] = [i]
+    j += 1
+  assert freq[best] == 0
+  del freq[best]
+
+pairs = numpy.array(pairs, numpy.uint16)
+text = numpy.array(text, numpy.uint16)
+
+left = (pairs[:, 0] & 0xff).astype(numpy.uint8)
+right = (pairs[:, 1] & 0xff).astype(numpy.uint8)
+high = ((pairs[:, 0] >> 8) | ((pairs[:, 1] >> 1) & 0x80)).astype(numpy.uint8)
+
+n = text.shape[0]
+m = (n + 7) >> 3
+
+# make groups of 8, with padding to decode first
+text1 = numpy.zeros((m * 8,), numpy.uint16)
+text1[:n] = text
+text = text1.reshape((m, 8))
+
+# make groups of 9, as 8 * bits 0..7 then 1 * bit 8s to decode first
+text1 = numpy.zeros((m, 9), numpy.uint8)
+text1[:, :-1] = text & 0xff
+text1[:, 8] = numpy.bitwise_or.reduce(
+  (text >> 8) << numpy.arange(8, dtype = numpy.int32)[numpy.newaxis, :],
+  1
+)
+text = text1.reshape((m * 9,))
+
+# bits is preloaded so we can enter loop8 partway through a group,
+# simulate decoding the padding by appending sentinel then shifting
+count = -n
+padding = count & 7
+count = (count >> 3) & 0xffff
+bits = (((text[-1] << 1) | 1) << padding) & 0x1ff
+text = text[:-1 - padding]
+
+text = list(text)
+left = list(left)
+right = list(right)
+high = list(high)
+tree = tree_loader + text + left + right + high
+print('bin', f'{len(bin):04x}', 'tree', f'{len(tree):04x}')
+
+src = load_addr + len(tree_loader) + len(text)
+dest = start + len(bin)
+left0 = src
+left1 = dest
+right0 = src + 0x100
+right1 = dest + 0x100
+high0 = src + 0x200
+high1 = dest + 0x200
+dest -= 1
+
+assert tree[5] == 0xb9 # lda NNNN,y
+tree[6] = left0 & 0xff
+tree[7] = left0 >> 8
+assert tree[8] == 0x99 # sta NNNN,y
+tree[9] = left1 & 0xff
+tree[0xa] = left1 >> 8
+
+assert tree[0xb] == 0xb9 # lda NNNN,y
+tree[0xc] = right0 & 0xff
+tree[0xd] = right0 >> 8
+assert tree[0xe] == 0x99 # sta NNNN,y
+tree[0xf] = right1 & 0xff
+tree[0x10] = right1 >> 8
+
+assert tree[0x11] == 0xb9 # lda NNNN,y
+tree[0x12] = high0 & 0xff
+tree[0x13] = high0 >> 8
+assert tree[0x14] == 0x99 # sta NNNN,y
+tree[0x15] = high1 & 0xff
+tree[0x16] = high1 >> 8
+
+assert tree[0x1a] == 0xa9 # lda #NN
+tree[0x1b] = src & 0xff
+assert tree[0x1e] == 0xa9 # lda #NN
+tree[0x1f] = src >> 8
+
+assert tree[0x22] == 0xa9 # lda #NN
+tree[0x23] = dest & 0xff
+assert tree[0x26] == 0xa9 # lda #NN
+tree[0x27] = dest >> 8
+
+assert tree[0x2a] == 0xa9 # lda #NN
+tree[0x2b] = count & 0xff
+assert tree[0x2e] == 0xa9 # lda #NN
+tree[0x2f] = count >> 8
+
+assert tree[0x32] == 0xa9 # lda #NN
+tree[0x33] = bits & 0xff
+assert tree[0x34] == 0x18 # clc
+tree[0x34] |= (bits >> 3) & 0x20 # clc or sec
+
+assert tree[0x5c] == 0x4c # jmp NNNN
+tree[0x5d] = start & 0xff
+tree[0x5e] = start >> 8
+
+assert tree[0x61] == 0xbd # lda NNNN,x
+tree[0x62] = high1 & 0xff
+tree[0x63] = high1 >> 8
+
+assert tree[0x65] == 0xbd # lda NNNN,x
+tree[0x66] = right1 & 0xff
+tree[0x67] = right1 >> 8
+
+assert tree[0x6d] == 0xbd # lda NNNN,x
+tree[0x6e] = high1 & 0xff
+tree[0x6f] = high1 >> 8
+
+assert tree[0x71] == 0xbd # lda NNNN,x
+tree[0x72] = left1 & 0xff
+tree[0x73] = left1 >> 8
+
+load_size = len(tree)
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+
+with open(out_bin, 'wb') as fout:
+  fout.write(bytes(hdr + tree))
diff --git a/loader/tree_loader.asm b/loader/tree_loader.asm
new file mode 100644 (file)
index 0000000..6e80f82
--- /dev/null
@@ -0,0 +1,119 @@
+       .r65c02
+
+       .area   zpage
+       .setdp
+
+       .ds     0x80
+src:   .ds     2                       ; address of last byte read
+dest:  .ds     2                       ; address of next byte to write
+count: .ds     2                       ; negative count of 8-token groups
+bits:  .ds     1                       ; buffers bit 8 of tokens in group
+
+       .area   text
+
+       ldx     #0xff
+       txs
+
+       ldy     #0
+0$:    lda     0,y                     ; left0 (above encoded data)
+       sta     0,y                     ; left1 (above decoded data)
+       lda     0,y                     ; right0 (above encoded data)
+       sta     0,y                     ; right1 (above decoded data)
+       lda     0,y                     ; high0 (above encoded data)
+       sta     0,y                     ; high1 (above decoded data)
+       iny
+       bne     0$
+
+       lda     #0
+       sta     src
+       lda     #0
+       sta     src + 1
+
+       lda     #0
+       sta     dest
+       lda     #0
+       sta     dest + 1
+
+       lda     #0
+       sta     count
+       lda     #0
+       sta     count + 1
+
+       lda     #0
+       clc
+loop8: sta     bits
+
+loop1: lda     src
+       bne     0$
+       dec     src + 1
+0$:    dec     src
+
+       lda     [src],y
+.if 1
+       jmp     expand
+expand_ret0:
+.else
+       jsr     expand
+.endif
+
+       ;clc
+       rol     bits
+       bne     loop1
+
+       lda     src
+       bne     1$
+       dec     src + 1
+1$:    dec     src
+
+       lda     [src],y
+       sec
+       rol     a
+
+       inc     count
+       bne     loop8
+       inc     count + 1
+       bne     loop8
+
+       jmp     0
+
+token: pha
+       tax
+
+       ; asxxxx inconsistency:
+       ; lda 0,x seems to default to lda *0,x whereas lda 0,y does not
+       lda     0xaaaa,x                ; high1 (above decoded data)
+       asl     a
+
+       lda     0xaaaa,x                ; right1 (above decoded data)
+.if 1
+       jmp     expand
+expand_ret1:
+.else
+       jsr     expand
+.endif
+
+       pla
+       tax
+
+       lda     0xaaaa,x                ; high1 (above decoded data)
+       lsr     a
+
+       lda     0xaaaa,x                ; left1 (above decoded data)
+expand:        ; a = token bits 0..7, cf = token bit 8
+       bcs     token
+
+       sta     [dest],y
+
+       lda     dest
+       bne     0$
+       dec     dest + 1
+0$:    dec     dest
+
+.if 1
+       tsx
+       inx
+       beq     expand_ret0
+       bne     expand_ret1
+.else
+       rts
+.endif