--- /dev/null
+#!/usr/bin/env python3
+
+import sys
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+if len(sys.argv) < 3:
+ print(f'usage: {sys.argv[0]:s} in.bin out.bin')
+ sys.exit(EXIT_FAILURE)
+in_bin = sys.argv[1]
+out_bin = sys.argv[2]
+
+with open(in_bin, 'rb') as fin:
+ data = list(fin.read())
+ hdr = data[:4]
+ tree = data[4:]
+load_addr = hdr[0] | (hdr[1] << 8)
+assert hdr[2] == len(tree) & 0xff
+assert hdr[3] == len(tree) >> 8
+
+assert tree[0x1a] == 0xa9 # lda #NN
+assert tree[0x1e] == 0xa9 # lda #NN
+src = tree[0x1b] | (tree[0x1f] << 8)
+
+assert tree[0x22] == 0xa9 # lda #NN
+assert tree[0x26] == 0xa9 # lda #NN
+dest = tree[0x23] + (tree[0x27] << 8)
+
+assert tree[0x2a] == 0xa9 # lda #NN
+assert tree[0x2e] == 0xa9 # lda #NN
+count = tree[0x2b] + (tree[0x2f] << 8)
+
+assert tree[0x32] == 0xa9 # lda #NN
+assert tree[0x34] & ~0x20 == 0x18 # clc
+bits = tree[0x33] | ((tree[0x34] & 0x20) << 3)
+
+assert tree[0x5c] == 0x4c # jmp NNNN
+start = tree[0x5d] | (tree[0x5e] << 8)
+
+assert src == load_addr + len(tree) - 0x300
+dest += 1
+bin = [0] * (dest - start)
+print(f'tree {len(tree):04x} bin {len(bin):04x}')
+
+def decode(_bytes, bits, j):
+ while bits & 0xff:
+ byte = _bytes.pop()
+ if bits & 0x100:
+ j = decode(
+ [tree[-0x300 + byte], tree[-0x200 + byte]],
+ 0x40 |
+ ((tree[-0x100 + byte] & 1) << 7) |
+ ((tree[-0x100 + byte] & 0x80) << 1),
+ j
+ )
+ else:
+ j -= 1
+ assert j >= 0
+ bin[j] = byte
+ bits <<= 1
+ assert len(_bytes) == 0
+ return j
+
+padding = 0
+while (bits & (1 << padding)) == 0:
+ padding += 1
+n = 8 - padding
+count |= (-1 << 16)
+
+i = len(tree) - 0x300
+j = len(bin)
+while count < 0:
+ i -= n
+ assert i >= 0x86
+ j = decode(tree[i:i + n], bits, j)
+
+ i -= 1
+ assert i >= 0x86 - 1 # last one is garbage
+ bits = (tree[i] << 1) | 1
+ n = 8
+
+ count += 1
+assert i == 0x86 - 1 # last one is garbage
+assert j == 0
+
+load_addr = start
+load_size = len(bin)
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+
+with open(out_bin, 'wb') as fout:
+ fout.write(bytes(hdr + bin))
--- /dev/null
+#!/usr/bin/env python3
+
+import numpy
+import sys
+from heapdict import heapdict
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+if len(sys.argv) < 5:
+ print(f'usage: {sys.argv[0]:s} load_addr tree_loader.bin in.bin out.bin')
+ sys.exit(EXIT_FAILURE)
+load_addr = int(sys.argv[1], 0)
+tree_loader_bin = sys.argv[2]
+in_bin = sys.argv[3]
+out_bin = sys.argv[4]
+
+with open(tree_loader_bin, 'rb') as fin:
+ tree_loader = list(fin.read())
+assert len(tree_loader) == 0x86
+
+with open(in_bin, 'rb') as fin:
+ data = list(fin.read())
+ hdr = data[:4]
+ bin = data[4:]
+start = hdr[0] | (hdr[1] << 8)
+assert hdr[2] == len(bin) & 0xff
+assert hdr[3] == len(bin) >> 8
+
+text = list(bin)
+
+freq = heapdict() #{}
+for i in range(len(text) - 1):
+ pair = tuple(text[i:i + 2])
+ if pair not in freq:
+ freq[pair] = 0
+ freq[pair] -= 1
+
+pairs = []
+for i in range(0x100, 0x200):
+ best, _ = freq.peekitem()
+ #_, best = min([(v, k) for k, v in freq.items()])
+ pairs.append(best)
+ j = 0
+ while True:
+ try:
+ j = text.index(best[0], j)
+ except ValueError:
+ break
+ if j + 1 < len(text) and text[j + 1] == best[1]:
+ if j >= 1:
+ pair = (text[j - 1], best[0])
+ freq[pair] += 1
+ pair = (pair[0], i)
+ if pair not in freq:
+ freq[pair] = 0
+ freq[pair] -= 1
+ freq[best] += 1
+ if j + 2 < len(text):
+ pair = (best[1], text[j + 2])
+ freq[pair] += 1
+ pair = (i, pair[1])
+ if pair not in freq:
+ freq[pair] = 0
+ freq[pair] -= 1
+ text[j:j + 2] = [i]
+ j += 1
+ assert freq[best] == 0
+ del freq[best]
+
+pairs = numpy.array(pairs, numpy.uint16)
+text = numpy.array(text, numpy.uint16)
+
+left = (pairs[:, 0] & 0xff).astype(numpy.uint8)
+right = (pairs[:, 1] & 0xff).astype(numpy.uint8)
+high = ((pairs[:, 0] >> 8) | ((pairs[:, 1] >> 1) & 0x80)).astype(numpy.uint8)
+
+n = text.shape[0]
+m = (n + 7) >> 3
+
+# make groups of 8, with padding to decode first
+text1 = numpy.zeros((m * 8,), numpy.uint16)
+text1[:n] = text
+text = text1.reshape((m, 8))
+
+# make groups of 9, as 8 * bits 0..7 then 1 * bit 8s to decode first
+text1 = numpy.zeros((m, 9), numpy.uint8)
+text1[:, :-1] = text & 0xff
+text1[:, 8] = numpy.bitwise_or.reduce(
+ (text >> 8) << numpy.arange(8, dtype = numpy.int32)[numpy.newaxis, :],
+ 1
+)
+text = text1.reshape((m * 9,))
+
+# bits is preloaded so we can enter loop8 partway through a group,
+# simulate decoding the padding by appending sentinel then shifting
+count = -n
+padding = count & 7
+count = (count >> 3) & 0xffff
+bits = (((text[-1] << 1) | 1) << padding) & 0x1ff
+text = text[:-1 - padding]
+
+text = list(text)
+left = list(left)
+right = list(right)
+high = list(high)
+tree = tree_loader + text + left + right + high
+print('bin', f'{len(bin):04x}', 'tree', f'{len(tree):04x}')
+
+src = load_addr + len(tree_loader) + len(text)
+dest = start + len(bin)
+left0 = src
+left1 = dest
+right0 = src + 0x100
+right1 = dest + 0x100
+high0 = src + 0x200
+high1 = dest + 0x200
+dest -= 1
+
+assert tree[5] == 0xb9 # lda NNNN,y
+tree[6] = left0 & 0xff
+tree[7] = left0 >> 8
+assert tree[8] == 0x99 # sta NNNN,y
+tree[9] = left1 & 0xff
+tree[0xa] = left1 >> 8
+
+assert tree[0xb] == 0xb9 # lda NNNN,y
+tree[0xc] = right0 & 0xff
+tree[0xd] = right0 >> 8
+assert tree[0xe] == 0x99 # sta NNNN,y
+tree[0xf] = right1 & 0xff
+tree[0x10] = right1 >> 8
+
+assert tree[0x11] == 0xb9 # lda NNNN,y
+tree[0x12] = high0 & 0xff
+tree[0x13] = high0 >> 8
+assert tree[0x14] == 0x99 # sta NNNN,y
+tree[0x15] = high1 & 0xff
+tree[0x16] = high1 >> 8
+
+assert tree[0x1a] == 0xa9 # lda #NN
+tree[0x1b] = src & 0xff
+assert tree[0x1e] == 0xa9 # lda #NN
+tree[0x1f] = src >> 8
+
+assert tree[0x22] == 0xa9 # lda #NN
+tree[0x23] = dest & 0xff
+assert tree[0x26] == 0xa9 # lda #NN
+tree[0x27] = dest >> 8
+
+assert tree[0x2a] == 0xa9 # lda #NN
+tree[0x2b] = count & 0xff
+assert tree[0x2e] == 0xa9 # lda #NN
+tree[0x2f] = count >> 8
+
+assert tree[0x32] == 0xa9 # lda #NN
+tree[0x33] = bits & 0xff
+assert tree[0x34] == 0x18 # clc
+tree[0x34] |= (bits >> 3) & 0x20 # clc or sec
+
+assert tree[0x5c] == 0x4c # jmp NNNN
+tree[0x5d] = start & 0xff
+tree[0x5e] = start >> 8
+
+assert tree[0x61] == 0xbd # lda NNNN,x
+tree[0x62] = high1 & 0xff
+tree[0x63] = high1 >> 8
+
+assert tree[0x65] == 0xbd # lda NNNN,x
+tree[0x66] = right1 & 0xff
+tree[0x67] = right1 >> 8
+
+assert tree[0x6d] == 0xbd # lda NNNN,x
+tree[0x6e] = high1 & 0xff
+tree[0x6f] = high1 >> 8
+
+assert tree[0x71] == 0xbd # lda NNNN,x
+tree[0x72] = left1 & 0xff
+tree[0x73] = left1 >> 8
+
+load_size = len(tree)
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+
+with open(out_bin, 'wb') as fout:
+ fout.write(bytes(hdr + tree))
--- /dev/null
+ .r65c02
+
+ .area zpage
+ .setdp
+
+ .ds 0x80
+src: .ds 2 ; address of last byte read
+dest: .ds 2 ; address of next byte to write
+count: .ds 2 ; negative count of 8-token groups
+bits: .ds 1 ; buffers bit 8 of tokens in group
+
+ .area text
+
+ ldx #0xff
+ txs
+
+ ldy #0
+0$: lda 0,y ; left0 (above encoded data)
+ sta 0,y ; left1 (above decoded data)
+ lda 0,y ; right0 (above encoded data)
+ sta 0,y ; right1 (above decoded data)
+ lda 0,y ; high0 (above encoded data)
+ sta 0,y ; high1 (above decoded data)
+ iny
+ bne 0$
+
+ lda #0
+ sta src
+ lda #0
+ sta src + 1
+
+ lda #0
+ sta dest
+ lda #0
+ sta dest + 1
+
+ lda #0
+ sta count
+ lda #0
+ sta count + 1
+
+ lda #0
+ clc
+loop8: sta bits
+
+loop1: lda src
+ bne 0$
+ dec src + 1
+0$: dec src
+
+ lda [src],y
+.if 1
+ jmp expand
+expand_ret0:
+.else
+ jsr expand
+.endif
+
+ ;clc
+ rol bits
+ bne loop1
+
+ lda src
+ bne 1$
+ dec src + 1
+1$: dec src
+
+ lda [src],y
+ sec
+ rol a
+
+ inc count
+ bne loop8
+ inc count + 1
+ bne loop8
+
+ jmp 0
+
+token: pha
+ tax
+
+ ; asxxxx inconsistency:
+ ; lda 0,x seems to default to lda *0,x whereas lda 0,y does not
+ lda 0xaaaa,x ; high1 (above decoded data)
+ asl a
+
+ lda 0xaaaa,x ; right1 (above decoded data)
+.if 1
+ jmp expand
+expand_ret1:
+.else
+ jsr expand
+.endif
+
+ pla
+ tax
+
+ lda 0xaaaa,x ; high1 (above decoded data)
+ lsr a
+
+ lda 0xaaaa,x ; left1 (above decoded data)
+expand: ; a = token bits 0..7, cf = token bit 8
+ bcs token
+
+ sta [dest],y
+
+ lda dest
+ bne 0$
+ dec dest + 1
+0$: dec dest
+
+.if 1
+ tsx
+ inx
+ beq expand_ret0
+ bne expand_ret1
+.else
+ rts
+.endif