# key byte is partitioned as follows
KEY_EOF = 0
-KEY_LIT = 1 # literal lengths 01..a7
+KEY_LIT = 1 # literal lengths 01..af
KEY_RUN = 0xa8 # run lengths 02..49
KEY_PAIR_RUN = 0xf0 # pair run lengths 03..12
N_KEYS = 0x100
data = list(fin.read())
hdr = data[:4]
rle2 = data[4:]
-load_addr = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(rle2) & 0xff
-assert hdr[3] == len(rle2) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(rle2)
assert rle2[0] == 0xa9 # lda #NN
assert rle2[4] == 0xa9 # lda #NN
src = rle2[1] | (rle2[5] << 8)
-assert src == load_addr + len(rle2) - 1
+assert src == load_addr0 + load_size0 - 1
assert rle2[8] == 0xa9 # lda #NN
assert rle2[0xc] == 0xa9 # lda #NN
dest = rle2[9] + (rle2[0xd] << 8)
assert rle2[0x6b] == 0x4c # jmp NNNN
-start = rle2[0x6c] | (rle2[0x6d] << 8)
+load_addr = rle2[0x6c] | (rle2[0x6d] << 8)
+load_size = dest + 1 - load_addr
-bin = [0] * (dest + 1 - start)
-print(f'rle2 {len(rle2):04x} bin {len(bin):04x}')
+print(f'rle2 {load_size0:04x} orig {load_size:04x}')
-i = len(rle2)
-j = len(bin)
+rle2 = rle2[0xa1:]
+bin = []
while True:
- i -= 1
- assert i >= 0xa1
- count = rle2[i]
+ count = rle2.pop()
if count == KEY_EOF:
break
if count < KEY_RUN:
- i -= count
- assert i >= 0xa1
-
- j -= count
- assert j >= 0
-
- bin[j:j + count] = rle2[i:i + count]
+ bin.extend(rle2[-1:-count - 1:-1])
+ del rle2[-count:]
elif count < KEY_PAIR_RUN:
count -= KEY_RUN - 2
-
- i -= 1
- assert i >= 0
-
- j -= count
- assert j >= 0
-
- bin[j:j + count] = rle2[i:i + 1] * count
+ bin.extend(rle2[-1:] * count)
+ del rle2[-1:]
else:
count -= KEY_PAIR_RUN - 3
+ bin.extend(rle2[-1:-3:-1] * (count >> 1) + rle2[-1:] * (count & 1))
+ del rle2[-2:]
+assert len(rle2) == 0
+assert len(bin) == load_size
- i -= 2
- assert i >= 0
-
- j -= count
- assert j >= 0
-
- bin[j:j + count] = (rle2[i:i + 2] * ((count + 1) >> 1))[-count:]
-assert i == 0xa1
-assert j == 0
-
-load_addr = start
-load_size = len(bin)
+bin = bin[::-1]
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout:
data = list(fin.read())
hdr = data[:4]
bin = data[4:]
-start = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(bin) & 0xff
-assert hdr[3] == len(bin) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(bin)
-rle2 = [0] * (len(bin) + (len(bin) >> 7) + 2)
-
-i = len(bin)
-j = len(rle2)
-while i:
+rle2 = []
+while len(bin):
# look for a run
count = 1
while (
count < MAX_RUN and
- i - count - 1 >= 0 and
- bin[i - count - 1] == bin[i - 1]
+ len(bin) >= count + 1 and
+ bin[-count - 1] == bin[-1]
):
count += 1
# since we are in key mode we can output runs of at least 2
if count >= 2:
- i -= count
- assert i >= 0
-
- j -= 1
- assert j >= 0
- rle2[j] = KEY_RUN + count - 2
-
- j -= 1
- assert j >= 0
- rle2[j] = bin[i]
+ rle2.extend([KEY_RUN + count - 2, bin[-1]])
+ del bin[-count:]
else:
# look for a run of pairs
count = 2
while (
count < MAX_PAIR_RUN and
- i - count - 1 >= 0 and
- bin[i - count - 1] == bin[i - 1 - (count & 1)]
+ len(bin) >= count + 1 and
+ bin[-count - 1] == bin[-(count & 1) - 1]
):
count += 1
- # since we are in key mode we can output pair runs of at least 2
+ # since we are in key mode we can output pair runs of at least 3
if count >= 3:
- i -= count
- assert i >= 0
-
- j -= 1
- assert j >= 0
- rle2[j] = KEY_PAIR_RUN + count - 3
-
- j -= 2
- assert j >= 0
- rle2[j:j + 2] = bin[i + count - 2:i + count]
+ rle2.extend([KEY_PAIR_RUN + count - 3] + bin[-1:-3:-1])
+ del bin[-count:]
else:
# we will have to start literal mode
# stop with a run of a least 3 or pair run of at least 4
count = 1
while (
count < MAX_LIT and
- count < i and
+ count < len(bin) and
(
- i - count - 3 < 0 or
- bin[i - count - 2] != bin[i - count - 1] or
- bin[i - count - 3] != bin[i - count - 1]
+ len(bin) < count + 3 or
+ bin[-count - 2] != bin[-count - 1] or
+ bin[-count - 3] != bin[-count - 1]
)
and
(
- i - count - 4 < 0 or
- bin[i - count - 3] != bin[i - count - 1] or
- bin[i - count - 4] != bin[i - count - 2]
+ len(bin) < count + 4 or
+ bin[-count - 3] != bin[-count - 1] or
+ bin[-count - 4] != bin[-count - 2]
)
):
count += 1
# but do not stop in the middle of a pair
if (
count == MAX_LIT and
- count + 1 <= i and
- bin[i - count - 1] == bin[i - count]
+ len(bin) >= count + 1 and
+ bin[-count - 1] == bin[-count]
):
count -= 1
- i -= count
- assert i >= 0
+ rle2.extend([KEY_LIT + count - 1] + bin[-1:-count - 1:-1])
+ del bin[-count:]
+rle2.append(KEY_EOF)
- j -= 1
- assert j >= 0
- rle2[j] = KEY_LIT + count - 1
+rle2 = rle2_loader + rle2[::-1]
+load_size = len(rle2)
- j -= count
- assert j >= 0
- rle2[j:j + count] = bin[i:i + count]
-j -= 1
-assert j >= 0
-rle2[j] = KEY_EOF
+print(f'orig {load_size0:04x} rle2 {load_size:04x}')
-rle2 = rle2_loader + rle2[j:]
-print(f'bin {len(bin):04x} rle2 {len(rle2):04x}')
+src = load_addr + load_size - 1
+dest = load_addr0 + load_size0 - 1
-src = load_addr + len(rle2) - 1
assert rle2[0] == 0xa9 # lda #NN
rle2[1] = src & 0xff
assert rle2[4] == 0xa9 # lda #NN
rle2[5] = src >> 8
-dest = start + len(bin) - 1
assert rle2[8] == 0xa9 # lda #NN
rle2[9] = dest & 0xff
assert rle2[0xc] == 0xa9 # lda #NN
rle2[0xd] = dest >> 8
assert rle2[0x6b] == 0x4c # jmp NNNN
-rle2[0x6c] = start & 0xff
-rle2[0x6d] = start >> 8
+rle2[0x6c] = load_addr0 & 0xff
+rle2[0x6d] = load_addr0 >> 8
-load_size = len(rle2)
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout:
data = list(fin.read())
hdr = data[:4]
rle = data[4:]
-load_addr = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(rle) & 0xff
-assert hdr[3] == len(rle) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(rle)
assert rle[0] == 0xa9 # lda #NN
assert rle[4] == 0xa9 # lda #NN
src = rle[1] | (rle[5] << 8)
-assert src == load_addr + len(rle) - 1
+assert src == load_addr0 + load_size0 - 1
assert rle[8] == 0xa9 # lda #NN
assert rle[0xc] == 0xa9 # lda #NN
dest = rle[9] + (rle[0xd] << 8)
assert rle[0x67] == 0x4c # jmp NNNN
-start = rle[0x68] | (rle[0x69] << 8)
+load_addr = rle[0x68] | (rle[0x69] << 8)
+load_size = dest + 1 - load_addr
-bin = [0] * (dest + 1 - start)
-print(f'rle {len(rle):04x} bin {len(bin):04x}')
+print(f'rle {load_size0:04x} orig {load_size:04x}')
-i = len(rle)
-j = len(bin)
+rle = rle[0x6a:]
+bin = []
while True:
- i -= 1
- assert i >= 0x6a
- count = rle[i]
+ count = rle.pop()
if count == KEY_EOF:
break
if count < KEY_RUN:
- i -= count
- assert i >= 0x6a
-
- j -= count
- assert j >= 0
-
- bin[j:j + count] = rle[i:i + count]
+ bin.extend(rle[-1:-count - 1:-1])
+ del rle[-count:]
else:
count -= KEY_RUN - 2
+ bin.extend(rle[-1:] * count)
+ del rle[-1:]
+assert len(rle) == 0
+assert len(bin) == load_size
- i -= 1
- assert i >= 0
-
- j -= count
- assert j >= 0
-
- bin[j:j + count] = rle[i:i + 1] * count
-assert i == 0x6a
-assert j == 0
-
-load_addr = start
-load_size = len(bin)
+bin = bin[::-1]
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout:
data = list(fin.read())
hdr = data[:4]
bin = data[4:]
-start = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(bin) & 0xff
-assert hdr[3] == len(bin) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(bin)
-rle = [0] * (len(bin) + (len(bin) >> 7) + 2)
-
-i = len(bin)
-j = len(rle)
-while i:
+rle = []
+while len(bin):
# look for a run
count = 1
while (
count < MAX_RUN and
- i - count - 1 >= 0 and
- bin[i - count - 1] == bin[i - 1]
+ len(bin) >= count + 1 and
+ bin[-count - 1] == bin[-1]
):
count += 1
# since we are in key mode we can output runs of at least 2
if count >= 2:
- i -= count
- assert i >= 0
-
- j -= 1
- assert j >= 0
- rle[j] = KEY_RUN + count - 2
-
- j -= 1
- assert j >= 0
- rle[j] = bin[i]
+ rle.extend([KEY_RUN + count - 2, bin[-1]])
+ del bin[-count:]
else:
# we will have to start literal mode, stop with a run of at least 3
count = 1
while (
- count < MAX_LIT and count < i and (
- i - count - 3 < 0 or
- bin[i - count - 2] != bin[i - count - 1] or
- bin[i - count - 3] != bin[i - count - 1]
+ count < MAX_LIT and count < len(bin) and (
+ len(bin) < count + 3 or
+ bin[-count - 2] != bin[-count - 1] or
+ bin[-count - 3] != bin[-count - 1]
)
):
count += 1
# but do not stop in the middle of a pair
if (
count == MAX_LIT and
- count + 1 <= i and
- bin[i - count - 1] == bin[i - count]
+ len(bin) >= count + 1 and
+ bin[-count - 1] == bin[-count]
):
count -= 1
- i -= count
- assert i >= 0
+ rle.extend([KEY_LIT + count - 1] + bin[-1:-count - 1:-1])
+ del bin[-count:]
+rle.append(KEY_EOF)
- j -= 1
- assert j >= 0
- rle[j] = KEY_LIT + count - 1
+rle = rle_loader + rle[::-1]
+load_size = len(rle)
- j -= count
- assert j >= 0
- rle[j:j + count] = bin[i:i + count]
-j -= 1
-assert j >= 0
-rle[j] = KEY_EOF
+print(f'orig {load_size0:04x} rle {load_size:04x}')
-rle = rle_loader + rle[j:]
-print(f'bin {len(bin):04x} rle {len(rle):04x}')
+src = load_addr + load_size - 1
+dest = load_addr0 + load_size0 - 1
-src = load_addr + len(rle) - 1
assert rle[0] == 0xa9 # lda #NN
rle[1] = src & 0xff
assert rle[4] == 0xa9 # lda #NN
rle[5] = src >> 8
-dest = start + len(bin) - 1
assert rle[8] == 0xa9 # lda #NN
rle[9] = dest & 0xff
assert rle[0xc] == 0xa9 # lda #NN
rle[0xd] = dest >> 8
assert rle[0x67] == 0x4c # jmp NNNN
-rle[0x68] = start & 0xff
-rle[0x69] = start >> 8
+rle[0x68] = load_addr0 & 0xff
+rle[0x69] = load_addr0 >> 8
-load_size = len(rle)
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout:
data = list(fin.read())
hdr = data[:4]
tree = data[4:]
-load_addr = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(tree) & 0xff
-assert hdr[3] == len(tree) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(tree)
assert tree[0x15] == 0xa9 # lda #NN
assert tree[0x19] == 0xa9 # lda #NN
bits = tree[0x2e] | ((tree[0x2f] & 0x20) << 3)
assert tree[0x60] == 0x4c # jmp NNNN
-start = tree[0x61] | (tree[0x62] << 8)
+load_addr = tree[0x61] | (tree[0x62] << 8)
assert tree[0x65] == 0xe9 # sbc #NN
break3 = tree[0x66]
assert tree[0x7a] == 0xc9 # cmp #NN
break2 = tree[0x7b]
-assert src == load_addr + len(tree) - 0x200
-dest += 1
-bin = [0] * (dest - start)
-print(f'tree {len(tree):04x} bin {len(bin):04x}')
+assert src == load_addr0 + load_size0 - 0x200
+left = tree[-0x200:-0x100]
+right = tree[-0x100:]
+tree = [0] + tree[0x91:-0x200] # 0 is sentinel for last dummy bits load
-def decode(_bytes, bits, j):
+bin = []
+def decode(_bytes, bits):
while bits & 0xff:
byte = _bytes.pop()
if bits & 0x100:
- j = decode(
- [tree[-0x200 + byte], tree[-0x100 + byte]],
+ decode(
+ [left[byte], right[byte]],
(
0x40
if byte < break1 else
0x1c0
if byte < break3 else
0xc0
- ),
- j
+ )
)
else:
- j -= 1
- assert j >= 0
- bin[j] = byte
+ bin.append(byte)
bits <<= 1
assert len(_bytes) == 0
- return j
padding = 0
while (bits & (1 << padding)) == 0:
padding += 1
n = 8 - padding
-count |= (-1 << 16)
-i = len(tree) - 0x200
-j = len(bin)
+count |= (-1 << 16)
while count < 0:
- i -= n
- assert i >= 0x91
- j = decode(tree[i:i + n], bits, j)
+ decode(tree[-n:], bits)
+ del tree[-n:]
- i -= 1
- assert i >= 0x91 - 1 # last one is garbage
- bits = (tree[i] << 1) | 1
+ bits = (tree.pop() << 1) | 1
n = 8
count += 1
-assert i == 0x91 - 1 # last one is garbage
-assert j == 0
+assert len(tree) == 0
+bin = bin[::-1]
-load_addr = start
load_size = len(bin)
+print(f'tree {load_size0:04x} orig {load_size:04x}')
+
+load_addr = dest + 1 - load_size
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout:
data = list(fin.read())
hdr = data[:4]
bin = data[4:]
-start = hdr[0] | (hdr[1] << 8)
-assert hdr[2] == len(bin) & 0xff
-assert hdr[3] == len(bin) >> 8
+load_addr0 = hdr[0] | (hdr[1] << 8)
+load_size0 = hdr[2] | (hdr[3] << 8)
+assert load_size0 == len(bin)
-text = list(bin)
+bin = list(bin)
freq = heapdict() #{}
-for i in range(len(text) - 1):
- pair = tuple(text[i:i + 2])
+for i in range(len(bin) - 1):
+ pair = tuple(bin[i:i + 2])
if pair not in freq:
freq[pair] = 0
freq[pair] -= 1
j = 0
while True:
try:
- j = text.index(best[0], j)
+ j = bin.index(best[0], j)
except ValueError:
break
- if j + 1 < len(text) and text[j + 1] == best[1]:
+ if j + 1 < len(bin) and bin[j + 1] == best[1]:
if j >= 1:
- pair = (text[j - 1], best[0])
+ pair = (bin[j - 1], best[0])
freq[pair] += 1
pair = (pair[0], token)
if pair not in freq:
freq[pair] = 0
freq[pair] -= 1
freq[best] += 1
- if j + 2 < len(text):
- pair = (best[1], text[j + 2])
+ if j + 2 < len(bin):
+ pair = (best[1], bin[j + 2])
freq[pair] += 1
pair = (token, pair[1])
if pair not in freq:
freq[pair] = 0
freq[pair] -= 1
- text[j:j + 2] = [token]
+ bin[j:j + 2] = [token]
j += 1
assert freq[best] == 0
del freq[best]
xlate[tokens[i][0]] = 0x100 + i
pairs = [(xlate[left], xlate[right]) for _, (left, right) in tokens]
tokens = [token for token, _ in tokens]
-text = [xlate[i] for i in text]
+bin = [xlate[i] for i in bin]
# find the gray code transitions in the collapsed token space
break1 = bisect.bisect_left(tokens, 0x200)
break3 = bisect.bisect_left(tokens, 0x400)
pairs = numpy.array(pairs, numpy.uint16)
-text = numpy.array(text, numpy.uint16)
+bin = numpy.array(bin, numpy.uint16)
left = (pairs[:, 0] & 0xff).astype(numpy.uint8)
assert not numpy.any(pairs[:break2, 0] >> 8)
assert numpy.all(pairs[break1:break3, 1] >> 8)
assert not numpy.any(pairs[break3:, 1] >> 8)
-n = text.shape[0]
+n = bin.shape[0]
m = (n + 7) >> 3
# make groups of 8, with padding to decode first
-text1 = numpy.zeros((m * 8,), numpy.uint16)
-text1[:n] = text
-text = text1.reshape((m, 8))
+bin1 = numpy.zeros((m * 8,), numpy.uint16)
+bin1[:n] = bin
+bin = bin1.reshape((m, 8))
# make groups of 9, as 8 * bits 0..7 then 1 * bit 8s to decode first
-text1 = numpy.zeros((m, 9), numpy.uint8)
-text1[:, :-1] = text & 0xff
-text1[:, 8] = numpy.bitwise_or.reduce(
- (text >> 8) << numpy.arange(8, dtype = numpy.int32)[numpy.newaxis, :],
+bin1 = numpy.zeros((m, 9), numpy.uint8)
+bin1[:, :-1] = bin & 0xff
+bin1[:, 8] = numpy.bitwise_or.reduce(
+ (bin >> 8) << numpy.arange(8, dtype = numpy.int32)[numpy.newaxis, :],
1
)
-text = text1.reshape((m * 9,))
+bin = bin1.reshape((m * 9,))
# bits is preloaded so we can enter loop8 partway through a group,
# simulate decoding the padding by appending sentinel then shifting
count = -n
padding = count & 7
count = (count >> 3) & 0xffff
-bits = (((text[-1] << 1) | 1) << padding) & 0x1ff
-text = text[:-1 - padding]
+bits = (((bin[-1] << 1) | 1) << padding) & 0x1ff
+bin = bin[:-1 - padding]
-text = list(text)
+bin = list(bin)
left = list(left)
right = list(right)
-tree = tree_loader + text + left + right
-print('bin', f'{len(bin):04x}', 'tree', f'{len(tree):04x}')
+tree = tree_loader + bin + left + right
+load_size = len(tree)
+print('orig', f'{load_size0:04x}', 'tree', f'{load_size:04x}')
-src = load_addr + len(tree_loader) + len(text)
-dest = start + len(bin)
+src = load_addr + load_size - 0x200
+dest = load_addr0 + load_size0
left0 = src
left1 = dest
right0 = src + 0x100
right1 = dest + 0x100
-high0 = src + 0x200
-high1 = dest + 0x200
dest -= 1
assert tree[6] == 0xb9 # lda NNNN,y
tree[0x2f] |= (bits >> 3) & 0x20 # clc or sec
assert tree[0x60] == 0x4c # jmp NNNN
-tree[0x61] = start & 0xff
-tree[0x62] = start >> 8
+tree[0x61] = load_addr0 & 0xff
+tree[0x62] = load_addr0 >> 8
assert tree[0x65] == 0xe9 # sbc #NN
tree[0x66] = break3
tree[0x7d] = left1 & 0xff
tree[0x7e] = left1 >> 8
-load_size = len(tree)
hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
with open(out_bin, 'wb') as fout: