EXIT_FAILURE = 1
if len(sys.argv) < 4:
- print(f'usage: {sys.argv[0]:s} entry_point in.a2bin out.ihx')
+ print(f'usage: {sys.argv[0]:s} entry_point in.a2bin out.ihx <segments.txt')
sys.exit(EXIT_FAILURE)
entry_point = int(sys.argv[1], 0)
in_a2bin = sys.argv[2]
out_ihx = sys.argv[3]
+segments0 = []
+for line in sys.stdin:
+ i = line.find('#')
+ if i != -1:
+ line = line[:i]
+ line = line.rstrip()
+ if len(line):
+ assert line[:1] == '[' and line[-1:] == ')'
+ fields = line[1:-1].split(',')
+ assert len(fields) == 2
+ segments0.extend([int(i, 0) for i in fields])
+if len(segments0) == 0:
+ segments0 = [0, 0x10000]
+
with open(in_a2bin, 'rb') as fin:
a2bin = list(fin.read())
hdr = a2bin[:4]
self.mem_used = mem_used
def __getitem__(self, addr):
- self.mem_used[addr] = False
+ #self.mem_used[addr] = False
return self.mem[addr]
def __setitem__(self, addr, data):
if mpu.sp != 0xff:
print('warning: s = 0x{mpu.sp:02x}')
+segments1 = []
+for i in range(0x10000):
+ if mem_used[i] != (len(segments1) & 1):
+ segments1.append(i)
+if len(segments1) & 1:
+ segments1.append(0x10000)
+
+# intersect segments
+segments = []
+i = 0
+j = 0
+while i < len(segments0) and j < len(segments1):
+ if segments0[i] < segments1[j]:
+ addr = segments0[i]
+ i += 1
+ elif segments1[j] < segments0[i]:
+ addr = segments1[j]
+ j += 1
+ else:
+ addr = segments0[i]
+ i += 1
+ j += 1
+ if (i & j & 1) != (len(segments) & 1):
+ segments.append(addr)
+assert (len(segments) & 1) == 0
+
intelhex = IntelHex()
intelhex.start_addr = {'EIP': entry_point}
-for i in range(0x10000):
- if mem_used[i]:
- intelhex[i] = mem[i]
+for i in range(0, len(segments), 2):
+ [addr0, addr1] = segments[i:i + 2]
+ print(f'[0x{addr0:04x}, 0x{addr1:04x})')
+ intelhex.frombytes(bytes(mem[addr0:addr1]), addr0)
intelhex.write_hex_file(out_ihx)
--- /dev/null
+#!/usr/bin/env python3
+
+import sys
+from intelhex import IntelHex
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+# short (8-bit) pointer
+DIST_BITS0 = 7
+LEN_BITS0 = 1
+
+# long (16-bit) pointer
+DIST_BITS1 = 10
+LEN_BITS1 = 6
+
+MAX_DIST = (1 << DIST_BITS1) # distance codes are 1..MAX_DIST
+MAX_LEN = (1 << LEN_BITS1) + 1 # length codes are 2..MAX_LEN
+
+if len(sys.argv) < 5:
+ print(f'usage: {sys.argv[0]:s} load_addr lzss_unpack.bin in.ihx out.a2bin')
+ sys.exit(EXIT_FAILURE)
+load_addr = int(sys.argv[1], 0)
+lzss_unpack_bin = sys.argv[2]
+in_ihx = sys.argv[3]
+out_a2bin = sys.argv[4]
+
+with open(lzss_unpack_bin, 'rb') as fin:
+ lzss_unpack = list(fin.read())
+
+def lzss_pack(dest, bin):
+ heads = {}
+ links = [-1] * len(bin)
+ lzss = []
+ i = 0
+ while i < len(bin):
+ _len = 1
+ dist = bin[i]
+
+ if i + 1 < len(bin):
+ pair = bin[i], bin[i + 1]
+ j = heads.get(pair, -1)
+ while j != -1 and i - j <= MAX_DIST:
+ #assert bin[i:i + 2] == bin[j:j + 2]
+ if (
+ _len < MAX_LEN and
+ i + _len < len(bin) and
+ bin[i + 2:i + _len + 1] == bin[j + 2:j + _len + 1]
+ ):
+ _len += 1
+ while (
+ _len < MAX_LEN and
+ i + _len < len(bin) and
+ bin[i + _len] == bin[j + _len]
+ ):
+ _len += 1
+ dist = i - j
+ j = links[j]
+ lzss.append((_len, dist))
+
+ for j in range(_len):
+ if i + 1 < len(bin):
+ pair = bin[i], bin[i + 1]
+ links[i] = heads.get(pair, -1)
+ heads[pair] = i
+ i += 1
+
+ # checking
+ bin1 = []
+ lzss1 = lzss[::-1]
+ while len(lzss1):
+ _len, dist = lzss1.pop()
+ if _len == 1:
+ bin1.append(dist)
+ else:
+ for i in range(_len):
+ bin1.append(bin1[-dist])
+ assert bin == bin1
+
+ # construct the real output in reverse to how it will be decoded,
+ # this means we flush the bits at the right time for the decoder,
+ # and any partial bit buffer is decoded at start rather than end
+ lzss1 = []
+ count = 0
+ bits = 1
+ while len(lzss):
+ _len, dist = lzss.pop()
+ if _len == 1:
+ #print('a', dist)
+ lzss1.append(dist)
+ cf = 0
+ else:
+ _len -= 2
+ dist -= 1
+ if _len < (1 << LEN_BITS0) and dist < (1 << DIST_BITS0):
+ item = dist | (_len << DIST_BITS0)
+ #print('b', item)
+ lzss1.append(item)
+ cf = 0
+ elif _len < (1 << LEN_BITS1) and dist < (1 << DIST_BITS1):
+ item = dist | (_len << DIST_BITS1)
+ #print('c', item)
+ lzss1.extend([item >> 8, item & 0xff])
+ cf = 1
+ else:
+ assert False
+
+ bits = (bits << 1) | cf
+ if bits & 0x100:
+ #print('d', bits)
+ lzss1.append(bits & 0xff)
+ bits = 1
+ # in this case we leave count alone (at decoding side we get
+ # another bit buffer for free without any increment or test)
+
+ cf = 1
+
+ bits = (bits << 1) | cf
+ if bits & 0x100:
+ #print('e', bits)
+ lzss1.append(bits & 0xff)
+ bits = 1
+ count += 1
+ lzss = lzss1[::-1]
+
+ # checking
+ bin1 = []
+ lzss1 = lzss[::-1]
+ count1 = count
+ bits1 = bits
+ while True:
+ if bits1 == 1:
+ if count1 == 0:
+ break
+ count1 -= 1
+ bits1 = lzss1.pop() | 0x100
+ #print('e', bits1)
+ cf = bits1 & 1
+ bits1 >>= 1
+
+ if cf:
+ if bits1 == 1:
+ bits1 = lzss1.pop() | 0x100
+ #print('d', bits1)
+ cf = bits1 & 1
+ bits1 >>= 1
+
+ if cf:
+ item = lzss1[-1] | (lzss1[-2] << 8)
+ del lzss1[-2:]
+ #print('c', item)
+ dist = item & ((1 << DIST_BITS1) - 1)
+ _len = item >> DIST_BITS1
+ else:
+ item = lzss1.pop()
+ #print('b', item)
+ dist = item & ((1 << DIST_BITS0) - 1)
+ _len = item >> DIST_BITS0
+ _len += 2
+ dist += 1
+
+ for i in range(_len):
+ bin1.append(bin1[-dist])
+ else:
+ #print('a', lzss1[-1])
+ bin1.append(lzss1.pop())
+ assert len(lzss1) == 0
+ assert bin1 == bin
+
+ # optimization: provided the input is not null, the first byte
+ # has to be literal, so the loader can fall straight into the
+ # literal decoding routine (saves a jump to the official loop)
+ if bits == 1:
+ assert count
+ count -= 1
+ bits = lzss.pop(0) | 0x100
+ assert (bits & 1) == 0
+ bits >>= 1
+
+ # prepend data block
+ count ^= 0xffff # inc/test is easier than test/dec
+ return [dest & 0xff, dest >> 8, count & 0xff, count >> 8, bits] + lzss
+
+intelhex = IntelHex(in_ihx)
+entry_point = intelhex.start_addr['EIP']
+segments = [j for i in intelhex.segments() for j in i]
+
+# zero page and stack are done last, after we finish with them,
+# and in 0x100-byte pieces so we can do them without zero page
+def intersect(segments, segment):
+ [addr0, addr1] = segment
+ segments1 = []
+ for i in range(0, len(segments), 2):
+ [addr2, addr3] = segments[i:i + 2]
+ if addr2 < addr0:
+ addr2 = addr0
+ if addr3 > addr1:
+ addr3 = addr1
+ if addr3 > addr2:
+ segments1.extend([addr2, addr3])
+ return segments1
+segments = (
+ intersect(segments, [0x200, 0x10000]) +
+ intersect(segments, [0, 0x100]) +
+ intersect(segments, [0x100, 0x200])
+)
+
+compressed = []
+uncompressed = []
+for i in range(0, len(segments), 2):
+ addr0 = segments[i]
+ addr1 = segments[i + 1]
+ data = list(intelhex.tobinstr(addr0, addr1 - 1))
+ if len(data) > 0x100:
+ compressed.append((addr0, addr1, lzss_pack(addr0, data)))
+ else:
+ uncompressed.append((addr0, addr1, data))
+
+bin = [0x4c, 0x00, 0x00] # jmp 0 (fixup: loader)
+loader = [
+ 0xd8, # cld
+ 0xa2, 0xff, # ldx #0xff
+ 0x9a, # txs
+]
+compressed_addr = load_addr + len(bin)
+for addr0, addr1, data in compressed:
+ addr2 = load_addr + len(bin)
+ bin.extend(data)
+ addr3 = load_addr + len(bin)
+ print(
+ f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x}) {100. * (addr3 - addr2) / (addr1 - addr0):5.1f}%'
+ )
+ if addr2 == compressed_addr:
+ loader.extend(
+ [
+ 0xa9, compressed_addr & 0xff, # lda #<compressed_addr
+ 0x85, 0xf0, # sta src
+ 0xa9, compressed_addr >> 8, # lda #>compressed_addr
+ 0x85, 0xf1, # sta src + 1
+ ]
+ )
+ loader.extend([0x20, 0x00, 0x00]) # jsr 0 (fixup: lzss_unpack)
+for addr0, addr1, data in uncompressed:
+ count = addr1 - addr0
+ zpage = addr0 < 0x100
+ if count >= 8:
+ addr2 = load_addr + len(bin)
+ bin.extend(data)
+ addr3 = load_addr + len(bin)
+ print(
+ f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})'
+ )
+
+ addr1 -= 0x100
+ addr3 -= 0x100
+ loader.extend(
+ [
+ 0xa2, -count & 0xff, # ldx #-count
+ 0xbd, addr3 & 0xff, (addr3 >> 8) & 0xff, # lda addr3,x
+ 0x95, addr1 & 0xff, # sta *addr1,x
+ 0xe8, # inx
+ 0xd0, 0xf8 # bne .-6
+ ]
+ if zpage else
+ [
+ 0xa2, -count & 0xff, # ldx #-count
+ 0xbd, addr3 & 0xff, (addr3 >> 8) & 0xff, # lda addr3,x
+ 0x9d, addr1 & 0xff, (addr1 >> 8) & 0xff, # sta addr1,x
+ 0xe8, # inx
+ 0xd0, 0xf7 # bne .-7
+ ]
+ )
+ else:
+ print(f'[0x{addr0:04x}, 0x{addr1:04x})')
+ for i in range(count):
+ loader.extend(
+ [
+ 0xa9, data[i], # lda #data
+ 0x85, addr0, # sta *addr0
+ ]
+ if zpage else
+ [
+ 0xa9, data[i], # lda #data
+ 0x8d, addr0 & 0xff, addr0 >> 8 # sta addr0
+ ]
+
+ )
+ addr0 += 1
+if len(compressed):
+ lzss_unpack_addr = load_addr + len(bin)
+ for i in range(len(compressed)):
+ assert loader[12 + i * 3] == 0x20 # jsr
+ loader[12 + i * 3 + 1] = lzss_unpack_addr & 0xff
+ loader[12 + i * 3 + 2] = lzss_unpack_addr >> 8
+ bin.extend(lzss_unpack)
+loader_addr = load_addr + len(bin)
+assert bin[0] == 0x4c # jmp
+bin[1] = loader_addr & 0xff
+bin[2] = loader_addr >> 8
+bin.extend(
+ loader + [0x4c, entry_point & 0xff, entry_point >> 8] # jmp entry_point
+)
+load_size = len(bin)
+
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+with open(out_a2bin, 'wb') as fout:
+ fout.write(bytes(hdr + bin))
--- /dev/null
+ .r65c02
+
+ .area zpage
+ .setdp
+
+ .ds 0xf0
+src: .ds 2 ; address of last byte read
+dest: .ds 2 ; address of last byte written
+count: .ds 2 ; count of bit buffer refills to do
+bits: .ds 1 ; bit buffer (highest 1 = sentinel)
+dist: .ds 2 ; distance, or address of repeated data
+len: .ds 1 ; length
+
+ .area text
+
+ ; enter with src = address of data block
+ ; 0 (word): destination address
+ ; 2 (word): count of bit buffer refills to do
+ ; 4 (byte): bit buffer (highest 1 = sentinel)
+ ; 5+: LZSS-compressed data
+ ; type of LZSS item depends on a bit from bit buffer:
+ ; 0: literal
+ ; 1: pointer
+ ; type of pointer depends on a bit from bit buffer:
+ ; 0: short pointer, lddddddd
+ ; 1: long pointer, lllllldd:dddddddd (LS byte first)
+
+ ; src += 5 - 0x100
+ clc
+ lda src
+ adc #5
+ sta src
+ bcs 0$
+ dec src + 1
+
+0$: ; copy data block
+ ldy #-5
+1$: lda [src],y
+ sta dest + 5 - 0x100,y
+ iny
+ bne 1$
+
+ ; src += 0x100
+ inc src + 1
+
+ clc
+ ; optimization: the first byte has to be literal
+ ;bcc loop2
+
+literal: ; copy one byte
+ lda [src],y
+ sta [dest],y
+
+ inc dest
+ bne loop1
+loop0: ; dest += 0x100 to remove offset used for pointer copy
+ ; then increment src for literal or last byte of pointer
+ inc dest + 1
+
+loop1: inc src
+ bne loop2
+ inc src + 1
+
+loop2: ; process LZSS item
+ ;clc
+ ror bits
+ bne literal_or_pointer
+
+ ; bit buffer exhausted
+ ; count refills of bit buffer
+ inc count
+ bne 0$
+ inc count + 1
+ beq done
+
+0$: ; load one byte to bit buffer
+ lda [src],y
+ ;sec
+ ror a
+ sta bits
+
+ inc src
+ bne literal_or_pointer
+ inc src + 1
+
+literal_or_pointer:
+ ; cf=0 literal, cf=1 pointer
+ bcc literal
+
+ ; pointer
+ clc
+ ror bits
+ bne short_or_long_pointer
+
+ ; bit buffer exhausted
+ ; load one byte to bit buffer
+ lda [src],y
+ ;sec
+ ror a
+ sta bits
+
+ inc src
+ bne short_or_long_pointer
+ inc src + 1
+
+short_or_long_pointer:
+ ; cf=0 short pointer, cf=1 long pointer
+ bcs long_pointer
+
+ ; short pointer, lddddddd
+ ; take source byte, but don't increment yet
+ lda [src],y
+ tax
+ and #0x7f
+ sta dist
+ sty dist + 1
+
+ txa
+ asl a ; cf = len - 2
+ tya
+ beq pointer
+
+long_pointer:
+ ; low of long pointer, dddddddd
+ ; take source byte
+ lda [src],y
+ sta dist
+
+ inc src
+ bne 0$
+ inc src + 1
+
+0$: ; high of long pointer, lllllldd
+ ; take source byte, but don't increment yet
+ lda [src],y
+ tax
+ and #3
+ sta dist + 1
+
+ txa
+ lsr a
+ lsr a
+
+ clc
+pointer: ; dist 0 based, a + cf = len 0 based, source needs increment
+ adc #2
+ sta len
+
+ ; dest += len - 0x100
+ ;clc
+ adc dest
+ sta dest
+ bcs 0$
+ dec dest + 1
+
+0$: ; dist = dest - dist - 1
+ clc
+ lda dest
+ sbc dist
+ sta dist
+ lda dest + 1
+ sbc dist + 1
+ sta dist + 1
+
+ ; y = -len
+ ;sec
+ tya
+ sbc len ; always overflows leaving cf = 0
+ tay
+
+ ; copy previous data
+1$: lda [dist],y
+ sta [dest],y
+ iny
+ bne 1$
+
+ ; dest += 0x100, src += 1, process LZSS item
+ beq loop0
+
+done: rts
--- /dev/null
+# file was constructed from a memory dump after copy protected loader ran
+# after loading at 0x0300:
+# [0x0300, 0x0a00) is not used except for a jump to crack loader at 0x3f00
+# [0x0a00, 0x2000) is program
+# [0x2000, 0x4000) is hi-res screen 0 which is reused by the crack loader:
+# [0x2000, 0x27ed) is end of program which is copied to 0x9600
+# [0x27ed, 0x3200) is junk copied after end of program at 0x9ded
+# [0x3200, 0x3c00) is junk copied to end of memory at 0xb600
+# [0x3c00, 0x3f00) is junk copied to start of memory at 0x0000, except:
+# [0x3c20, 0x3c22) is vec_start (duplicated in crack loader, kept anyway)
+# [0x3c22, 0x3c24) is vec_init_game
+# [0x3c24, 0x3c26) is vec_start_game
+# [0x3c28, 0x3c2a) is vec_calculate_object_shape
+# [0x3c4e, 0x3c50) is vec_draw_misc_from_table
+# [0x3e00, 0x3e02) is vec_restart
+# [0x3f00, 0x4000) is junk except the crack loader at [0x3f00, 0x3f79)
+# [0x4000, 0x8c08) is program
+# [0x8c08, 0x8e00) is junk which I think is from the shape editor he used
+# [0x8e00, 0x9580) is program
+# [0x9580, 0x9600) is padding to a page boundary
+[0x0022, 0x0024) # vec_init_game
+[0x0024, 0x0026) # vec_start_game
+[0x0028, 0x002a) # vec_calculate_object_shape
+[0x004e, 0x0050) # vec_draw_misc_from_table
+[0x0200, 0x0202) # vec_restart
+[0x0a00, 0x2000)
+[0x4000, 0x8c08)
+[0x8e00, 0x9ded)