--- /dev/null
+#!/usr/bin/env python3
+
+import bisect
+import py65.devices.mpu65c02
+import py65.disassembler
+import re
+import sys
+from intelhex import IntelHex
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+AREA_INIT = 0
+AREA_UNINIT = 1
+area_types = {
+ 'uninit': AREA_UNINIT,
+ 'init': AREA_INIT,
+}
+
+ITEM_BYTE = 0
+ITEM_WORD = 1
+ITEM_CODE = 2
+ITEM_CODE_IGN = 3
+ITEM_CODE_M1 = 4
+item_types = {
+ 'byte': ITEM_BYTE,
+ 'word': ITEM_WORD,
+ 'code': ITEM_CODE,
+ 'code_ign': ITEM_CODE_IGN,
+ 'code_m1': ITEM_CODE_M1,
+}
+
+trace_txt = None
+if len(sys.argv) >= 2 and sys.argv[1][:8] == '--trace=':
+ trace_txt = sys.argv[1][8:]
+ del sys.argv[1]
+if len(sys.argv) < 4:
+ print(f'usage: {sys.argv[0]:s} [--trace=trace.txt] addrs.txt in.ihx out.asm')
+ sys.exit(EXIT_FAILURE)
+addrs_txt = sys.argv[1]
+in_ihx = sys.argv[2]
+out_asm = sys.argv[3]
+
+TRACE_REG_A = 0
+TRACE_REG_X = 1
+TRACE_REG_Y = 2
+TRACE_REG_S = 3
+TRACE_REG_P = 4
+trace_reg_names = ['a', 'x', 'y', 's', 'p']
+trace_reg_indices = {
+ 'a': TRACE_REG_A,
+ 'x': TRACE_REG_X,
+ 'y': TRACE_REG_Y,
+ 's': TRACE_REG_S,
+ 'p': TRACE_REG_P
+}
+
+TRACE_FLAG_C = 0
+TRACE_FLAG_Z = 1
+TRACE_FLAG_I = 2
+TRACE_FLAG_D = 3
+TRACE_FLAG_B = 4
+TRACE_FLAG_V = 6
+TRACE_FLAG_N = 7
+trace_flag_names = ['c', 'z', 'i', 'd', 'b', None, 'v', 'n']
+
+TRACE_MIN_UNSIGNED = 0
+TRACE_MIN_SIGNED = 1
+TRACE_MIN_BITWISE = 2
+TRACE_MAX_UNSIGNED = 3
+TRACE_MAX_SIGNED = 4
+TRACE_MAX_BITWISE = 5
+
+trace_nexts = {}
+trace_prevs = {}
+trace_regs = {}
+if trace_txt is not None:
+ print('reading trace')
+ with open(trace_txt) as fin:
+ for line in fin:
+ assert line[:3] == 'pc='
+ pc0 = int(line[3:7], 16)
+ assert line[7] == ','
+ pc1 = int(line[8:12], 16)
+
+ # hack so I don't have to redo the trace
+ if (
+ trace_txt == 'star_blazer_trace.txt' and (
+ pc0 < 0xa00 or
+ (pc0 >= 0x2000 and pc0 < 0x4000) or
+ pc1 < 0xa00 or
+ (pc1 >= 0x2000 and pc1 < 0x4000)
+ )
+ ):
+ continue
+
+ if pc0 not in trace_nexts:
+ trace_nexts[pc0] = set()
+ trace_nexts[pc0].add(pc1)
+ if pc1 not in trace_prevs:
+ trace_prevs[pc1] = set()
+ trace_prevs[pc1].add(pc0)
+
+ r = []
+ for i in range(5):
+ assert line[i * 16 + 14] == '='
+ assert line[i * 16 + 21] == ','
+ r.append(
+ [
+ int(line[i * 16 + 15:i * 16 + 17], 16),
+ int(line[i * 16 + 17:i * 16 + 19], 16),
+ int(line[i * 16 + 19:i * 16 + 21], 16),
+ int(line[i * 16 + 22:i * 16 + 24], 16),
+ int(line[i * 16 + 24:i * 16 + 26], 16),
+ int(line[i * 16 + 26:i * 16 + 28], 16)
+ ]
+ )
+ trace_regs[pc1] = r
+
+class AreaInfo:
+ def __init__(self, size, name, _type):
+ self.size = size
+ self.name = name
+ self.type = _type
+
+# extra_types contains pointed-to type(s)
+class ItemInfo:
+ def __init__(
+ self,
+ size,
+ name,
+ _type,
+ extra_types,
+ local_label,
+ writeable,
+ mate
+ ):
+ self.size = size
+ self.name = name
+ self.type = _type
+ self.extra_types = extra_types
+ self.local_label = local_label
+ self.writeable = writeable
+ self.mate = mate
+
+print('reading addrs')
+area_addr = []
+area_info = []
+item_addr = []
+item_info = []
+with open(addrs_txt) as fin:
+ def get_line():
+ while True:
+ line = fin.readline()
+ if len(line) == 0:
+ return []
+ i = line.find('#')
+ if i >= 0:
+ line = line[:i]
+ fields = line.strip().split(',')
+ if fields != ['']:
+ #print('fields', fields)
+ return fields
+
+ fields = get_line()
+ while len(fields):
+ assert len(fields) == 1
+ section = fields[0]
+ print(section)
+
+ if section == 'areas':
+ fields = get_line()
+ while len(fields) >= 2:
+ assert len(fields) == 4
+ addr = int(fields[0], 0)
+ size = int(fields[1], 0)
+ name = fields[2]
+ _type = area_types[fields[3]]
+
+ assert (
+ len(area_addr) == 0 or
+ addr >= area_addr[-1] + area_info[-1].size
+ )
+ area_addr.append(addr)
+ area_info.append(AreaInfo(size, name, _type))
+
+ fields = get_line()
+ continue
+
+ if section == 'items':
+ fields = get_line()
+ while len(fields) >= 2:
+ assert len(fields) >= 4
+ addr = int(fields[0], 0)
+ size = int(fields[1], 0)
+ name = fields[2]
+ _type = item_types[fields[3]]
+ extra_types = []
+ if len(fields) >= 5 and len(fields[4]):
+ extra_types = [item_types[i] for i in fields[4].split()]
+ local_label = 0
+ if len(fields) >= 6 and len(fields[5]):
+ local_label = int(fields[5])
+ writeable = False
+ if len(fields) >= 7 and len(fields[6]):
+ writeable = fields[6] == 'true'
+ mate = (0, 0)
+ if len(fields) >= 8 and len(fields[7]):
+ mate = tuple([int(i, 0) for i in fields[7].split()])
+ assert len(mate) == 2
+
+ # enable this to see which address is out of order
+ print(hex(addr))
+ assert (
+ len(item_addr) == 0 or
+ addr >= item_addr[-1] + item_info[-1].size
+ )
+ item_addr.append(addr)
+ item_info.append(
+ ItemInfo(
+ size,
+ name,
+ _type,
+ extra_types,
+ local_label,
+ writeable,
+ mate
+ )
+ )
+ fields = get_line()
+ continue
+
+ # unknown section, skip (for shape or object extractor)
+ fields = get_line()
+ while len(fields) >= 2:
+ fields = get_line()
+
+print('reading ihx')
+intelhex = IntelHex(in_ihx)
+entry_point = intelhex.start_addr['EIP']
+segments = [j for i in intelhex.segments() for j in i]
+
+mem = [0] * 0x10000
+for i in range(0, len(segments), 2):
+ [addr0, addr1] = segments[i:i + 2]
+ print(f'[{addr0:04x}, {addr1:04x})')
+ mem[addr0:addr1] = list(intelhex.tobinstr(addr0, addr1 - 1))
+
+mpu = py65.devices.mpu65c02.MPU(mem, 0)
+disassembler = py65.disassembler.Disassembler(mpu)
+
+# local_refs is checked and cleared after each block of local labels (i.e.
+# when a non-local label is defined) and anything referenced within that
+# block, which is not also defined in the same block, is marked non-local
+MAX_LOCALS = 10000
+local_defs = set()
+local_refs = set()
+def flush_locals():
+ global modified
+
+ # canonicalize
+ local_defs1 = {get_item(i) for i in local_defs}
+ assert -1 not in local_defs1
+ local_refs1 = {get_item(i) for i in local_refs}
+ assert -1 not in local_refs1
+
+ for i in local_refs1 - local_defs1:
+ if item_info[i].local_label != MAX_LOCALS:
+ item_info[i].local_label = MAX_LOCALS
+ modified = True
+
+item = 0
+modified = False
+ignore = False
+# note: modifies local_defs
+def add_item(
+ addr,
+ size,
+ name,
+ _type,
+ extra_types,
+ local_label = 0,
+ writeable = False
+):
+ global item, modified
+
+ #print(
+ # 'addr',
+ # f'{addr:04x}',
+ # 'size',
+ # f'{size:04x}',
+ # 'name',
+ # name,
+ # 'type',
+ # _type,
+ # 'extra_types',
+ # extra_types,
+ # 'local_label',
+ # local_label,
+ # 'writeable',
+ # writeable
+ #)
+ if ignore:
+ return get_item(addr)
+ end = addr + size
+
+ # find start of overlapping items
+ i = bisect.bisect_right(item_addr, addr)
+ if i >= 1 and addr < item_addr[i - 1] + item_info[i - 1].size:
+ i -= 1
+
+ # find end of overlapping items
+ j = i
+ while j < len(item_addr) and item_addr[j] < end:
+ j += 1
+
+ # create a combined item with min/max of all properties
+ # (except name which is max of names at same address)
+ if i < j:
+ if addr >= item_addr[i]:
+ addr = item_addr[i]
+ if len(name):
+ name = f'{name:s}_{addr:04x}'
+ if item_info[i].name > name:
+ name = item_info[i].name
+ elif len(name):
+ name = f'{name:s}_{addr:04x}'
+
+ for k in range(i, j):
+ if item_info[k].type > _type:
+ _type = item_info[k].type
+ if item_info[k].extra_types > extra_types:
+ extra_types = item_info[k].extra_types
+ if item_info[k].local_label > local_label:
+ local_label = item_info[k].local_label
+ if item_info[k].writeable > writeable:
+ writeable = item_info[k].writeable
+
+ end1 = item_addr[j - 1] + item_info[j - 1].size
+ if end1 > end:
+ end = end1
+ elif len(name):
+ name = f'{name:s}_{addr:04x}'
+ info = ItemInfo(
+ end - addr,
+ name,
+ _type,
+ extra_types,
+ local_label,
+ writeable,
+ (0, 0) # mate -- has to be re-detected after merging items
+ )
+
+ # code items should only ever be the opcode
+ # if more than 1 byte then something has gone wrong
+ # the stack trace lets us see what was being disassembled
+ assert info.size == 1 or info.type < ITEM_CODE
+
+ # if the combined item is different, replace/insert it
+ if (
+ j != i + 1 or
+ item_addr[i] != addr or
+ item_info[i].size != info.size or
+ item_info[i].name != info.name or
+ item_info[i].type != info.type or
+ item_info[i].extra_types != info.extra_types or
+ item_info[i].local_label != info.local_label or
+ item_info[i].writeable != info.writeable #or
+ #item_info[i].mate != info.mate
+ ):
+ item_addr[i:j] = [addr]
+ item_info[i:j] = [info]
+
+ # adjust cursor on behalf of calling routine
+ if item >= j:
+ item += i + 1 - j
+ elif item > i:
+ item = i
+
+ # for newly defined local, say it was declared in this block
+ # (prevents backward references to locals being made non-local)
+ if info.local_label != MAX_LOCALS:
+ local_defs.add(addr)
+
+ modified = True
+ return i
+
+add_item(entry_point, 1, '', ITEM_CODE, [], MAX_LOCALS)
+for pc1 in trace_nexts.keys():
+ add_item(pc1, 1, '', ITEM_CODE, [])
+
+def get_item(dest):
+ i = bisect.bisect_right(item_addr, dest)
+ if i >= 1:
+ i -= 1
+ addr = item_addr[i]
+ if dest < addr + item_info[i].size:
+ return i
+ return -1
+
+# note: modifies local_refs
+def get_label(dest, zpage = False, offset = 0):
+ asterisk = '*' if zpage else ''
+ i = get_item(dest)
+ if i != -1:
+ addr = item_addr[i]
+ if len(item_info[i].name):
+ if item_info[i].local_label != MAX_LOCALS:
+ local_refs.add(dest)
+ label = f'{item_info[i].local_label:d}$'
+ else:
+ label = item_info[i].name
+ if label[0] < 'A':
+ label = label[1:] # strip numeric priority indicator
+ offset += dest - addr
+ if offset < -10:
+ label += f' - 0x{-offset:x}'
+ elif offset < 0:
+ label += f' - {-offset:d}'
+ elif offset < 1:
+ pass
+ elif offset < 10:
+ label += f' + {offset:d}'
+ else:
+ label += f' + 0x{offset:x}'
+ return asterisk + label
+ return asterisk + (
+ f'0x{dest + offset:02x}'
+ if zpage else
+ f'0x{dest + offset:04x}'
+ )
+
+def word_operand(addr, extra_types = [], mate = (0, 1)):
+ dest = mem[addr + mate[0]] + (mem[addr + mate[1]] << 8)
+ if dest and len(extra_types):
+ if extra_types[0] == ITEM_BYTE:
+ add_item(dest, 1, '1bvar', ITEM_BYTE, extra_types[1:])
+ operand = get_label(dest)
+ elif extra_types[0] == ITEM_WORD:
+ add_item(dest, 2, '2wvar', ITEM_WORD, extra_types[1:])
+ operand = get_label(dest)
+ elif extra_types[0] == ITEM_CODE:
+ add_item(dest, 1, '3loc', ITEM_CODE, extra_types[1:])
+ operand = get_label(dest)
+ elif extra_types[0] == ITEM_CODE_M1:
+ add_item(dest + 1, 1, '3loc', ITEM_CODE, extra_types[1:])
+ operand = get_label(dest + 1, False, -1)
+ else:
+ assert False
+ else:
+ operand = f'0x{dest:04x}'
+ return operand
+
+def byte_operand(addr, extra_types = [], mate = (0, 0)):
+ if mate == (0, 0):
+ operand = f'0x{mem[addr]:02x}'
+ elif mate[0] == 0:
+ operand = '<' + word_operand(addr, extra_types, mate)
+ elif mate[1] == 0:
+ operand = '>' + word_operand(addr, extra_types, mate)
+ else:
+ assert False
+ return operand
+
+re_macro0 = re.compile(
+ '''lda \$([0-9a-f]+),([xy])
+sta \$([0-9a-f]+)
+lda \$([0-9a-f]+),([xy])
+sta \$([0-9a-f]+)
+'''
+)
+def disasm_code_macro0(addrs, match):
+ global modified
+
+ addr = addrs[0]
+ addr0 = int(match.group(1), 16)
+ reg0 = trace_reg_indices[match.group(2)]
+ addr1 = int(match.group(3), 16)
+ addr2 = int(match.group(4), 16)
+ reg2 = trace_reg_indices[match.group(5)]
+ addr3 = int(match.group(6), 16)
+ if reg2 == reg0 and addr3 == addr1 + 1:
+ if addr2 == addr0 + 1:
+ # we can be confident source is word, make destination word
+ i = add_item(addr1, 2, '2word', ITEM_WORD, [])
+ if addr in trace_regs:
+ # word access should only access evens or odds per instruction
+ assert (
+ (trace_regs[addr][reg0][TRACE_MIN_BITWISE] & 1) ==
+ (trace_regs[addr][reg0][TRACE_MAX_BITWISE] & 1)
+ )
+ min_unsigned = trace_regs[addr][reg0][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][reg0][TRACE_MAX_UNSIGNED]
+ add_item(
+ addr0 + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '9warr',
+ ITEM_WORD,
+ item_info[i].extra_types
+ )
+ else:
+ # we can't be confident source is word, check destination is word
+ i = get_item(addr1)
+ if (
+ i != -1 and
+ item_info[i].type == ITEM_WORD and
+ get_item(addr3) == i and
+ addr in trace_regs
+ ):
+ min_unsigned = trace_regs[addr][reg0][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][reg0][TRACE_MAX_UNSIGNED]
+ j = add_item(
+ addr0 + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '9larr',
+ ITEM_BYTE,
+ item_info[i].extra_types
+ )
+ if item_info[j].mate == (0, 0):
+ item_info[j].mate = (0, addr2 - addr0)
+ modified = True
+ j = add_item(
+ addr2 + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '9harr',
+ ITEM_BYTE,
+ item_info[i].extra_types
+ )
+ if item_info[j].mate == (0, 0):
+ item_info[j].mate = (addr0 - addr2, 0)
+ modified = True
+
+re_macro1 = re.compile(
+ '''lda \$([0-9a-f]+),([xy])
+pha
+lda \$([0-9a-f]+),([xy])
+pha
+(sec
+|ldy \$[0-9a-f]+
+)?
+rts
+'''
+)
+def disasm_code_macro1(addrs, match):
+ addr = addrs[0]
+ addr0 = int(match.group(1), 16)
+ reg0 = trace_reg_indices[match.group(2)]
+ addr1 = int(match.group(3), 16)
+ reg1 = trace_reg_indices[match.group(4)]
+ if addr0 == addr1 + 1 and reg1 == reg0:
+ # word access should only access evens or odds per instruction
+ assert (
+ (trace_regs[addr][reg0][TRACE_MIN_BITWISE] & 1) ==
+ (trace_regs[addr][reg0][TRACE_MAX_BITWISE] & 1)
+ )
+ min_unsigned = trace_regs[addr][reg0][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][reg0][TRACE_MAX_UNSIGNED]
+ add_item(
+ addr1 + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '9warr',
+ ITEM_WORD,
+ [ITEM_CODE_M1]
+ )
+
+re_macro2 = re.compile(
+ '''ld([axy]) #\$[0-9a-f]+
+st([axy]) \$([0-9a-f]+)
+ld([axy]) #\$[0-9a-f]+
+st([axy]) \$([0-9a-f]+)
+'''
+)
+def disasm_code_macro2(addrs, match):
+ global modified
+
+ reg0 = trace_reg_indices[match.group(1)]
+ addr0 = addrs[0] + 1
+ reg1 = trace_reg_indices[match.group(2)]
+ addr1 = int(match.group(3), 16)
+ reg2 = trace_reg_indices[match.group(4)]
+ addr2 = addrs[2] + 1
+ reg3 = trace_reg_indices[match.group(5)]
+ addr3 = int(match.group(6), 16)
+ if reg1 == reg0 and reg3 == reg2 and addr3 == addr1 + 1:
+ # we can't be confident source is word, check destination is word
+ i = get_item(addr1)
+ if (
+ i != -1 and
+ item_info[i].type == ITEM_WORD and
+ get_item(addr3) == i
+ ):
+ j = add_item(
+ addr0,
+ 1,
+ '2lvar',
+ ITEM_BYTE,
+ item_info[i].extra_types
+ )
+ if item_info[j].mate == (0, 0):
+ item_info[j].mate = (0, addr2 - addr0)
+ modified = True
+ j = add_item(
+ addr2,
+ 1,
+ '2hvar',
+ ITEM_BYTE,
+ item_info[i].extra_types
+ )
+ if item_info[j].mate == (0, 0):
+ item_info[j].mate = (addr0 - addr2, 0)
+ modified = True
+
+re_direct = re.compile('\$([0-9a-f]+)$')
+def disasm_code_direct(addr, n, instr, match):
+ zpage = len(match.group(1)) == 2
+ dest = int(match.group(1), 16)
+ if instr[0] in op_control_transfer:
+ if instr[0] == 'jsr':
+ add_item(dest, 1, '4sub', ITEM_CODE, [], MAX_LOCALS)
+ else:
+ add_item(dest, 1, '3loc', ITEM_CODE, [])
+
+ # allow for self-modifying code
+ if n == 3:
+ add_item(addr + 1, 2, '6ptr', ITEM_WORD, [ITEM_CODE])
+ else:
+ add_item(dest, 1, '1bvar', ITEM_BYTE, [], 0, instr[0] in op_write)
+
+ # allow for self-modifying code
+ if n == 3:
+ add_item(addr + 1, 2, '6ptr', ITEM_WORD, [ITEM_BYTE])
+ instr[1] = get_label(dest, zpage)
+
+re_direct_xy = re.compile('\$([0-9a-f]+),([xy])$')
+def disasm_code_direct_xy(addr, n, instr, match):
+ zpage = len(match.group(1)) == 2
+ dest = int(match.group(1), 16)
+ reg = trace_reg_indices[match.group(2)]
+ min_unsigned = 0
+ if addr in trace_regs:
+ min_unsigned = trace_regs[addr][reg][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][reg][TRACE_MAX_UNSIGNED]
+ add_item(
+ dest + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '8barr',
+ ITEM_BYTE,
+ [],
+ 0,
+ instr[0] in op_write
+ )
+ instr[1] = f'{get_label(dest + min_unsigned, zpage, -min_unsigned):s},{trace_reg_names[reg]:s}'
+
+ # allow for self-modifying code
+ if n == 3:
+ add_item(addr + 1, 2, '6ptr', ITEM_WORD, [ITEM_BYTE])
+
+re_indirect = re.compile('\(\$([0-9a-f]+)\)$')
+def disasm_code_indirect(addr, n, instr, match):
+ zpage = len(match.group(1)) == 2
+ dest = int(match.group(1), 16)
+
+ if instr[0] == 'jmp':
+ add_item(dest, 2, '7vec', ITEM_WORD, [ITEM_CODE])
+
+ # allow for self-modifying code
+ assert n == 3
+ add_item(addr + 1, 2, '6ptr', ITEM_WORD, [ITEM_WORD, ITEM_CODE])
+ else:
+ add_item(dest, 2, '6ptr', ITEM_WORD, [ITEM_BYTE])
+
+ # allow for self-modifying code
+ assert n != 3
+ instr[1] = f'[{get_label(dest, zpage):s}]'
+
+re_indirect_y = re.compile('\(\$([0-9a-f]+)\),y$')
+def disasm_code_indirect_y(addr, n, instr, match):
+ assert len(match.group(1)) == 2
+ dest = int(match.group(1), 16)
+ add_item(dest, 2, '6ptr', ITEM_WORD, [ITEM_BYTE])
+ instr[1] = f'[{get_label(dest, True):s}],y'
+
+re_x_indirect = re.compile('\(\$([0-9a-f]+),x\)$')
+def disasm_code_x_indirect(addr, n, instr, match):
+ #assert len(match.group(1)) == 2 -- doesn't handle 65c02 indexed jmp
+ dest = int(match.group(1), 16)
+ min_unsigned = 0
+ if addr in trace_regs:
+ # word access should only access evens or odds per instruction
+ assert (
+ (trace_regs[addr][TRACE_REG_X][TRACE_MIN_BITWISE] & 1) ==
+ (trace_regs[addr][TRACE_REG_X][TRACE_MAX_BITWISE] & 1)
+ )
+ min_unsigned = trace_regs[addr][TRACE_REG_X][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][TRACE_REG_X][TRACE_MAX_UNSIGNED]
+ add_item(
+ dest + min_unsigned,
+ max_unsigned + 1 - min_unsigned,
+ '9warr',
+ ITEM_WORD,
+ [ITEM_BYTE]
+ )
+ instr[1] = f'[{get_label(dest + min_unsigned, True, -min_unsigned):s},x]'
+
+re_immediate = re.compile('#\$([0-9a-f]+)$')
+def disasm_code_immediate(addr, n, instr, match):
+ assert len(match.group(1)) == 2
+
+ dest = addr + 1
+ i = get_item(dest)
+ if i != -1 and item_info[i].type == ITEM_BYTE:
+ operand = byte_operand(
+ dest,
+ item_info[i].extra_types,
+ item_info[i].mate
+ )
+ else:
+ operand = '0x' + match.group(1)
+ instr[1] = '#' + operand
+
+op_write = {
+ 'sta',
+ 'stx',
+ 'sty',
+ 'inc',
+ 'dec',
+ 'asl',
+ 'lsr',
+ 'rol',
+ 'ror'
+}
+op_control_transfer = {
+ 'jmp',
+ 'jsr',
+ 'bpl',
+ 'bmi',
+ 'bvc',
+ 'bvs',
+ 'bcc',
+ 'bcs',
+ 'bne',
+ 'beq',
+ 'bra'
+}
+op_no_fall_through = {'jmp', 'rts', 'brk'}
+def disasm_code(addr):
+ # disassemble the next 6 instructions
+ i = addr
+ addrs = []
+ instrs = []
+ for j in range(6):
+ addrs.append(i)
+ n, instr = disassembler.instruction_at(i)
+ instrs.append(instr.lower())
+ i += n
+ assert i < 0x10000
+
+ # extract first instruction for detailed analysis
+ n = addrs[1] - addrs[0]
+ instr = instrs[0].split()
+
+ # convert instruction group to text for regex analysis
+ instrs = ''.join([i + '\n' for i in instrs])
+
+ # recognize common instruction sequences
+ match = re_macro0.match(instrs)
+ if match is not None:
+ disasm_code_macro0(addrs, match)
+ else:
+ match = re_macro1.match(instrs)
+ if match is not None:
+ disasm_code_macro1(addrs, match)
+ else:
+ match = re_macro2.match(instrs)
+ if match is not None:
+ disasm_code_macro2(addrs, match)
+
+ # enable this when code item with size > 1 causes abort
+ #print(hex(addr), instr)
+
+ # recognize and reformat addressing modes
+ if len(instr) >= 2:
+ match = re_direct.match(instr[1])
+ if match is not None:
+ disasm_code_direct(addr, n, instr, match)
+ else:
+ match = re_direct_xy.match(instr[1])
+ if match is not None:
+ disasm_code_direct_xy(addr, n, instr, match)
+ else:
+ match = re_indirect.match(instr[1])
+ if match is not None:
+ disasm_code_indirect(addr, n, instr, match)
+ else:
+ match = re_indirect_y.match(instr[1])
+ if match is not None:
+ disasm_code_indirect_y(addr, n, instr, match)
+ else:
+ match = re_x_indirect.match(instr[1])
+ if match is not None:
+ disasm_code_x_indirect(addr, n, instr, match)
+ else:
+ match = re_immediate.match(instr[1])
+ if match is not None:
+ disasm_code_immediate(addr, n, instr, match)
+ else:
+ assert instr[1] == 'a'
+ elif instr[0] == 'rts':
+ i = get_item(addr)
+ if i != -1:
+ name = f'5rts_{addr:04x}'
+ if len(item_info[i].name) and name > item_info[i].name:
+ item_info[i].name = name
+
+ if instr[0] not in op_no_fall_through:
+ add_item(addr + n, 1, '', ITEM_CODE, [])
+
+ return n, instr
+
+# a basic disassembler is provided without any analysis, for problem
+# instructions which would cause undesirable merging of memory items
+#def disasm_code_ign(addr):
+# n, instr = disassembler.instruction_at(addr)
+# instr = instr.lower().split()
+# instr[1] = instr[1].replace('(', '[').replace(')', ']').replace('$', '0x')
+# return n, instr
+
+def disasm_word(addr, extra_types = [], mate = (0, 1)):
+ return 2, ['.dw', word_operand(addr, extra_types, mate)]
+
+def disasm_byte(addr, extra_types = [], mate = (0, 0)):
+ return 1, ['.db', byte_operand(addr, extra_types, mate)]
+
+def addr_chain(addr):
+ return (
+ '{0:s}-> {1:04x} ->{2:s}'.format(
+ (
+ ','.join(
+ [f'{i:04x}' for i in sorted(trace_prevs[addr])]
+ ) + ' '
+ if addr in trace_prevs else
+ ''
+ ),
+ addr,
+ (
+ ' ' + ','.join(
+ [f'{i:04x}' for i in sorted(trace_nexts[addr])]
+ )
+ if addr in trace_nexts else
+ ''
+ )
+ )
+ if addr in trace_prevs or addr in trace_nexts else
+ f'{addr:04x}'
+ )
+
+op_stack = set(
+ ['jsr', 'rts', 'pha', 'pla', 'phx', 'plx', 'phy', 'ply', 'php', 'plp']
+)
+op_flags = {
+ 'adc': [TRACE_FLAG_C, TRACE_FLAG_D],
+ 'sbc': [TRACE_FLAG_C, TRACE_FLAG_D],
+ 'bcc': [TRACE_FLAG_C],
+ 'bcs': [TRACE_FLAG_C],
+ 'bne': [TRACE_FLAG_Z],
+ 'beq': [TRACE_FLAG_Z],
+ 'bvc': [TRACE_FLAG_V],
+ 'bvs': [TRACE_FLAG_V],
+ 'bpl': [TRACE_FLAG_N],
+ 'bmi': [TRACE_FLAG_N],
+}
+
+_pass = 0
+modified = True
+while modified:
+ print('pass', _pass)
+ modified = False
+
+ with open(out_asm, 'w') as fout:
+ # equates pass
+ area = 0
+ need_blank = 0
+ while area <= len(area_addr):
+ # find region between area - 1 and area
+ area_start = (
+ 0
+ if area == 0 else
+ area_addr[area - 1] + area_info[area - 1].size
+ )
+ area_end = (
+ area_addr[area]
+ if area < len(area_addr) else
+ 0x10000
+ )
+
+ item = bisect.bisect_left(item_addr, area_start)
+ while item < len(item_addr) and item_addr[item] < area_end:
+ if len(item_info[item].name):
+ label = item_info[item].name
+ if label[0] < 'A':
+ label = label[1:] # strip numeric priority indicator
+ addr = item_addr[item]
+ line = f'{label:s} = 0x{addr:04x}'
+ comment = f'; {addr_chain(addr):s}'
+ x = len(line)
+ if x >= 40:
+ line += ' '
+ else:
+ while x < 40:
+ line += '\t'
+ x = (x + 8) & ~7
+ fout.write(
+ f'{line:s}{comment:s}\n'
+ )
+ need_blank = 1
+ item += 1
+
+ area += 1
+ fout.write('\n' * need_blank)
+
+ # output pass
+ area = 0
+ while area < len(area_addr):
+ area_end = area_addr[area] + area_info[area].size
+ fout.write(f'\t.area\t{area_info[area].name:s}\n\n')
+ need_blank = 0
+ local_defs.clear()
+ local_refs.clear()
+
+ addr = area_addr[area]
+ item = bisect.bisect_left(item_addr, addr)
+ while addr < area_end:
+ comment = f'; {addr_chain(addr):s}'
+ if item < len(item_addr) and addr >= item_addr[item]:
+ comment += ' rw' if item_info[item].writeable else ' r'
+
+ assert addr < item_addr[item] + item_info[item].size
+ if addr == item_addr[item] and len(item_info[item].name):
+ if item_info[item].local_label != MAX_LOCALS:
+ assert len(local_defs) < MAX_LOCALS
+ if item_info[item].local_label != len(local_defs):
+ item_info[item].local_label = len(local_defs)
+ modified = True
+ local_defs.add(addr)
+ label = f'{item_info[item].local_label:d}$'
+ else:
+ flush_locals()
+ local_defs.clear()
+ local_refs.clear()
+
+ label = item_info[item].name
+ if label[0] < 'A':
+ label = label[1:] # strip numeric priority indicator
+ if len(label) < 7:
+ fout.write(label + ':')
+ need_blank = 2
+ else:
+ fout.write(label + ':\n')
+ need_blank = 1
+
+ if area_info[area].type == AREA_UNINIT:
+ n = item_addr[item] + item_info[item].size - addr
+ instr = ['.ds', f'{n:d}' if n < 10 else f'0x{n:x}']
+ elif (
+ item_info[item].type == ITEM_CODE or
+ item_info[item].type == ITEM_CODE_IGN
+ ):
+ ignore = item_info[item].type == ITEM_CODE_IGN
+ n, instr = disasm_code(addr)
+ ignore = False
+
+ # use disassembly for reg display, even if falling back to byte
+ instr0 = instr
+
+ # for multi-byte instructions, only disassemble if the remaining
+ # bytes occupied by the instruction are not code and not writeable
+ # - if code then we are jumping into the middle of an instruction
+ # - if writeable then instruction is self-modifying, and we want
+ # to output a specific label such as ptr_XXXX for modified part
+ # note: if opcode item is longer than 1 byte then it extends over
+ # some operand bytes -- do not treat these as code or writeable,
+ # it confuses disassembly for code that accesses itself blockwise
+ i = item + 1
+ while i < len(item_info) and item_addr[i] < addr + n:
+ if item_info[i].type == ITEM_CODE or item_info[i].writeable:
+ # put instruction as a comment instead, fall back to byte
+ comment += ' "{0:s}"'.format(' '.join(instr))
+ n, instr = disasm_byte(addr)
+ break
+ i += 1
+
+ # display relevant register bounds captured from trace file
+ if addr in trace_regs:
+ regs = []
+ flags = []
+ if instr0[0] == 'cmp':
+ regs.append(TRACE_REG_A)
+ if instr0[0] == 'cpx' or (len(instr0) >= 2 and ',x' in instr0[1]):
+ regs.append(TRACE_REG_X)
+ if instr0[0] == 'cpy' or (len(instr0) >= 2 and ',y' in instr0[1]):
+ regs.append(TRACE_REG_Y)
+ if instr0[0] in op_stack:
+ regs.append(TRACE_REG_S)
+ if instr0[0] in op_flags:
+ flags = op_flags[instr0[0]]
+ for reg in regs:
+ min_unsigned = trace_regs[addr][reg][TRACE_MIN_UNSIGNED]
+ max_unsigned = trace_regs[addr][reg][TRACE_MAX_UNSIGNED]
+ comment += f' {trace_reg_names[reg]:s}={min_unsigned:02x}'
+ if max_unsigned != min_unsigned:
+ comment += f'..{max_unsigned:02x}'
+ min_bitwise = trace_regs[addr][reg][TRACE_MIN_BITWISE]
+ max_bitwise = trace_regs[addr][reg][TRACE_MAX_BITWISE]
+ incr = 1
+ while (min_bitwise & incr) == (max_bitwise & incr):
+ incr <<= 1
+ if incr != 1:
+ comment += f',{incr:02x}'
+ for flag in flags:
+ min_bitwise = (
+ trace_regs[addr][TRACE_REG_P][TRACE_MIN_BITWISE] >> flag
+ ) & 1
+ max_bitwise = (
+ trace_regs[addr][TRACE_REG_P][TRACE_MAX_BITWISE] >> flag
+ ) & 1
+ comment += f' {trace_flag_names[flag]:s}={min_bitwise:d}'
+ if max_bitwise != min_bitwise:
+ comment += f'..{max_bitwise:d}'
+ elif item_info[item].type == ITEM_WORD:
+ if addr + 1 >= item_addr[item] + item_info[item].size:
+ # fall back to byte for last byte of odd-length item
+ n, instr = disasm_byte(
+ addr,
+ item_info[item].extra_types,
+ item_info[item].mate
+ )
+ else:
+ n, instr = disasm_word(
+ addr,
+ item_info[item].extra_types
+ )
+ elif item_info[item].type == ITEM_BYTE:
+ n, instr = disasm_byte(
+ addr,
+ item_info[item].extra_types,
+ item_info[item].mate
+ )
+ else:
+ assert False
+ elif area_info[area].type == AREA_UNINIT:
+ # no information about item, fall back to byte(s)
+ n = (
+ item_addr[item]
+ if item < len(item_addr) else
+ 0x10000
+ ) - addr
+ if n > area_end:
+ n = area_end
+ instr = ['.ds', f'{n:d}' if n < 10 else f'0x{n:x}']
+ else:
+ # no information about item, fall back to byte
+ n, instr = disasm_byte(addr)
+
+ if len(instr) == 1:
+ line = f'\t{instr[0]:s}'
+ x = 8 + len(instr[0])
+ elif len(instr) == 2:
+ line = f'\t{instr[0]:s}\t{instr[1]:s}'
+ x = 16 + len(instr[1])
+ else:
+ assert False
+ if x >= 40:
+ line += ' '
+ else:
+ while x < 40:
+ line += '\t'
+ x = (x + 8) & ~7
+ fout.write(line + comment + '\n')
+ need_blank = 1
+ addr += n
+
+ while (
+ item < len(item_addr) and
+ addr >= item_addr[item] + item_info[item].size
+ ):
+ item += 1
+
+ fout.write('\n' * need_blank)
+ flush_locals()
+
+ area += 1
+ fout.write(f'\t.end\t{get_label(entry_point):s}\n')
+
+ _pass += 1