In a2_pack.py implement a mini linker based on end addresses of sections, change...
authorNick Downing <nick@ndcode.org>
Tue, 21 Jun 2022 03:34:09 +0000 (13:34 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 21 Jun 2022 03:39:13 +0000 (13:39 +1000)
loader/Makefile
loader/a2_pack.py
loader/lzss_unpack.asm

index d78daa3..c911c28 100755 (executable)
@@ -9,7 +9,7 @@ ASLINK=../asxv5pxx/asxmak/linux/exe/aslink
 #   pip3 install --user intelhex
 HEX2BIN=hex2bin.py
 
-LOAD_ADDR=0x5800
+LOAD_ADDR=0xa000
 LZSS_LOADER=0x800
 RECRACK_LOADER=0x9ded
 
index 49bd512..588bde6 100755 (executable)
@@ -18,9 +18,9 @@ MAX_DIST = (1 << DIST_BITS1) # distance codes are 1..MAX_DIST
 MAX_LEN = (1 << LEN_BITS1) + 1 # length codes are 2..MAX_LEN
 
 if len(sys.argv) < 5:
-  print(f'usage: {sys.argv[0]:s} load_addr lzss_unpack.bin in.ihx out.a2bin')
+  print(f'usage: {sys.argv[0]:s} end_addr lzss_unpack.bin in.ihx out.a2bin')
   sys.exit(EXIT_FAILURE)
-load_addr = int(sys.argv[1], 0)
+end_addr = int(sys.argv[1], 0)
 lzss_unpack_bin = sys.argv[2]
 in_ihx = sys.argv[3]
 out_a2bin = sys.argv[4]
@@ -66,16 +66,16 @@ def lzss_pack(dest, bin):
       i += 1
 
   # checking
-  bin1 = []
-  lzss1 = lzss[::-1]
-  while len(lzss1):
-    _len, dist = lzss1.pop()
-    if _len == 1:
-      bin1.append(dist)
-    else:
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-  assert bin == bin1
+  #bin1 = []
+  #lzss1 = lzss[::-1]
+  #while len(lzss1):
+  #  _len, dist = lzss1.pop()
+  #  if _len == 1:
+  #    bin1.append(dist)
+  #  else:
+  #    for i in range(_len):
+  #      bin1.append(bin1[-dist])
+  #assert bin == bin1
 
   # construct the real output in reverse to how it will be decoded,
   # this means we flush the bits at the right time for the decoder,
@@ -124,48 +124,48 @@ def lzss_pack(dest, bin):
   lzss = lzss1[::-1]
 
   # checking
-  bin1 = []
-  lzss1 = lzss[::-1]
-  count1 = count
-  bits1 = bits
-  while True:
-    if bits1 == 1:
-      if count1 == 0:
-        break
-      count1 -= 1
-      bits1 = lzss1.pop() | 0x100
-      #print('e', bits1)
-    cf = bits1 & 1
-    bits1 >>= 1
-  
-    if cf:
-      if bits1 == 1:
-        bits1 = lzss1.pop() | 0x100
-        #print('d', bits1)
-      cf = bits1 & 1
-      bits1 >>= 1
-  
-      if cf:
-        item = lzss1[-1] | (lzss1[-2] << 8)
-        del lzss1[-2:]
-        #print('c', item)
-        dist = item & ((1 << DIST_BITS1) - 1)
-        _len = item >> DIST_BITS1
-      else: 
-        item = lzss1.pop()
-        #print('b', item)
-        dist = item & ((1 << DIST_BITS0) - 1)
-        _len = item >> DIST_BITS0
-      _len += 2
-      dist += 1
-  
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-    else:
-      #print('a', lzss1[-1])
-      bin1.append(lzss1.pop())
-  assert len(lzss1) == 0
-  assert bin1 == bin
+  #bin1 = []
+  #lzss1 = lzss[::-1]
+  #count1 = count
+  #bits1 = bits
+  #while True:
+  #  if bits1 == 1:
+  #    if count1 == 0:
+  #      break
+  #    count1 -= 1
+  #    bits1 = lzss1.pop() | 0x100
+  #    #print('e', bits1)
+  #  cf = bits1 & 1
+  #  bits1 >>= 1
+  #
+  #  if cf:
+  #    if bits1 == 1:
+  #      bits1 = lzss1.pop() | 0x100
+  #      #print('d', bits1)
+  #    cf = bits1 & 1
+  #    bits1 >>= 1
+  #
+  #    if cf:
+  #      item = lzss1[-1] | (lzss1[-2] << 8)
+  #      del lzss1[-2:]
+  #      #print('c', item)
+  #      dist = item & ((1 << DIST_BITS1) - 1)
+  #      _len = item >> DIST_BITS1
+  #    else: 
+  #      item = lzss1.pop()
+  #      #print('b', item)
+  #      dist = item & ((1 << DIST_BITS0) - 1)
+  #      _len = item >> DIST_BITS0
+  #    _len += 2
+  #    dist += 1
+  #
+  #    for i in range(_len):
+  #      bin1.append(bin1[-dist])
+  #  else:
+  #    #print('a', lzss1[-1])
+  #    bin1.append(lzss1.pop())
+  #assert len(lzss1) == 0
+  #assert bin1 == bin
 
   # optimization: provided the input is not null, the first byte
   # has to be literal, so the loader can fall straight into the
@@ -205,93 +205,228 @@ segments = (
   intersect(segments, [0x100, 0x200])
 )
 
-bin = [0x4c, 0x00, 0x00] # jmp 0 (fixup: loader)
-loader = [
-  0xd8,                # cld
-  0xa2, 0xff,  # ldx #0xff
-  0x9a,                # txs
-]
-fixup_lzss_unpack = []
-for i in range(0, len(segments), 2):
+# sections are output to the a2bin file from top to bottom as follows:
+SECTION_UNPACKER = 0
+SECTION_LOADER = 1
+SECTION_PAYLOAD = 2
+N_SECTIONS = 3
+
+# fixup is a 4-tuple:
+#   (fixup type, fixup address, target section, target address)
+# both addresses are negative and relative to the end addr of the section
+FIXUP_TYPE_LO_BYTE = 0
+FIXUP_TYPE_HI_BYTE = 1
+FIXUP_TYPE_WORD = 2
+
+# each section has a data area, an end address and a list of fixups
+# the data is constructed from top to bottom, hence is reversed here
+# relocation is done after section lengths and end addresses known
+class Section:
+  def __init__(self, data, end_addr, fixups):
+    self.data = data
+    self.end_addr = end_addr
+    self.fixups = fixups
+sections = [Section([], 0, []) for i in range(N_SECTIONS)]
+
+# report is a 5-tuple:
+#   (report type, ihx start, ihx end, a2bin start, a2bin end)
+# for compressed the compression ratio will be printed
+# for direct poke the a2bin values are not used, otherwise they
+# are negative and relative to the end addr of the payload section
+# report is used to visually check for source/destination overlap
+REPORT_TYPE_DIRECT_POKE = 0
+REPORT_TYPE_UNCOMPRESSED = 1
+REPORT_TYPE_COMPRESSED = 2
+report = []
+
+# epilogue
+sections[SECTION_LOADER].data.extend(
+  [
+    0x4c, entry_point & 0xff, entry_point >> 8,        # jmp entry_point
+  ][::-1]
+)
+
+# segments
+for i in range(len(segments) - 2, -2, -2):
   addr0 = segments[i]
   addr1 = segments[i + 1]
   data = list(intelhex.tobinstr(addr0, addr1 - 1))
-  if len(data) > 0x100:
-    addr2 = load_addr + len(bin)
-    bin.extend(lzss_pack(addr0, data))
-    addr3 = load_addr + len(bin)
-    print(
-      f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x}) {100. * (addr3 - addr2) / (addr1 - addr0):5.1f}%'
+
+  if len(data) <= 4:
+    report.append(
+      (REPORT_TYPE_DIRECT_POKE, addr0, addr1, 0, 0)
     )
 
-    addr2 += 5 - 0x100
-    loader.extend(
+    # use of zpage version is determined byte by byte
+    for i in data[::-1]:
+      addr1 -= 1
+      sections[SECTION_LOADER].data.extend(
+        [
+          0xa9, i,                             # lda #data
+          0x85, addr1,                         # sta *addr1
+        ][::-1]
+      if addr1 < 0x100 else
+        [
+          0xa9, i,                             # lda #data
+          0x8d, addr1 & 0xff, addr1 >> 8,      # sta addr1
+        ][::-1]
+      )
+  elif len(data) <= 0x100:
+    addr3 = -len(sections[SECTION_PAYLOAD].data)
+    sections[SECTION_PAYLOAD].data.extend(
+      data[::-1]
+    )
+    addr2 = -len(sections[SECTION_PAYLOAD].data)
+    report.append(
+      (REPORT_TYPE_UNCOMPRESSED, addr0, addr1, addr2, addr3)
+    )
+
+    # use of zpage version is determined in advance (if completely fits)
+    zpage = addr1 < 0x100
+
+    addr1 -= 0x100
+    addr3 -= 0x100
+    sections[SECTION_LOADER].data.extend(
+      [
+        0xa2, -len(data) & 0xff,                       # ldx #-count
+        0xbd, 0x00, 0x00,                              # lda addr3,x
+        0x95, addr1 & 0xff,                            # sta *addr1,x
+        0xe8,                                          # inx
+        0xd0, 0xf8                                     # bne .-6
+      ][::-1]
+    if zpage else
+      [
+        0xa2, -len(data) & 0xff,                       # ldx #-count
+        0xbd, 0x00, 0x00,                              # lda addr3,x
+        0x9d, addr1 & 0xff, (addr1 >> 8) & 0xff,       # sta addr1,x
+        0xe8,                                          # inx
+        0xd0, 0xf7                                     # bne .-7
+      ][::-1]
+    )
+    sections[SECTION_LOADER].fixups.extend(
       [
-        0xa9, addr2 & 0xff,    # lda #<addr2
-        0xa0, addr2 >> 8,      # ldy #>addr2
-        0x20, 0x00, 0x00,      # jsr 0 (fixup: lzss_unpack)
+        (
+          FIXUP_TYPE_WORD,
+          3 - len(sections[SECTION_LOADER].data),
+          SECTION_PAYLOAD,
+          addr3
+        ),
       ]
     )
-    fixup_lzss_unpack.append(len(loader) - 2)
   else:
-    count = addr1 - addr0
-    zpage = addr0 < 0x100
-    if count > 4:
-      addr2 = load_addr + len(bin)
-      bin.extend(data)
-      addr3 = load_addr + len(bin)
-      print(
-        f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})'
+    addr3 = -len(sections[SECTION_PAYLOAD].data)
+    sections[SECTION_PAYLOAD].data.extend(
+      lzss_pack(addr0, data)[::-1]
+    )
+    addr2 = -len(sections[SECTION_PAYLOAD].data)
+    report.append(
+      (REPORT_TYPE_COMPRESSED, addr0, addr1, addr2, addr3)
+    )
+
+    if len(sections[SECTION_UNPACKER].data) == 0:
+      sections[SECTION_UNPACKER].data.extend(
+        lzss_unpack[::-1]
       )
-  
-      addr1 -= 0x100
-      addr3 -= 0x100
-      loader.extend(
-        [
-          0xa2, -count & 0xff,                         # ldx #-count
-          0xbd, addr3 & 0xff, (addr3 >> 8) & 0xff,     # lda addr3,x
-          0x95, addr1 & 0xff,                          # sta *addr1,x
-          0xe8,                                                # inx
-          0xd0, 0xf8                                   # bne .-6
-        ]
-      if zpage else
-        [
-          0xa2, -count & 0xff,                         # ldx #-count
-          0xbd, addr3 & 0xff, (addr3 >> 8) & 0xff,     # lda addr3,x
-          0x9d, addr1 & 0xff, (addr1 >> 8) & 0xff,     # sta addr1,x
-          0xe8,                                                # inx
-          0xd0, 0xf7                                   # bne .-7
-        ]
+
+    addr2 += 5 - 0x100
+    sections[SECTION_LOADER].data.extend(
+      [
+        0xa9, 0x00,            # lda #<addr2
+        0xa0, 0x00,            # ldy #>addr2
+        0x20, 0x00, 0x00,      # jsr lzss_unpack
+      ][::-1]
+    )
+    sections[SECTION_LOADER].fixups.extend(
+      [
+        (
+          FIXUP_TYPE_LO_BYTE,
+          1 - len(sections[SECTION_LOADER].data),
+          SECTION_PAYLOAD,
+          addr2
+        ),
+        (
+          FIXUP_TYPE_HI_BYTE,
+          3 - len(sections[SECTION_LOADER].data),
+          SECTION_PAYLOAD,
+          addr2
+        ),
+        (
+          FIXUP_TYPE_WORD,
+          5 - len(sections[SECTION_LOADER].data),
+          SECTION_UNPACKER,
+          -len(sections[SECTION_UNPACKER].data)
+        ),
+      ]
+    )
+
+# prologue
+sections[SECTION_LOADER].data.extend(
+  [
+    0xd8,      # cld
+    0xa2, 0xff,        # ldx #0xff
+    0x9a,      # txs
+  ][::-1]
+)
+sections[SECTION_PAYLOAD].data.extend(
+  [
+    0x4c, 0x00, 0x00,  # jmp loader
+  ][::-1]
+)
+sections[SECTION_PAYLOAD].fixups.extend(
+  [
+    (
+      FIXUP_TYPE_WORD,
+      1 - len(sections[SECTION_PAYLOAD].data),
+      SECTION_LOADER,
+      -len(sections[SECTION_LOADER].data)
+    ),
+  ] 
+)
+
+# relocate
+load_addr = end_addr
+for i in range(N_SECTIONS):
+  sections[i].end_addr = load_addr
+  load_addr -= len(sections[i].data)
+load_size = end_addr - load_addr
+
+for report_type, addr0, addr1, addr2, addr3 in report[::-1]:
+  if report_type == REPORT_TYPE_DIRECT_POKE:
+    print(f'[0x{addr0:04x}, 0x{addr1:04x})')
+  else:
+    addr2 += sections[SECTION_PAYLOAD].end_addr
+    addr3 += sections[SECTION_PAYLOAD].end_addr
+    print(
+      f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})' + (
+        f'{100. * (addr3 - addr2) / (addr1 - addr0):6.1f}%'
+      if report_type == REPORT_TYPE_COMPRESSED else
+        ''
       )
+    )
+
+bin = []
+for i in range(N_SECTIONS):
+  for fixup_type, fixup_addr, section, addr in sections[i].fixups:
+    addr += sections[section].end_addr
+
+    # fixup_addr is in range -len(sections[i].data) .. -1
+    # -1 means last byte of section, but since the section data is
+    # reversed, a fixup at -1 would be done at data[0] and so forth
+    if fixup_type == FIXUP_TYPE_LO_BYTE:
+      assert sections[i].data[~fixup_addr] == 0
+      sections[i].data[~fixup_addr] = addr & 0xff
+    elif fixup_type == FIXUP_TYPE_HI_BYTE:
+      assert sections[i].data[~fixup_addr] == 0
+      sections[i].data[~fixup_addr] = addr >> 8
+    elif fixup_type == FIXUP_TYPE_WORD:
+      assert sections[i].data[~fixup_addr] == 0
+      sections[i].data[~fixup_addr] = addr & 0xff
+      assert sections[i].data[~fixup_addr - 1] == 0
+      sections[i].data[~fixup_addr - 1] = addr >> 8
     else:
-      print(f'[0x{addr0:04x}, 0x{addr1:04x})')
-      for i in range(count):
-        loader.extend(
-          [
-            0xa9, data[i],                     # lda #data
-            0x85, addr0,                       # sta *addr0
-          ]
-        if zpage else
-          [
-            0xa9, data[i],                     # lda #data
-            0x8d, addr0 & 0xff, addr0 >> 8     # sta addr0
-          ]
-        )
-        addr0 += 1
-loader.extend(
-  [0x4c, entry_point & 0xff, entry_point >> 8] # jmp entry_point
-)
-if len(fixup_lzss_unpack):
-  lzss_unpack_addr = load_addr + len(bin)
-  for i in fixup_lzss_unpack:
-    loader[i] = lzss_unpack_addr & 0xff
-    loader[i + 1] = lzss_unpack_addr >> 8
-  bin.extend(lzss_unpack)
-loader_addr = load_addr + len(bin)
-bin[1] = loader_addr & 0xff
-bin[2] = loader_addr >> 8
-bin.extend(loader)
-load_size = len(bin)
+      assert False
+  bin.extend(sections[i].data)
+bin = bin[::-1]
 
 hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
 with open(out_a2bin, 'wb') as fout:
index 262134a..3b37ebc 100644 (file)
@@ -4,8 +4,8 @@
        .setdp
 
        .ds     0xf0
-src:   .ds     2                       ; address of last byte read
-dest:  .ds     2                       ; address of last byte written
+src:   .ds     2                       ; address of next byte to read
+dest:  .ds     2                       ; address of next byte to write
 count: .ds     2                       ; count of bit buffer refills to do
 bits:  .ds     1                       ; bit buffer (highest 1 = sentinel)
 dist:  .ds     2                       ; distance, or address of repeated data