Unify a2_pack_*.py into a2_pack.py (--rev|--fwd)
authorNick Downing <nick@ndcode.org>
Tue, 21 Jun 2022 10:25:09 +0000 (20:25 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 21 Jun 2022 10:25:09 +0000 (20:25 +1000)
loader/Makefile
loader/a2_pack.py [new file with mode: 0755]
loader/a2_pack_fwd.py [deleted file]
loader/a2_pack_rev.py [deleted file]

index f5d2e95..0aee390 100755 (executable)
@@ -25,7 +25,7 @@ star_blazer_dejunked1.a2bin \
 star_blazer_recrack_lzss.a2bin
 
 star_blazer_pack_rev.a2bin: lzss_unpack_rev.bin star_blazer.ihx
-       ./a2_pack_rev.py ${LOAD_ADDR} $^ $@
+       ./a2_pack.py --rev ${LOAD_ADDR} $^ $@
 
 lzss_unpack_rev.bin: lzss_unpack_rev.ihx
        ${HEX2BIN} $< $@
@@ -37,7 +37,7 @@ lzss_unpack_rev.rel: lzss_unpack_rev.asm
        ${AS6500} -l -o $<
 
 star_blazer_pack_fwd.a2bin: lzss_unpack_fwd.bin star_blazer.ihx
-       ./a2_pack_fwd.py ${END_ADDR} $^ $@
+       ./a2_pack.py --fwd ${END_ADDR} $^ $@
 
 lzss_unpack_fwd.bin: lzss_unpack_fwd.ihx
        ${HEX2BIN} $< $@
diff --git a/loader/a2_pack.py b/loader/a2_pack.py
new file mode 100755 (executable)
index 0000000..483727a
--- /dev/null
@@ -0,0 +1,662 @@
+#!/usr/bin/env python3
+
+import sys
+from intelhex import IntelHex
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+
+# short (8-bit) pointer
+DIST_BITS0 = 7
+LEN_BITS0 = 1
+
+# long (16-bit) pointer
+DIST_BITS1 = 10
+LEN_BITS1 = 6
+
+MAX_DIST = (1 << DIST_BITS1) # distance codes are 1..MAX_DIST
+MAX_LEN = (1 << LEN_BITS1) + 1 # length codes are 2..MAX_LEN
+
+# reverse decompressor: sits below the uncompressed data
+# the compressed vs uncompressed data can overlap like this:
+#   DDDCCCCCC          (D = decompressor, C = compressed data)
+#       \    \  
+#       UUUUUUUUUU     (U = uncompressed data)
+#   low addr ... high addr
+# this is generally preferred since DOS 3.3 may sit at 0x9600 above the
+# BLOADed image and then be overwritten by the decompressor, but we need
+# some space lower than the uncompressed data (e.g. if the game loads at
+# 0xa00, there'll be 0x200 bytes after text page 1 for the decompressor)
+
+# forward decompressor: sits above the uncompressed data, overlap like:
+#            CCCCCCDDD (C = compressed data, D = decompressor)
+#           /    /
+#       UUUUUUUUUU     (U = uncompressed data)
+#   low addr ... high addr
+# it is needed if we do not control the load address of the uncompressed
+# data and there is no room for the decompressor underneath it (e.g. if
+# the game loads at 0x800 which is immediately after text screen page 1),
+# but the restriction is that end of the decompressor can be no higher
+# than the start of DOS at 0x9600, in turn limiting the uncompressed size
+
+rev = True
+while len(sys.argv) >= 2:
+  if sys.argv[1] == '--rev':
+    rev = True
+  elif sys.argv[1] == '--fwd':
+    rev = False
+  else:
+    break
+  del sys.argv[1]
+if len(sys.argv) < 5:
+  print(
+    f'usage: {sys.argv[0]:s} [--rev|--fwd] (load|end)_addr lzss_unpack_(rev|fwd).bin in.ihx out.a2bin'
+  )
+  sys.exit(EXIT_FAILURE)
+load_or_end_addr = int(sys.argv[1], 0)
+lzss_unpack_rev_or_fwd_bin = sys.argv[2]
+in_ihx = sys.argv[3]
+out_a2bin = sys.argv[4]
+
+with open(lzss_unpack_rev_or_fwd_bin, 'rb') as fin:
+  lzss_unpack_rev_or_fwd = list(fin.read())
+
+def lzss_pack(dest, bin, rev):
+  if rev:
+    bin = bin[::-1] # makes it easier to construct LZSS items
+
+  heads = {}
+  links = [-1] * len(bin)
+  lzss = []
+  i = 0
+  while i < len(bin):
+    _len = 1
+    dist = bin[i]
+
+    if i + 1 < len(bin):
+      pair = bin[i], bin[i + 1]
+      j = heads.get(pair, -1)
+      while j != -1 and i - j <= MAX_DIST:
+        #assert bin[i:i + 2] == bin[j:j + 2]
+        if (
+          _len < MAX_LEN and
+            i + _len < len(bin) and
+            bin[i + 2:i + _len + 1] == bin[j + 2:j + _len + 1]
+        ):
+          _len += 1
+          while (
+            _len < MAX_LEN and
+              i + _len < len(bin) and
+              bin[i + _len] == bin[j + _len]
+          ):
+            _len += 1
+          dist = i - j
+        j = links[j]
+    lzss.append((_len, dist))
+
+    for j in range(_len):
+      if i + 1 < len(bin):
+        pair = bin[i], bin[i + 1]
+        links[i] = heads.get(pair, -1)
+        heads[pair] = i
+      i += 1
+
+  # checking
+  bin1 = []
+  lzss1 = lzss[::-1]
+  while len(lzss1):
+    _len, dist = lzss1.pop()
+    if _len == 1:
+      bin1.append(dist)
+    else:
+      for i in range(_len):
+        bin1.append(bin1[-dist])
+  assert bin == bin1
+
+  # construct the real output in reverse to how it will be decoded,
+  # this means we flush the bits at the right time for the decoder,
+  # and any partial bit buffer is decoded at start rather than end
+  lzss1 = []
+  count = 0
+  bits = 1
+  while len(lzss):
+    _len, dist = lzss.pop()
+    if _len == 1:
+      #print('a', dist)
+      lzss1.append(dist)
+      cf = 0
+    else:
+      _len -= 2
+      dist -= 1
+      if _len < (1 << LEN_BITS0) and dist < (1 << DIST_BITS0):
+        item = dist | (_len << DIST_BITS0)
+        #print('b', item)
+        lzss1.append(item)
+        cf = 0
+      elif _len < (1 << LEN_BITS1) and dist < (1 << DIST_BITS1):
+        item = dist | (_len << DIST_BITS1)
+        #print('c', item)
+        # keep the 16-bit words in little-endian order in memory,
+        # means swapping them if the output will be reversed later
+        lzss1.extend(
+          [item & 0xff, item >> 8]
+        if rev else
+          [item >> 8, item & 0xff]
+        )
+        cf = 1
+      else:
+        assert False
+
+      bits = (bits << 1) | cf
+      if bits & 0x100:
+        #print('d', bits)
+        lzss1.append(bits & 0xff)
+        bits = 1
+        # in this case we leave count alone (at decoding side we get
+        # another bit buffer for free without any increment or test)
+
+      cf = 1
+
+    bits = (bits << 1) | cf
+    if bits & 0x100:
+      #print('e', bits)
+      lzss1.append(bits & 0xff)
+      bits = 1
+      count += 1
+  lzss = lzss1
+
+  # checking
+  bin1 = []
+  lzss1 = list(lzss)
+  count1 = count
+  bits1 = bits
+  while True:
+    if bits1 == 1:
+      if count1 == 0:
+        break
+      count1 -= 1
+      bits1 = lzss1.pop() | 0x100
+      #print('e', bits1)
+    cf = bits1 & 1
+    bits1 >>= 1
+
+    if cf:
+      if bits1 == 1:
+        bits1 = lzss1.pop() | 0x100
+        #print('d', bits1)
+      cf = bits1 & 1
+      bits1 >>= 1
+
+      if cf:
+        # keep the 16-bit words in little-endian order in memory,
+        # means swapping them if the output will be reversed later
+        item = (
+          lzss1[-2] | (lzss1[-1] << 8)
+        if rev else
+          lzss1[-1] | (lzss1[-2] << 8)
+        )
+        del lzss1[-2:]
+        #print('c', item)
+        dist = item & ((1 << DIST_BITS1) - 1)
+        _len = item >> DIST_BITS1
+      else: 
+        item = lzss1.pop()
+        #print('b', item)
+        dist = item & ((1 << DIST_BITS0) - 1)
+        _len = item >> DIST_BITS0
+      _len += 2
+      dist += 1
+
+      for i in range(_len):
+        bin1.append(bin1[-dist])
+    else:
+      #print('a', lzss1[-1])
+      bin1.append(lzss1.pop())
+  assert len(lzss1) == 0
+  assert bin1 == bin
+
+  # optimization: provided the input is not null, the first byte
+  # has to be literal, so the loader can fall straight into the
+  # literal decoding routine (saves a jump to the official loop)
+  if bits == 1:
+    assert count
+    count -= 1
+    bits = lzss.pop() | 0x100
+  assert (bits & 1) == 0
+  bits >>= 1
+
+  # LZSS data has been reversed by stackwise encoding method
+  # change it back if needed and put the data block on correct end
+  count ^= 0xffff # inc/test is easier than test/dec
+  data_block = [dest & 0xff, dest >> 8, count & 0xff, count >> 8, bits]
+  if rev:
+    # append data block
+    lzss.extend(data_block)
+  else:
+    # prepend data block
+    lzss.extend(data_block[::-1])
+    lzss = lzss[::-1]
+  return lzss
+
+intelhex = IntelHex(in_ihx)
+entry_point = intelhex.start_addr['EIP']
+segments = [j for i in intelhex.segments() for j in i]
+
+# zero page and stack are done last, after we finish with them,
+# and in 0x100-byte pieces so we can do them without zero page
+def intersect(segments, segment):
+  [addr0, addr1] = segment
+  segments1 = []
+  for i in range(0, len(segments), 2):
+    [addr2, addr3] = segments[i:i + 2]
+    if addr2 < addr0:
+      addr2 = addr0
+    if addr3 > addr1:
+      addr3 = addr1
+    if addr3 > addr2:
+      segments1.extend([addr2, addr3])
+  return segments1
+segments = (
+  intersect(segments, [0, 0x100]) +
+  intersect(segments, [0x100, 0x200]) +
+  intersect(segments, [0x200, 0x10000])
+if rev else
+  intersect(segments, [0x200, 0x10000]) +
+  intersect(segments, [0, 0x100]) +
+  intersect(segments, [0x100, 0x200])
+)
+
+# fixup is a 4-tuple:
+#   (fixup flags, fixup address, target section, target address)
+# either address can be start relative or end relative
+# either lo byte, hi byte or both (lo byte first) can be fixed up
+FIXUP_FLAG_FIXUP_END_RELATIVE = 1
+FIXUP_FLAG_TARGET_END_RELATIVE = 2
+FIXUP_FLAG_LO_BYTE = 4
+FIXUP_FLAG_HI_BYTE = 8
+
+# each section has a data area, a load address and a list of fixups
+# relocation is done after section lengths and load addresses known
+class Section:
+  def __init__(self, data, load_addr, fixups):
+    self.data = data
+    self.load_addr = load_addr
+    self.fixups = fixups
+section_lzss_unpack = Section([], 0, [])
+section_loader = Section([], 0, [])
+section_payload = Section([], 0, [])
+
+# sections are output to the a2bin file from bottom to top as follows:
+sections = (
+  [section_loader, section_lzss_unpack, section_payload]
+if rev else
+  [section_payload, section_loader, section_lzss_unpack]
+)
+
+# report is a 5-tuple:
+#   (report type, ihx start, ihx end, a2bin start, a2bin end)
+# for compressed the compression ratio will be printed
+# for direct poke the a2bin values are not used, otherwise they
+# are relative to the load or end address of the payload section
+# report is used to visually check for source/destination overlap
+REPORT_TYPE_DIRECT_POKE = 0
+REPORT_TYPE_UNCOMPRESSED = 1
+REPORT_TYPE_COMPRESSED = 2
+report = []
+
+if rev:
+  # prologue
+  section_loader.data.extend(
+    [
+      0xd8,    # cld
+      0xa2, 0xff,      # ldx #0xff
+      0x9a,    # txs
+    ]
+  )
+
+  # segments
+  # loader is constructed in order of execution (bottom to top)
+  # payload is constructed in order of unpacking (top to bottom)
+  for i in range(len(segments) - 2, -2, -2):
+    addr0 = segments[i]
+    addr1 = segments[i + 1]
+    data = list(intelhex.tobinstr(addr0, addr1 - 1))
+
+    if len(data) <= 4:
+      report.append(
+        (REPORT_TYPE_DIRECT_POKE, addr0, addr1, 0, 0)
+      )
+
+      # use of zpage version is determined byte by byte
+      for i in data[::-1]:
+        addr1 -= 1
+        section_loader.data.extend(
+          [
+            0xa9, i,                           # lda #data
+            0x85, addr1,                               # sta *addr1
+          ]
+        if addr1 < 0x100 else
+          [
+            0xa9, i,                           # lda #data
+            0x8d, addr1 & 0xff, addr1 >> 8,    # sta addr1
+          ]
+        )
+    elif len(data) <= 0x100:
+      addr3 = -len(section_payload.data)
+      section_payload.data.extend(
+        data[::-1]
+      )
+      addr2 = -len(section_payload.data)
+      report.append(
+        (REPORT_TYPE_UNCOMPRESSED, addr0, addr1, addr2, addr3)
+      )
+
+      # use of zpage version is determined in advance (if completely fits)
+      zpage = addr1 <= 0x100
+
+      section_loader.fixups.extend(
+        [
+          (
+            FIXUP_FLAG_TARGET_END_RELATIVE |
+              FIXUP_FLAG_LO_BYTE |
+              FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 3,
+            section_payload,
+            addr2
+          ),
+        ]
+      )
+      if len(data) == 0x100:
+        # for the full count we will copy forward (an exception)
+        section_loader.data.extend(
+          [
+            0xa2, 0x00,                                        # ldx #0
+            0xbd, 0x00, 0x00,                          # lda addr2,x
+            0x95, addr0 & 0xff,                                # sta *addr0,x
+            0xe8,                                              # inx
+            0xd0, 0xf8                                 # bne .-6
+          ]
+        if zpage else
+          [
+            0xa2, 0x00,                                        # ldx #0
+            0xbd, 0x00, 0x00,                          # lda addr2,x
+            0x9d, addr0 & 0xff, (addr0 >> 8) & 0xff,   # sta addr0,x
+            0xe8,                                              # inx
+            0xd0, 0xf7                                 # bne .-7
+          ]
+        )
+      else:
+        addr0 -= 1
+        addr2 -= 1
+        section_loader.data.extend(
+          [
+            0xa2, len(data),                           # ldx #count
+            0xbd, 0x00, 0x00,                          # lda addr2,x
+            0x95, addr0 & 0xff,                                # sta *addr0,x
+            0xca,                                              # dex
+            0xd0, 0xf8                                 # bne .-6
+          ]
+        if zpage else
+          [
+            0xa2, len(data),                           # ldx #count
+            0xbd, 0x00, 0x00,                          # lda addr2,x
+            0x9d, addr0 & 0xff, (addr0 >> 8) & 0xff,   # sta addr0,x
+            0xca,                                              # dex
+            0xd0, 0xf7                                 # bne .-7
+          ]
+        )
+    else:
+      addr3 = -len(section_payload.data)
+      section_payload.data.extend(
+        lzss_pack(addr1 - 1, data, True)[::-1]
+      )
+      addr2 = -len(section_payload.data)
+      report.append(
+        (REPORT_TYPE_COMPRESSED, addr0, addr1, addr2, addr3)
+      )
+
+      if len(section_lzss_unpack.data) == 0:
+        section_lzss_unpack.data.extend(
+          lzss_unpack_rev_or_fwd
+        )
+
+      addr3 -= 5 + 1
+      section_loader.fixups.extend(
+        [
+          (
+            FIXUP_FLAG_TARGET_END_RELATIVE | FIXUP_FLAG_LO_BYTE,
+            len(section_loader.data) + 1,
+            section_payload,
+            addr3
+          ),
+          (
+            FIXUP_FLAG_TARGET_END_RELATIVE | FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 3,
+            section_payload,
+            addr3
+          ),
+          (
+            FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 5,
+            section_lzss_unpack,
+            0
+          ),
+        ]
+      )
+      section_loader.data.extend(
+        [
+          0xa9, 0x00,          # lda #<addr3
+          0xa0, 0x00,          # ldy #>addr3
+          0x20, 0x00, 0x00,    # jsr lzss_unpack_rev
+        ]
+      )
+
+  # epilogue
+  section_loader.data.extend(
+    [
+      0x4c, entry_point & 0xff, entry_point >> 8,      # jmp entry_point
+    ]
+  )
+
+  # sections that were constructed in reverse can now be made normal
+  section_payload.data = section_payload.data[::-1]
+
+  # relocate from bottom up
+  load_addr = load_or_end_addr
+  end_addr = load_addr
+  for section in sections:
+    section.load_addr = end_addr
+    end_addr += len(section.data)
+  load_size = end_addr - load_addr
+else:
+  # prologue
+  section_payload.fixups.extend(
+    [
+      (
+        FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
+        len(section_payload.data) + 1,
+        section_loader,
+        0
+      ),
+    ]
+  )
+  section_payload.data.extend(
+    [
+      0x4c, 0x00, 0x00,        # jmp loader
+    ]
+  )
+  section_loader.data.extend(
+    [
+      0xd8,    # cld
+      0xa2, 0xff,      # ldx #0xff
+      0x9a,    # txs
+    ]
+  )
+
+  # segments
+  # loader is constructed in order of execution (bottom to top)
+  # payload is constructed in order of unpacking (bottom to top)
+  for i in range(0, len(segments), 2):
+    addr0 = segments[i]
+    addr1 = segments[i + 1]
+    data = list(intelhex.tobinstr(addr0, addr1 - 1))
+
+    if len(data) <= 4:
+      report.append(
+        (REPORT_TYPE_DIRECT_POKE, addr0, addr1, 0, 0)
+      )
+
+      # use of zpage version is determined byte by byte
+      for i in data:
+        section_loader.data.extend(
+          [
+            0xa9, i,                           # lda #data
+            0x85, addr0,                               # sta *addr0
+          ]
+        if addr0 < 0x100 else
+          [
+            0xa9, i,                           # lda #data
+            0x8d, addr0 & 0xff, addr0 >> 8,    # sta addr0
+          ]
+        )
+        addr0 += 1
+    elif len(data) <= 0x100:
+      addr2 = len(section_payload.data)
+      section_payload.data.extend(
+        data
+      )
+      addr3 = len(section_payload.data)
+      report.append(
+        (REPORT_TYPE_UNCOMPRESSED, addr0, addr1, addr2, addr3)
+      )
+
+      # use of zpage version is determined in advance (if completely fits)
+      zpage = addr1 <= 0x100
+
+      addr1 -= 0x100
+      addr3 -= 0x100
+      section_loader.fixups.extend(
+        [
+          (
+            FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 3,
+            section_payload,
+            addr3
+          ),
+        ]
+      )
+      section_loader.data.extend(
+        [
+          0xa2, -len(data) & 0xff,                     # ldx #-count
+          0xbd, 0x00, 0x00,                            # lda addr3,x
+          0x95, addr1 & 0xff,                          # sta *addr1,x
+          0xe8,                                                # inx
+          0xd0, 0xf8                                   # bne .-6
+        ]
+      if zpage else
+        [
+          0xa2, -len(data) & 0xff,                     # ldx #-count
+          0xbd, 0x00, 0x00,                            # lda addr3,x
+          0x9d, addr1 & 0xff, (addr1 >> 8) & 0xff,     # sta addr1,x
+          0xe8,                                                # inx
+          0xd0, 0xf7                                   # bne .-7
+        ]
+      )
+    else:
+      addr2 = len(section_payload.data)
+      section_payload.data.extend(
+        lzss_pack(addr0, data, False)
+      )
+      addr3 = len(section_payload.data)
+      report.append(
+        (REPORT_TYPE_COMPRESSED, addr0, addr1, addr2, addr3)
+      )
+
+      if len(section_lzss_unpack.data) == 0:
+        section_lzss_unpack.data.extend(
+          lzss_unpack_rev_or_fwd
+        )
+
+      addr2 += 5 - 0x100
+      section_loader.fixups.extend(
+        [
+          (
+            FIXUP_FLAG_LO_BYTE,
+            len(section_loader.data) + 1,
+            section_payload,
+            addr2
+          ),
+          (
+            FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 3,
+            section_payload,
+            addr2
+          ),
+          (
+            FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
+            len(section_loader.data) + 5,
+            section_lzss_unpack,
+            0
+          ),
+        ]
+      )
+      section_loader.data.extend(
+        [
+          0xa9, 0x00,          # lda #<addr2
+          0xa0, 0x00,          # ldy #>addr2
+          0x20, 0x00, 0x00,    # jsr lzss_unpack_fwd
+        ]
+      )
+
+  # epilogue
+  section_loader.data.extend(
+    [
+      0x4c, entry_point & 0xff, entry_point >> 8,      # jmp entry_point
+    ]
+  )
+
+  # relocate from top down
+  end_addr = load_or_end_addr
+  load_addr = end_addr
+  for section in sections[::-1]:
+    load_addr -= len(section.data)
+    section.load_addr = load_addr
+  load_size = end_addr - load_addr
+
+for report_type, addr0, addr1, addr2, addr3 in report:
+  if report_type == REPORT_TYPE_DIRECT_POKE:
+    print(f'[0x{addr0:04x}, 0x{addr1:04x})')
+  else:
+    offset = section_payload.load_addr + len(section_payload.data)
+    addr2 += offset
+    addr3 += offset
+    print(
+      f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})' + (
+        f'{100. * (addr3 - addr2) / (addr1 - addr0):6.1f}%'
+      if report_type == REPORT_TYPE_COMPRESSED else
+        ''
+      )
+    )
+
+bin = []
+for section in sections:
+  for fixup_flags, fixup_addr, target_section, target_addr in section.fixups:
+    if fixup_flags & FIXUP_FLAG_FIXUP_END_RELATIVE:
+      fixup_addr += len(section.data)
+    if fixup_flags & FIXUP_FLAG_TARGET_END_RELATIVE:
+      target_addr += len(target_section.data)
+
+    target_addr += target_section.load_addr
+    if fixup_flags & FIXUP_FLAG_LO_BYTE:
+      assert section.data[fixup_addr] == 0
+      section.data[fixup_addr] = target_addr & 0xff
+      fixup_addr += 1
+    if fixup_flags & FIXUP_FLAG_HI_BYTE:
+      assert section.data[fixup_addr] == 0
+      section.data[fixup_addr] = target_addr >> 8
+      #fixup_addr += 1
+  bin.extend(section.data)
+
+hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
+with open(out_a2bin, 'wb') as fout:
+  fout.write(bytes(hdr + bin))
diff --git a/loader/a2_pack_fwd.py b/loader/a2_pack_fwd.py
deleted file mode 100755 (executable)
index 07d4a92..0000000
+++ /dev/null
@@ -1,457 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-from intelhex import IntelHex
-
-EXIT_SUCCESS = 0
-EXIT_FAILURE = 1
-
-# short (8-bit) pointer
-DIST_BITS0 = 7
-LEN_BITS0 = 1
-
-# long (16-bit) pointer
-DIST_BITS1 = 10
-LEN_BITS1 = 6
-
-MAX_DIST = (1 << DIST_BITS1) # distance codes are 1..MAX_DIST
-MAX_LEN = (1 << LEN_BITS1) + 1 # length codes are 2..MAX_LEN
-
-if len(sys.argv) < 5:
-  print(
-    f'usage: {sys.argv[0]:s} end_addr lzss_unpack_fwd.bin in.ihx out.a2bin'
-  )
-  sys.exit(EXIT_FAILURE)
-end_addr = int(sys.argv[1], 0)
-lzss_unpack_fwd_bin = sys.argv[2]
-in_ihx = sys.argv[3]
-out_a2bin = sys.argv[4]
-
-with open(lzss_unpack_fwd_bin, 'rb') as fin:
-  lzss_unpack_fwd = list(fin.read())
-
-def lzss_pack(dest, bin, rev):
-  if rev:
-    bin = bin[::-1] # makes it easier to construct LZSS items
-
-  heads = {}
-  links = [-1] * len(bin)
-  lzss = []
-  i = 0
-  while i < len(bin):
-    _len = 1
-    dist = bin[i]
-
-    if i + 1 < len(bin):
-      pair = bin[i], bin[i + 1]
-      j = heads.get(pair, -1)
-      while j != -1 and i - j <= MAX_DIST:
-        #assert bin[i:i + 2] == bin[j:j + 2]
-        if (
-          _len < MAX_LEN and
-            i + _len < len(bin) and
-            bin[i + 2:i + _len + 1] == bin[j + 2:j + _len + 1]
-        ):
-          _len += 1
-          while (
-            _len < MAX_LEN and
-              i + _len < len(bin) and
-              bin[i + _len] == bin[j + _len]
-          ):
-            _len += 1
-          dist = i - j
-        j = links[j]
-    lzss.append((_len, dist))
-
-    for j in range(_len):
-      if i + 1 < len(bin):
-        pair = bin[i], bin[i + 1]
-        links[i] = heads.get(pair, -1)
-        heads[pair] = i
-      i += 1
-
-  # checking
-  bin1 = []
-  lzss1 = lzss[::-1]
-  while len(lzss1):
-    _len, dist = lzss1.pop()
-    if _len == 1:
-      bin1.append(dist)
-    else:
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-  assert bin == bin1
-
-  # construct the real output in reverse to how it will be decoded,
-  # this means we flush the bits at the right time for the decoder,
-  # and any partial bit buffer is decoded at start rather than end
-  lzss1 = []
-  count = 0
-  bits = 1
-  while len(lzss):
-    _len, dist = lzss.pop()
-    if _len == 1:
-      #print('a', dist)
-      lzss1.append(dist)
-      cf = 0
-    else:
-      _len -= 2
-      dist -= 1
-      if _len < (1 << LEN_BITS0) and dist < (1 << DIST_BITS0):
-        item = dist | (_len << DIST_BITS0)
-        #print('b', item)
-        lzss1.append(item)
-        cf = 0
-      elif _len < (1 << LEN_BITS1) and dist < (1 << DIST_BITS1):
-        item = dist | (_len << DIST_BITS1)
-        #print('c', item)
-        # keep the 16-bit words in little-endian order in memory,
-        # means swapping them if the output will be reversed later
-        lzss1.extend(
-          [item & 0xff, item >> 8]
-        if rev else
-          [item >> 8, item & 0xff]
-        )
-        cf = 1
-      else:
-        assert False
-
-      bits = (bits << 1) | cf
-      if bits & 0x100:
-        #print('d', bits)
-        lzss1.append(bits & 0xff)
-        bits = 1
-        # in this case we leave count alone (at decoding side we get
-        # another bit buffer for free without any increment or test)
-
-      cf = 1
-
-    bits = (bits << 1) | cf
-    if bits & 0x100:
-      #print('e', bits)
-      lzss1.append(bits & 0xff)
-      bits = 1
-      count += 1
-  lzss = lzss1
-
-  # checking
-  bin1 = []
-  lzss1 = list(lzss)
-  count1 = count
-  bits1 = bits
-  while True:
-    if bits1 == 1:
-      if count1 == 0:
-        break
-      count1 -= 1
-      bits1 = lzss1.pop() | 0x100
-      #print('e', bits1)
-    cf = bits1 & 1
-    bits1 >>= 1
-  
-    if cf:
-      if bits1 == 1:
-        bits1 = lzss1.pop() | 0x100
-        #print('d', bits1)
-      cf = bits1 & 1
-      bits1 >>= 1
-  
-      if cf:
-        # keep the 16-bit words in little-endian order in memory,
-        # means swapping them if the output will be reversed later
-        item = (
-          lzss1[-2] | (lzss1[-1] << 8)
-        if rev else
-          lzss1[-1] | (lzss1[-2] << 8)
-        )
-        del lzss1[-2:]
-        #print('c', item)
-        dist = item & ((1 << DIST_BITS1) - 1)
-        _len = item >> DIST_BITS1
-      else: 
-        item = lzss1.pop()
-        #print('b', item)
-        dist = item & ((1 << DIST_BITS0) - 1)
-        _len = item >> DIST_BITS0
-      _len += 2
-      dist += 1
-  
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-    else:
-      #print('a', lzss1[-1])
-      bin1.append(lzss1.pop())
-  assert len(lzss1) == 0
-  assert bin1 == bin
-
-  # optimization: provided the input is not null, the first byte
-  # has to be literal, so the loader can fall straight into the
-  # literal decoding routine (saves a jump to the official loop)
-  if bits == 1:
-    assert count
-    count -= 1
-    bits = lzss.pop() | 0x100
-  assert (bits & 1) == 0
-  bits >>= 1
-
-  # LZSS data has been reversed by stackwise encoding method
-  # change it back if needed and put the data block on correct end
-  count ^= 0xffff # inc/test is easier than test/dec
-  data_block = [dest & 0xff, dest >> 8, count & 0xff, count >> 8, bits]
-  if rev:
-    # append data block
-    lzss.extend(data_block)
-  else:
-    # prepend data block
-    lzss.extend(data_block[::-1])
-    lzss = lzss[::-1]
-  return lzss
-
-intelhex = IntelHex(in_ihx)
-entry_point = intelhex.start_addr['EIP']
-segments = [j for i in intelhex.segments() for j in i]
-
-# zero page and stack are done last, after we finish with them,
-# and in 0x100-byte pieces so we can do them without zero page
-def intersect(segments, segment):
-  [addr0, addr1] = segment
-  segments1 = []
-  for i in range(0, len(segments), 2):
-    [addr2, addr3] = segments[i:i + 2]
-    if addr2 < addr0:
-      addr2 = addr0
-    if addr3 > addr1:
-      addr3 = addr1
-    if addr3 > addr2:
-      segments1.extend([addr2, addr3])
-  return segments1
-segments = (
-  intersect(segments, [0x200, 0x10000]) +
-  intersect(segments, [0, 0x100]) +
-  intersect(segments, [0x100, 0x200])
-)
-
-# fixup is a 4-tuple:
-#   (fixup flags, fixup address, target section, target address)
-# either address can be start relative or end relative
-# either lo byte, hi byte or both (lo byte first) can be fixed up
-FIXUP_FLAG_FIXUP_END_RELATIVE = 1
-FIXUP_FLAG_TARGET_END_RELATIVE = 2
-FIXUP_FLAG_LO_BYTE = 4
-FIXUP_FLAG_HI_BYTE = 8
-
-# each section has a data area, a load address and a list of fixups
-# relocation is done after section lengths and load addresses known
-class Section:
-  def __init__(self, data, load_addr, fixups):
-    self.data = data
-    self.load_addr = load_addr
-    self.fixups = fixups
-section_lzss_unpack = Section([], 0, [])
-section_loader = Section([], 0, [])
-section_payload = Section([], 0, [])
-
-# sections are output to the a2bin file from bottom to top as follows:
-sections = [section_payload, section_loader, section_lzss_unpack]
-
-# report is a 5-tuple:
-#   (report type, ihx start, ihx end, a2bin start, a2bin end)
-# for compressed the compression ratio will be printed
-# for direct poke the a2bin values are not used, otherwise they
-# are negative and relative to the end address of the payload section
-# report is used to visually check for source/destination overlap
-REPORT_TYPE_DIRECT_POKE = 0
-REPORT_TYPE_UNCOMPRESSED = 1
-REPORT_TYPE_COMPRESSED = 2
-report = []
-
-# prologue
-section_payload.fixups.extend(
-  [
-    (
-      FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
-      len(section_payload.data) + 1,
-      section_loader,
-      0
-    ),
-  ]
-)
-section_payload.data.extend(
-  [
-    0x4c, 0x00, 0x00,  # jmp loader
-  ]
-)
-section_loader.data.extend(
-  [
-    0xd8,      # cld
-    0xa2, 0xff,        # ldx #0xff
-    0x9a,      # txs
-  ]
-)
-
-# segments
-# loader is constructed in order of execution (bottom to top)
-# payload is constructed in order of unpacking (bottom to top)
-for i in range(0, len(segments), 2):
-  addr0 = segments[i]
-  addr1 = segments[i + 1]
-  data = list(intelhex.tobinstr(addr0, addr1 - 1))
-
-  if len(data) <= 4:
-    report.append(
-      (REPORT_TYPE_DIRECT_POKE, addr0, addr1, 0, 0)
-    )
-
-    # use of zpage version is determined byte by byte
-    for i in data:
-      section_loader.data.extend(
-        [
-          0xa9, i,                             # lda #data
-          0x85, addr0,                         # sta *addr0
-        ]
-      if addr0 < 0x100 else
-        [
-          0xa9, i,                             # lda #data
-          0x8d, addr0 & 0xff, addr0 >> 8,      # sta addr0
-        ]
-      )
-      addr0 += 1
-  elif len(data) <= 0x100:
-    addr2 = len(section_payload.data)
-    section_payload.data.extend(
-      data
-    )
-    addr3 = len(section_payload.data)
-    report.append(
-      (REPORT_TYPE_UNCOMPRESSED, addr0, addr1, addr2, addr3)
-    )
-
-    # use of zpage version is determined in advance (if completely fits)
-    zpage = addr1 <= 0x100
-
-    addr1 -= 0x100
-    addr3 -= 0x100
-    section_loader.fixups.extend(
-      [
-        (
-          FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 3,
-          section_payload,
-          addr3
-        ),
-      ]
-    )
-    section_loader.data.extend(
-      [
-        0xa2, -len(data) & 0xff,                       # ldx #-count
-        0xbd, 0x00, 0x00,                              # lda addr3,x
-        0x95, addr1 & 0xff,                            # sta *addr1,x
-        0xe8,                                          # inx
-        0xd0, 0xf8                                     # bne .-6
-      ]
-    if zpage else
-      [
-        0xa2, -len(data) & 0xff,                       # ldx #-count
-        0xbd, 0x00, 0x00,                              # lda addr3,x
-        0x9d, addr1 & 0xff, (addr1 >> 8) & 0xff,       # sta addr1,x
-        0xe8,                                          # inx
-        0xd0, 0xf7                                     # bne .-7
-      ]
-    )
-  else:
-    addr2 = len(section_payload.data)
-    section_payload.data.extend(
-      lzss_pack(addr0, data, False)
-    )
-    addr3 = len(section_payload.data)
-    report.append(
-      (REPORT_TYPE_COMPRESSED, addr0, addr1, addr2, addr3)
-    )
-
-    if len(section_lzss_unpack.data) == 0:
-      section_lzss_unpack.data.extend(
-        lzss_unpack_fwd
-      )
-
-    addr2 += 5 - 0x100
-    section_loader.fixups.extend(
-      [
-        (
-          FIXUP_FLAG_LO_BYTE,
-          len(section_loader.data) + 1,
-          section_payload,
-          addr2
-        ),
-        (
-          FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 3,
-          section_payload,
-          addr2
-        ),
-        (
-          FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 5,
-          section_lzss_unpack,
-          0
-        ),
-      ]
-    )
-    section_loader.data.extend(
-      [
-        0xa9, 0x00,            # lda #<addr2
-        0xa0, 0x00,            # ldy #>addr2
-        0x20, 0x00, 0x00,      # jsr lzss_unpack_fwd
-      ]
-    )
-
-# epilogue
-section_loader.data.extend(
-  [
-    0x4c, entry_point & 0xff, entry_point >> 8,        # jmp entry_point
-  ]
-)
-
-# relocate from top down
-load_addr = end_addr
-for section in sections[::-1]:
-  load_addr -= len(section.data)
-  section.load_addr = load_addr
-load_size = end_addr - load_addr
-
-for report_type, addr0, addr1, addr2, addr3 in report:
-  if report_type == REPORT_TYPE_DIRECT_POKE:
-    print(f'[0x{addr0:04x}, 0x{addr1:04x})')
-  else:
-    offset = section_payload.load_addr
-    addr2 += offset
-    addr3 += offset
-    print(
-      f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})' + (
-        f'{100. * (addr3 - addr2) / (addr1 - addr0):6.1f}%'
-      if report_type == REPORT_TYPE_COMPRESSED else
-        ''
-      )
-    )
-
-bin = []
-for section in sections:
-  for fixup_flags, fixup_addr, target_section, target_addr in section.fixups:
-    if fixup_flags & FIXUP_FLAG_FIXUP_END_RELATIVE:
-      fixup_addr += len(section.data)
-    if fixup_flags & FIXUP_FLAG_TARGET_END_RELATIVE:
-      target_addr += len(target_section.data)
-
-    target_addr += target_section.load_addr
-    if fixup_flags & FIXUP_FLAG_LO_BYTE:
-      assert section.data[fixup_addr] == 0
-      section.data[fixup_addr] = target_addr & 0xff
-      fixup_addr += 1
-    if fixup_flags & FIXUP_FLAG_HI_BYTE:
-      assert section.data[fixup_addr] == 0
-      section.data[fixup_addr] = target_addr >> 8
-      #fixup_addr += 1
-  bin.extend(section.data)
-
-hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
-with open(out_a2bin, 'wb') as fout:
-  fout.write(bytes(hdr + bin))
diff --git a/loader/a2_pack_rev.py b/loader/a2_pack_rev.py
deleted file mode 100755 (executable)
index 806f397..0000000
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-from intelhex import IntelHex
-
-EXIT_SUCCESS = 0
-EXIT_FAILURE = 1
-
-# short (8-bit) pointer
-DIST_BITS0 = 7
-LEN_BITS0 = 1
-
-# long (16-bit) pointer
-DIST_BITS1 = 10
-LEN_BITS1 = 6
-
-MAX_DIST = (1 << DIST_BITS1) # distance codes are 1..MAX_DIST
-MAX_LEN = (1 << LEN_BITS1) + 1 # length codes are 2..MAX_LEN
-
-if len(sys.argv) < 5:
-  print(
-    f'usage: {sys.argv[0]:s} load_addr lzss_unpack_rev.bin in.ihx out.a2bin'
-  )
-  sys.exit(EXIT_FAILURE)
-load_addr = int(sys.argv[1], 0)
-lzss_unpack_rev_bin = sys.argv[2]
-in_ihx = sys.argv[3]
-out_a2bin = sys.argv[4]
-
-with open(lzss_unpack_rev_bin, 'rb') as fin:
-  lzss_unpack_rev = list(fin.read())
-
-def lzss_pack(dest, bin, rev):
-  if rev:
-    bin = bin[::-1] # makes it easier to construct LZSS items
-
-  heads = {}
-  links = [-1] * len(bin)
-  lzss = []
-  i = 0
-  while i < len(bin):
-    _len = 1
-    dist = bin[i]
-
-    if i + 1 < len(bin):
-      pair = bin[i], bin[i + 1]
-      j = heads.get(pair, -1)
-      while j != -1 and i - j <= MAX_DIST:
-        #assert bin[i:i + 2] == bin[j:j + 2]
-        if (
-          _len < MAX_LEN and
-            i + _len < len(bin) and
-            bin[i + 2:i + _len + 1] == bin[j + 2:j + _len + 1]
-        ):
-          _len += 1
-          while (
-            _len < MAX_LEN and
-              i + _len < len(bin) and
-              bin[i + _len] == bin[j + _len]
-          ):
-            _len += 1
-          dist = i - j
-        j = links[j]
-    lzss.append((_len, dist))
-
-    for j in range(_len):
-      if i + 1 < len(bin):
-        pair = bin[i], bin[i + 1]
-        links[i] = heads.get(pair, -1)
-        heads[pair] = i
-      i += 1
-
-  # checking
-  bin1 = []
-  lzss1 = lzss[::-1]
-  while len(lzss1):
-    _len, dist = lzss1.pop()
-    if _len == 1:
-      bin1.append(dist)
-    else:
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-  assert bin == bin1
-
-  # construct the real output in reverse to how it will be decoded,
-  # this means we flush the bits at the right time for the decoder,
-  # and any partial bit buffer is decoded at start rather than end
-  lzss1 = []
-  count = 0
-  bits = 1
-  while len(lzss):
-    _len, dist = lzss.pop()
-    if _len == 1:
-      #print('a', dist)
-      lzss1.append(dist)
-      cf = 0
-    else:
-      _len -= 2
-      dist -= 1
-      if _len < (1 << LEN_BITS0) and dist < (1 << DIST_BITS0):
-        item = dist | (_len << DIST_BITS0)
-        #print('b', item)
-        lzss1.append(item)
-        cf = 0
-      elif _len < (1 << LEN_BITS1) and dist < (1 << DIST_BITS1):
-        item = dist | (_len << DIST_BITS1)
-        #print('c', item)
-        # keep the 16-bit words in little-endian order in memory,
-        # means swapping them if the output will be reversed later
-        lzss1.extend(
-          [item & 0xff, item >> 8]
-        if rev else
-          [item >> 8, item & 0xff]
-        )
-        cf = 1
-      else:
-        assert False
-
-      bits = (bits << 1) | cf
-      if bits & 0x100:
-        #print('d', bits)
-        lzss1.append(bits & 0xff)
-        bits = 1
-        # in this case we leave count alone (at decoding side we get
-        # another bit buffer for free without any increment or test)
-
-      cf = 1
-
-    bits = (bits << 1) | cf
-    if bits & 0x100:
-      #print('e', bits)
-      lzss1.append(bits & 0xff)
-      bits = 1
-      count += 1
-  lzss = lzss1
-
-  # checking
-  bin1 = []
-  lzss1 = list(lzss)
-  count1 = count
-  bits1 = bits
-  while True:
-    if bits1 == 1:
-      if count1 == 0:
-        break
-      count1 -= 1
-      bits1 = lzss1.pop() | 0x100
-      #print('e', bits1)
-    cf = bits1 & 1
-    bits1 >>= 1
-  
-    if cf:
-      if bits1 == 1:
-        bits1 = lzss1.pop() | 0x100
-        #print('d', bits1)
-      cf = bits1 & 1
-      bits1 >>= 1
-  
-      if cf:
-        # keep the 16-bit words in little-endian order in memory,
-        # means swapping them if the output will be reversed later
-        item = (
-          lzss1[-2] | (lzss1[-1] << 8)
-        if rev else
-          lzss1[-1] | (lzss1[-2] << 8)
-        )
-        del lzss1[-2:]
-        #print('c', item)
-        dist = item & ((1 << DIST_BITS1) - 1)
-        _len = item >> DIST_BITS1
-      else: 
-        item = lzss1.pop()
-        #print('b', item)
-        dist = item & ((1 << DIST_BITS0) - 1)
-        _len = item >> DIST_BITS0
-      _len += 2
-      dist += 1
-  
-      for i in range(_len):
-        bin1.append(bin1[-dist])
-    else:
-      #print('a', lzss1[-1])
-      bin1.append(lzss1.pop())
-  assert len(lzss1) == 0
-  assert bin1 == bin
-
-  # optimization: provided the input is not null, the first byte
-  # has to be literal, so the loader can fall straight into the
-  # literal decoding routine (saves a jump to the official loop)
-  if bits == 1:
-    assert count
-    count -= 1
-    bits = lzss.pop() | 0x100
-  assert (bits & 1) == 0
-  bits >>= 1
-
-  # LZSS data has been reversed by stackwise encoding method
-  # change it back if needed and put the data block on correct end
-  count ^= 0xffff # inc/test is easier than test/dec
-  data_block = [dest & 0xff, dest >> 8, count & 0xff, count >> 8, bits]
-  if rev:
-    # append data block
-    lzss.extend(data_block)
-  else:
-    # prepend data block
-    lzss.extend(data_block[::-1])
-    lzss = lzss[::-1]
-  return lzss
-
-intelhex = IntelHex(in_ihx)
-entry_point = intelhex.start_addr['EIP']
-segments = [j for i in intelhex.segments() for j in i]
-
-# zero page and stack are done last, after we finish with them,
-# and in 0x100-byte pieces so we can do them without zero page
-def intersect(segments, segment):
-  [addr0, addr1] = segment
-  segments1 = []
-  for i in range(0, len(segments), 2):
-    [addr2, addr3] = segments[i:i + 2]
-    if addr2 < addr0:
-      addr2 = addr0
-    if addr3 > addr1:
-      addr3 = addr1
-    if addr3 > addr2:
-      segments1.extend([addr2, addr3])
-  return segments1
-segments = (
-  intersect(segments, [0, 0x100]) +
-  intersect(segments, [0x100, 0x200]) +
-  intersect(segments, [0x200, 0x10000])
-)
-
-# fixup is a 4-tuple:
-#   (fixup flags, fixup address, target section, target address)
-# either address can be start relative or end relative
-# either lo byte, hi byte or both (lo byte first) can be fixed up
-FIXUP_FLAG_FIXUP_END_RELATIVE = 1
-FIXUP_FLAG_TARGET_END_RELATIVE = 2
-FIXUP_FLAG_LO_BYTE = 4
-FIXUP_FLAG_HI_BYTE = 8
-
-# each section has a data area, a load address and a list of fixups
-# relocation is done after section lengths and load addresses known
-class Section:
-  def __init__(self, data, load_addr, fixups):
-    self.data = data
-    self.load_addr = load_addr
-    self.fixups = fixups
-section_lzss_unpack = Section([], 0, [])
-section_loader = Section([], 0, [])
-section_payload = Section([], 0, [])
-
-# sections are output to the a2bin file from bottom to top as follows:
-sections = [section_loader, section_lzss_unpack, section_payload]
-
-# report is a 5-tuple:
-#   (report type, ihx start, ihx end, a2bin start, a2bin end)
-# for compressed the compression ratio will be printed
-# for direct poke the a2bin values are not used, otherwise they
-# are relative to the load address of the payload section
-# report is used to visually check for source/destination overlap
-REPORT_TYPE_DIRECT_POKE = 0
-REPORT_TYPE_UNCOMPRESSED = 1
-REPORT_TYPE_COMPRESSED = 2
-report = []
-
-# prologue
-section_loader.data.extend(
-  [
-    0xd8,      # cld
-    0xa2, 0xff,        # ldx #0xff
-    0x9a,      # txs
-  ]
-)
-
-# segments
-# loader is constructed in order of execution (bottom to top)
-# payload is constructed in order of unpacking (top to bottom)
-for i in range(len(segments) - 2, -2, -2):
-  addr0 = segments[i]
-  addr1 = segments[i + 1]
-  data = list(intelhex.tobinstr(addr0, addr1 - 1))
-
-  if len(data) <= 4:
-    report.append(
-      (REPORT_TYPE_DIRECT_POKE, addr0, addr1, 0, 0)
-    )
-
-    # use of zpage version is determined byte by byte
-    for i in data[::-1]:
-      addr1 -= 1
-      section_loader.data.extend(
-        [
-          0xa9, i,                             # lda #data
-          0x85, addr1,                         # sta *addr1
-        ]
-      if addr1 < 0x100 else
-        [
-          0xa9, i,                             # lda #data
-          0x8d, addr1 & 0xff, addr1 >> 8,      # sta addr1
-        ]
-      )
-  elif len(data) <= 0x100:
-    addr3 = -len(section_payload.data)
-    section_payload.data.extend(
-      data[::-1]
-    )
-    addr2 = -len(section_payload.data)
-    report.append(
-      (REPORT_TYPE_UNCOMPRESSED, addr0, addr1, addr2, addr3)
-    )
-
-    # use of zpage version is determined in advance (if completely fits)
-    zpage = addr1 <= 0x100
-
-    section_loader.fixups.extend(
-      [
-        (
-          FIXUP_FLAG_TARGET_END_RELATIVE |
-            FIXUP_FLAG_LO_BYTE |
-            FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 3,
-          section_payload,
-          addr2
-        ),
-      ]
-    )
-    if len(data) == 0x100:
-      # for the full count we will copy forward (an exception)
-      section_loader.data.extend(
-        [
-          0xa2, 0x00,                                  # ldx #0
-          0xbd, 0x00, 0x00,                            # lda addr2,x
-          0x95, addr0 & 0xff,                          # sta *addr0,x
-          0xe8,                                                # inx
-          0xd0, 0xf8                                   # bne .-6
-        ]
-      if zpage else
-        [
-          0xa2, 0x00,                                  # ldx #0
-          0xbd, 0x00, 0x00,                            # lda addr2,x
-          0x9d, addr0 & 0xff, (addr0 >> 8) & 0xff,     # sta addr0,x
-          0xe8,                                                # inx
-          0xd0, 0xf7                                   # bne .-7
-        ]
-      )
-    else:
-      addr0 -= 1
-      addr2 -= 1
-      section_loader.data.extend(
-        [
-          0xa2, len(data),                             # ldx #count
-          0xbd, 0x00, 0x00,                            # lda addr2,x
-          0x95, addr0 & 0xff,                          # sta *addr0,x
-          0xca,                                                # dex
-          0xd0, 0xf8                                   # bne .-6
-        ]
-      if zpage else
-        [
-          0xa2, len(data),                             # ldx #count
-          0xbd, 0x00, 0x00,                            # lda addr2,x
-          0x9d, addr0 & 0xff, (addr0 >> 8) & 0xff,     # sta addr0,x
-          0xca,                                                # dex
-          0xd0, 0xf7                                   # bne .-7
-        ]
-      )
-  else:
-    addr3 = -len(section_payload.data)
-    section_payload.data.extend(
-      lzss_pack(addr1 - 1, data, True)[::-1]
-    )
-    addr2 = -len(section_payload.data)
-    report.append(
-      (REPORT_TYPE_COMPRESSED, addr0, addr1, addr2, addr3)
-    )
-
-    if len(section_lzss_unpack.data) == 0:
-      section_lzss_unpack.data.extend(
-        lzss_unpack_rev
-      )
-
-    addr3 -= 5 + 1
-    section_loader.fixups.extend(
-      [
-        (
-          FIXUP_FLAG_TARGET_END_RELATIVE | FIXUP_FLAG_LO_BYTE,
-          len(section_loader.data) + 1,
-          section_payload,
-          addr3
-        ),
-        (
-          FIXUP_FLAG_TARGET_END_RELATIVE | FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 3,
-          section_payload,
-          addr3
-        ),
-        (
-          FIXUP_FLAG_LO_BYTE | FIXUP_FLAG_HI_BYTE,
-          len(section_loader.data) + 5,
-          section_lzss_unpack,
-          0
-        ),
-      ]
-    )
-    section_loader.data.extend(
-      [
-        0xa9, 0x00,            # lda #<addr3
-        0xa0, 0x00,            # ldy #>addr3
-        0x20, 0x00, 0x00,      # jsr lzss_unpack_rev
-      ]
-    )
-
-# epilogue
-section_loader.data.extend(
-  [
-    0x4c, entry_point & 0xff, entry_point >> 8,        # jmp entry_point
-  ]
-)
-
-# sections that were constructed in reverse can now be made normal
-section_payload.data = section_payload.data[::-1]
-
-# relocate from bottom up
-end_addr = load_addr
-for section in sections:
-  section.load_addr = end_addr
-  end_addr += len(section.data)
-load_size = end_addr - load_addr
-
-for report_type, addr0, addr1, addr2, addr3 in report:
-  if report_type == REPORT_TYPE_DIRECT_POKE:
-    print(f'[0x{addr0:04x}, 0x{addr1:04x})')
-  else:
-    offset = section_payload.load_addr + len(section_payload.data)
-    addr2 += offset
-    addr3 += offset
-    print(
-      f'[0x{addr0:04x}, 0x{addr1:04x}) -> [0x{addr2:04x}, 0x{addr3:04x})' + (
-        f'{100. * (addr3 - addr2) / (addr1 - addr0):6.1f}%'
-      if report_type == REPORT_TYPE_COMPRESSED else
-        ''
-      )
-    )
-
-bin = []
-for section in sections:
-  for fixup_flags, fixup_addr, target_section, target_addr in section.fixups:
-    if fixup_flags & FIXUP_FLAG_FIXUP_END_RELATIVE:
-      fixup_addr += len(section.data)
-    if fixup_flags & FIXUP_FLAG_TARGET_END_RELATIVE:
-      target_addr += len(target_section.data)
-
-    target_addr += target_section.load_addr
-    if fixup_flags & FIXUP_FLAG_LO_BYTE:
-      assert section.data[fixup_addr] == 0
-      section.data[fixup_addr] = target_addr & 0xff
-      fixup_addr += 1
-    if fixup_flags & FIXUP_FLAG_HI_BYTE:
-      assert section.data[fixup_addr] == 0
-      section.data[fixup_addr] = target_addr >> 8
-      #fixup_addr += 1
-  bin.extend(section.data)
-
-hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
-with open(out_a2bin, 'wb') as fout:
-  fout.write(bytes(hdr + bin))