Optimizations to how the bit buffer is initialized in LZSS loader

author Nick Downing <nick@ndcode.org>

Fri, 17 Jun 2022 12:13:47 +0000 (22:13 +1000)

committer Nick Downing <nick@ndcode.org>

Fri, 17 Jun 2022 13:16:23 +0000 (23:16 +1000)
author Nick Downing <nick@ndcode.org>
Fri, 17 Jun 2022 12:13:47 +0000 (22:13 +1000)
committer Nick Downing <nick@ndcode.org>
Fri, 17 Jun 2022 13:16:23 +0000 (23:16 +1000)
diff --git a/loader/lzss_decode.py b/loader/lzss_decode.py

index 8a0c0b6..32d1793 100755 (executable)
--- a/loader/lzss_decode.py
+++ b/loader/lzss_decode.py
@@ -43,15 +43,30 @@ assert lzss[0x15] == 0xa9 # lda #NN
  count = lzss[0x12] + (lzss[0x16] << 8)
  
  assert lzss[0x19] == 0xa9 # lda #NN
-#assert lzss[0x1d] & ~0x20 == 0x18 # clc
-bits = lzss[0x1a] #| ((lzss[0x1d] & 0x20) << 3)
+assert lzss[0x1d] & ~0x20 == 0x18 # clc
+bits = lzss[0x1a] | ((lzss[0x1d] & 0x20) << 3)
  
-assert lzss[0x42] == 0x4c # jmp NNNN
-entry_point = lzss[0x43] | (lzss[0x44] << 8)
+assert lzss[0x40] == 0x4c # jmp NNNN
+entry_point = lzss[0x41] | (lzss[0x42] << 8)
  
  assert src == load_addr0 + load_size0
  count = ~count & 0xffff
  
+# undo optimization: initial 9-bit buffer via clc/sec
+if bits & 0x100:
+  # note the cf will be preserved during copy of first byte,
+  # so the byte we do not have to store is the second byte
+  lzss[-1:-1] = [bits & 0xff]
+  bits = 1
+  count += 1
+
+# undo optimization: the first byte has to be literal
+bits <<= 1
+if bits & 0x100:
+  lzss.append(bits & 0xff)
+  bits = 1
+  count += 1
+
  bin = []
  lzss = lzss
  count = count
@@ -92,7 +107,7 @@ while True:
    else:
      #print('a', lzss[-1])
      bin.append(lzss.pop())
-assert len(lzss) == 0xb6
+assert len(lzss) == 0xb4
  bin = bin[::-1]
  
  load_size = len(bin)
diff --git a/loader/lzss_encode.py b/loader/lzss_encode.py

index 2c0a552..5e4aeda 100755 (executable)
--- a/loader/lzss_encode.py
+++ b/loader/lzss_encode.py
@@ -29,7 +29,7 @@ out_bin = sys.argv[4]
  
  with open(lzss_loader_bin, 'rb') as fin:
    lzss_loader = list(fin.read())
-assert len(lzss_loader) == 0xb6
+assert len(lzss_loader) == 0xb4
  
  with open(in_bin, 'rb') as fin:
    data = list(fin.read())
@@ -140,8 +140,6 @@ for _len, dist in lzss:
      bits = 1
      count += 1
  lzss = lzss1
-load_size = len(lzss)
-print('orig', f'{load_size0:04x}', 'lzss', f'{load_size:04x}')
  
  # checking
  #bin1 = []
@@ -188,6 +186,29 @@ print('orig', f'{load_size0:04x}', 'lzss', f'{load_size:04x}')
  #bin1 = bin1[::-1]
  #assert bin == bin1
  
+# optimization: provided the input is not null, the first byte
+# has to be literal, so the loader can fall straight into the
+# literal decoding routine (saves a jump to the official loop)
+if bits == 1:
+  print('opt1')
+  assert count
+  count -= 1
+  bits = lzss.pop() | 0x100
+assert (bits & 1) == 0
+bits >>= 1
+
+# optimization: provide an initial 9-bit buffer via clc/sec
+if bits == 1:
+  print('opt2')
+  # note the cf will be preserved during copy of first byte,
+  # so the byte we do not have to store is the second byte
+  assert count
+  count -= 1
+  bits = lzss.pop(-2) | 0x100
+
+load_size = len(lzss)
+print('orig', f'{load_size0:04x}', 'lzss', f'{load_size:04x}')
+
  src = load_addr + load_size
  dest = load_addr0 + load_size0 - 1
  count = ~count & 0xffff # inc/test is easier than test/dec
@@ -208,13 +229,13 @@ assert lzss[0x15] == 0xa9 # lda #NN
  lzss[0x16] = count >> 8
  
  assert lzss[0x19] == 0xa9 # lda #NN
-lzss[0x1a] = bits #& 0xff
-#assert lzss[0x1b] == 0x18 # clc
-#lzss[0x1b] |= (bits >> 3) & 0x20 # clc or sec
+lzss[0x1a] = bits & 0xff
+assert lzss[0x1d] == 0x18 # clc
+lzss[0x1d] |= (bits >> 3) & 0x20 # clc or sec
  
-assert lzss[0x42] == 0x4c # jmp NNNN
-lzss[0x43] = entry_point & 0xff
-lzss[0x44] = entry_point >> 8
+assert lzss[0x40] == 0x4c # jmp NNNN
+lzss[0x41] = entry_point & 0xff
+lzss[0x42] = entry_point >> 8
  
  hdr = [load_addr & 0xff, load_addr >> 8, load_size & 0xff, load_size >> 8]
  with open(out_bin, 'wb') as fout:
diff --git a/loader/lzss_loader.asm b/loader/lzss_loader.asm

index 4586359..2c525e7 100644 (file)
--- a/loader/lzss_loader.asm
+++ b/loader/lzss_loader.asm
@@ -35,7 +35,8 @@ dist: .ds     2                       ; distance, or address of repeated data
         clc
  
         ldy     #0
-       beq     loop
+       ; optimization: the first byte has to be literal
+       ;beq    loop
  
  literal:
         lda     src
author	Nick Downing <nick@ndcode.org>
	Fri, 17 Jun 2022 12:13:47 +0000 (22:13 +1000)
committer	Nick Downing <nick@ndcode.org>
	Fri, 17 Jun 2022 13:16:23 +0000 (23:16 +1000)
loader/lzss_decode.py		patch \| blob \| history
loader/lzss_encode.py		patch \| blob \| history
loader/lzss_loader.asm		patch \| blob \| history