page0 = 1
page1 = 2
-page2 = 4
+page2 = 3
.area SM (abs,ovr)
page1_page0:
push de
- ld a,(bc)
- inc bc
- ld l,a
- dec h ; page 0
- jp (hl)
+ jr page1_dispatch0
page1_call:
push bc
ld h,page0
jp (hl)
-page1_stkst_l:
- call math_stkst_l
- jr page1_dispatch0
-
page1_ld_w:
ex de,hl
ld e,(hl)
jr page1_dispatch1
page1_and_w:
+ pop hl
call math_and_w
jr page1_dispatch1
jr page1_dispatch1
page1_or_w:
+ pop hl
call math_or_w
jr page1_dispatch1
jr page1_dispatch1
page1_xor_w:
+ pop hl
call math_xor_w
jr page1_dispatch1
call math_sl_w
jr page1_dispatch1
+page1_imm_slrev_l:
+ ex de,hl
+ call math_imm_l
+ call math_sl_l
+ jr page1_dispatch2
+
+page1_sl_l:
+ exx
+ pop hl
+ exx
+ pop hl
+ call math_sl_l0
+ jr page1_dispatch2
+
page1_imm_srrev_sw:
rst 8
.db 0x3e ; ld a,
call math_sr_sw
jr page1_dispatch1
+page1_imm_srrev_sl:
+ ex de,hl
+ call math_imm_l
+ call math_sr_sl
+ jr page1_dispatch2
+
+page1_sr_sl:
+ exx
+ pop hl
+ exx
+ pop hl
+ call math_sr_sl0
+ jr page1_dispatch2
+
page1_imm_srrev_uw:
rst 8
.db 0x3e ; ld a,
call math_sr_uw
jr page1_dispatch1
+page1_imm_srrev_ul:
+ ex de,hl
+ call math_imm_l
+ call math_sr_ul
+ jr page1_dispatch2
+
+page1_sr_ul:
+ exx
+ pop hl
+ exx
+ pop hl
+ call math_sr_ul0
+ jr page1_dispatch2
+
page1_imm_mul_uw:
call math_mul_imm_w
jr page1_dispatch1
page1_div_sw:
pop hl
call math_divrev_sw
- ex de,hl
push hl
jr page1_dispatch1
page1_divrev_sw:
pop hl
call math_div_sw
- ex de,hl
push hl
jr page1_dispatch1
page1_div_uw:
pop hl
call math_divrev_uw
- ex de,hl
push hl
jr page1_dispatch1
page1_divrev_uw:
pop hl
call math_div_uw
- ex de,hl
push hl
jr page1_dispatch1
.org page2 * 0x100
+page2_trace:
+ jp print_trace
+
+page2_page0:
+ push de
+ exx
+ push hl
+ exx
+ jr page2_dispatch0
+
+page2_stkst_l:
+ call math_stkst_l
+page2_dispatch0:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page0
+ jp (hl)
+
+page2_st_l:
+ pop hl
+ call math_st_l
+ jr page2_dispatch0
+
+page2_imm_and_l:
+ call math_and_imm_l
+ jr page2_dispatch2
+
+page2_and_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_and_l
+ jr page2_dispatch2
+
+page2_imm_or_l:
+ call math_or_imm_l
+ jr page2_dispatch2
+
+page2_or_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_or_l
+ jr page2_dispatch2
+
+page2_imm_xor_l:
+ call math_xor_imm_l
+ jr page2_dispatch2
+
+page2_xor_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_xor_l
+ jr page2_dispatch2
+
+page2_imm_add_l:
+ call math_add_imm_l
+ jr page2_dispatch2
+
+page2_add_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_add_l
+ jr page2_dispatch2
+
+page2_imm_subrev_l:
+ call math_subrev_imm_l
+ jr page2_dispatch2
+
+page2_sub_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_subrev_l
+ jr page2_dispatch2
+
+; use addition for page2_imm_sub_l
+
+page2_subrev_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_sub_l
+ jr page2_dispatch2
+
+page2_imm_cmprev_sl:
+ call math_cmprev_imm_sl
+ jr page2_dispatch2
+
+page2_cmp_sl:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_cmprev_sl
+ jr page2_dispatch2
+
+page2_imm_cmprev_ul:
+ call math_cmprev_imm_ul
+ jr page2_dispatch2
+
+page2_cmp_ul:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_cmprev_ul
+ jr page2_dispatch2
+
+page2_imm_cmp_sl:
+ call math_cmp_imm_sl
+ jr page2_dispatch2
+
+page2_cmprev_sl:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_cmp_sl
+ jr page2_dispatch2
+
+page2_imm_cmp_ul:
+ call math_cmp_imm_ul
+ jr page2_dispatch2
+
+page2_cmprev_ul:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_cmp_ul
+ jr page2_dispatch2
+
+page2_imm_sl_l:
+ call math_sl_imm_l
+page2_dispatch2:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page2
+ jp (hl)
+
+page2_slrev_l:
+ pop hl
+ call math_sl_l
+ jr page2_dispatch2
+
+page2_imm_sr_sl:
+ ld a,(bc)
+ inc bc
+ ex de,hl
+ call math_sr_sl1
+ jr page2_dispatch2
+
+page2_srrev_sl:
+ pop hl
+ call math_sr_sl
+ jr page2_dispatch2
+
+page2_imm_sr_ul:
+ ld a,(bc)
+ inc bc
+ ex de,hl
+ call math_sr_ul1
+ jr page2_dispatch2
+
+page2_srrev_ul:
+ pop hl
+ call math_sr_ul
+ jr page2_dispatch2
+
+page2_imm_mul_ul:
+ call math_mul_imm_l
+ jr page2_dispatch2
+
+page2_mul_l:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_mul_l
+ jr page2_dispatch2
+
+page2_imm_divrev_sl:
+ exx
+ ex de,hl
+ exx
+ call math_imm_l
+ jr div_sl_entry
+
+page2_div_sl:
+ exx
+ ex de,hl
+ pop hl
+ exx
+ pop hl
+div_sl_entry:
+ call math_div_sl0
+div_sl_done:
+ push hl
+ exx
+ push de
+ exx
+ jr page2_dispatch2
+
+page2_imm_div_sl:
+ call math_div_imm_sl
+ jr div_sl_done
+
+page2_divrev_sl:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_div_sl
+ jr div_sl_done
+
+page2_imm_divrev_ul:
+ exx
+ ex de,hl
+ exx
+ call math_imm_l
+ jr div_ul_entry
+
+page2_div_ul:
+ exx
+ ex de,hl
+ pop hl
+ exx
+ pop hl
+div_ul_entry:
+ call math_div_ul0
+div_ul_done:
+ push hl
+ exx
+ push de
+ exx
+ jr page2_dispatch2
+
+page2_imm_div_ul:
+ call math_div_imm_ul
+ jr div_ul_done
+
+page2_divrev_ul:
+ exx
+ pop de
+ exx
+ pop hl
+ call math_div_ul
+ jr div_ul_done
+
; conditionals
test_eq:
ld l,a
math_sl_l: ; de:hl' <<= l & 0x1f
ex de,hl
+math_sl_l0: ; hl:hl' <<= e & 0x1f, for slrev
; by 1
bit 0,e
jr z,1$
ld a,e
and 0x1f
add 7
-math_srrev_uw0: ; de = hl >> (a - 7), a in [7, 0x17), logical (a immediate)
+math_srrev_uw0: ; de = hl >> (a - 7), immediate a in [7, 0x17), logical
ld e,a
sub a
jr sr_w_entry
ld a,e
and 0xf
add 7
-math_srrev_sw0: ; de = hl >> (a - 7), a in [7, 0x17), arithmetic (a immediate)
+math_srrev_sw0: ; de = hl >> (a - 7), immediate a in [7, 0x17), arithmetic
ld e,a
ld a,h
rla
ret
math_sr_ul: ; de:hl' >>= l & 0x1f, logical
- ld a,l
+ ex de,hl
+math_sr_ul0: ; de:hl' = hl:hl' >> (e & 0x1f), logical
+ ld a,e
and 0x1f
add 7
-math_sr_ul0: ; de:hl' >>= a - 7, a in [7, 0x27), logical (for immediates)
- ld l,a
+math_sr_ul1: ; de:hl' = hl:hl' >> (a - 7), immediate a in [7, 0x27), logical
+ ld e,a
sub a
jr sr_l_entry
math_sr_sl: ; de:hl' >>= l & 0x1f, arithmetic
- ld a,l
- and 0x1f
+ ex de,hl
+math_sr_sl0: ; de:hl' = hl:hl' >> (e & 0x1f), arithmetic
+ ld a,e
+ and 0xf
add 7
-math_sr_sl0: ; de:hl' >>= a - 7, a in [7, 0x27), arithmetic (for immediates)
- ld l,a
- ld a,d
+math_sr_sl1: ; de:hl' = hl:hl' >> (a - 7), immediate a in [7, 0x17), arithmetic
+ ld e,a
+ ld a,h
rla
sbc a,a
sr_l_entry:
- ex de,hl
; by -1
bit 0,e
jr nz,1$
adc hl,de
ret
+; word division
+
; use rst 8 then math_div_sw for math_div_imm_sw
-math_div_sw: ; de, hl = de / hl, de % hl, signed
+math_div_sw: ; de, hl = de % hl, de / hl, signed
ex de,hl
; use rst 8 then math_divrev_sw for math_divrev_imm_sw
-math_divrev_sw: ; de, hl = hl / de, hl % de, signed
+math_divrev_sw: ; de, hl = hl % de, hl / de, signed
push bc
ld a,h
or a
1$: ld d,b
ld e,a
pop bc
+ ex de,hl
ret
div_w_n:
ld d,b
ld e,a
pop bc
+ ex de,hl
ret
div_w_nn: ; negative dividend, negative divisor
ld d,b
ld e,a
pop bc
+ ex de,hl
ret
-math_div_imm_uw:
- rst 8
-math_div_uw: ; de, hl = de / hl, de % hl, unsigned
+; use rst 8 then math_div_uw for math_div_imm_uw
+math_div_uw: ; de, hl = de % hl, de / hl, unsigned
ex de,hl
; use rst 8 then math_divrev_uw for math_divrev_imm_uw
-math_divrev_uw: ; de, hl = hl / de, hl % de, unsigned
+math_divrev_uw: ; de, hl = hl % de, hl / de, unsigned
push bc
ld a,h
ld c,l
1$: ld d,b
ld e,a
pop bc
+ ex de,hl
ret
; non-restoring division routine
scf
ret
+; long division
+
+math_div_imm_sl:
+ call math_imm_l0
+math_div_sl: ; de:hl', hl:de' = de:hl' % hl:de', de:hl' / hl:de', signed
+ ex de,hl
+math_div_sl0: ; ; de:hl', hl:de' = hl:hl' % de:de', hl:hl' / de:de', signed
+ push bc
+ ld a,h
+ or a
+ ld a,d
+ rla
+ jp m,div_l_n ; positive dividend
+
+ ; positive dividend
+ ld a,h
+ ld c,l
+ ld hl,0
+ jr nc,div_l_pp ; positive dividend, positive divisor
+
+ ; positive dividend, negative divisor
+ call div_l_n1
+ ld b,a
+ ld a,c
+ call div_l_ncf
+ inc a
+ jr c,1$
+ sbc hl,de
+1$: ld d,b
+ ld e,a
+ pop bc
+ ex de,hl
+ ret
+
+div_l_n:
+ ; negative dividend
+ dec hl ; reduces remainder by 1 (we inc later)
+ ld a,h
+ ld c,l
+ ld hl,-1
+ jr c,div_l_nn ; negative dividend, negative divisor
+
+ ; negative dividend, positive divisor
+ call div_l1
+ ld b,a
+ ld a,c
+ call div_lcf
+ inc a
+ jr c,1$
+ sbc hl,de
+1$: inc hl ; get into range -divisor+1..0
+ ld d,b
+ ld e,a
+ pop bc
+ ex de,hl
+ ret
+
+div_l_nn: ; negative dividend, negative divisor
+ call div_l_n0
+ ld b,a
+ ld a,c
+ call div_l_ncf
+ jr nc,1$
+ add hl,de
+1$: inc hl ; get into range divisor+1..0
+ ld d,b
+ ld e,a
+ pop bc
+ ex de,hl
+ ret
+
+math_div_imm_ul:
+ call math_imm_l0
+math_div_ul: ; de:hl', hl:de' = de:hl' % hl:de', de:hl' / hl:de', unsigned
+ ex de,hl
+math_div_ul0: ; ; de:hl', hl:de' = hl:hl' % de:de', hl:hl' / de:de', unsigned
+ push bc
+ ld a,h
+ ld c,l
+ ld hl,0
+div_l_pp: ; positive dividend, positive divisor
+ call div_l0
+ ld b,a
+ ld a,c
+ call div_lcf
+ jr nc,1$
+ add hl,de
+1$: ld d,b
+ ld e,a
+ pop bc
+ ex de,hl
+ ret
+
+; non-restoring division routine
+
+; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
+; enter at div0 with positive remainder in hl, such that hl < de
+; enter at div1 with negative remainder in hl, such that hl >= -de
+
+; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
+; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
+
+; if entered at div0, there is a -80 and so quotient is in range -ff..-1
+; if entered at div1, there is a +80 and so quotient is in range 1..ff
+; falls out of loop after div01 with positive remainder, div11 with negative,
+; depending on this we should re-enter at div0 or div1, signalled by cf return
+
+; the successive quotient bytes can be concatenated into a full quotient,
+; but negative bytes require the next higher quotient byte to be decremented,
+; we know in advance if this will happen because the implied sign of the
+; quotient byte depends only on whether we entered at div0 or div1, hence,
+; before the div11 return we'll decrement to compensate for next negative byte
+
+; the decrement can also be seen as compensating for the extra add hl,de that
+; may be needed to make negative remainder positive before return to caller,
+; thus leaving quotient in a consistent state regardless of which exit taken,
+; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even)
+
+; in the following code each sbc hl,de gets an inc a and each add hl,de gets
+; a dec a, guaranteeing the integrity of the division, the initial scf/rla is
+; needed to make the result 100 + -ff..ff or 1..1ff, so that the decrements
+; cannot borrow into the upcoming dividend bits also held in a, and there must
+; be another shift between the scf/rla and increment/decrement so that the scf
+; is implicitly in the 100s place, making the code awkward though it's correct
+
+; now optimized to only inc/dec a when doing zero-crossing, fix above analysis
+
+div_lcf:
+ jr c,div_l1
+div_l0: ; bit 0, above
+ scf
+ rla
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l01
+ dec a
+div_l11: ; bit 1, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l12
+ inc a
+div_l02: ; bit 2, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l03
+ dec a
+div_l13: ; bit 3, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l14
+ inc a
+div_l04: ; bit 4, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l05
+ dec a
+div_l15: ; bit 5, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l16
+ inc a
+div_l06: ; bit 6, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l07
+ dec a
+div_l17: ; bit 7, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l18
+ inc a
+div_l08: ; done, above
+ add a,a
+ dec a
+ or a
+ ret
+
+div_l1: ; bit 0, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l11
+ inc a
+div_l01: ; bit 1, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l02
+ dec a
+div_l12: ; bit 2, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l13
+ inc a
+div_l03: ; bit 3, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l04
+ dec a
+div_l14: ; bit 4, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l15
+ inc a
+div_l05: ; bit 5, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l06
+ dec a
+div_l16: ; bit 6, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr nc,div_l17
+ inc a
+div_l07: ; bit 7, above
+ add a,a
+ adc hl,hl
+ sbc hl,de
+ jr nc,div_l08
+ dec a
+div_l18: ; done, below
+ add a,a
+ ;inc a
+ ;dec a ; compensation
+ scf
+ ret
+
+; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor
+; this is for negative divisors where we expect carry (means no zero crossing)
+
+; when divisor negated, remainder also negated, so we expect to do subtraction
+; when remainder negative and vice versa, need to clear carry after add hl,hl
+
+div_l_ncf:
+ jr c,div_l_n1
+div_l_n0: ; bit 0, above
+ scf
+ rla
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n01
+ dec a
+div_l_n11: ; bit 1, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n12
+ inc a
+div_l_n02: ; bit 2, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n03
+ dec a
+div_l_n13: ; bit 3, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n14
+ inc a
+div_l_n04: ; bit 4, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n05
+ dec a
+div_l_n15: ; bit 5, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n16
+ inc a
+div_l_n06: ; bit 6, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n07
+ dec a
+div_l_n17: ; bit 7, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n18
+ inc a
+div_l_n08: ; done, above
+ add a,a
+ dec a
+ or a
+ ret
+
+div_l_n1: ; bit 0, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n11
+ inc a
+div_l_n01: ; bit 1, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n02
+ dec a
+div_l_n12: ; bit 2, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n13
+ inc a
+div_l_n03: ; bit 3, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n04
+ dec a
+div_l_n14: ; bit 4, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n15
+ inc a
+div_l_n05: ; bit 5, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n06
+ dec a
+div_l_n16: ; bit 6, below
+ add a,a
+ adc hl,hl
+ add hl,de
+ jr c,div_l_n17
+ inc a
+div_l_n07: ; bit 7, above
+ add a,a
+ adc hl,hl
+ or a
+ sbc hl,de
+ jr c,div_l_n08
+ dec a
+div_l_n18: ; done, below
+ add a,a
+ ;inc a
+ ;dec a ; compensation
+ scf
+ ret
+
+
; debugging
print_trace: ; print af, bc, de, hl, sp, (sp)