page0_jeq:
jr nz,not_taken
page0_imm_jmp:
- rst 8
+ rst 0x28
ld c,l
ld b,h
jr page0_dispatch0
jr page0_dispatch1
page0_imm_call:
- rst 8
+ rst 0x28
push bc
ld c,l
ld b,h
page0_ret:
pop bc
page0_stkadj:
- rst 8
+ rst 0x28
add hl,sp
ld sp,hl
page0_dispatch0:
jp (hl)
page0_stkptr:
- rst 8
+ rst 0x28
add hl,sp
ex de,hl
page0_dispatch1:
jr page1_dispatch1
page1_imm_add_w:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_add_w:
pop hl
jr page1_dispatch1
page1_imm_subrev_w:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_sub_w:
pop hl
jr page1_dispatch1
page1_imm_cmprev_sw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_cmp_sw:
pop hl
jr page1_dispatch0
page1_imm_cmp_sw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_cmprev_sw:
pop hl
jr page1_dispatch0
page1_imm_cmprev_uw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_cmp_uw:
pop hl
jr page1_dispatch0
page1_imm_cmp_uw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_cmprev_uw:
pop hl
jr page1_dispatch0
page1_imm_slrev_w:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_sl_w:
pop hl
jp (hl)
page1_imm_sl_w:
- call math_sl_imm_w
- jr page1_dispatch1
-
+ ld a,(bc)
+ inc bc
+ ld l,a
+ .db 0x3e ; ld a,
page1_slrev_w:
pop hl
call math_sl_w
jr page1_dispatch2
page1_imm_srrev_sw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_sr_sw:
pop hl
jr page1_dispatch2
page1_imm_srrev_uw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_sr_uw:
pop hl
jr page1_dispatch1
page1_imm_divrev_sw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_div_sw:
pop hl
jr page1_dispatch1
page1_imm_div_sw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_divrev_sw:
pop hl
jr page1_dispatch1
page1_imm_divrev_uw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_div_uw:
pop hl
jr page1_dispatch1
page1_imm_div_uw:
- rst 8
+ rst 0x28
.db 0x3e ; ld a,
page1_divrev_uw:
pop hl
jr page2_dispatch2
page2_imm_add_l:
- call math_add_imm_l
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_add_l:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_add_l
jr page2_dispatch2
page2_imm_subrev_l:
- call math_subrev_imm_l
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_sub_l:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_subrev_l
jr page2_dispatch2
; use addition for page2_imm_sub_l
-
page2_subrev_l:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_sub_l
jr page2_dispatch2
page2_imm_cmprev_sl:
- call math_cmprev_imm_sl
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_cmp_sl:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_cmprev_sl
jr page2_dispatch2
page2_imm_cmprev_ul:
- call math_cmprev_imm_ul
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_cmp_ul:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_cmprev_ul
jr page2_dispatch2
page2_imm_cmp_sl:
- call math_cmp_imm_sl
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_cmprev_sl:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_cmp_sl
jr page2_dispatch2
page2_imm_cmp_ul:
- call math_cmp_imm_ul
- jr page2_dispatch2
-
+ rst 0x38
+ .db 0x3e ; ld a,
page2_cmprev_ul:
- exx
- pop de
- exx
- pop hl
+ rst 0x30
call math_cmp_ul
jr page2_dispatch2
-page2_imm_sl_l:
- call math_sl_imm_l
-page2_dispatch2:
+page1_imm_sl_l:
ld a,(bc)
inc bc
ld l,a
- ld h,page2
- jp (hl)
-
+ .db 0x3e ; ld a,
page2_slrev_l:
pop hl
call math_sl_l
- jr page2_dispatch2
+page2_dispatch2:
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld h,page2
+ jp (hl)
page2_imm_sr_sl:
ld a,(bc)
; math package
+; lightweight routines
+
+; these can be duplicated without much cost, and will be called often, so the
+; calling convention is geared to what the interpreter needs (de:hl' and so on)
+
math_imm_l: ; immediate to de:hl'
ld a,(bc)
inc bc
ld d,a
ret
-math_imm_l0: ; immediate to hl:de'
- ld a,(bc)
- inc bc
- exx
- ld e,a
- exx
- ld a,(bc)
- inc bc
- exx
- ld d,a
- exx
-; use rst 8 for math_imm_w0
- ld a,(bc)
- inc bc
- ld l,a
- ld a,(bc)
- inc bc
- ld h,a
- ret
-
math_stkld_w: ; sp(imm_w) to de
ld a,(bc)
inc bc
exx
ret
-math_sl_imm_w: ; de <<= imm_b, imm_b in [0, 0x10)
- ld a,(bc)
- inc bc
- ld l,a
-math_sl_w: ; de <<= l & 0xf
+; heavyweight routines
+
+; these have unrolled loops and so there needs to be as much reuse as possible
+
+; for the multiply and divide the unrolled loops are placed in subroutines, so
+; there is some overhead to use them, but it allows the calling code to itself
+; be cheaply unrolled, so the amount left to do is determined by context not
+; by a counter; the tradeoff is do we count loops by push bc/pop bc/djnz or by
+; call/call/call, since both need a stack push/pop it should be about the same
+
+; for these routines the calling convention is geared to whatever the routine
+; needs to work most efficiently, this makes the usage more cumbersome as you
+; to do something like ex de,hl/call/ex de,hl but it is done this way because
+; otherwise some callsites need to exchange registers into place on one side
+; of a call/ret boundary only for them to be immediately exchanged back again
+
+; as an exception to this we may provide an earlier entry point(s), before the
+; routine proper, which exchanges arguments into place (the caller must still
+; deal with exchanges afterwards), we do this because of callsites that are in
+; the interpreter pages and need to conserve code size, therefore the prefix
+; chosen need not occur more than once, and we'll use the longest such prefix
+
+; while some of these earlier entry points have a meaning, e.g. math_divrev_l
+; before math_div_l, many of them do not, so we will just number them instead
+; (this is because the prefix code can be a compromise between callers' needs)
+
+math_sl_w0: ; hl = de << (l & 0xf)
ex de,hl
-math_slrev_w: ; de = hl << (e & 0xf)
+math_sl_w: ; hl <<= e & 0xf
; by 1
bit 0,e
jr z,1$
add hl,hl
3$: ; by 8
bit 3,e
- ex de,hl
ret z
- ld d,e
- ld e,0
+ ld h,l
+ ld l,0
ret
-math_sl_imm_l: ; de:hl' << imm_b, imm_b in [0, 0x20)
- ld a,(bc)
- inc bc
- ld l,a
-math_sl_l: ; de:hl' <<= l & 0x1f
+math_sl_l0: ; hl:de' <<= e & 0x1f
+ exx
ex de,hl
-math_sl_l0: ; hl:hl' <<= e & 0x1f, for slrev
+ exx
+math_sl_l: ; hl:hl' <<= e & 0x1f
; by 1
bit 0,e
jr z,1$
ld l,a
4$: ; by 16
bit 4,e
- ex de,hl
ret z
exx
push hl
ld hl,0
exx
- pop de
+ pop hl
ret
-math_sr_uw: ; de >>= l & 0xf, logical
+math_sr_uw0: ; hl = de >> (l & 0xf), logical
ex de,hl
-math_srrev_uw: ; de = hl >> (e & 0xf), logical
+math_sr_uw: ; hl >>= e & 0xf, logical
ld a,e
and 0x1f
add 7
-math_srrev_uw0: ; de = hl >> (a - 7), immediate a in [7, 0x17), logical
+math_sr_uw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic
ld e,a
sub a
jr sr_w_entry
-math_sr_sw: ; de >>= l & 0xf, arithmetic
+math_sr_sw0: ; hl = de >> (l & 0xf), arithmetic
ex de,hl
-math_srrev_sw: ; de = hl >> (e & 0xf), arithmetic
+math_sr_sw: ; hl >>= e & 0xf, arithmetic
ld a,e
and 0xf
add 7
-math_srrev_sw0: ; de = hl >> (a - 7), immediate a in [7, 0x17), arithmetic
+math_sr_sw1: ; hl >>= a - 7, immediate a in [7, 0x17), arithmetic
ld e,a
ld a,h
rla
3$: ; by 8
bit 3,e
jr z,4$
- ld e,h
- ld d,a
+ ld l,h
+ ld h,a
ret
4$: ; by 16 (can't occur simultaneously with by 8)
bit 4,e
- ex de,hl
ret z
- ld e,a
+ ld l,a
rla
sbc a,a
- ld d,a
- ret
-
-math_mul_imm_l: ; de:hl' *= imm_l, big-endian imm_l
- exx
- ex de,hl
- sub a
- ld l,a
- ld h,a
- exx
- ld l,a
ld h,a
- ld a,(bc)
- inc bc
- call mul_l0
- ld a,(bc)
- inc bc
- call mul_l
- ld a,(bc)
- inc bc
- call mul_l
- ld a,(bc)
- inc bc
- call mul_l
- ex de,hl
ret
-math_sr_ul: ; de:hl' >>= l & 0x1f, logical
+math_sr_ul0: ; hl:de' >>= e & 0x1f, logical
+ exx
ex de,hl
-math_sr_ul0: ; de:hl' = hl:hl' >> (e & 0x1f), logical
+ exx
+math_sr_ul: ; hl:hl' >>= e & 0x1f, logical
ld a,e
and 0x1f
add 7
-math_sr_ul1: ; de:hl' = hl:hl' >> (a - 7), immediate a in [7, 0x27), logical
+math_sr_ul1: ; hl:hl' >>= a - 7, immediate a in [7, 0x27), logical
ld e,a
sub a
jr sr_l_entry
-math_sr_sl: ; de:hl' >>= l & 0x1f, arithmetic
+math_sr_sl0: ; hl:de' >>= e & 0x1f, arithmetic
+ exx
ex de,hl
-math_sr_sl0: ; de:hl' = hl:hl' >> (e & 0x1f), arithmetic
+ exx
+math_sr_sl: ; hl:hl' >>= e & 0x1f, arithmetic
ld a,e
and 0xf
add 7
-math_sr_sl1: ; de:hl' = hl:hl' >> (a - 7), immediate a in [7, 0x17), arithmetic
+math_sr_sl1: ; hl:hl' >>= a - 7, immediate a in [7, 0x27), arithmetic
ld e,a
ld a,h
rla
bit 4,e
jr z,5$
push hl
- ld e,a
+ ld l,a
rla
sbc a,a
- ld d,a
+ ld h,a
exx
pop hl
exx
ret
5$: ; by 32 (can't occur simultaneously with by 16)
bit 5,e
- ex de,hl
ret z
exx
ld l,a
sbc a,a
ld h,a
exx
- ld e,a
- ld d,a
+ ld l,a
+ ld h,a
ret
+; this routine is just an optimization, therefore use interpreter registers
math_mul_imm_w: ; de *= imm_w, big-endian imm_w
ld hl,0
ld a,(bc)
ex de,hl
ret
-math_mul_w: ; de *= hl
+math_mul_w: ; hl *= de
ld a,l
push af
ld a,h
call mul_w0
pop af
call mul_w
- ex de,hl
- ret
-
mul_w: ; bit 0
add hl,hl
mul_w0: rla
add hl,de
ret
-math_mul_l: ; de:hl' *= hl:de'
+; this routine is just an optimization, therefore use interpreter registers
+math_mul_imm_l: ; de:hl' *= imm_l, big-endian imm_l
+ exx
+ ex de,hl
+ sub a
+ ld l,a
+ ld h,a
+ exx
+ ld l,a
+ ld h,a
+ ld a,(bc)
+ inc bc
+ call mul_l0
+ ld a,(bc)
+ inc bc
+ call mul_l
+ ld a,(bc)
+ inc bc
+ call mul_l
+ ld a,(bc)
+ inc bc
+ call mul_l
ex de,hl
+ ret
+
+math_mul_l: ; hl:hl' *= de:de'
exx
ld a,l
push af
pop af
call mul_l
pop af
- call mul_l
- ex de,hl
- ret
-
mul_l: ; bit 0
exx
add hl,hl
adc hl,de
ret
-; word division
-
-; use rst 8 then math_div_sw for math_div_imm_sw
-math_div_sw: ; de, hl = de % hl, de / hl, signed
+math_div_sw0: ; hl, de = de % hl, de / hl, signed
ex de,hl
-; use rst 8 then math_divrev_sw for math_divrev_imm_sw
-math_divrev_sw: ; de, hl = hl % de, hl / de, signed
+math_div_sw: ; hl, de = hl % de, hl / de, signed
push bc
ld a,h
or a
1$: ld d,b
ld e,a
pop bc
- ex de,hl
ret
div_w_n:
ld d,b
ld e,a
pop bc
- ex de,hl
ret
div_w_nn: ; negative dividend, negative divisor
ld d,b
ld e,a
pop bc
- ex de,hl
ret
-; use rst 8 then math_div_uw for math_div_imm_uw
-math_div_uw: ; de, hl = de % hl, de / hl, unsigned
+math_div_uw0: ; hl, de = de % hl, de / hl, unsigned
ex de,hl
-; use rst 8 then math_divrev_uw for math_divrev_imm_uw
-math_divrev_uw: ; de, hl = hl % de, hl / de, unsigned
+math_div_uw: ; hl, de = hl % de, hl / de, unsigned
push bc
ld a,h
ld c,l
1$: ld d,b
ld e,a
pop bc
- ex de,hl
ret
; non-restoring division routine
scf
ret
-; long division
-
-math_div_imm_sl:
- call math_imm_l0
-math_div_sl: ; de:hl', hl:de' = de:hl' % hl:de', de:hl' / hl:de', signed
+math_div_sl0: ; hl:hl', de:de' = hl:de' % de:hl', hl:de' / de:hl', signed
+ exx
ex de,hl
-math_div_sl0: ; ; de:hl', hl:de' = hl:hl' % de:de', hl:hl' / de:de', signed
+ exx
+math_div_sl: ; ; hl:hl', de:de' = hl:hl' % de:de', hl:hl' / de:de', signed
push bc
ld a,h
or a
jp m,div_l_n ; positive dividend
; positive dividend
+ exx
+ ld c,l
+ ld b,h
+ ld hl,0
+ exx
ld a,h
ld c,l
ld hl,0
ld b,a
ld a,c
call div_l_ncf
+ ld c,a
+ exx
+ ld a,b
+ exx
+ call div_l_ncf
+ exx
+ ld b,a
+ ld a,c
+ exx
+ call div_l_ncf
inc a
- jr c,1$
+ jr c,div_l_p_done
+ exx
sbc hl,de
-1$: ld d,b
- ld e,a
- pop bc
- ex de,hl
- ret
+ exx
+ sbc hl,de
+ jr div_l_p_done
div_l_n:
; negative dividend
- dec hl ; reduces remainder by 1 (we inc later)
+ call dec_l ; reduces remainder by 1 (we inc later)
+ exx
+ ld c,l
+ ld b,h
+ ld hl,-1
+ exx
ld a,h
ld c,l
ld hl,-1
ld b,a
ld a,c
call div_lcf
+ ld c,a
+ exx
+ ld a,b
+ exx
+ call div_lcf
+ exx
+ ld b,a
+ ld a,c
+ exx
+ call div_lcf
inc a
- jr c,1$
+ jr c,div_l_n_done
+ exx
sbc hl,de
-1$: inc hl ; get into range -divisor+1..0
- ld d,b
- ld e,a
- pop bc
- ex de,hl
- ret
+ exx
+ sbc hl,de
+ jr div_l_n_done
div_l_nn: ; negative dividend, negative divisor
call div_l_n0
ld b,a
ld a,c
call div_l_ncf
- jr nc,1$
+ ld c,a
+ exx
+ ld a,b
+ exx
+ call div_l_ncf
+ exx
+ ld b,a
+ ld a,c
+ exx
+ call div_l_ncf
+ jr nc,div_l_n_done
+ exx
add hl,de
-1$: inc hl ; get into range divisor+1..0
- ld d,b
+ exx
+ adc hl,de
+div_l_n_done:
+ exx
ld e,a
+ ld d,b
+ exx
+ ld e,c
+ ld d,b
pop bc
- ex de,hl
- ret
-
-math_div_imm_ul:
- call math_imm_l0
-math_div_ul: ; de:hl', hl:de' = de:hl' % hl:de', de:hl' / hl:de', unsigned
- ex de,hl
-math_div_ul0: ; ; de:hl', hl:de' = hl:hl' % de:de', hl:hl' / de:de', unsigned
+inc_l: ; get into range divisor+1..0
+ exx
+ inc hl
+ ld a,l
+ or h
+ exx
+ ret nz
+ inc hl
+ ret
+
+math_div_ul0: ; hl:hl', de:de' = hl:de' % de:hl', hl:de' / de:hl', unsigned
+ exx
+ ex de,hl
+ exx
+math_div_ul: ; ; hl:hl', de:de' = hl:hl' % de:de', hl:hl' / de:de', unsigned
push bc
+ exx
+ ld c,l
+ ld b,h
+ ld hl,0
+ exx
ld a,h
ld c,l
ld hl,0
ld b,a
ld a,c
call div_lcf
- jr nc,1$
+ ld c,a
+ exx
+ ld a,b
+ exx
+ call div_lcf
+ exx
+ ld b,a
+ ld a,c
+ exx
+ call div_lcf
+ jr nc,div_l_p_done
+ exx
add hl,de
-1$: ld d,b
+ exx
+ adc hl,de
+div_l_p_done:
+ exx
ld e,a
+ ld d,b
+ exx
+ ld e,c
+ ld d,b
pop bc
- ex de,hl
ret
-; non-restoring division routine
-
-; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
-; enter at div0 with positive remainder in hl, such that hl < de
-; enter at div1 with negative remainder in hl, such that hl >= -de
-
-; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
-; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
-
-; if entered at div0, there is a -80 and so quotient is in range -ff..-1
-; if entered at div1, there is a +80 and so quotient is in range 1..ff
-; falls out of loop after div01 with positive remainder, div11 with negative,
-; depending on this we should re-enter at div0 or div1, signalled by cf return
-
-; the successive quotient bytes can be concatenated into a full quotient,
-; but negative bytes require the next higher quotient byte to be decremented,
-; we know in advance if this will happen because the implied sign of the
-; quotient byte depends only on whether we entered at div0 or div1, hence,
-; before the div11 return we'll decrement to compensate for next negative byte
-
-; the decrement can also be seen as compensating for the extra add hl,de that
-; may be needed to make negative remainder positive before return to caller,
-; thus leaving quotient in a consistent state regardless of which exit taken,
-; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even)
-
-; in the following code each sbc hl,de gets an inc a and each add hl,de gets
-; a dec a, guaranteeing the integrity of the division, the initial scf/rla is
-; needed to make the result 100 + -ff..ff or 1..1ff, so that the decrements
-; cannot borrow into the upcoming dividend bits also held in a, and there must
-; be another shift between the scf/rla and increment/decrement so that the scf
-; is implicitly in the 100s place, making the code awkward though it's correct
+dec_l:
+ exx
+ ld a,l
+ or h
+ dec hl
+ exx
+ ret nz
+ dec hl
+ ret
-; now optimized to only inc/dec a when doing zero-crossing, fix above analysis
+; non-restoring division routine
+; see earlier comments for the word version, this extends the concept to long
div_lcf:
jr c,div_l1
div_l0: ; bit 0, above
scf
rla
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l01
dec a
div_l11: ; bit 1, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l12
inc a
div_l02: ; bit 2, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l03
dec a
div_l13: ; bit 3, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l14
inc a
div_l04: ; bit 4, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l05
dec a
div_l15: ; bit 5, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l16
inc a
div_l06: ; bit 6, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l07
dec a
div_l17: ; bit 7, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l18
inc a
div_l08: ; done, above
div_l1: ; bit 0, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l11
inc a
div_l01: ; bit 1, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l02
dec a
div_l12: ; bit 2, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l13
inc a
div_l03: ; bit 3, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l04
dec a
div_l14: ; bit 4, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l15
inc a
div_l05: ; bit 5, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l06
dec a
div_l16: ; bit 6, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr nc,div_l17
inc a
div_l07: ; bit 7, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr nc,div_l08
dec a
scf
ret
-; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor
-; this is for negative divisors where we expect carry (means no zero crossing)
-
-; when divisor negated, remainder also negated, so we expect to do subtraction
-; when remainder negative and vice versa, need to clear carry after add hl,hl
+; version for negative divisors
+; see earlier comments for the word version, this extends the concept to long
div_l_ncf:
jr c,div_l_n1
div_l_n0: ; bit 0, above
scf
rla
+ exx
+ adc hl,hl
+ exx
adc hl,hl
or a
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr c,div_l_n01
dec a
div_l_n11: ; bit 1, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n12
inc a
div_l_n02: ; bit 2, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
or a
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr c,div_l_n03
dec a
div_l_n13: ; bit 3, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n14
inc a
div_l_n04: ; bit 4, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
or a
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr c,div_l_n05
dec a
div_l_n15: ; bit 5, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n16
inc a
div_l_n06: ; bit 6, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
or a
+ exx
+ sbc hl,de
+ exx
sbc hl,de
jr c,div_l_n07
dec a
div_l_n17: ; bit 7, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n18
inc a
div_l_n08: ; done, above
div_l_n1: ; bit 0, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n11
inc a
div_l_n01: ; bit 1, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
or a
sbc hl,de
+ exx
+ sbc hl,de
jr c,div_l_n02
dec a
div_l_n12: ; bit 2, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n13
inc a
div_l_n03: ; bit 3, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
or a
sbc hl,de
+ exx
+ sbc hl,de
jr c,div_l_n04
dec a
div_l_n14: ; bit 4, below
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n15
inc a
div_l_n05: ; bit 5, above
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
or a
sbc hl,de
+ exx
+ sbc hl,de
jr c,div_l_n06
dec a
div_l_n16: ; bit 6, below
add a,a
+ exx
adc hl,hl
+ exx
+ adc hl,hl
+ exx
add hl,de
+ exx
+ adc hl,de
jr c,div_l_n17
inc a
div_l_n07: ; bit 7, above
add a,a
+ exx
+ adc hl,hl
+ exx
adc hl,hl
+ exx
or a
sbc hl,de
+ exx
+ sbc hl,de
jr c,div_l_n08
dec a
div_l_n18: ; done, below
scf
ret
-
; debugging
print_trace: ; print af, bc, de, hl, sp, (sp)
ld hl,print_trace
ld (page0_trace + 1),hl
- ld hl,rst8
- ld de,8
- ld bc,7
+ ld hl,restarts
+ ld de,0x28
+ ld bc,restarts_end - restarts
ldir
ld bc,sm_start
.db <page0_esc
jp 0
-rst8: ; immediate to hl
+restarts:
+ ; rst 0x28, immediate to hl
+ ld a,(bc)
+ inc bc
+ ld l,a
+ ld a,(bc)
+ inc bc
+ ld h,a
+ ret
+ .db 0
+ ; rst 0x30, pop hl:de'
+ pop hl
+ exx
+ pop de
+ exx
+ ex (sp),hl
+ ret
+ .db 0,0
+ ; rst 0x38, immediate to hl:de'
+ ld a,(bc)
+ inc bc
+ exx
+ ld e,a
+ exx
+ ld a,(bc)
+ inc bc
+ exx
+ ld d,a
+ exx
ld a,(bc)
inc bc
ld l,a
inc bc
ld h,a
ret
+restarts_end:
sm_main:
; create stack frame