-page0 = 2
-page1 = 4
-page2 = 6
+page0 = 4
+page1 = 6
+page2 = 8
.area SM (abs,ovr)
.org 0x100
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_uw0
+ call print_hlde
+ call math_div_uw
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_uw0
+ call print_hlde
+ call math_div_uw
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_uw0
+ call print_hlde
+ call math_div_uw
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_uw0
+ call print_hlde
+ call math_div_uw
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+ call math_div_sw
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x7531
+ exx
+ ld hl,0x1357
+ ld de,0xfdb9
+ exx
+ call math_mul_ul0
+ call print_hlhldede
+ call math_div_ul
+ call print_hlhldede
+
+ ld hl,0xdb97
+ ld de,0x7531
+ exx
+ ld hl,0x1357
+ ld de,0xfdb9
+ exx
+ call math_mul_sl0
+ call print_hlhldede
+ call math_div_sl
+ call print_hlhldede
+
+ ld hl,~0xdb97
+ ld de,0x7531
+ exx
+ ld hl,~0x1357
+ ld de,0xfdb9
+ exx
+ call math_mul_sl0
+ call print_hlhldede
+ call math_div_sl
+ call print_hlhldede
+
+ ld hl,0xdb97
+ ld de,~0x7531
+ exx
+ ld hl,0x1357
+ ld de,~0xfdb9
+ exx
+ call math_mul_sl0
+ call print_hlhldede
+ call math_div_sl
+ call print_hlhldede
+
+ ld hl,~0xdb97
+ ld de,~0x7531
+ exx
+ ld hl,~0x1357
+ ld de,~0xfdb9
+ exx
+ call math_mul_sl0
+ call print_hlhldede
+ call math_div_sl
+ call print_hlhldede
+
ld hl,restarts
ld de,0x28
ld bc,restarts_end - restarts
page1_mul_w:
pop hl
push bc
- call math_mul_w0
+ call math_smul_w0
pop bc
jr mul_w_done
rst 0x28
.db 0x3e ; ld a,
page1_div_sw:
- pop hl
- call math_div_sw
+ pop hl
+ push bc
+ call math_sdiv_sw0
jr div_w_done
page1_imm_div_sw:
.db 0x3e ; ld a,
page1_divrev_sw:
pop hl
- call math_div_sw0
+ push bc
+ call math_sdiv_sw
jr div_w_done
page1_imm_divrev_uw:
.db 0x3e ; ld a,
page1_div_uw:
pop hl
- call math_div_uw
+ push bc
+ call math_sdiv_uw0
jr div_w_done
page1_imm_div_uw:
.db 0x3e ; ld a,
page1_divrev_uw:
pop hl
- call math_div_uw0
+ push bc
+ call math_sdiv_uw
jr div_w_done
; page 1 to 2
jp (hl)
div_w_done:
+ pop bc
push de
mul_w_done:
ex de,hl
mul_l_entry:
ex de,hl
push bc
- call math_mul_l0
+ call math_smul_l0
pop bc
jr mul_l_done
page2_imm_divrev_sl:
rst 0x38
;.db 0x3e ; ld a,
- call math_div_sl0
+ ex de,hl
+ push bc
+ call math_sdiv_sl
jr div_l_done
page2_div_sl:
;rst 0x30
exx
pop de
exx
- call math_div_sl0
+ ex de,hl
+ push bc
+ call math_sdiv_sl
jr div_l_done
page2_imm_div_sl:
rst 0x38
;.db 0x3e ; ld a,
- ex de,hl
- call math_div_sl
+ push bc
+ call math_sdiv_sl0
jr div_l_done
page2_divrev_sl:
;rst 0x30
exx
pop de
exx
- ex de,hl
- call math_div_sl
+ push bc
+ call math_sdiv_sl0
jr div_l_done
page2_imm_divrev_ul:
rst 0x38
;.db 0x3e ; ld a,
- call math_div_ul0
+ ex de,hl
+ push bc
+ call math_div_ul1
jr div_l_done
page2_div_ul:
;rst 0x30
exx
pop de
exx
- call math_div_ul0
+ ex de,hl
+ push bc
+ call math_div_ul1
jr div_l_done
page2_imm_div_ul:
rst 0x38
;.db 0x3e ; ld a,
- ex de,hl
- call math_div_ul
+ push bc
+ call math_div_ul0
jr div_l_done
page2_divrev_ul:
;rst 0x30
exx
pop de
exx
- ex de,hl
- call math_div_ul
+ push bc
+ call math_div_ul0
div_l_done:
+ pop bc
exx
push de
exx
exx
ret
-math_mul_w0: ; hl *= de
+; smul: short multiplication
+; 16 * 16 to 16 bit product (word)
+; 32 * 32 to 32 bit product (long)
+; mul: long multiplication
+; 16 + 16 * 16 to 32 bit product (word)
+; 32 + 32 * 32 to 64 bit product (long)
+
+; smul is implemented in a more optimal way that uses only left shifts,
+; since left shifts are cheaper on the z80, this works for smul because
+; there is no need to worry about propagating carries into high result
+
+; mul has the ability to initialize the product with some nonzero value,
+; which smul doesn't have because it only shifts zeros in from the left,
+; using this ability the long multiplication reverses the long division
+; (initialize product with a remainder, then add in quotient * divisor)
+
+math_smul_w0: ; hl *= de
ld c,l
ld b,h
- ld hl,0
-math_mul_w: ; hl += bc * de
+math_smul_w: ; hl = bc * de
ld a,d
- call mul_w0
+ call smul_w0
ld a,e
-mul_w: ; bit 0
+smul_w: ; bit 0
add hl,hl
-mul_w0: rla
+smul_w0: rla
jr nc,1$
add hl,bc
1$: ; bit 1
add hl,bc
ret
-math_mul_l0: ; hl':hl *= de':de
+math_smul_l0: ; hl':hl *= de':de
ld c,l
ld b,h
- ld hl,0
exx
ld c,l
ld b,h
- ld hl,0
exx
-math_mul_l: ; hl':hl += de':de * bc':bc
+math_smul_l: ; hl':hl = de':de * bc':bc
exx
ld a,d
exx
- call mul_l0
+ call smul_l0
exx
ld a,e
exx
- call mul_l
+ call smul_l
ld a,d
- call mul_l
+ call smul_l
ld a,e
-mul_l: ; bit 0
+smul_l: ; bit 0
add hl,hl
exx
adc hl,hl
exx
-mul_l0: rla
+smul_l0: rla
jr nc,1$
add hl,bc
exx
exx
ret
-math_div_sw0: ; hl, de = de % hl, de / hl, signed
- ex de,hl
-math_div_sw: ; hl, de = hl % de, hl / de, signed
- push bc
- ld a,h
- or a
- ld a,d
- rla
- jp m,div_w_n ; positive dividend
-
- ; positive dividend
- ld a,h
+math_mul_sw0: ; hl:de = hl * de, signed
ld c,l
+ ld b,h
ld hl,0
- jr nc,div_w_pp ; positive dividend, positive divisor
-
- ; positive dividend, negative divisor
- call div_w_n1
- ld b,a
- ld a,c
- call div_w_ncf
- inc a
- jr c,1$
+math_mul_sw: ; hl:de = hl + bc * de, signed
+ ld a,b
+ rla ; cf will be preserved through to the last rra below
+ ld a,e
+ call mul_uw
+ push af
+ ld a,d
+ call mul_uw1 ; do only 7 bits, get sign of d into cf
+ jr nc,1$
+ or a
+ sbc hl,bc
+1$: rr h
+ rr l
+ rra
+ jr nc,2$
+ or a
sbc hl,de
-1$: ld d,b
+2$: ld d,a
+ pop af
ld e,a
- pop bc
ret
-div_w_n:
- ; negative dividend
- dec hl ; reduces remainder by 1 (we inc later)
- ld a,h
+math_mul_uw0: ; hl:de = hl * de, unsigned
ld c,l
- ld hl,-1
- jr c,div_w_nn ; negative dividend, negative divisor
-
- ; negative dividend, positive divisor
- call div_w1
- ld b,a
- ld a,c
- call div_wcf
- inc a
- jr c,1$
- sbc hl,de
-1$: inc hl ; get into range -divisor+1..0
- ld d,b
+ ld b,h
+ ld hl,0
+math_mul_uw: ; hl:de = hl + bc * de, unsigned
+ ld a,e
+ call mul_uw
ld e,a
- pop bc
+ ld a,d
+ call mul_uw
+ ld d,a
ret
-div_w_nn: ; negative dividend, negative divisor
- call div_w_n0
- ld b,a
- ld a,c
- call div_w_ncf
+mul_uw: rra
+ ; bit 0
jr nc,1$
- add hl,de
-1$: inc hl ; get into range divisor+1..0
- ld d,b
- ld e,a
- pop bc
+ add hl,bc
+1$: rr h
+ rr l
+mul_uw1:
+ rra
+ ; bit 1
+ jr nc,2$
+ add hl,bc
+2$: rr h
+ rr l
+ rra
+ ; bit 2
+ jr nc,3$
+ add hl,bc
+3$: rr h
+ rr l
+ rra
+ ; bit 3
+ jr nc,4$
+ add hl,bc
+4$: rr h
+ rr l
+ rra
+ ; bit 4
+ jr nc,5$
+ add hl,bc
+5$: rr h
+ rr l
+ rra
+ ; bit 5
+ jr nc,6$
+ add hl,bc
+6$: rr h
+ rr l
+ rra
+ ; bit 6
+ jr nc,7$
+ add hl,bc
+7$: rr h
+ rr l
+ rra
+ ; bit 7
+ jr nc,8$
+ add hl,bc
+8$: rr h
+ rr l
+ rra
ret
-math_div_uw0: ; hl, de = de % hl, de / hl, unsigned
- ex de,hl
-math_div_uw: ; hl, de = hl % de, hl / de, unsigned
- push bc
- ld a,h
+math_mul_sl0: ; hl':hl:de':de = hl':hl * de':de, signed
ld c,l
- ld hl,0
-div_w_pp: ; positive dividend, positive divisor
- call div_w0
- ld b,a
- ld a,c
- call div_wcf
+ ld b,h
+ sub a
+ ld l,a
+ ld h,a
+ exx
+ ld c,l
+ ld b,h
+ ld l,a
+ ld h,a
+ exx ; hard to optimize this
+math_mul_sl: ; hl':hl:de':de = hl':hl + bc':bc * de':de, signed
+ exx
+ ld a,b
+ rla ; cf will be preserved through to the last rra below
+ exx
+ ld a,e
+ call mul_ul
+ push af
+ ld a,d
+ call mul_ul
+ push af
+ exx
+ ld a,e
+ exx
+ call mul_ul
+ push af
+ exx
+ ld a,d
+ exx
+ call mul_ul1 ; do only 7 bits, get sign of d into cf
jr nc,1$
- add hl,de
-1$: ld d,b
+ or a
+ sbc hl,bc
+ exx
+ sbc hl,bc
+ exx ; hard to optimize this
+1$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ jr nc,2$
+ or a
+ sbc hl,de
+ exx
+ sbc hl,de
+ exx ; hard to optimize this
+2$: exx
+ ld d,a
+ pop af
+ ld e,a
+ exx
+ pop de
+ pop af
ld e,a
- pop bc
ret
-; non-restoring division routine
-
-; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
-; enter at div0 with positive remainder in hl, such that hl < de
-; enter at div1 with negative remainder in hl, such that hl >= -de
-
-; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
-; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
-
-; if entered at div0, there is a -80 and so quotient is in range -ff..-1
-; if entered at div1, there is a +80 and so quotient is in range 1..ff
-; falls out of loop after div01 with positive remainder, div11 with negative,
-; depending on this we should re-enter at div0 or div1, signalled by cf return
-
-; the successive quotient bytes can be concatenated into a full quotient,
-; but negative bytes require the next higher quotient byte to be decremented,
-; we know in advance if this will happen because the implied sign of the
-; quotient byte depends only on whether we entered at div0 or div1, hence,
-; before the div11 return we'll decrement to compensate for next negative byte
+math_mul_ul0: ; hl':hl:de':de = hl':hl * de':de, unsigned
+ ld c,l
+ ld b,h
+ sub a
+ ld l,a
+ ld h,a
+ exx
+ ld c,l
+ ld b,h
+ ld l,a
+ ld h,a
+ exx
+math_mul_ul: ; hl':hl:de':de = hl':hl + bc':bc * de':de, unsigned
+ ld a,e
+ call mul_ul
+ ld e,a
+ ld a,d
+ call mul_ul
+ ld d,a
+ exx
+ ld a,e
+ exx
+ call mul_ul
+ exx
+ ld e,a
+ ld a,d
+ exx
+ call mul_ul
+ exx
+ ld d,a
+ exx
+ ret
-; the decrement can also be seen as compensating for the extra add hl,de that
-; may be needed to make negative remainder positive before return to caller,
+mul_ul: rra
+ ; bit 0
+ jr nc,1$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+1$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+mul_ul1:
+ rra
+ ; bit 1
+ jr nc,2$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+2$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 2
+ jr nc,3$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+3$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 3
+ jr nc,4$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+4$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 4
+ jr nc,5$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+5$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 5
+ jr nc,6$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+6$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 6
+ jr nc,7$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+7$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ; bit 7
+ jr nc,8$
+ add hl,bc
+ exx
+ adc hl,bc
+ exx ; optimize this
+8$: exx
+ rr h
+ rr l
+ exx
+ rr h
+ rr l
+ rra
+ ret
+
+; sdiv: short division
+; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
+; div: long division
+; 32 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 64 / 32 to 32 bit quotient, 32 bit remainder (long)
+
+; sdiv is implemented as sign/zero extension then div
+
+math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed
+ ex de,hl
+math_sdiv_sw: ; hl, de = de % hl, de / hl, signed
+ ld c,l
+ ld b,h
+ ld a,d
+ rla
+ sbc a,a
+ ld l,a
+ ld h,a
+math_div_sw: ; hl, de = hl:de % bc, hl:de / bc, signed
+ ld a,h
+ or a
+ ld a,b
+ rla
+ jp m,div_w_n ; positive dividend
+
+ ; positive dividend
+ ld a,d
+ jr nc,div_w_pp ; positive dividend, positive divisor
+
+ ; positive dividend, negative divisor
+ call div_w_n1
+ ld d,a
+ ld a,e
+ call div_w_ncf
+ inc a
+ ld e,a
+ ret c
+ sbc hl,bc
+ ret
+
+div_w_n:
+ ; negative dividend
+ dec de ; reduces remainder by 1 (we inc later)
+ ld a,d
+ jr c,div_w_nn ; negative dividend, negative divisor
+
+ ; negative dividend, positive divisor
+ call div_w1
+ ld d,a
+ ld a,e
+ call div_wcf
+ inc a
+ ld e,a
+ inc hl ; get into range -divisor+1..0
+ ret c
+ sbc hl,bc
+ ret
+
+div_w_nn: ; negative dividend, negative divisor
+ call div_w_n0
+ ld d,a
+ ld a,e
+ call div_w_ncf
+ ld e,a
+ inc hl ; get into range divisor+1..0
+ ret nc
+ add hl,bc
+ ret
+
+math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned
+ ex de,hl
+math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
+ ld c,l
+ ld b,h
+ ld hl,0
+math_div_uw: ; hl, de = hl:de % bc, hl:de / bc, unsigned
+ ld a,d
+div_w_pp: ; positive dividend, positive divisor
+ call div_w0
+ ld d,a
+ ld a,e
+ call div_wcf
+ ld e,a
+ ret nc
+ add hl,bc
+ ret
+
+; non-restoring division routine
+
+; de = divisor, hl:a = dividend with hl = previous remainder, a = next byte
+; enter at div0 with positive remainder in hl, such that hl < de
+; enter at div1 with negative remainder in hl, such that hl >= -de
+
+; div0/1 return a = 8-bit quotient as an odd number interpreted as -ff..ff,
+; by summing positive/negative place values, e.g. -80 +40 +20 -10 +8 -4 -2 +1
+
+; if entered at div0, there is a -80 and so quotient is in range -ff..-1
+; if entered at div1, there is a +80 and so quotient is in range 1..ff
+; falls out of loop after div01 with positive remainder, div11 with negative,
+; depending on this we should re-enter at div0 or div1, signalled by cf return
+
+; the successive quotient bytes can be concatenated into a full quotient,
+; but negative bytes require the next higher quotient byte to be decremented,
+; we know in advance if this will happen because the implied sign of the
+; quotient byte depends only on whether we entered at div0 or div1, hence,
+; before the div11 return we'll decrement to compensate for next negative byte
+
+; the decrement can also be seen as compensating for the extra add hl,de that
+; may be needed to make negative remainder positive before return to caller,
; thus leaving quotient in a consistent state regardless of which exit taken,
; remainder needs the add hl,de if cf=1 returned (equiv. return byte is even)
; now optimized to only inc/dec a when doing zero-crossing, fix above analysis
+.if 1 ; temporary
+div_wcf:
+ jr c,div_w1
+div_w0: ; above
+ push de
+ ld d,8
+ scf
+ rla
+div_w00:
+ adc hl,hl
+ jr c,div_w01
+ sbc hl,bc
+ jr nc,div_w02
+ dec a
+ ;jr div_w12
+ add a,a
+ dec d
+ jr nz,div_w10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+div_w01:
+ or a
+ sbc hl,bc
+div_w02:
+ add a,a
+ dec d
+ jr nz,div_w00
+ dec a
+ or a
+ pop de
+ ret
+
+div_w1: ; below
+ push de
+ ld d,8
+ add a,a
+div_w10:
+ adc hl,hl
+ jr nc,div_w11
+ add hl,bc
+ jr nc,div_w12
+ inc a
+ ;jr div_w02
+ add a,a
+ dec d
+ jr nz,div_w00
+ dec a
+ or a
+ pop de
+ ret
+div_w11:
+ add hl,bc
+div_w12:
+ add a,a
+ dec d
+ jr nz,div_w10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+
+div_w_ncf:
+ jr c,div_w_n1
+div_w_n0: ; above
+ push de
+ ld d,8
+ scf
+ rla
+div_w_n00:
+ adc hl,hl
+ jr nc,div_w_n01
+ or a
+ sbc hl,bc
+ jr c,div_w_n02
+ dec a
+ ;jr div_w_n12
+ add a,a
+ dec d
+ jr nz,div_w_n10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+div_w_n01:
+ sbc hl,bc
+div_w_n02:
+ add a,a
+ dec d
+ jr nz,div_w_n00
+ dec a
+ or a
+ pop de
+ ret
+
+div_w_n1: ; below
+ push de
+ ld d,8
+ add a,a
+div_w_n10:
+ adc hl,hl
+ jr c,div_w_n11
+ add hl,bc
+ jr c,div_w_n12
+ inc a
+ ;jr div_w_n02
+ add a,a
+ dec d
+ jr nz,div_w_n00
+ dec a
+ or a
+ pop de
+ ret
+div_w_n11:
+ add hl,bc
+div_w_n12:
+ add a,a
+ dec d
+ jr nz,div_w_n10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+.else
div_wcf:
jr c,div_w1
div_w0: ; bit 0, above
scf
rla
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w01
dec a
div_w11: ; bit 1, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w12
inc a
div_w02: ; bit 2, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w03
dec a
div_w13: ; bit 3, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w14
inc a
div_w04: ; bit 4, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w05
dec a
div_w15: ; bit 5, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w16
inc a
div_w06: ; bit 6, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w07
dec a
div_w17: ; bit 7, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w18
inc a
div_w08: ; done, above
div_w1: ; bit 0, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w11
inc a
div_w01: ; bit 1, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w02
dec a
div_w12: ; bit 2, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w13
inc a
div_w03: ; bit 3, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w04
dec a
div_w14: ; bit 4, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w15
inc a
div_w05: ; bit 5, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w06
dec a
div_w16: ; bit 6, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr nc,div_w17
inc a
div_w07: ; bit 7, above
add a,a
adc hl,hl
- sbc hl,de
+ sbc hl,bc
jr nc,div_w08
dec a
div_w18: ; done, below
rla
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n01
dec a
div_w_n11: ; bit 1, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n12
inc a
div_w_n02: ; bit 2, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n03
dec a
div_w_n13: ; bit 3, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n14
inc a
div_w_n04: ; bit 4, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n05
dec a
div_w_n15: ; bit 5, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n16
inc a
div_w_n06: ; bit 6, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n07
dec a
div_w_n17: ; bit 7, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n18
inc a
div_w_n08: ; done, above
div_w_n1: ; bit 0, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n11
inc a
div_w_n01: ; bit 1, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n02
dec a
div_w_n12: ; bit 2, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n13
inc a
div_w_n03: ; bit 3, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n04
dec a
div_w_n14: ; bit 4, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n15
inc a
div_w_n05: ; bit 5, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n06
dec a
div_w_n16: ; bit 6, below
add a,a
adc hl,hl
- add hl,de
+ add hl,bc
jr c,div_w_n17
inc a
div_w_n07: ; bit 7, above
add a,a
adc hl,hl
or a
- sbc hl,de
+ sbc hl,bc
jr c,div_w_n08
dec a
div_w_n18: ; done, below
;dec a ; compensation
scf
ret
+.endif
-math_div_sl0: ; hl':hl, de':de = de':hl % hl':de, de':hl / hl':de, signed
+math_sdiv_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed
exx
ex de,hl
exx
-math_div_sl: ; ; hl':hl, de':de = hl':hl % de':de, hl':hl / de':de, signed
- push bc
+math_sdiv_sl: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, signed
exx
- ld a,h
- or a
+ ld c,l
+ ld b,h
ld a,d
rla
+ sub a,a
+ ld l,a
+ ld h,a
exx
- jp m,div_l_n ; positive dividend
-
- ; positive dividend
ld c,l
ld b,h
- ld hl,0
+ ld l,a
+ ld h,a
+math_div_sl:
+ ; hl':hl, de':de =
+ ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, signed
exx
ld a,h
- ld c,l
- ld hl,0
+ or a
+ jp m,div_l_n ; positive dividend
+
+ ; positive dividend
+ ld a,b
+ rla
+ ld a,d
exx
jr nc,div_l_pp ; positive dividend, positive divisor
; positive dividend, negative divisor
call div_l_n1
exx
- ld b,a
- ld a,c
+ ld d,a
+ ld a,e
exx
call div_l_ncf
exx
- ld c,a
+ ld e,a
exx
- ld a,b
+ ld a,d
call div_l_ncf
- ld b,a
- ld a,c
+ ld d,a
+ ld a,e
call div_l_ncf
inc a
- jr c,div_l_p_done
- sbc hl,de
+ ld e,a
+ ret c
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
- jr div_l_p_done
+ ret
-div_l_n:
- ; negative dividend
- call dec_l ; reduces remainder by 1 (we inc later)
- ld c,l
- ld b,h
- ld hl,-1
+div_l_n: ; negative dividend
exx
- ld a,h
- ld c,l
- ld hl,-1
+ ld a,e
+ or d
+ dec de ; reduces remainder by 1 (we inc later)
+ exx
+ jr nz,1$
+ dec de
+1$: ld a,b
+ rla
+ ld a,d
exx
jr c,div_l_nn ; negative dividend, negative divisor
; negative dividend, positive divisor
call div_l1
exx
- ld b,a
- ld a,c
+ ld d,a
+ ld a,e
exx
call div_lcf
exx
- ld c,a
+ ld e,a
exx
- ld a,b
+ ld a,d
call div_lcf
- ld b,a
- ld a,c
+ ld d,a
+ ld a,e
call div_lcf
inc a
- jr c,div_l_n_done
- sbc hl,de
+ ld e,a
+ jr c,2$
+ sbc hl,bc
+ exx
+ sbc hl,bc
+ exx
+2$: inc hl ; get into range divisor+1..0
+ ld a,l
+ or h
+ ret nz
+ exx
+ inc hl
+ exx
+ ret
+
+math_div_ul0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, unsigned
+ exx
+ ex de,hl
+ exx
+math_div_ul1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, unsigned
+ ld c,l
+ ld b,h
+ sub a
+ ld l,a
+ ld h,a
+ exx
+ ld c,l
+ ld b,h
+ ld l,a
+ ld h,a
+ exx
+math_div_ul:
+ ; hl':hl, de':de =
+ ; hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, unsigned
+ exx
+ ld a,d
+ exx
+div_l_pp: ; positive dividend, positive divisor
+ call div_l0
+ exx
+ ld d,a
+ ld a,e
+ exx
+ call div_lcf
+ exx
+ ld e,a
+ exx
+ ld a,d
+ call div_lcf
+ ld d,a
+ ld a,e
+ call div_lcf
+ ld e,a
+ ret nc
+ add hl,bc
+ exx
+ adc hl,bc
+ exx
+ ret
+
+div_l_nn: ; negative dividend, negative divisor
+ call div_l_n0
+ exx
+ ld d,a
+ ld a,e
+ exx
+ call div_l_ncf
+ exx
+ ld e,a
+ exx
+ ld a,d
+ call div_l_ncf
+ ld d,a
+ ld a,e
+ call div_l_ncf
+ ld e,a
+ jr nc,1$
+ add hl,bc
exx
- sbc hl,de
+ adc hl,bc
+ exx
+1$: inc hl ; get into range divisor+1..0
+ ld a,l
+ or h
+ ret nz
+ exx
+ inc hl
exx
- jr div_l_n_done
+ ret
+
+; non-restoring division routine
+; see earlier comments for the word version, this extends the concept to long
-math_div_ul0: ; hl:hl', de:de' = hl:de' % de:hl', hl:de' / de:hl', unsigned
+.if 1 ; temporary
+div_lcf:
+ jr c,div_l1
+div_l0: ; above
+ push de
+ ld d,8
+ scf
+ rla
+div_l00:
+ adc hl,hl
exx
- ex de,hl
+ adc hl,hl
exx
-math_div_ul: ; ; hl:hl', de:de' = hl:hl' % de:de', hl:hl' / de:de', unsigned
- push bc
- ld c,l
- ld b,h
- ld hl,0
+ jr c,div_l01
+ sbc hl,bc
exx
- ld a,h
- ld c,l
- ld hl,0
+ sbc hl,bc
exx
-div_l_pp: ; positive dividend, positive divisor
- call div_l0
+ jr nc,div_l02
+ dec a
+ ;jr div_l12
+ add a,a
+ dec d
+ jr nz,div_l10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+div_l01:
+ or a
+ sbc hl,bc
exx
- ld b,a
- ld a,c
+ sbc hl,bc
exx
- call div_lcf
+div_l02:
+ add a,a
+ dec d
+ jr nz,div_l00
+ dec a
+ or a
+ pop de
+ ret
+
+div_l1: ; below
+ push de
+ ld d,8
+ add a,a
+div_l10:
+ adc hl,hl
exx
- ld c,a
+ adc hl,hl
exx
- ld a,b
- call div_lcf
- ld b,a
- ld a,c
- call div_lcf
- jr nc,div_l_p_done
- add hl,de
+ jr nc,div_l11
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
-div_l_p_done:
- ld e,a
- ld d,b
+ jr nc,div_l12
+ inc a
+ ;jr div_l02
+ add a,a
+ dec d
+ jr nz,div_l00
+ dec a
+ or a
+ pop de
+ ret
+div_l11:
+ add hl,bc
exx
- ld e,c
- ld d,b
+ adc hl,bc
exx
- pop bc
+div_l12:
+ add a,a
+ dec d
+ jr nz,div_l10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
ret
-div_l_nn: ; negative dividend, negative divisor
- call div_l_n0
+div_l_ncf:
+ jr c,div_l_n1
+div_l_n0: ; above
+ push de
+ ld d,8
+ scf
+ rla
+div_l_n00:
+ adc hl,hl
exx
- ld b,a
- ld a,c
+ adc hl,hl
exx
- call div_l_ncf
+ jr nc,div_l_n01
+ or a
+ sbc hl,bc
exx
- ld c,a
+ sbc hl,bc
exx
- ld a,b
- call div_l_ncf
- ld b,a
- ld a,c
- call div_l_ncf
- jr nc,div_l_n_done
- add hl,de
+ jr c,div_l_n02
+ dec a
+ ;jr div_l_n12
+ add a,a
+ dec d
+ jr nz,div_l_n10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
+ ret
+div_l_n01:
+ sbc hl,bc
exx
- adc hl,de
+ sbc hl,bc
exx
-div_l_n_done:
- ld e,a
- ld d,b
+div_l_n02:
+ add a,a
+ dec d
+ jr nz,div_l_n00
+ dec a
+ or a
+ pop de
+ ret
+
+div_l_n1: ; below
+ push de
+ ld d,8
+ add a,a
+div_l_n10:
+ adc hl,hl
exx
- ld e,c
- ld d,b
+ adc hl,hl
exx
- pop bc
-inc_l: ; get into range divisor+1..0
- inc hl
- ld a,l
- or h
- ret nz
+ jr c,div_l_n11
+ add hl,bc
exx
- inc hl
+ adc hl,bc
exx
+ jr c,div_l_n12
+ inc a
+ ;jr div_l_n02
+ add a,a
+ dec d
+ jr nz,div_l_n00
+ dec a
+ or a
+ pop de
ret
-
-dec_l:
- ld a,l
- or h
- dec hl
- ret nz
+div_l_n11:
+ add hl,bc
exx
- dec hl
+ adc hl,bc
exx
+div_l_n12:
+ add a,a
+ dec d
+ jr nz,div_l_n10
+ ;inc a
+ ;dec a ; compensation
+ scf
+ pop de
ret
-
-; non-restoring division routine
-; see earlier comments for the word version, this extends the concept to long
-
+.else
; changed all jr to jp, revisit this
div_lcf:
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l01
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l12
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l03
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l14
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l05
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l16
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l07
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l18
inc a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l11
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l02
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l13
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l04
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l15
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l06
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp nc,div_l17
inc a
exx
adc hl,hl
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp nc,div_l08
dec a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n01
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n12
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n03
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n14
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n05
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n16
inc a
adc hl,hl
or a
exx
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n07
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n18
inc a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n11
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n02
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n13
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n04
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n15
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n06
dec a
exx
adc hl,hl
exx
- add hl,de
+ add hl,bc
exx
- adc hl,de
+ adc hl,bc
exx
jp c,div_l_n17
inc a
adc hl,hl
exx
or a
- sbc hl,de
+ sbc hl,bc
exx
- sbc hl,de
+ sbc hl,bc
exx
jp c,div_l_n08
dec a
;dec a ; compensation
scf
ret
+.endif
; debugging
+print_hlde:
+ call print_word
+ ld a,':
+ call print_char
+ ex de,hl
+ call print_word
+ ex de,hl
+ ld a,0xd
+ call print_char
+ ld a,0xa
+ jp print_char
+
+print_hlhldede:
+ exx
+ call print_word
+ exx
+ call print_word
+ ld a,':
+ call print_char
+ exx
+ ex de,hl
+ call print_word
+ ex de,hl
+ exx
+ ex de,hl
+ call print_word
+ ex de,hl
+ ld a,0xd
+ call print_char
+ ld a,0xa
+ jp print_char
+
print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
call print_trace2
ld a,(bc)