-page0 = 2
-page1 = 4
-page2 = 6
+page0 = 3
+page1 = 5
+page2 = 7
.area SM (abs,ovr)
.org 0x100
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+
ld hl,restarts
ld de,0x28
ld bc,restarts_end - restarts
page1_mul_w:
pop hl
push bc
- call math_mul_w0
+ call math_smul_w0
pop bc
jr mul_w_done
page1_div_sw:
pop hl
push bc
- call math_div_sw0
+ call math_sdiv_sw0
jr div_w_done
page1_imm_div_sw:
page1_divrev_sw:
pop hl
push bc
- call math_div_sw1
+ call math_sdiv_sw
jr div_w_done
page1_imm_divrev_uw:
page1_div_uw:
pop hl
push bc
- call math_div_uw0
+ call math_sdiv_uw0
jr div_w_done
page1_imm_div_uw:
page1_divrev_uw:
pop hl
push bc
- call math_div_uw1
+ call math_sdiv_uw
jr div_w_done
; page 1 to 2
mul_l_entry:
ex de,hl
push bc
- call math_mul_l0
+ call math_smul_l0
pop bc
jr mul_l_done
exx
ret
-math_mul_w0: ; hl *= de
+; smul: short multiplication
+; 16 * 16 to 16 bit product (word)
+; 32 * 32 to 32 bit product (long)
+; mul: long multiplication
+; 16 + 16 * 16 to 32 bit product (word)
+; 32 + 32 * 32 to 64 bit product (long)
+
+; smul is implemented in a more optimal way that uses only left shifts,
+; since left shifts are cheaper on the z80, this works for smul because
+; there is no need to worry about propagating carries into high result
+
+; mul has the ability to initialize the product with some nonzero value,
+; which smul doesn't have because it only shifts zeros in from the left,
+; using this ability the long multiplication reverses the long division
+; (initialize product with a remainder, then add in quotient * divisor)
+
+math_smul_w0: ; hl *= de
ld c,l
ld b,h
- ld hl,0
-math_mul_w: ; hl += bc * de
+math_smul_w: ; hl = bc * de
ld a,d
- call mul_w0
+ call smul_w0
ld a,e
-mul_w: ; bit 0
+smul_w: ; bit 0
add hl,hl
-mul_w0: rla
+smul_w0: rla
jr nc,1$
add hl,bc
1$: ; bit 1
add hl,bc
ret
-math_mul_l0: ; hl':hl *= de':de
+math_smul_l0: ; hl':hl *= de':de
ld c,l
ld b,h
- ld hl,0
exx
ld c,l
ld b,h
- ld hl,0
exx
-math_mul_l: ; hl':hl += de':de * bc':bc
+math_smul_l: ; hl':hl = de':de * bc':bc
exx
ld a,d
exx
- call mul_l0
+ call smul_l0
exx
ld a,e
exx
- call mul_l
+ call smul_l
ld a,d
- call mul_l
+ call smul_l
ld a,e
-mul_l: ; bit 0
+smul_l: ; bit 0
add hl,hl
exx
adc hl,hl
exx
-mul_l0: rla
+smul_l0: rla
jr nc,1$
add hl,bc
exx
exx
ret
-math_div_sw0: ; hl, de = hl % de, hl / de, signed
+math_mul_uw0: ; hl:de = hl * de, unsigned
+math_mul_sw0: ; hl:de = hl * de, signed
+ ld c,l
+ ld b,h
+ ld hl,0
+math_mul_uw: ; hl:de = hl + bc * de, unsigned
+math_mul_sw: ; hl:de = hl + bc * de, signed
+ ld a,e
+ call mul_uw
+ ld e,a
+ ld a,d
+ call mul_uw
+ ld d,a
+ ret
+mul_uw: rra
+ ; bit 0
+ jr nc,1$
+ add hl,bc
+1$: rr h
+ rr l
+ rra
+ ; bit 1
+ jr nc,2$
+ add hl,bc
+2$: rr h
+ rr l
+ rra
+ ; bit 2
+ jr nc,3$
+ add hl,bc
+3$: rr h
+ rr l
+ rra
+ ; bit 3
+ jr nc,4$
+ add hl,bc
+4$: rr h
+ rr l
+ rra
+ ; bit 4
+ jr nc,5$
+ add hl,bc
+5$: rr h
+ rr l
+ rra
+ ; bit 5
+ jr nc,6$
+ add hl,bc
+6$: rr h
+ rr l
+ rra
+ ; bit 6
+ jr nc,7$
+ add hl,bc
+7$: rr h
+ rr l
+ rra
+ ; bit 7
+ jr nc,8$
+ add hl,bc
+8$: rr h
+ rr l
+ rra
+ ret
+
+; sdiv: short division
+; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
+; div: long division
+; 32 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 64 / 32 to 32 bit quotient, 32 bit remainder (long)
+
+; sdiv is implemented as sign/zero extension then div
+
+math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed
ex de,hl
-math_div_sw1: ; hl, de = de % hl, de / hl, signed
+math_sdiv_sw: ; hl, de = de % hl, de / hl, signed
ld c,l
ld b,h
ld a,d
add hl,bc
ret
-math_div_uw0: ; hl, de = hl % de, hl / de, unsigned
+math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned
ex de,hl
-math_div_uw1: ; hl, de = de % hl, de / hl, unsigned
+math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
ld c,l
ld b,h
ld hl,0
; debugging
+print_hlde:
+ call print_word
+ ld a,':
+ call print_char
+ ex de,hl
+ call print_word
+ ex de,hl
+ ld a,0xd
+ call print_char
+ ld a,0xa
+ jp print_char
+
print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
call print_trace2
ld a,(bc)