/bin
cordic
div
+mul
+mul2
+all: div mul mul2 cordic
+
div: div.c
gcc -o $@ $^
+mul: mul.c
+ gcc -o $@ $^
+
+mul2: mul2.c
+ gcc -o $@ $^
+
cordic: cordic.c
gcc -o $@ $^
--- /dev/null
+#include <stdio.h>
+
+int main(void) {
+ printf("%08x\n", 0x1234 * 0x56);
+ printf("%08x\n", 0x6543 * 0x21);
+ printf("%08x\n", 0xb975 * 0x31);
+ printf("%08x\n", 0xdb97 * 0x531);
+
+ printf("%08x\n", 0x1234 * 0x56);
+ printf("%08x\n", 0x6543 * 0x21);
+ printf("%08x\n", (int)0xffffb975 * 0x31);
+ printf("%08x\n", (int)0xffffdb97 * 0x531);
+
+ printf("%08x\n", -0x1234 * 0x56);
+ printf("%08x\n", -0x6543 * 0x21);
+ printf("%08x\n", -(int)0xffffb975 * 0x31);
+ printf("%08x\n", -(int)0xffffdb97 * 0x531);
+
+ printf("%08x\n", 0x1234 * -0x56);
+ printf("%08x\n", 0x6543 * -0x21);
+ printf("%08x\n", (int)0xffffb975 * -0x31);
+ printf("%08x\n", (int)0xffffdb97 * -0x531);
+
+ printf("%08x\n", -0x1234 * -0x56);
+ printf("%08x\n", -0x6543 * -0x21);
+ printf("%08x\n", -(int)0xffffb975 * -0x31);
+ printf("%08x\n", -(int)0xffffdb97 * -0x531);
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+
+int main(void) {
+ printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
+ printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
+ printf("%08x\n", (unsigned short)0xb975 * (unsigned short)0x31);
+ printf("%08x\n", (unsigned short)0xdb97 * (unsigned short)0x531);
+
+ printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
+ printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
+ printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)0x31);
+ printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)0x531);
+
+ printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)0x56);
+ printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)0x21);
+ printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)0x31);
+ printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)0x531);
+
+ printf("%08x\n", (unsigned short)0x1234 * (unsigned short)-0x56);
+ printf("%08x\n", (unsigned short)0x6543 * (unsigned short)-0x21);
+ printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)-0x31);
+ printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)-0x531);
+
+ printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)-0x56);
+ printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)-0x21);
+ printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)-0x31);
+ printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)-0x531);
+
+ return 0;
+}
-page0 = 2
-page1 = 4
-page2 = 6
+page0 = 3
+page1 = 5
+page2 = 7
.area SM (abs,ovr)
.org 0x100
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_uw0
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x1234
+ ld de,-0x56
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0x6543
+ ld de,-0x21
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xb975
+ ld de,-0x31
+ call math_mul_sw0
+ call print_hlde
+
+ ld hl,-0xdb97
+ ld de,-0x531
+ call math_mul_sw0
+ call print_hlde
+
ld hl,restarts
ld de,0x28
ld bc,restarts_end - restarts
page1_mul_w:
pop hl
push bc
- call math_mul_w0
+ call math_smul_w0
pop bc
jr mul_w_done
page1_div_sw:
pop hl
push bc
- call math_div_sw0
+ call math_sdiv_sw0
jr div_w_done
page1_imm_div_sw:
page1_divrev_sw:
pop hl
push bc
- call math_div_sw1
+ call math_sdiv_sw
jr div_w_done
page1_imm_divrev_uw:
page1_div_uw:
pop hl
push bc
- call math_div_uw0
+ call math_sdiv_uw0
jr div_w_done
page1_imm_div_uw:
page1_divrev_uw:
pop hl
push bc
- call math_div_uw1
+ call math_sdiv_uw
jr div_w_done
; page 1 to 2
mul_l_entry:
ex de,hl
push bc
- call math_mul_l0
+ call math_smul_l0
pop bc
jr mul_l_done
exx
ret
-math_mul_w0: ; hl *= de
+; smul: short multiplication
+; 16 * 16 to 16 bit product (word)
+; 32 * 32 to 32 bit product (long)
+; mul: long multiplication
+; 16 + 16 * 16 to 32 bit product (word)
+; 32 + 32 * 32 to 64 bit product (long)
+
+; smul is implemented in a more optimal way that uses only left shifts,
+; since left shifts are cheaper on the z80, this works for smul because
+; there is no need to worry about propagating carries into high result
+
+; mul has the ability to initialize the product with some nonzero value,
+; which smul doesn't have because it only shifts zeros in from the left,
+; using this ability the long multiplication reverses the long division
+; (initialize product with a remainder, then add in quotient * divisor)
+
+math_smul_w0: ; hl *= de
ld c,l
ld b,h
- ld hl,0
-math_mul_w: ; hl += bc * de
+math_smul_w: ; hl = bc * de
ld a,d
- call mul_w0
+ call smul_w0
ld a,e
-mul_w: ; bit 0
+smul_w: ; bit 0
add hl,hl
-mul_w0: rla
+smul_w0: rla
jr nc,1$
add hl,bc
1$: ; bit 1
add hl,bc
ret
-math_mul_l0: ; hl':hl *= de':de
+math_smul_l0: ; hl':hl *= de':de
ld c,l
ld b,h
- ld hl,0
exx
ld c,l
ld b,h
- ld hl,0
exx
-math_mul_l: ; hl':hl += de':de * bc':bc
+math_smul_l: ; hl':hl = de':de * bc':bc
exx
ld a,d
exx
- call mul_l0
+ call smul_l0
exx
ld a,e
exx
- call mul_l
+ call smul_l
ld a,d
- call mul_l
+ call smul_l
ld a,e
-mul_l: ; bit 0
+smul_l: ; bit 0
add hl,hl
exx
adc hl,hl
exx
-mul_l0: rla
+smul_l0: rla
jr nc,1$
add hl,bc
exx
exx
ret
-math_div_sw0: ; hl, de = hl % de, hl / de, signed
+math_mul_uw0: ; hl:de = hl * de, unsigned
+math_mul_sw0: ; hl:de = hl * de, signed
+ ld c,l
+ ld b,h
+ ld hl,0
+math_mul_uw: ; hl:de = hl + bc * de, unsigned
+math_mul_sw: ; hl:de = hl + bc * de, signed
+ ld a,e
+ call mul_uw
+ ld e,a
+ ld a,d
+ call mul_uw
+ ld d,a
+ ret
+mul_uw: rra
+ ; bit 0
+ jr nc,1$
+ add hl,bc
+1$: rr h
+ rr l
+ rra
+ ; bit 1
+ jr nc,2$
+ add hl,bc
+2$: rr h
+ rr l
+ rra
+ ; bit 2
+ jr nc,3$
+ add hl,bc
+3$: rr h
+ rr l
+ rra
+ ; bit 3
+ jr nc,4$
+ add hl,bc
+4$: rr h
+ rr l
+ rra
+ ; bit 4
+ jr nc,5$
+ add hl,bc
+5$: rr h
+ rr l
+ rra
+ ; bit 5
+ jr nc,6$
+ add hl,bc
+6$: rr h
+ rr l
+ rra
+ ; bit 6
+ jr nc,7$
+ add hl,bc
+7$: rr h
+ rr l
+ rra
+ ; bit 7
+ jr nc,8$
+ add hl,bc
+8$: rr h
+ rr l
+ rra
+ ret
+
+; sdiv: short division
+; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
+; div: long division
+; 32 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 64 / 32 to 32 bit quotient, 32 bit remainder (long)
+
+; sdiv is implemented as sign/zero extension then div
+
+math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed
ex de,hl
-math_div_sw1: ; hl, de = de % hl, de / hl, signed
+math_sdiv_sw: ; hl, de = de % hl, de / hl, signed
ld c,l
ld b,h
ld a,d
add hl,bc
ret
-math_div_uw0: ; hl, de = hl % de, hl / de, unsigned
+math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned
ex de,hl
-math_div_uw1: ; hl, de = de % hl, de / hl, unsigned
+math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
ld c,l
ld b,h
ld hl,0
; debugging
+print_hlde:
+ call print_word
+ ld a,':
+ call print_char
+ ex de,hl
+ call print_word
+ ex de,hl
+ ld a,0xd
+ call print_char
+ ld a,0xa
+ jp print_char
+
print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
call print_trace2
ld a,(bc)