Correct unsigned 16x16 to 32 bit multiply, need to add corrections for signed version
authorNick Downing <nick.downing@lifx.co>
Mon, 24 Jun 2019 05:08:20 +0000 (15:08 +1000)
committerNick Downing <nick.downing@lifx.co>
Mon, 24 Jun 2019 05:08:54 +0000 (15:08 +1000)
.gitignore
Makefile
mul.c [new file with mode: 0644]
mul2.c [new file with mode: 0644]
sm3.asm

index dcdb971..5ac53fc 100644 (file)
@@ -8,3 +8,5 @@
 /bin
 cordic
 div
+mul
+mul2
index 0790d34..f7856c1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,13 @@
+all: div mul mul2 cordic
+
 div: div.c
        gcc -o $@ $^
 
+mul: mul.c
+       gcc -o $@ $^
+
+mul2: mul2.c
+       gcc -o $@ $^
+
 cordic: cordic.c
        gcc -o $@ $^
diff --git a/mul.c b/mul.c
new file mode 100644 (file)
index 0000000..c77001b
--- /dev/null
+++ b/mul.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+
+int main(void) {
+  printf("%08x\n", 0x1234 * 0x56);
+  printf("%08x\n", 0x6543 * 0x21);
+  printf("%08x\n", 0xb975 * 0x31);
+  printf("%08x\n", 0xdb97 * 0x531);
+
+  printf("%08x\n", 0x1234 * 0x56);
+  printf("%08x\n", 0x6543 * 0x21);
+  printf("%08x\n", (int)0xffffb975 * 0x31);
+  printf("%08x\n", (int)0xffffdb97 * 0x531);
+
+  printf("%08x\n", -0x1234 * 0x56);
+  printf("%08x\n", -0x6543 * 0x21);
+  printf("%08x\n", -(int)0xffffb975 * 0x31);
+  printf("%08x\n", -(int)0xffffdb97 * 0x531);
+
+  printf("%08x\n", 0x1234 * -0x56);
+  printf("%08x\n", 0x6543 * -0x21);
+  printf("%08x\n", (int)0xffffb975 * -0x31);
+  printf("%08x\n", (int)0xffffdb97 * -0x531);
+
+  printf("%08x\n", -0x1234 * -0x56);
+  printf("%08x\n", -0x6543 * -0x21);
+  printf("%08x\n", -(int)0xffffb975 * -0x31);
+  printf("%08x\n", -(int)0xffffdb97 * -0x531);
+
+  return 0;
+}
diff --git a/mul2.c b/mul2.c
new file mode 100644 (file)
index 0000000..ca9f5ad
--- /dev/null
+++ b/mul2.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+
+int main(void) {
+  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
+  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
+  printf("%08x\n", (unsigned short)0xb975 * (unsigned short)0x31);
+  printf("%08x\n", (unsigned short)0xdb97 * (unsigned short)0x531);
+
+  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
+  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
+  printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)0x31);
+  printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)0x531);
+
+  printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)0x56);
+  printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)0x21);
+  printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)0x31);
+  printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)0x531);
+
+  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)-0x56);
+  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)-0x21);
+  printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)-0x31);
+  printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)-0x531);
+
+  printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)-0x56);
+  printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)-0x21);
+  printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)-0x31);
+  printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)-0x531);
+
+  return 0;
+}
diff --git a/sm3.asm b/sm3.asm
index 204cf2c..986ff44 100644 (file)
--- a/sm3.asm
+++ b/sm3.asm
-page0  =       2
-page1  =       4
-page2  =       6
+page0  =       3
+page1  =       5
+page2  =       7
 
        .area   SM (abs,ovr)
 
        .org    0x100
 
+       ld      hl,0x1234
+       ld      de,0x56
+       call    math_mul_uw0
+       call    print_hlde
+
+       ld      hl,0x6543
+       ld      de,0x21
+       call    math_mul_uw0
+       call    print_hlde
+
+       ld      hl,0xb975
+       ld      de,0x31
+       call    math_mul_uw0
+       call    print_hlde
+
+       ld      hl,0xdb97
+       ld      de,0x531
+       call    math_mul_uw0
+       call    print_hlde
+
+       ld      hl,0x1234
+       ld      de,0x56
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0x6543
+       ld      de,0x21
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0xb975
+       ld      de,0x31
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0xdb97
+       ld      de,0x531
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0x1234
+       ld      de,0x56
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0x6543
+       ld      de,0x21
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0xb975
+       ld      de,0x31
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0xdb97
+       ld      de,0x531
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0x1234
+       ld      de,-0x56
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0x6543
+       ld      de,-0x21
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0xb975
+       ld      de,-0x31
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,0xdb97
+       ld      de,-0x531
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0x1234
+       ld      de,-0x56
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0x6543
+       ld      de,-0x21
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0xb975
+       ld      de,-0x31
+       call    math_mul_sw0
+       call    print_hlde
+
+       ld      hl,-0xdb97
+       ld      de,-0x531
+       call    math_mul_sw0
+       call    print_hlde
+
        ld      hl,restarts
        ld      de,0x28
        ld      bc,restarts_end - restarts
@@ -402,7 +502,7 @@ page1_imm_mul_uw:
 page1_mul_w:
        pop     hl
        push    bc
-       call    math_mul_w0
+       call    math_smul_w0
        pop     bc
        jr      mul_w_done
 
@@ -412,7 +512,7 @@ page1_imm_divrev_sw:
 page1_div_sw:
        pop     hl      
        push    bc
-       call    math_div_sw0
+       call    math_sdiv_sw0
        jr      div_w_done
 
 page1_imm_div_sw:
@@ -421,7 +521,7 @@ page1_imm_div_sw:
 page1_divrev_sw:
        pop     hl
        push    bc
-       call    math_div_sw1
+       call    math_sdiv_sw
        jr      div_w_done
 
 page1_imm_divrev_uw:
@@ -430,7 +530,7 @@ page1_imm_divrev_uw:
 page1_div_uw:
        pop     hl
        push    bc
-       call    math_div_uw0
+       call    math_sdiv_uw0
        jr      div_w_done
 
 page1_imm_div_uw:
@@ -439,7 +539,7 @@ page1_imm_div_uw:
 page1_divrev_uw:
        pop     hl
        push    bc
-       call    math_div_uw1
+       call    math_sdiv_uw
        jr      div_w_done
 
 ; page 1 to 2
@@ -723,7 +823,7 @@ page2_mul_l:
 mul_l_entry:
        ex      de,hl
        push    bc
-       call    math_mul_l0
+       call    math_smul_l0
        pop     bc
        jr      mul_l_done
 
@@ -1640,17 +1740,32 @@ sr_l_entry:
        exx
        ret
 
-math_mul_w0: ; hl *= de
+; smul: short multiplication
+; 16 * 16 to 16 bit product (word)
+; 32 * 32 to 32 bit product (long)
+; mul: long multiplication
+; 16 + 16 * 16 to 32 bit product (word)
+; 32 + 32 * 32 to 64 bit product (long)
+
+; smul is implemented in a more optimal way that uses only left shifts,
+; since left shifts are cheaper on the z80, this works for smul because
+; there is no need to worry about propagating carries into high result
+
+; mul has the ability to initialize the product with some nonzero value,
+; which smul doesn't have because it only shifts zeros in from the left,
+; using this ability the long multiplication reverses the long division
+; (initialize product with a remainder, then add in quotient * divisor)
+
+math_smul_w0: ; hl *= de
        ld      c,l
        ld      b,h
-       ld      hl,0
-math_mul_w: ; hl += bc * de
+math_smul_w: ; hl = bc * de
        ld      a,d
-       call    mul_w0
+       call    smul_w0
        ld      a,e
-mul_w: ; bit 0
+smul_w:        ; bit 0
        add     hl,hl
-mul_w0:        rla
+smul_w0:       rla
        jr      nc,1$
        add     hl,bc
 1$:    ; bit 1
@@ -1690,33 +1805,31 @@ mul_w0: rla
        add     hl,bc
        ret
 
-math_mul_l0: ; hl':hl *= de':de
+math_smul_l0: ; hl':hl *= de':de
        ld      c,l
        ld      b,h
-       ld      hl,0
        exx
        ld      c,l
        ld      b,h
-       ld      hl,0
        exx
-math_mul_l: ; hl':hl += de':de * bc':bc
+math_smul_l: ; hl':hl = de':de * bc':bc
        exx
        ld      a,d
        exx
-       call    mul_l0
+       call    smul_l0
        exx
        ld      a,e
        exx
-       call    mul_l
+       call    smul_l
        ld      a,d
-       call    mul_l
+       call    smul_l
        ld      a,e
-mul_l: ; bit 0
+smul_l:        ; bit 0
        add     hl,hl
        exx
        adc     hl,hl
        exx
-mul_l0:        rla
+smul_l0:       rla
        jr      nc,1$
        add     hl,bc
        exx
@@ -1801,9 +1914,83 @@ mul_l0:  rla
        exx
        ret
 
-math_div_sw0: ; hl, de = hl % de, hl / de, signed
+math_mul_uw0: ; hl:de = hl * de, unsigned
+math_mul_sw0: ; hl:de = hl * de, signed
+       ld      c,l
+       ld      b,h
+       ld      hl,0
+math_mul_uw: ; hl:de = hl + bc * de, unsigned
+math_mul_sw: ; hl:de = hl + bc * de, signed
+       ld      a,e
+       call    mul_uw
+       ld      e,a
+       ld      a,d
+       call    mul_uw
+       ld      d,a
+       ret
+mul_uw: rra
+       ; bit 0
+       jr      nc,1$
+       add     hl,bc
+1$:    rr      h
+       rr      l
+       rra
+       ; bit 1
+       jr      nc,2$
+       add     hl,bc
+2$:    rr      h
+       rr      l
+       rra
+       ; bit 2
+       jr      nc,3$
+       add     hl,bc
+3$:    rr      h
+       rr      l
+       rra
+       ; bit 3
+       jr      nc,4$
+       add     hl,bc
+4$:    rr      h
+       rr      l
+       rra
+       ; bit 4
+       jr      nc,5$
+       add     hl,bc
+5$:    rr      h
+       rr      l
+       rra
+       ; bit 5
+       jr      nc,6$
+       add     hl,bc
+6$:    rr      h
+       rr      l
+       rra
+       ; bit 6 
+       jr      nc,7$
+       add     hl,bc
+7$:    rr      h
+       rr      l
+       rra
+       ; bit 7
+       jr      nc,8$
+       add     hl,bc
+8$:    rr      h
+       rr      l
+       rra
+       ret
+
+; sdiv: short division
+; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
+; div: long division
+; 32 / 16 to 16 bit quotient, 16 bit remainder (word)
+; 64 / 32 to 32 bit quotient, 32 bit remainder (long)
+
+; sdiv is implemented as sign/zero extension then div
+
+math_sdiv_sw0: ; hl, de = hl % de, hl / de, signed
        ex      de,hl
-math_div_sw1: ; hl, de = de % hl, de / hl, signed
+math_sdiv_sw: ; hl, de = de % hl, de / hl, signed
        ld      c,l
        ld      b,h
        ld      a,d
@@ -1862,9 +2049,9 @@ div_w_nn: ; negative dividend, negative divisor
        add     hl,bc
        ret
 
-math_div_uw0: ; hl, de = hl % de, hl / de, unsigned
+math_sdiv_uw0: ; hl, de = hl % de, hl / de, unsigned
        ex      de,hl
-math_div_uw1: ; hl, de = de % hl, de / hl, unsigned
+math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
        ld      c,l
        ld      b,h
        ld      hl,0
@@ -2764,6 +2951,18 @@ div_l_n18: ; done, below
 
 ; debugging
 
+print_hlde:
+       call    print_word
+       ld      a,':
+       call    print_char
+       ex      de,hl
+       call    print_word
+       ex      de,hl
+       ld      a,0xd
+       call    print_char
+       ld      a,0xa
+       jp      print_char
+
 print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
        call    print_trace2
        ld      a,(bc)