Add division test, fix multiply bug, still a bug on hi bit set unsigned divisor
authorNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 10:34:18 +0000 (20:34 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 10:34:18 +0000 (20:34 +1000)
.gitignore
Makefile
mul.c
mul2.c [deleted file]
sm3.asm

index 5ac53fc..84f7b90 100644 (file)
@@ -9,4 +9,3 @@
 cordic
 div
 mul
-mul2
index f7856c1..0abb477 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-all: div mul mul2 cordic
+all: div mul cordic
 
 div: div.c
        gcc -o $@ $^
@@ -6,8 +6,5 @@ div: div.c
 mul: mul.c
        gcc -o $@ $^
 
-mul2: mul2.c
-       gcc -o $@ $^
-
 cordic: cordic.c
        gcc -o $@ $^
diff --git a/mul.c b/mul.c
index c77001b..a141a3b 100644 (file)
--- a/mul.c
+++ b/mul.c
@@ -1,30 +1,36 @@
 #include <stdio.h>
 
 int main(void) {
-  printf("%08x\n", 0x1234 * 0x56);
-  printf("%08x\n", 0x6543 * 0x21);
-  printf("%08x\n", 0xb975 * 0x31);
-  printf("%08x\n", 0xdb97 * 0x531);
+  printf("%08x\n", 0x00001234 * 0x00000056);
+  printf("%08x\n", 0x00006543 * 0x00000021);
+  printf("%08x\n", 0x0000b975 * 0x00000031);
+  printf("%08x\n", 0x0000db97 * 0x00000531);
 
-  printf("%08x\n", 0x1234 * 0x56);
-  printf("%08x\n", 0x6543 * 0x21);
-  printf("%08x\n", (int)0xffffb975 * 0x31);
-  printf("%08x\n", (int)0xffffdb97 * 0x531);
+  printf("%08x\n", 0x00001234 * 0x00000056);
+  printf("%08x\n", 0x00006543 * 0x00000021);
+  printf("%08x\n", (int)0xffffb975 * 0x00000031);
+  printf("%08x\n", (int)0xffffdb97 * 0x00000531);
 
-  printf("%08x\n", -0x1234 * 0x56);
-  printf("%08x\n", -0x6543 * 0x21);
-  printf("%08x\n", -(int)0xffffb975 * 0x31);
-  printf("%08x\n", -(int)0xffffdb97 * 0x531);
+  printf("%08x\n", -0x00001234 * 0x00000056);
+  printf("%08x\n", -0x00006543 * 0x00000021);
+  printf("%08x\n", -(int)0xffffb975 * 0x00000031);
+  printf("%08x\n", -(int)0xffffdb97 * 0x00000531);
 
-  printf("%08x\n", 0x1234 * -0x56);
-  printf("%08x\n", 0x6543 * -0x21);
-  printf("%08x\n", (int)0xffffb975 * -0x31);
-  printf("%08x\n", (int)0xffffdb97 * -0x531);
+  printf("%08x\n", 0x00001234 * -0x00000056);
+  printf("%08x\n", 0x00006543 * -0x00000021);
+  printf("%08x\n", (int)0xffffb975 * -0x00000031);
+  printf("%08x\n", (int)0xffffdb97 * -0x00000531);
 
-  printf("%08x\n", -0x1234 * -0x56);
-  printf("%08x\n", -0x6543 * -0x21);
-  printf("%08x\n", -(int)0xffffb975 * -0x31);
-  printf("%08x\n", -(int)0xffffdb97 * -0x531);
+  printf("%08x\n", -0x00001234 * -0x00000056);
+  printf("%08x\n", -0x00006543 * -0x00000021);
+  printf("%08x\n", -(int)0xffffb975 * -0x00000031);
+  printf("%08x\n", -(int)0xffffdb97 * -0x00000531);
+
+  printf("%016lx\n", 0x000000001357db97L * 0x00000000fdb97531L);
+  printf("%016lx\n", 0x000000001357db97L * 0xfffffffffdb97531L);
+  printf("%016lx\n", ~0x000000001357db97L * 0xfffffffffdb97531L);
+  printf("%016lx\n", 0x000000001357db97L * ~0xfffffffffdb97531L);
+  printf("%016lx\n", ~0x000000001357db97L * ~0xfffffffffdb97531L);
 
   return 0;
 }
diff --git a/mul2.c b/mul2.c
deleted file mode 100644 (file)
index ca9f5ad..0000000
--- a/mul2.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <stdio.h>
-
-int main(void) {
-  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
-  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
-  printf("%08x\n", (unsigned short)0xb975 * (unsigned short)0x31);
-  printf("%08x\n", (unsigned short)0xdb97 * (unsigned short)0x531);
-
-  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)0x56);
-  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)0x21);
-  printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)0x31);
-  printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)0x531);
-
-  printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)0x56);
-  printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)0x21);
-  printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)0x31);
-  printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)0x531);
-
-  printf("%08x\n", (unsigned short)0x1234 * (unsigned short)-0x56);
-  printf("%08x\n", (unsigned short)0x6543 * (unsigned short)-0x21);
-  printf("%08x\n", (unsigned short)(int)0xffffb975 * (unsigned short)-0x31);
-  printf("%08x\n", (unsigned short)(int)0xffffdb97 * (unsigned short)-0x531);
-
-  printf("%08x\n", (unsigned short)-0x1234 * (unsigned short)-0x56);
-  printf("%08x\n", (unsigned short)-0x6543 * (unsigned short)-0x21);
-  printf("%08x\n", (unsigned short)-(int)0xffffb975 * (unsigned short)-0x31);
-  printf("%08x\n", (unsigned short)-(int)0xffffdb97 * (unsigned short)-0x531);
-
-  return 0;
-}
diff --git a/sm3.asm b/sm3.asm
index 22bd08f..584108b 100644 (file)
--- a/sm3.asm
+++ b/sm3.asm
@@ -1,6 +1,6 @@
-page0  =       3
-page1  =       5
-page2  =       7
+page0  =       4
+page1  =       6
+page2  =       8
 
        .area   SM (abs,ovr)
 
@@ -10,101 +10,196 @@ page2     =       7
        ld      de,0x56
        call    math_mul_uw0
        call    print_hlde
+       call    math_div_uw
+       call    print_hlde
 
        ld      hl,0x6543
        ld      de,0x21
        call    math_mul_uw0
        call    print_hlde
+       call    math_div_uw
+       call    print_hlde
 
        ld      hl,0xb975
        ld      de,0x31
        call    math_mul_uw0
        call    print_hlde
+       call    math_div_uw
+       call    print_hlde
 
        ld      hl,0xdb97
        ld      de,0x531
        call    math_mul_uw0
        call    print_hlde
+       call    math_div_uw
+       call    print_hlde
 
        ld      hl,0x1234
        ld      de,0x56
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0x6543
        ld      de,0x21
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0xb975
        ld      de,0x31
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0xdb97
        ld      de,0x531
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0x1234
        ld      de,0x56
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0x6543
        ld      de,0x21
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0xb975
        ld      de,0x31
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0xdb97
        ld      de,0x531
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0x1234
        ld      de,-0x56
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0x6543
        ld      de,-0x21
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0xb975
        ld      de,-0x31
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,0xdb97
        ld      de,-0x531
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0x1234
        ld      de,-0x56
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0x6543
        ld      de,-0x21
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0xb975
        ld      de,-0x31
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
 
        ld      hl,-0xdb97
        ld      de,-0x531
        call    math_mul_sw0
        call    print_hlde
+       call    math_div_sw
+       call    print_hlde
+
+       ld      hl,0xdb97
+       ld      de,0x7531
+       exx
+       ld      hl,0x1357
+       ld      de,0xfdb9
+       exx
+       call    math_mul_ul0
+       call    print_hlhldede
+       call    math_div_ul
+       call    print_hlhldede
+
+       ld      hl,0xdb97
+       ld      de,0x7531
+       exx
+       ld      hl,0x1357
+       ld      de,0xfdb9
+       exx
+       call    math_mul_sl0
+       call    print_hlhldede
+       call    math_div_sl
+       call    print_hlhldede
+
+       ld      hl,~0xdb97
+       ld      de,0x7531
+       exx
+       ld      hl,~0x1357
+       ld      de,0xfdb9
+       exx
+       call    math_mul_sl0
+       call    print_hlhldede
+       call    math_div_sl
+       call    print_hlhldede
+
+       ld      hl,0xdb97
+       ld      de,~0x7531
+       exx
+       ld      hl,0x1357
+       ld      de,~0xfdb9
+       exx
+       call    math_mul_sl0
+       call    print_hlhldede
+       call    math_div_sl
+       call    print_hlhldede
+
+       ld      hl,~0xdb97
+       ld      de,~0x7531
+       exx
+       ld      hl,~0x1357
+       ld      de,~0xfdb9
+       exx
+       call    math_mul_sl0
+       call    print_hlhldede
+       call    math_div_sl
+       call    print_hlhldede
 
        ld      hl,restarts
        ld      de,0x28
@@ -832,7 +927,7 @@ page2_imm_divrev_sl:
        ;.db    0x3e ; ld a,
        ex      de,hl
        push    bc
-       call    math_div_sl1
+       call    math_sdiv_sl
        jr      div_l_done
 page2_div_sl:
        ;rst    0x30
@@ -842,14 +937,14 @@ page2_div_sl:
        exx
        ex      de,hl
        push    bc
-       call    math_div_sl1
+       call    math_sdiv_sl
        jr      div_l_done
 
 page2_imm_div_sl:
        rst     0x38
        ;.db    0x3e ; ld a,
        push    bc
-       call    math_div_sl0
+       call    math_sdiv_sl0
        jr      div_l_done
 page2_divrev_sl:
        ;rst    0x30
@@ -858,7 +953,7 @@ page2_divrev_sl:
        pop     de
        exx
        push    bc
-       call    math_div_sl0
+       call    math_sdiv_sl0
        jr      div_l_done
 
 page2_imm_divrev_ul:
@@ -1947,6 +2042,7 @@ math_mul_uw0: ; hl:de = hl * de, unsigned
 math_mul_uw: ; hl:de = hl + bc * de, unsigned
        ld      a,e
        call    mul_uw
+       ld      e,a
        ld      a,d
        call    mul_uw
        ld      d,a
@@ -2004,6 +2100,208 @@ mul_uw1:
        rra
        ret
 
+math_mul_sl0: ; hl':hl:de':de = hl':hl * de':de, signed
+       ld      c,l
+       ld      b,h
+       sub     a
+       ld      l,a
+       ld      h,a
+       exx
+       ld      c,l
+       ld      b,h
+       ld      l,a
+       ld      h,a
+       exx ; hard to optimize this
+math_mul_sl: ; hl':hl:de':de = hl':hl + bc':bc * de':de, signed
+       exx
+       ld      a,b
+       rla ; cf will be preserved through to the last rra below
+       exx
+       ld      a,e
+       call    mul_ul
+       push    af
+       ld      a,d
+       call    mul_ul
+       push    af
+       exx
+       ld      a,e
+       exx
+       call    mul_ul
+       push    af
+       exx
+       ld      a,d
+       exx
+       call    mul_ul1 ; do only 7 bits, get sign of d into cf
+       jr      nc,1$
+       or      a
+       sbc     hl,bc
+       exx
+       sbc     hl,bc
+       exx ; hard to optimize this
+1$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       jr      nc,2$
+       or      a
+       sbc     hl,de
+       exx
+       sbc     hl,de
+       exx ; hard to optimize this
+2$:    exx
+       ld      d,a
+       pop     af
+       ld      e,a
+       exx
+       pop     de
+       pop     af
+       ld      e,a
+       ret
+
+math_mul_ul0: ; hl':hl:de':de = hl':hl * de':de, unsigned
+       ld      c,l
+       ld      b,h
+       sub     a
+       ld      l,a
+       ld      h,a
+       exx
+       ld      c,l
+       ld      b,h
+       ld      l,a
+       ld      h,a
+       exx
+math_mul_ul: ; hl':hl:de':de = hl':hl + bc':bc * de':de, unsigned
+       ld      a,e
+       call    mul_ul
+       ld      e,a
+       ld      a,d
+       call    mul_ul
+       ld      d,a
+       exx
+       ld      a,e
+       exx
+       call    mul_ul
+       exx
+       ld      e,a
+       ld      a,d
+       exx
+       call    mul_ul
+       exx
+       ld      d,a
+       exx
+       ret
+
+mul_ul: rra
+       ; bit 0
+       jr      nc,1$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+1$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+mul_ul1:
+       rra
+       ; bit 1
+       jr      nc,2$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+2$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 2
+       jr      nc,3$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+3$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 3
+       jr      nc,4$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+4$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 4
+       jr      nc,5$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+5$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 5
+       jr      nc,6$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+6$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 6 
+       jr      nc,7$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+7$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ; bit 7
+       jr      nc,8$
+       add     hl,bc
+       exx
+       adc     hl,bc
+       exx ; optimize this
+8$:    exx
+       rr      h
+       rr      l
+       exx
+       rr      h
+       rr      l
+       rra
+       ret
+
 ; sdiv: short division
 ; 16 / 16 to 16 bit quotient, 16 bit remainder (word)
 ; 32 / 32 to 32 bit quotient, 32 bit remainder (long)
@@ -2081,6 +2379,17 @@ math_sdiv_uw: ; hl, de = de % hl, de / hl, unsigned
        ld      b,h
        ld      hl,0
 math_div_uw: ; hl, de = hl:de % bc, hl:de / bc, unsigned
+ ld a,'X
+ call print_char
+ call print_word
+ ex de,hl
+ call print_word
+ ex de,hl
+ push hl
+ ld l,c
+ ld h,b
+ call print_word
+ pop hl
        ld      a,d
 div_w_pp: ; positive dividend, positive divisor
        call    div_w0
@@ -2234,7 +2543,7 @@ div_w07: ; bit 7, above
 div_w18: ; done, below
        add     a,a
        ;inc    a
-       ;bcc    a                       ; compensation
+       ;dec    a                       ; compensation
        scf
        ret
 
@@ -2360,15 +2669,15 @@ div_w_n07: ; bit 7, above
 div_w_n18: ; done, below
        add     a,a
        ;inc    a
-       ;bcc    a                       ; compensation
+       ;dec    a                       ; compensation
        scf
        ret
 
-math_div_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed
+math_sdiv_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed
        exx
        ex      de,hl
        exx
-math_div_sl1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, signed
+math_sdiv_sl: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, signed
        exx
        ld      c,l
        ld      b,h
@@ -2482,11 +2791,11 @@ math_div_ul1: ; ; hl':hl, de':de = de':de % hl':hl, de':de / hl':hl, unsigned
        ld      b,h
        ld      l,a
        ld      h,a
-       ;exx
-;math_div_ul:
+       exx
+math_div_ul:
        ; hl':hl, de':de =
        ;   hl':hl:de':de % bc':bc, hl':hl:de':de / bc':bc, unsigned
-       ;exx
+       exx
        ld      a,d
        exx
 div_l_pp: ; positive dividend, positive divisor
@@ -2988,6 +3297,26 @@ print_hlde:
        ld      a,0xa
        jp      print_char
 
+print_hlhldede:
+       exx
+       call    print_word
+       exx
+       call    print_word
+       ld      a,':
+       call    print_char
+       exx
+       ex      de,hl
+       call    print_word
+       ex      de,hl
+       exx
+       ex      de,hl
+       call    print_word
+       ex      de,hl
+       ld      a,0xd
+       call    print_char
+       ld      a,0xa
+       jp      print_char
+
 print_trace: ; print af, bc, hl':de, de':hl, (sp+2):(sp), sp
        call    print_trace2
        ld      a,(bc)