Temporarily de-optimize the inner divide routine as must add another branch
authorNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 11:05:55 +0000 (21:05 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 11:05:55 +0000 (21:05 +1000)
sm3.asm

diff --git a/sm3.asm b/sm3.asm
index 584108b..c5d4ae5 100644 (file)
--- a/sm3.asm
+++ b/sm3.asm
@@ -2437,6 +2437,47 @@ div_w_pp: ; positive dividend, positive divisor
 
 div_wcf:
        jr      c,div_w1
+.if 1 ; temporary
+div_w0: ; above
+       push    de
+       ld      d,8
+       scf
+       rla
+div_w00:
+       adc     hl,hl
+       sbc     hl,bc
+       jr      nc,div_w01
+       dec     a
+       jr      div_w11
+div_w01:
+       add     a,a
+       dec     d
+       jr      nz,div_w00
+       dec     a
+       or      a
+       pop     de
+       ret
+
+div_w1: ; below
+       push    de
+       ld      d,8
+       add     a,a
+div_w10:
+       adc     hl,hl
+       add     hl,bc
+       jr      nc,div_w11
+       inc     a
+       jr      div_w01
+div_w11:
+       add     a,a
+       dec     d
+       jr      nz,div_w10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
+.else
 div_w0: ; bit 0, above
        scf
        rla
@@ -2546,15 +2587,58 @@ div_w18: ; done, below
        ;dec    a                       ; compensation
        scf
        ret
+.endif
 
 ; divn0/1 are the same as div0/1 but carry reversed after add/subtract divisor
 ; this is for negative divisors where we expect carry (means no zero crossing)
 
-; when divisor negated, remainbcr also negated, so we expect to do subtraction
-; when remainbcr negative and vice versa, need to clear carry after add hl,hl
+; when divisor negated, remainder also negated, so we expect to do subtraction
+; when remainder negative and vice versa, need to clear carry after add hl,hl
 
 div_w_ncf:
        jr      c,div_w_n1
+.if 1 ; temporary
+div_w_n0: ; above
+       push    de
+       ld      d,8
+       scf
+       rla
+div_w_n00:
+       adc     hl,hl
+       or      a
+       sbc     hl,bc
+       jr      c,div_w_n01
+       dec     a
+       jr      div_w_n11
+div_w_n01:
+       add     a,a
+       dec     d
+       jr      nz,div_w_n00
+       dec     a
+       or      a
+       pop     de
+       ret
+
+div_w_n1: ; below
+       push    de
+       ld      d,8
+       add     a,a
+div_w_n10:
+       adc     hl,hl
+       add     hl,bc
+       jr      c,div_w_n11
+       inc     a
+       jr      div_w_n01
+div_w_n11:
+       add     a,a
+       dec     d
+       jr      nz,div_w_n10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
+.else
 div_w_n0: ; bit 0, above
        scf
        rla
@@ -2672,6 +2756,7 @@ div_w_n18: ; done, below
        ;dec    a                       ; compensation
        scf
        ret
+.endif
 
 math_sdiv_sl0: ; hl':hl, de':de = hl':de % de':hl, hl':de / de':hl, signed
        exx