Unroll slightly to avoid an inner loop jump to loop end on zero crossing master
authorNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 11:43:29 +0000 (21:43 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 25 Jun 2019 11:43:29 +0000 (21:43 +1000)
sm3.asm

diff --git a/sm3.asm b/sm3.asm
index f8866db..0a622b7 100644 (file)
--- a/sm3.asm
+++ b/sm3.asm
@@ -2438,7 +2438,15 @@ div_w00:
        sbc     hl,bc
        jr      nc,div_w02
        dec     a
-       jr      div_w12
+       ;jr     div_w12
+       add     a,a
+       dec     d
+       jr      nz,div_w10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
 div_w01:
        or      a
        sbc     hl,bc
@@ -2461,7 +2469,14 @@ div_w10:
        add     hl,bc
        jr      nc,div_w12
        inc     a
-       jr      div_w02
+       ;jr     div_w02
+       add     a,a
+       dec     d
+       jr      nz,div_w00
+       dec     a
+       or      a
+       pop     de
+       ret
 div_w11:
        add     hl,bc
 div_w12:
@@ -2488,7 +2503,15 @@ div_w_n00:
        sbc     hl,bc
        jr      c,div_w_n02
        dec     a
-       jr      div_w_n12
+       ;jr     div_w_n12
+       add     a,a
+       dec     d
+       jr      nz,div_w_n10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
 div_w_n01:
        sbc     hl,bc
 div_w_n02:
@@ -2510,7 +2533,14 @@ div_w_n10:
        add     hl,bc
        jr      c,div_w_n12
        inc     a
-       jr      div_w_n02
+       ;jr     div_w_n02
+       add     a,a
+       dec     d
+       jr      nz,div_w_n00
+       dec     a
+       or      a
+       pop     de
+       ret
 div_w_n11:
        add     hl,bc
 div_w_n12:
@@ -2962,7 +2992,15 @@ div_l00:
        exx
        jr      nc,div_l02
        dec     a
-       jr      div_l12
+       ;jr     div_l12
+       add     a,a
+       dec     d
+       jr      nz,div_l10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
 div_l01:
        or      a
        sbc     hl,bc
@@ -2994,7 +3032,14 @@ div_l10:
        exx
        jr      nc,div_l12
        inc     a
-       jr      div_l02
+       ;jr     div_l02
+       add     a,a
+       dec     d
+       jr      nz,div_l00
+       dec     a
+       or      a
+       pop     de
+       ret
 div_l11:
        add     hl,bc
        exx
@@ -3030,7 +3075,15 @@ div_l_n00:
        exx
        jr      c,div_l_n02
        dec     a
-       jr      div_l_n12
+       ;jr     div_l_n12
+       add     a,a
+       dec     d
+       jr      nz,div_l_n10
+       ;inc    a
+       ;dec    a                       ; compensation
+       scf
+       pop     de
+       ret
 div_l_n01:
        sbc     hl,bc
        exx
@@ -3061,7 +3114,14 @@ div_l_n10:
        exx
        jr      c,div_l_n12
        inc     a
-       jr      div_l_n02
+       ;jr     div_l_n02
+       add     a,a
+       dec     d
+       jr      nz,div_l_n00
+       dec     a
+       or      a
+       pop     de
+       ret
 div_l_n11:
        add     hl,bc
        exx