Rewrite fef 8 in powerpc assembly.
authorGeorge Koehler <xkernigh@netscape.net>
Thu, 29 Sep 2016 19:52:54 +0000 (15:52 -0400)
committerGeorge Koehler <xkernigh@netscape.net>
Thu, 29 Sep 2016 19:52:54 +0000 (15:52 -0400)
In EM, fef splits a float into exponent and fraction.  The old C code,
given an infinite float, got stuck in an infinite loop.  The new
assembly code doesn't loop; it extracts the IEEE exponent.

mach/powerpc/libem/build.lua
mach/powerpc/libem/fef8.c [deleted file]
mach/powerpc/libem/fef8.s [new file with mode: 0644]
mach/powerpc/ncg/table

index d17adcd..56278aa 100644 (file)
@@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do
                name = "lib_"..plat,
                srcs = {
                        "./*.s",
-                       "./*.c"
                },
                vars = { plat = plat },
                deps = {
diff --git a/mach/powerpc/libem/fef8.c b/mach/powerpc/libem/fef8.c
deleted file mode 100644 (file)
index 244d0fa..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * $Source$
- * $State$
- * $Revision$
- */
-
-/* no headers allowed! */
-
-/* Given a double, calculates the mantissa and exponent.
- * 
- * This function is intended to be called internally by the code generator,
- * so the calling convention is odd.
- */
-
-int __fef8(double* fp)
-{
-       double f = *fp;
-       int exponent, sign;
-
-       if (f == 0.0)
-               return 0;
-               
-       if (f < 0.0)
-       {
-               sign = -1;
-               f = -f;
-       }
-       else
-               sign = 0;
-       
-       exponent = 0;
-       while (f >= 1.0)
-       {
-               f /= 2.0;
-               exponent++;
-       }
-       
-       while (f < 0.5)
-       {
-               f *= 2.0;
-               exponent--;
-       }
-       
-       *fp = (sign) ? -f : f;
-       return exponent;
-}
diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s
new file mode 100644 (file)
index 0000000..fc72b04
--- /dev/null
@@ -0,0 +1,58 @@
+#include "powerpc.h"
+
+.sect .text
+
+! Split a double-precision float into fraction and exponent, like
+! frexp(3) in C.  On entry:
+!  r3 = float, high word (bits 0..31)
+!  r4 = float, low word (bits 32..63)
+! Yields:
+!  r3 = fraction, high word (bits 0..31)
+!  r4 = fraction, low word (bits 32..63)
+!  r5 = exponent
+! Kills: cr0 f0 f1 r6 r7
+
+.define .fef8
+.fef8:
+       ! IEEE double-precision format:
+       !   sign  exponent  fraction
+       !   0     1..11     12..63
+       rlwinm r6, r3, 12, 21, 31       ! r6 = IEEE exponent
+       addis r7, r0, 0x7ff0            ! r7 = exponent mask
+       addi r5, r6, -1022              ! r5 = true exponent
+       cmpi cr0, 0, r6, 2047
+       bclr IFTRUE, EQ, 0              ! return if infinity or NaN
+       cmpi cr0, 0, r6, 0
+       bc IFFALSE, EQ, 1f              ! jump if normalized number
+
+       ! Got denormalized number or zero, probably zero.
+       rlwinm r6, r3, 0, 12, 31
+       addi r5, r0, 0                  ! r5 = true exponent = 0
+       or. r6, r6, r4                  ! r6 = high|low fraction
+       bclr IFTRUE, EQ, 0              ! return if zero
+
+       ! Got denormalized number, not zero.
+       stwu r4, -4(sp)
+       stwu r3, -4(sp)
+       li32 r6, _2_64
+       lfd f0, 0(sp)
+       lfd f1, 0(r6)
+       fmul f0, f0, f1                 ! multiply it by 2**64
+       stfd f0, 0(sp)
+       lwz r3, 0(sp)
+       lwz r4, 4(sp)
+       rlwinm r6, r3, 12, 21, 31       ! r6 = IEEE exponent
+       addi sp, sp, 8
+       addi r5, r6, -1022 - 64         ! r5 = true exponent
+1:
+       ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its
+       ! exponent to true 0, IEEE 1022.
+       andc r3, r3, r7                 ! clear old exponent
+       oris r3, r3, 1022 << 4          ! set new exponent
+       bclr ALWAYS, 0, 0
+
+.sect .rom
+_2_64:
+       ! (double) 2**64
+       .data4 0x43f00000
+       .data4 0x00000000
index a5585be..93b7722 100644 (file)
@@ -2180,13 +2180,11 @@ PATTERNS
                                bl {LABEL, ".cuf8"}
                                
        pat fef $1==INT64                  /* Split double */
-               with FREG
+               with GPR3 GPR4
+                       kills FPR0, FPR1, GPR6, GPR7
                        gen
-                               addi SP, SP, {CONST, 0-8}
-                               stfd %1, {GPRINDIRECT, SP, 0}
-                               stwu SP, {GPRINDIRECT, SP, 0-4}
-                               bl {LABEL, "___fef8"}
-                               stw R3, {GPRINDIRECT, SP, 0}
+                               bl {LABEL, ".fef8"}
+                       yields R4 R3 R5
                                
        pat fif $1==INT64                  /* Multiply and split double (?) */
                with STACK