From 6ae415d48b44da1dd8e9d2fe6333bb1bdb85c2cf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 29 Sep 2016 15:52:54 -0400 Subject: [PATCH] Rewrite fef 8 in powerpc assembly. In EM, fef splits a float into exponent and fraction. The old C code, given an infinite float, got stuck in an infinite loop. The new assembly code doesn't loop; it extracts the IEEE exponent. --- mach/powerpc/libem/build.lua | 1 - mach/powerpc/libem/fef8.c | 46 ---------------------------- mach/powerpc/libem/fef8.s | 58 ++++++++++++++++++++++++++++++++++++ mach/powerpc/ncg/table | 10 +++---- 4 files changed, 62 insertions(+), 53 deletions(-) delete mode 100644 mach/powerpc/libem/fef8.c create mode 100644 mach/powerpc/libem/fef8.s diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index d17adcd92..56278aa55 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do name = "lib_"..plat, srcs = { "./*.s", - "./*.c" }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/fef8.c b/mach/powerpc/libem/fef8.c deleted file mode 100644 index 244d0fac8..000000000 --- a/mach/powerpc/libem/fef8.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * $Source$ - * $State$ - * $Revision$ - */ - -/* no headers allowed! */ - -/* Given a double, calculates the mantissa and exponent. - * - * This function is intended to be called internally by the code generator, - * so the calling convention is odd. - */ - -int __fef8(double* fp) -{ - double f = *fp; - int exponent, sign; - - if (f == 0.0) - return 0; - - if (f < 0.0) - { - sign = -1; - f = -f; - } - else - sign = 0; - - exponent = 0; - while (f >= 1.0) - { - f /= 2.0; - exponent++; - } - - while (f < 0.5) - { - f *= 2.0; - exponent--; - } - - *fp = (sign) ? -f : f; - return exponent; -} diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s new file mode 100644 index 000000000..fc72b04f2 --- /dev/null +++ b/mach/powerpc/libem/fef8.s @@ -0,0 +1,58 @@ +#include "powerpc.h" + +.sect .text + +! Split a double-precision float into fraction and exponent, like +! frexp(3) in C. On entry: +! r3 = float, high word (bits 0..31) +! r4 = float, low word (bits 32..63) +! Yields: +! r3 = fraction, high word (bits 0..31) +! r4 = fraction, low word (bits 32..63) +! r5 = exponent +! Kills: cr0 f0 f1 r6 r7 + +.define .fef8 +.fef8: + ! IEEE double-precision format: + ! sign exponent fraction + ! 0 1..11 12..63 + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addis r7, r0, 0x7ff0 ! r7 = exponent mask + addi r5, r6, -1022 ! r5 = true exponent + cmpi cr0, 0, r6, 2047 + bclr IFTRUE, EQ, 0 ! return if infinity or NaN + cmpi cr0, 0, r6, 0 + bc IFFALSE, EQ, 1f ! jump if normalized number + + ! Got denormalized number or zero, probably zero. + rlwinm r6, r3, 0, 12, 31 + addi r5, r0, 0 ! r5 = true exponent = 0 + or. r6, r6, r4 ! r6 = high|low fraction + bclr IFTRUE, EQ, 0 ! return if zero + + ! Got denormalized number, not zero. + stwu r4, -4(sp) + stwu r3, -4(sp) + li32 r6, _2_64 + lfd f0, 0(sp) + lfd f1, 0(r6) + fmul f0, f0, f1 ! multiply it by 2**64 + stfd f0, 0(sp) + lwz r3, 0(sp) + lwz r4, 4(sp) + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addi sp, sp, 8 + addi r5, r6, -1022 - 64 ! r5 = true exponent +1: + ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its + ! exponent to true 0, IEEE 1022. + andc r3, r3, r7 ! clear old exponent + oris r3, r3, 1022 << 4 ! set new exponent + bclr ALWAYS, 0, 0 + +.sect .rom +_2_64: + ! (double) 2**64 + .data4 0x43f00000 + .data4 0x00000000 diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index a5585bed7..93b7722ee 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -2180,13 +2180,11 @@ PATTERNS bl {LABEL, ".cuf8"} pat fef $1==INT64 /* Split double */ - with FREG + with GPR3 GPR4 + kills FPR0, FPR1, GPR6, GPR7 gen - addi SP, SP, {CONST, 0-8} - stfd %1, {GPRINDIRECT, SP, 0} - stwu SP, {GPRINDIRECT, SP, 0-4} - bl {LABEL, "___fef8"} - stw R3, {GPRINDIRECT, SP, 0} + bl {LABEL, ".fef8"} + yields R4 R3 R5 pat fif $1==INT64 /* Multiply and split double (?) */ with STACK -- 2.34.1