From: George Koehler <xkernigh@netscape.net>
Date: Mon, 17 Oct 2016 04:39:59 +0000 (-0400)
Subject: Rewrite .fif8 to avoid powerpc64 fctid
X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=f33b30ed3c185bd784b595ec2cc4d1ce8b29d897;p=ack.git

Rewrite .fif8 to avoid powerpc64 fctid

This fixes the SIGILL (illegal instruction) in startrek when firing
phasers.  The 32-bit processors in my PowerPC Mac and in QEMU don't
have fctid, a 64-bit instruction.

I got the idea from mach/proto/fp/fif8.c to extract the exponent,
clear some bits to get an integer, then subtract the integer from
the original value to get the fraction.
---

diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s
index 052c38cf2..a26c77830 100644
--- a/mach/powerpc/libem/fif8.s
+++ b/mach/powerpc/libem/fif8.s
@@ -1,38 +1,71 @@
-#
-! $Source$
-! $State$
-! $Revision$
-
 #include "powerpc.h"
-	
+
 .sect .text
 
-! Multiplies two floats, and returns the fraction and integer.
+! Multiplies two double-precision floats, then splits the product into
+! integer and fraction, like modf(3) in C.  On entry:
+!  f1 = float
+!  f2 = other float
+! Yields:
+!  f1 = fraction
+!  f2 = integer
+! Kills: cr0 f1 f2 r3 r4 r5 r6
 
 .define .fif8
 .fif8:
-	lfd f0, 8(sp)
-	lfd f1, 0(sp)
-	fmul f0, f0, f1
-	fabs f1, f0              ! f0 = result
-	
-	! The following chunk does f1 = floor(f1). See page 158 of the book.
-	
-	mtfsfi cr7, 3              ! set rounding mode to -inf.
-	mtfsb0 23
-	fctid f2, f1
-	fcfid f2, f2
-	mcrfs cr7, cr5
-	bc IFFALSE, 31, toobig
-	fmr f1, f2
-toobig:
-
-	fabs f2, f1              ! f2 = fabs(f1)
-	fsub f2, f2, f1
-	stfd f2, 8(sp)
-	
-	fneg f2, f1
-	fsel f2, f0, f1, f2
-	stfd f2, 0(sp)
-	
+	fmul f1, f1, f2
+	stfdu f1, -8(sp)		! push f1 = product
+	lwz r3, 0(sp)			! r3 = high word
+	lwz r4, 4(sp)			! r4 = low word
+
+	! IEEE double-precision format:
+	!   sign  exponent  fraction
+	!   0     1..11     12..63
+	! Subtract 1023 from the IEEE exponent.  If the result is from
+	! 0 to 51, then the IEEE fraction has that many integer bits.
+	! (IEEE has an implicit 1 before its fraction.  If the IEEE
+	! fraction has 0 integer bits, we still have an integer.)
+	rlwinm r5, r3, 12, 21, 31	! r5 = IEEE exponent
+	addic. r5, r5, -1023		! r5 = nr of integer bits
+	bc IFTRUE, LT, no_int
+	cmpi cr0, 0, r5, 21
+	bc IFTRUE, LT, small_int
+	cmpi cr0, 0, r5, 52
+	bc IFTRUE, LT, big_int
+
+	! f1 is an integer without fraction.  Jump to calculate
+	! fraction f1 = f2 - f1.  It will be zero (or perhaps NaN).
+	fmr f2, f1
+	b subtract
+
+no_int:
+	! f1 is a fraction without integer.
+	fsub f2, f1, f1			! integer = zero
+	b done
+
+small_int:
+	! f1 has r5 = 0 to 20 integer bits in the IEEE fraction.
+	! High word has 20 - r5 fraction bits.
+	addi r6, r0, 20
+	subf r6, r5, r6
+	srw r3, r3, r6
+	addi r4, r0, 0			! clear low word
+	slw r3, r3, r6			! clear fraction in high word
+	b move_int
+
+big_int:
+	! f1 has r5 = 21 to 51 to integer bits.
+	! Low word has 52 - r5 fraction bits.
+	addi r6, r0, 52
+	subf r6, r5, r6
+	srw r4, r4, r6
+	slw r4, r4, r6			! clear fraction in low word
+move_int:
+	stw r3, 0(sp)
+	stw r4, 4(sp)
+	lfd f2, 0(sp)			! f2 = integer
+subtract:
+	fsub f1, f1, f2			! fraction = value - integer
+done:
+	addi sp, sp, 8			! restore stack pointer
 	bclr ALWAYS, 0, 0
diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table
index 4a99c9d61..08ddd7d2e 100644
--- a/mach/powerpc/ncg/table
+++ b/mach/powerpc/ncg/table
@@ -2141,16 +2141,17 @@ PATTERNS
 		with STACK
 			gen
 				bl {LABEL, ".cuf8"}
-				
-	pat fef $1==INT64                  /* Split double */
+
+	pat fef $1==INT64                  /* Split exponent, fraction */
 		with GPR3 GPR4
 			kills FPR0, FPR1, GPR6, GPR7
 			gen
 				bl {LABEL, ".fef8"}
 			yields R4 R3 R5
-				
-	pat fif $1==INT64                  /* Multiply and split double (?) */
-		with STACK
+
+	pat fif $1==INT64                  /* Multiply then split integer, fraction */
+		with FPR1 FPR2
+			kills FPR1, FPR2, GPR3, GPR4, GPR5, GPR6
 			gen
 				bl {LABEL, ".fif8"}
-				
+			yields F1 F2