From: ceriel <none@none>
Date: Wed, 10 Aug 1988 10:07:53 +0000 (+0000)
Subject: Added version for machines with proper 4-byte operations
X-Git-Tag: release-5-5~2940
X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=bb46f5218c46f3a63ba973c01ebe9b3134a030d4;p=ack.git

Added version for machines with proper 4-byte operations
---

diff --git a/mach/proto/fp/div_ext.c b/mach/proto/fp/div_ext.c
index 447fdd164..db38b833b 100644
--- a/mach/proto/fp/div_ext.c
+++ b/mach/proto/fp/div_ext.c
@@ -17,19 +17,28 @@
 	November 15, 1984
 
 	This is a routine to do the work.
-	It is based on the partial products method
+	There are two versions: 
+	One is based on the partial products method
 	and makes no use possible machine instructions
 	to divide (hardware dividers).
+	The other is used when USE_DIVIDE is defined. It is much faster on
+	machines with fast 4 byte operations.
 */
 /********************************************************/
 
 div_ext(e1,e2)
 EXTEND	*e1,*e2;
 {
-			short	count;
 			short	error = 0;
 			unsigned long	result[2];
 	register	unsigned long	*lp;
+#ifndef USE_DIVIDE
+			short	count;
+#else
+			unsigned short u[9], v[5];
+			register int j;
+			register unsigned short *u_p = u;
+#endif
 
 	if ((e2->m1 | e2->m2) == 0) {
                 /*
@@ -44,6 +53,7 @@ EXTEND	*e1,*e2;
 		e1->exp = 0;	/* make sure */
 		return;
 	}
+#ifndef USE_DIVIDE
 	/*
 	 * numbers are right shifted one bit to make sure
 	 * that m1 is quaranteed to be larger if its
@@ -53,6 +63,7 @@ EXTEND	*e1,*e2;
 	b64_rsft(&e2->m1);	/* 64 bit shift right */
 	e1->exp++;
 	e2->exp++;
+#endif
 	/*	check for underflow, divide by zero, etc	*/
 	e1->sign ^= e2->sign;
 	e1->exp -= e2->exp;
@@ -76,15 +87,14 @@ EXTEND	*e1,*e2;
                 return;
         }
 
+#ifndef USE_DIVIDE
 		/* do division of mantissas	*/
 		/* uses partial product method	*/
 		/* init control variables	*/
 
 	count = 64;
-	lp = result;	/* result[0] == high word	*/
-			/* result[0] == low  word	*/
-	*lp++ = 0L;		/* high word	*/
-	*lp-- = 0L;		/* low word	*/
+	result[0] = 0L;
+	result[1] = 0L;
 
 		/* partial product division loop */
 
@@ -146,6 +156,95 @@ EXTEND	*e1,*e2;
 			*lp <<= count;
 		}
 	}
+#else USE_DIVIDE
+
+	u[4] = (e1->m2 & 1) << 15;
+	b64_rsft(&(e1->m1));
+	u[0] = e1->m1 >> 16;
+	u[1] = e1->m1;
+	u[2] = e1->m2 >> 16;
+	u[3] = e1->m2;
+	u[5] = 0; u[6] = 0; u[7] = 0;
+	v[1] = e2->m1 >> 16;
+	v[2] = e2->m1;
+	v[3] = e2->m2 >> 16;
+	v[4] = e2->m2;
+	result[0] = 0;
+	result[1] = 0;
+	lp = result;
+
+	/*
+	 * Use an algorithm of Knuth (The art of programming, Seminumerical
+	 * algorithms), to divide u by v. u and v are both seen as numbers
+	 * with base 65536. 
+	 */
+	for (j = 0; j <= 3; j++, u_p++) {
+		unsigned long q_est, temp;
+
+		if (j == 2) lp++;
+		if (u_p[0] == 0 && u_p[1] < v[1]) continue;
+		temp = ((unsigned long)u_p[0] << 16) + u_p[1];
+		if (u_p[0] >= v[1]) {
+			q_est = 0x0000FFFFL;
+		}
+		else {
+			q_est = temp / v[1];
+		}
+		temp -= q_est * v[1];
+		while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {
+			q_est--;
+			temp += v[1];
+		}
+		/*	Now, according to Knuth, we have an estimate of the
+			quotient, that is either correct or one too big, but
+			almost always correct.
+		*/
+		if (q_est != 0)  {
+			int i;
+			unsigned long k = 0;
+			int borrow = 0;
+
+			for (i = 4; i > 0; i--) {
+				unsigned long tmp = q_est * v[i] + k + borrow;
+				unsigned short md = tmp;
+
+				borrow = (md > u_p[i]);
+				u_p[i] -= md;
+				k = tmp >> 16;
+			}
+			k += borrow;
+			borrow = u_p[0] < k;
+			u_p[0] -= k;
+
+			if (borrow) {
+				/* So, this does not happen often; the estimate
+				   was one too big; correct this
+				*/
+				*lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
+				borrow = 0;
+				for (i = 4; i > 0; i--) {
+					unsigned long tmp 
+					    = v[i]+(unsigned long)u_p[i]+borrow;
+					
+					u_p[i] = tmp;
+					borrow = tmp >> 16;
+				}
+				u_p[0] += borrow;
+			}
+			else *lp |= (j & 1) ? q_est : (q_est<<16);
+		}
+	}
+#ifdef	EXCEPTION_INEXACT
+	u_p = &u[0];
+	for (j = 7; j >= 0; j--) {
+		if (*u_p++) {
+			error = 1;
+			break;
+		}
+	}
+#endif
+#endif
+
 #ifdef  EXCEPTION_INEXACT
         if (error)      {
                 /*
@@ -161,5 +260,6 @@ EXTEND	*e1,*e2;
 #endif
 	e1->m1 = result[0];
 	e1->m2 = result[1];
+
 	nrm_ext(e1);
 }