mach/proto/fp/div_ext.c

   1 /*
   2   (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
   3   See the copyright notice in the ACK home directory, in the file "Copyright".
   4 */
   5
   6 /* $Id: div_ext.c,v 1.11 1994/06/24 13:31:48 ceriel Exp $ */
   7
   8 /*
   9         DIVIDE EXTENDED FORMAT
  10 */
  11
  12 #include "FP_bias.h"
  13 #include "FP_trap.h"
  14 #include "FP_types.h"
  15
  16 /*
  17         November 15, 1984
  18
  19         This is a routine to do the work.
  20         There are two versions:
  21         One is based on the partial products method
  22         and makes no use possible machine instructions
  23         to divide (hardware dividers).
  24         The other is used when USE_DIVIDE is defined. It is much faster on
  25         machines with fast 4 byte operations.
  26 */
  27 /********************************************************/
  28
  29 void
  30 div_ext(e1,e2)
  31 EXTEND  *e1,*e2;
  32 {
  33         short   error = 0;
  34         B64             result;
  35         register        unsigned long   *lp;
  36 #ifndef USE_DIVIDE
  37         short   count;
  38 #else
  39         unsigned short u[9], v[5];
  40         register int j;
  41         register unsigned short *u_p = u;
  42         int maxv = 4;
  43 #endif
  44
  45         if ((e2->m1 | e2->m2) == 0) {
  46                 /*
  47                  * Exception 8.2 - Divide by zero
  48                  */
  49                 trap(EFDIVZ);
  50                 e1->m1 = e1->m2 = 0L;
  51                 e1->exp = EXT_MAX;
  52                 return;
  53         }
  54         if ((e1->m1 | e1->m2) == 0) {   /* 0 / anything == 0 */
  55                 e1->exp = 0;    /* make sure */
  56                 return;
  57         }
  58 #ifndef USE_DIVIDE
  59         /*
  60          * numbers are right shifted one bit to make sure
  61          * that m1 is quaranteed to be larger if its
  62          * maximum bit is set
  63          */
  64         b64_rsft(&e1->mantissa);        /* 64 bit shift right */
  65         b64_rsft(&e2->mantissa);        /* 64 bit shift right */
  66         e1->exp++;
  67         e2->exp++;
  68 #endif
  69         /*      check for underflow, divide by zero, etc        */
  70         e1->sign ^= e2->sign;
  71         e1->exp -= e2->exp;
  72
  73 #ifndef USE_DIVIDE
  74                 /* do division of mantissas     */
  75                 /* uses partial product method  */
  76                 /* init control variables       */
  77
  78         count = 64;
  79         result.h_32 = 0L;
  80         result.l_32 = 0L;
  81
  82                 /* partial product division loop */
  83
  84         while (count--) {
  85                 /* first left shift result 1 bit        */
  86                 /* this is ALWAYS done                  */
  87
  88                 b64_lsft(&result);
  89
  90                 /* compare dividend and divisor         */
  91                 /* if dividend >= divisor add a bit     */
  92                 /* and subtract divisior from dividend  */
  93
  94                 if ( (e1->m1 < e2->m1) ||
  95                         ((e1->m1 == e2->m1) && (e1->m2 < e2->m2) ))
  96                         ;       /* null statement */
  97                                 /* i.e., don't add or subtract */
  98                 else    {
  99                         result.l_32++;  /* ADD  */
 100                         if (e2->m2 > e1->m2)
 101                                 e1->m1 -= 1;    /* carry in */
 102                         e1->m1 -= e2->m1;       /* do SUBTRACTION */
 103                         e1->m2 -= e2->m2;       /*    SUBTRACTION */
 104                 }
 105
 106                 /*      shift dividend left one bit OR  */
 107                 /*      IF it equals ZERO we can break out      */
 108                 /*      of the loop, but still must shift       */
 109                 /*      the quotient the remaining count bits   */
 110                 /* NB   save the results of this test in error  */
 111                 /*      if not zero, then the result is inexact. */
 112                 /*      this would be reported in IEEE standard */
 113
 114                 /*      lp points to dividend                   */
 115                 lp = &e1->m1;
 116
 117                 error = ((*lp | *(lp+1)) != 0L) ? 1 : 0;
 118                 if (error)      {       /* more work */
 119                         /*      assume max bit == 0 (see above) */
 120                         b64_lsft(&e1->mantissa);
 121                         continue;
 122                 }
 123                 else
 124                         break;  /* leave loop   */
 125         }       /* end of divide by subtraction loop    */
 126
 127         if (count > 0)  {
 128                 lp = &result.h_32;
 129                 if (count > 31) {       /* move to higher word */
 130                         *lp = *(lp+1);
 131                         count -= 32;
 132                         *(lp+1) = 0L;   /* clear low word       */
 133                 }
 134                 if (*lp)
 135                         *lp <<= count;  /* shift rest of way    */
 136                 lp++;   /*  == &result.l_32     */
 137                 if (*lp) {
 138                         result.h_32 |= (*lp >> 32-count);
 139                         *lp <<= count;
 140                 }
 141         }
 142 #else /* USE_DIVIDE */
 143
 144         u[4] = (e1->m2 & 1) << 15;
 145         b64_rsft(&(e1->mantissa));
 146         u[0] = e1->m1 >> 16;
 147         u[1] = e1->m1;
 148         u[2] = e1->m2 >> 16;
 149         u[3] = e1->m2;
 150         u[5] = 0; u[6] = 0; u[7] = 0;
 151         v[1] = e2->m1 >> 16;
 152         v[2] = e2->m1;
 153         v[3] = e2->m2 >> 16;
 154         v[4] = e2->m2;
 155         while (! v[maxv]) maxv--;
 156         result.h_32 = 0;
 157         result.l_32 = 0;
 158         lp = &result.h_32;
 159
 160         /*
 161          * Use an algorithm of Knuth (The art of programming, Seminumerical
 162          * algorithms), to divide u by v. u and v are both seen as numbers
 163          * with base 65536.
 164          */
 165         for (j = 0; j <= 3; j++, u_p++) {
 166                 unsigned long q_est, temp;
 167
 168                 if (j == 2) lp++;
 169                 if (u_p[0] == 0 && u_p[1] < v[1]) continue;
 170                 temp = ((unsigned long)u_p[0] << 16) + u_p[1];
 171                 if (u_p[0] >= v[1]) {
 172                         q_est = 0x0000FFFFL;
 173                 }
 174                 else {
 175                         q_est = temp / v[1];
 176                 }
 177                 temp -= q_est * v[1];
 178                 while (temp < 0x10000 && v[2]*q_est > ((temp<<16)+u_p[2])) {
 179                         q_est--;
 180                         temp += v[1];
 181                 }
 182                 /*      Now, according to Knuth, we have an estimate of the
 183                         quotient, that is either correct or one too big, but
 184                         almost always correct.
 185                 */
 186                 if (q_est != 0)  {
 187                         int i;
 188                         unsigned long k = 0;
 189                         int borrow = 0;
 190
 191                         for (i = maxv; i > 0; i--) {
 192                                 unsigned long tmp = q_est * v[i] + k + borrow;
 193                                 unsigned short md = tmp;
 194
 195                                 borrow = (md > u_p[i]);
 196                                 u_p[i] -= md;
 197                                 k = tmp >> 16;
 198                         }
 199                         k += borrow;
 200                         borrow = u_p[0] < k;
 201                         u_p[0] -= k;
 202
 203                         if (borrow) {
 204                                 /* So, this does not happen often; the estimate
 205                                    was one too big; correct this
 206                                 */
 207                                 *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
 208                                 borrow = 0;
 209                                 for (i = maxv; i > 0; i--) {
 210                                         unsigned long tmp
 211                                             = v[i]+(unsigned long)u_p[i]+borrow;
 212
 213                                         u_p[i] = tmp;
 214                                         borrow = tmp >> 16;
 215                                 }
 216                                 u_p[0] += borrow;
 217                         }
 218                         else *lp |= (j & 1) ? q_est : (q_est<<16);
 219                 }
 220         }
 221 #ifdef  EXCEPTION_INEXACT
 222         u_p = &u[0];
 223         for (j = 7; j >= 0; j--) {
 224                 if (*u_p++) {
 225                         error = 1;
 226                         break;
 227                 }
 228         }
 229 #endif
 230 #endif
 231
 232 #ifdef  EXCEPTION_INEXACT
 233         if (error)      {
 234                 /*
 235                  * report here exception 8.5 - Inexact
 236                  * from Draft 8.0 of IEEE P754:
 237                  * In the absence of an invalid operation exception,
 238                  * if the rounded result of an operation is not exact or if
 239                  * it overflows without a trap, then the inexact exception
 240                  * shall be assigned. The rounded or overflowed result
 241                  * shall be delivered to the destination.
 242                  */
 243                 INEXACT();
 244 #endif
 245         e1->mantissa = result;
 246
 247         nrm_ext(e1);
 248         if (e1->exp < EXT_MIN)  {
 249                 /*
 250                  * Exception 8.4 - Underflow
 251                  */
 252                 trap(EFUNFL);   /* underflow */
 253                 e1->exp = EXT_MIN;
 254                 e1->m1 = e1->m2 = 0L;
 255                 return;
 256         }
 257         if (e1->exp >= EXT_MAX) {
 258                 /*
 259                  * Exception 8.3 - Overflow
 260                  */
 261                 trap(EFOVFL);   /* overflow */
 262                 e1->exp = EXT_MAX;
 263                 e1->m1 = e1->m2 = 0L;
 264                 return;
 265         }
 266 }