speeded up again :-)
authorceriel <none@none>
Thu, 11 Aug 1988 14:50:18 +0000 (14:50 +0000)
committerceriel <none@none>
Thu, 11 Aug 1988 14:50:18 +0000 (14:50 +0000)
mach/proto/fp/FP.script
mach/proto/fp/adder.c
mach/proto/fp/compact.c
mach/proto/fp/div_ext.c
mach/proto/fp/mul_ext.c

index 38213b1..a9fa236 100644 (file)
@@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g
 g/_zrf_ext/s//.zrf_ext/g
 g/_compact/s//.compact/g
 g/_extend/s//.extend/g
-g/_b32_add/s//.b32_add/g
 g/_b64_add/s//.b64_add/g
 g/_b64_sft/s//.b64_sft/g
 g/_b64_rsft/s//.b64_rsft/g
index af53965..2a9afbf 100644 (file)
@@ -32,10 +32,12 @@ register    B64     *e1,*e2;
                                int     carry;
 
                        /* add higher pair of 32 bits */
-       overflow = b32_add(&e1->h_32,&e2->h_32);
+       overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32);
+       e1->h_32 += e2->h_32;
 
                        /* add lower pair of 32 bits */
-       carry =    b32_add(&e1->l_32,&e2->l_32);
+       carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32);
+       e1->l_32 += e2->l_32;
 # ifdef        EXT_DEBUG
        printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n",
                                        overflow,carry);
@@ -46,17 +48,3 @@ register     B64     *e1,*e2;
        else
                return(overflow);       /* return status from higher add */
 }
-
-       /*
-        *      add 32 bits (unsigned longs)
-        *      and return the carry status
-        */
-
-b32_add(e1,e2)
-register       unsigned long   *e1,*e2;
-{
-       int     carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2);
-
-       *e1 += *e2;
-       return carry;
-}
index 186d41c..ffc42f2 100644 (file)
@@ -1,4 +1,4 @@
-e
+/*
   (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands.
   See the copyright notice in the ACK home directory, in the file "Copyright".
 */
index db38b83..c58be17 100644 (file)
@@ -38,6 +38,7 @@ EXTEND        *e1,*e2;
                        unsigned short u[9], v[5];
                        register int j;
                        register unsigned short *u_p = u;
+                       int maxv = 4;
 #endif
 
        if ((e2->m1 | e2->m2) == 0) {
@@ -169,6 +170,7 @@ EXTEND      *e1,*e2;
        v[2] = e2->m1;
        v[3] = e2->m2 >> 16;
        v[4] = e2->m2;
+       while (! v[maxv]) maxv--;
        result[0] = 0;
        result[1] = 0;
        lp = result;
@@ -204,7 +206,7 @@ EXTEND      *e1,*e2;
                        unsigned long k = 0;
                        int borrow = 0;
 
-                       for (i = 4; i > 0; i--) {
+                       for (i = maxv; i > 0; i--) {
                                unsigned long tmp = q_est * v[i] + k + borrow;
                                unsigned short md = tmp;
 
@@ -222,7 +224,7 @@ EXTEND      *e1,*e2;
                                */
                                *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16);
                                borrow = 0;
-                               for (i = 4; i > 0; i--) {
+                               for (i = maxv; i > 0; i--) {
                                        unsigned long tmp 
                                            = v[i]+(unsigned long)u_p[i]+borrow;
                                        
index a04a9b3..3d30ffc 100644 (file)
 mul_ext(e1,e2)
 EXTEND *e1,*e2;
 {
-       register int    k,i,j;          /* loop control */
-       long  unsigned  *reg[7];
-       long  unsigned  tmp[4];
+       register int    i,j;            /* loop control */
        short unsigned  mp[4];  /* multiplier */
        short unsigned  mc[4];  /* multipcand */
-       B64     low64,tmp64;    /* 64 bit storage       */
+       short unsigned  result[8];      /* result */
+       B64             tmp64;
+       register unsigned short *pres;
 
        /* first save the sign (XOR)                    */
 
@@ -88,48 +88,33 @@ infinity:   e1->m1 = e1->m2 =0L;
        mc[1] = (unsigned short) e2->m1;
        mc[2] = e2->m2 >> 16;
        mc[3] = (unsigned short) e2->m2;
-       /*
-        *      assign pointers
-        */
-       reg[0] = &e1->m1;       /* the answer goes here */
-       reg[1] = &tmp[1];
-       reg[2] = &e1->m2;       /* and here     */
-       reg[3] = &tmp[2];
-       reg[4] = &low64.h_32;
-       reg[5] = &tmp[3];
-       reg[6] = &low64.l_32;
-
-       /*
-        *      zero registers
-        */
-       for(i=7;i--;)
-               *reg[i] = 0;
-
+       for (i = 8; i--;) {
+               result[i] = 0;
+       }
        /*
         *      fill registers with their components
         */
-       for(i=4;i--;) if (mp[i])
-               for(j=4;j--;) if (mc[j]) {
-                       k = i+j;
-                       tmp[0] = (long)mp[i] * (long)mc[j];
-                       if (b32_add(reg[k],tmp))        {
-                               for(tmp[0] = 0x10000L;k>0;)
-                                       if (b32_add(reg[--k],tmp) == 0)
-                                               break;
-                       }
+       for(i=4, pres = &result[4];i--;pres--) if (mp[i]) {
+               unsigned short k = 0;
+               unsigned long mpi = mp[i];
+               for(j=4;j--;) {
+                       unsigned long tmp = (unsigned long)pres[j] + k;
+                       if (mc[j]) tmp += mpi * mc[j];
+                       pres[j] = tmp;
+                       k = tmp >> 16;
                }
+               pres[-1] = k;
+       }
        
        /*
         *      combine the registers to a total
         */
-       tmp64.h_32 = (*reg[1]>>16);
-       tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16);
-       b64_add((B64 *)&e1->m1,&tmp64);
-       tmp64.l_32 = *reg[5]<<16;
-       tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16);
-       if (b64_add(&low64,&tmp64))
+       e1->m1 = ((unsigned long)(result[0]) << 16) + result[1];
+       e1->m2 = ((unsigned long)(result[2]) << 16) + result[3];
+       if (result[4] & 0x8000) {
                if (++e1->m2 == 0)
                        e1->m1++;
+       }
 
        nrm_ext(e1);
 }