From: ceriel Date: Thu, 11 Aug 1988 14:50:18 +0000 (+0000) Subject: speeded up again :-) X-Git-Tag: release-5-5~2929 X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=5aa128ea62efdd6e9ede5d508430a73f9d8333e7;p=ack.git speeded up again :-) --- diff --git a/mach/proto/fp/FP.script b/mach/proto/fp/FP.script index 38213b181..a9fa23645 100644 --- a/mach/proto/fp/FP.script +++ b/mach/proto/fp/FP.script @@ -34,7 +34,6 @@ g/_sub_ext/s//.sub_ext/g g/_zrf_ext/s//.zrf_ext/g g/_compact/s//.compact/g g/_extend/s//.extend/g -g/_b32_add/s//.b32_add/g g/_b64_add/s//.b64_add/g g/_b64_sft/s//.b64_sft/g g/_b64_rsft/s//.b64_rsft/g diff --git a/mach/proto/fp/adder.c b/mach/proto/fp/adder.c index af5396546..2a9afbfb2 100644 --- a/mach/proto/fp/adder.c +++ b/mach/proto/fp/adder.c @@ -32,10 +32,12 @@ register B64 *e1,*e2; int carry; /* add higher pair of 32 bits */ - overflow = b32_add(&e1->h_32,&e2->h_32); + overflow = ((unsigned long) 0xFFFFFFFF - e1->h_32 < e2->h_32); + e1->h_32 += e2->h_32; /* add lower pair of 32 bits */ - carry = b32_add(&e1->l_32,&e2->l_32); + carry = ((unsigned long) 0xFFFFFFFF - e1->l_32 < e2->l_32); + e1->l_32 += e2->l_32; # ifdef EXT_DEBUG printf("\t\t\t\t\tb64_add: overflow (%d); internal carry(%d)\n", overflow,carry); @@ -46,17 +48,3 @@ register B64 *e1,*e2; else return(overflow); /* return status from higher add */ } - - /* - * add 32 bits (unsigned longs) - * and return the carry status - */ - -b32_add(e1,e2) -register unsigned long *e1,*e2; -{ - int carry = ((unsigned long) 0xFFFFFFFF - *e1 < *e2); - - *e1 += *e2; - return carry; -} diff --git a/mach/proto/fp/compact.c b/mach/proto/fp/compact.c index 186d41cfa..ffc42f29e 100644 --- a/mach/proto/fp/compact.c +++ b/mach/proto/fp/compact.c @@ -1,4 +1,4 @@ -e +/* (c) copyright 1988 by the Vrije Universiteit, Amsterdam, The Netherlands. See the copyright notice in the ACK home directory, in the file "Copyright". */ diff --git a/mach/proto/fp/div_ext.c b/mach/proto/fp/div_ext.c index db38b833b..c58be1781 100644 --- a/mach/proto/fp/div_ext.c +++ b/mach/proto/fp/div_ext.c @@ -38,6 +38,7 @@ EXTEND *e1,*e2; unsigned short u[9], v[5]; register int j; register unsigned short *u_p = u; + int maxv = 4; #endif if ((e2->m1 | e2->m2) == 0) { @@ -169,6 +170,7 @@ EXTEND *e1,*e2; v[2] = e2->m1; v[3] = e2->m2 >> 16; v[4] = e2->m2; + while (! v[maxv]) maxv--; result[0] = 0; result[1] = 0; lp = result; @@ -204,7 +206,7 @@ EXTEND *e1,*e2; unsigned long k = 0; int borrow = 0; - for (i = 4; i > 0; i--) { + for (i = maxv; i > 0; i--) { unsigned long tmp = q_est * v[i] + k + borrow; unsigned short md = tmp; @@ -222,7 +224,7 @@ EXTEND *e1,*e2; */ *lp |= (j & 1) ? (q_est - 1) : ((q_est-1)<<16); borrow = 0; - for (i = 4; i > 0; i--) { + for (i = maxv; i > 0; i--) { unsigned long tmp = v[i]+(unsigned long)u_p[i]+borrow; diff --git a/mach/proto/fp/mul_ext.c b/mach/proto/fp/mul_ext.c index a04a9b379..3d30ffcd1 100644 --- a/mach/proto/fp/mul_ext.c +++ b/mach/proto/fp/mul_ext.c @@ -17,12 +17,12 @@ mul_ext(e1,e2) EXTEND *e1,*e2; { - register int k,i,j; /* loop control */ - long unsigned *reg[7]; - long unsigned tmp[4]; + register int i,j; /* loop control */ short unsigned mp[4]; /* multiplier */ short unsigned mc[4]; /* multipcand */ - B64 low64,tmp64; /* 64 bit storage */ + short unsigned result[8]; /* result */ + B64 tmp64; + register unsigned short *pres; /* first save the sign (XOR) */ @@ -88,48 +88,33 @@ infinity: e1->m1 = e1->m2 =0L; mc[1] = (unsigned short) e2->m1; mc[2] = e2->m2 >> 16; mc[3] = (unsigned short) e2->m2; - /* - * assign pointers - */ - reg[0] = &e1->m1; /* the answer goes here */ - reg[1] = &tmp[1]; - reg[2] = &e1->m2; /* and here */ - reg[3] = &tmp[2]; - reg[4] = &low64.h_32; - reg[5] = &tmp[3]; - reg[6] = &low64.l_32; - - /* - * zero registers - */ - for(i=7;i--;) - *reg[i] = 0; - + for (i = 8; i--;) { + result[i] = 0; + } /* * fill registers with their components */ - for(i=4;i--;) if (mp[i]) - for(j=4;j--;) if (mc[j]) { - k = i+j; - tmp[0] = (long)mp[i] * (long)mc[j]; - if (b32_add(reg[k],tmp)) { - for(tmp[0] = 0x10000L;k>0;) - if (b32_add(reg[--k],tmp) == 0) - break; - } + for(i=4, pres = &result[4];i--;pres--) if (mp[i]) { + unsigned short k = 0; + unsigned long mpi = mp[i]; + for(j=4;j--;) { + unsigned long tmp = (unsigned long)pres[j] + k; + if (mc[j]) tmp += mpi * mc[j]; + pres[j] = tmp; + k = tmp >> 16; } + pres[-1] = k; + } /* * combine the registers to a total */ - tmp64.h_32 = (*reg[1]>>16); - tmp64.l_32 = (*reg[1]<<16) + (*reg[3]>>16); - b64_add((B64 *)&e1->m1,&tmp64); - tmp64.l_32 = *reg[5]<<16; - tmp64.h_32 = (*reg[5]>>16) + (*reg[3]<<16); - if (b64_add(&low64,&tmp64)) + e1->m1 = ((unsigned long)(result[0]) << 16) + result[1]; + e1->m2 = ((unsigned long)(result[2]) << 16) + result[3]; + if (result[4] & 0x8000) { if (++e1->m2 == 0) e1->m1++; + } nrm_ext(e1); }