#include "misc.h"
#include <em_arith.h>
-flt_arith2flt(n, e)
+flt_arith2flt(n, e, uns)
register arith n;
register flt_arith *e;
{
*/
register int i;
- if (n < 0) {
+ if (!uns && n < 0) {
e->flt_sign = 1;
n = -n;
}
return;
}
e->flt_exp = 63;
- if (n < 0) {
- /* n = MINARITH */
- n = 0x40000000;
- while ((n << 1) > 0) n <<= 1;
- e->flt_exp++;
- }
+
for (i = 64; i > 0 && n != 0; i--) {
flt_b64_sft(&(e->flt_mantissa),1);
e->m1 |= (n & 1) << 31;
- n >>= 1;
+ n = (n >> 1) & ~(0x80 << 8*(sizeof(arith)-1));
}
if (i > 0) {
.B int flt_status;
.PP
.B #include <em_arith.h>
-.B flt_arith2flt(n, e)
+.B flt_arith2flt(n, e, uns)
.B arith n;
.B flt_arith *e;
+.B int uns;
.PP
-.B arith flt_flt2arith(e)
+.B arith flt_flt2arith(e, uns)
.B flt_arith *e;
+.B int uns;
.PP
.B flt_b64_sft(m, n)
.B struct flt_mantissa *m;
.I n
to the floating point format used in this package and returns the result
in
-.IR e .
+.IR e . If the
+.I uns
+flag is set, the number
+.I n
+is regarded as an unsigned.
.PP
.B flt_flt2arith
truncates the number indicated by
.I e
to the largest integer value smaller than or equal to the number indicated by
.IR e .
-It returns this value.
+It returns this value. If the
+.I uns
+flag is set, the result is to be regarded as unsigned.
.PP
Before each operation, the
.I flt_status
#include <em_arith.h>
arith
-flt_flt2arith(e)
+flt_flt2arith(e, uns)
register flt_arith *e;
{
/* Convert the flt_arith "n" to an arith.
*/
arith n;
struct flt_mantissa a;
+ register int i;
+ if (uns) uns = 1;
flt_status = 0;
+ if (e->flt_sign && uns) {
+ flt_status = FLT_UNFL;
+ return 0;
+ }
if (e->flt_exp < 0) {
/* absolute value of result < 1.
Return value only depends on sign:
return -e->flt_sign;
}
- if (e->flt_exp > (8*sizeof(arith)-2)) {
+ if (e->flt_exp > 8*sizeof(arith)-2 + uns) {
/* probably overflow, but there is one exception:
*/
- if (e->flt_sign) {
- n = 0x80;
- while (n << 8) n <<= 8;
- if (e->flt_exp == 8*sizeof(arith)-1 &&
- e->m2 == 0 &&
- e->m1 == 0x80000000) {
- /* No overflow in this case */
- }
- else flt_status = FLT_OVFL;
- return n;
+ if (e->flt_sign &&
+ e->flt_exp == 8*sizeof(arith)-1 &&
+ e->m2 == 0 &&
+ e->m1 == 0x80000000) {
+ /* No overflow in this case */
+ flt_status = 0;
}
- n = 0x7F;
- while ((n << 8) > 0) {
- n <<= 8;
- n |= 0xFF;
+ else {
+ flt_status = FLT_OVFL;
+ e->flt_exp = 8*sizeof(arith)-2 + uns + e->flt_sign;
+ if (e->flt_sign) {
+ e->m1 = 0x80000000;
+ e->m2 = 0;
+ }
+ else {
+ e->m1 = 0xFFFFFFFF;
+ e->m2 = 0xFFFFFFFF;
+ }
}
- return n;
}
a = e->flt_mantissa;
flt_b64_sft(&a, 63-e->flt_exp);
n = a.flt_l_32 | ((a.flt_h_32 << 16) << 16);
/* not << 32; this could be an undefined operation */
- return n;
+ return e->flt_sign ? -n : n;
}
(*decpt)++; /* because now value in [1.0, 10.0) */
}
while (p <= pe) {
- if (e->flt_exp >= 0) {
+ if (e->flt_exp >= 0 && e->m1 != 0) {
flt_arith x;
x.m2 = 0; x.flt_exp = e->flt_exp;
x.m1 = (e->m1 >> 1) & 0x7FFFFFFF;
x.m1 = x.m1>>(30-e->flt_exp);
*p++ = (x.m1) + '0';
- x.m1 = x.m1 << (31-e->flt_exp);
- flt_add(e, &x, e);
+ if (x.m1) {
+ x.m1 = x.m1 << (31-e->flt_exp);
+ flt_add(e, &x, e);
+ }
}
else *p++ = '0';
flt_mul(e, &s10pow[1], e);