1 .TH FLT_ARITH 3 "$Revision: 1.9 $"
4 flt_arith \- high precision floating point arithmetic
7 .B #include <flt_arith.h>
12 long flt_h_32; /* high order 32 bits of mantissa */
13 long flt_l_32; /* low order 32 bits of mantissa */
17 short flt_sign; /* 0 for positive, 1 for negative */
18 short flt_exp; /* between -16384 and 16384 */
19 struct flt_mantissa flt_mantissa; /* normalized, in [1,2). */
22 extern int flt_status;
31 .B void flt_add(e1, e2, e3)
32 .B flt_arith *e1, *e2, *e3;
34 .B void flt_mul(e1, e2, e3)
35 .B flt_arith *e1, *e2, *e3;
37 .B void flt_sub(e1, e2, e3)
38 .B flt_arith *e1, *e2, *e3;
40 .B void flt_div(e1, e2, e3)
41 .B flt_arith *e1, *e2, *e3;
46 .B void flt_modf(e1, intpart, fractpart)
47 .B flt_arith *e1, *intpart, *fractpart;
49 .B int flt_cmp(e1, e2)
50 .B flt_arith *e1, *e2;
52 .B void flt_str2flt(s, e)
56 .B void flt_flt2str(e, buf, bufsize)
63 .B #include <em_arith.h>
64 .B void flt_arith2flt(n, e, uns)
69 .B arith flt_flt2arith(e, uns)
73 .B void flt_b64_sft(m, n)
74 .B struct flt_mantissa *m;
77 This set of routines emulates floating point arithmetic, in a high
78 precision. It is intended primarily for compilers that need to evaluate
79 floating point expressions at compile-time. It could be argued that this
80 should be done in the floating point arithmetic of the target machine,
81 but EM does not define its floating point arithmetic.
84 adds the numbers indicated by
88 and stores the result indirectly through
92 multiplies the numbers indicated by
96 and stores the result indirectly through
100 subtracts the number indicated by
102 from the one indicated by
104 and stores the result indirectly through
108 divides the number indicated by
110 by the one indicated by
112 and stores the result indirectly through
116 negates the number indicated by
118 and stores the result indirectly through
122 splits the number indicated by
124 in an integer and a fraction part, and stores the integer part through
126 and the fraction part through
128 So, adding the numbers indicated by
132 results (in the absence of rounding error) in the number
135 Also, the absolute value of the number indicated by
137 is less than or equal to the absolute value of the number indicated by
139 The absolute value of the number indicated by
144 compares the numbers indicated by
162 converts the string indicated by
164 to a floating point number, and stores this number through
166 The string should contain a floating point constant, which consists of
167 an integer part, a decimal point, a fraction part, an \f(CWe\fP or an
168 \f(CWE\fP, and an optionally signed integer exponent. The integer and
169 fraction parts both consist of a sequence of digits. They may not both be
170 missing. The decimal point, the \f(CWe\fP and the exponent may be
174 converts the number indicated by
176 into a string, in a scientific notation acceptable for EM. The result is
181 characters are stored.
182 The maximum length needed is available in the constant FLT_STRLEN.
187 to the floating point format used in this package and returns the result
191 flag is set, the number
193 is regarded as an unsigned.
196 truncates the number indicated by
198 to the largest integer value smaller than or equal to the number indicated by
200 It returns this value. If the
202 flag is set, the result is to be regarded as unsigned.
204 Before each operation, the
206 variable is reset to 0. After an operation, it can be checked for one
207 of the following values:
210 an overflow occurred. The result is a large value with the correct sign.
211 This can occur with the routines
221 an underflow occurred. The result is 0.
222 This can occur with the routines
231 divide by 0. The result is a large value with the sign of the dividend.
232 This can only occur with the routine
236 indicates that the string did not represent a floating point number. The
238 This can only occur with the routine
242 indicates that the buffer is too small. The contents of the buffer is
243 undefined. This can only occur with the routine
251 bits left or right, depending on the sign of
255 is negative, it is a left-shift; If
257 is positive, it is a right shift.
259 ~em/modules/h/flt_arith.h
261 ~em/modules/h/em_arith.h
263 ~em/modules/lib/libflt.a