modules/src/flt_arith/flt_arith.3

   1 .TH FLT_ARITH 3 "$Revision: 1.9 $"
   2 .ad
   3 .SH NAME
   4 flt_arith \- high precision floating point arithmetic
   5 .SH SYNOPSIS
   6 .nf
   7 .B #include <flt_arith.h>
   8 .PP
   9 .if t .ta 3m 13m 22m
  10 .if n .ta 5m 25m 40m
  11 struct flt_mantissa {
  12         long    flt_h_32;       /* high order 32 bits of mantissa */
  13         long    flt_l_32;       /* low order 32 bits of mantissa */
  14 };
  15
  16 typedef struct {
  17         short   flt_sign;       /* 0 for positive, 1 for negative */
  18         short   flt_exp;        /* between -16384 and 16384 */
  19         struct flt_mantissa     flt_mantissa;   /* normalized, in [1,2). */
  20 } flt_arith;
  21
  22 extern int      flt_status;
  23 #define FLT_OVFL        001
  24 #define FLT_UNFL        002
  25 #define FLT_DIV0        004
  26 #define FLT_NOFLT       010
  27 #define FLT_BTSM        020
  28
  29 #define FLT_STRLEN      32
  30 .PP
  31 .B void flt_add(e1, e2, e3)
  32 .B flt_arith *e1, *e2, *e3;
  33 .PP
  34 .B void flt_mul(e1, e2, e3)
  35 .B flt_arith *e1, *e2, *e3;
  36 .PP
  37 .B void flt_sub(e1, e2, e3)
  38 .B flt_arith *e1, *e2, *e3;
  39 .PP
  40 .B void flt_div(e1, e2, e3)
  41 .B flt_arith *e1, *e2, *e3;
  42 .PP
  43 .B void flt_umin(e)
  44 .B flt_arith *e;
  45 .PP
  46 .B void flt_modf(e1, intpart, fractpart)
  47 .B flt_arith *e1, *intpart, *fractpart;
  48 .PP
  49 .B int flt_cmp(e1, e2)
  50 .B flt_arith *e1, *e2;
  51 .PP
  52 .B void flt_str2flt(s, e)
  53 .B char *s;
  54 .B flt_arith *e;
  55 .PP
  56 .B void flt_flt2str(e, buf, bufsize)
  57 .B flt_arith *e;
  58 .B char *buf;
  59 .B int bufsize;
  60 .PP
  61 .B int flt_status;
  62 .PP
  63 .B #include <em_arith.h>
  64 .B void flt_arith2flt(n, e, uns)
  65 .B arith n;
  66 .B flt_arith *e;
  67 .B int uns;
  68 .PP
  69 .B arith flt_flt2arith(e, uns)
  70 .B flt_arith *e;
  71 .B int uns;
  72 .PP
  73 .B void flt_b64_sft(m, n)
  74 .B struct flt_mantissa *m;
  75 .B int n;
  76 .SH DESCRIPTION
  77 This set of routines emulates floating point arithmetic, in a high
  78 precision. It is intended primarily for compilers that need to evaluate
  79 floating point expressions at compile-time. It could be argued that this
  80 should be done in the floating point arithmetic of the target machine,
  81 but EM does not define its floating point arithmetic.
  82 .PP
  83 .B flt_add
  84 adds the numbers indicated by
  85 .I e1
  86 and
  87 .I e2
  88 and stores the result indirectly through
  89 .IR e3 .
  90 .PP
  91 .B flt_mul
  92 multiplies the numbers indicated by
  93 .I e1
  94 and
  95 .I e2
  96 and stores the result indirectly through
  97 .IR e3 .
  98 .PP
  99 .B flt_sub
 100 subtracts the number indicated by
 101 .I e2
 102 from the one indicated by
 103 .I e1
 104 and stores the result indirectly through
 105 .IR e3 .
 106 .PP
 107 .B flt_div
 108 divides the number indicated by
 109 .I e1
 110 by the one indicated by
 111 .I e2
 112 and stores the result indirectly through
 113 .IR e3 .
 114 .PP
 115 .B flt_umin
 116 negates the number indicated by
 117 .I e
 118 and stores the result indirectly through
 119 .IR e .
 120 .PP
 121 .B flt_modf
 122 splits the number indicated by
 123 .I e
 124 in an integer and a fraction part, and stores the integer part through
 125 .I intpart
 126 and the fraction part through
 127 .IR fractpart .
 128 So, adding the numbers indicated by
 129 .I intpart
 130 and
 131 .I fractpart
 132 results (in the absence of rounding error) in the number
 133 indicated by
 134 .IR e .
 135 Also, the absolute value of the number indicated by
 136 .I intpart
 137 is less than or equal to the absolute value of the number indicated by
 138 .IR e .
 139 The absolute value of the number indicated by
 140 .I fractpart
 141 is less than 1.
 142 .PP
 143 .B flt_cmp
 144 compares the numbers indicated by
 145 .I e1
 146 and
 147 .I e2
 148 and returns -1 if
 149 .I e1
 150 <
 151 .IR e2 ,
 152 0 if
 153 .I e1
 154 =
 155 .IR e2 ,
 156 and 1 if
 157 .I e1
 158 >
 159 .IR e2 .
 160 .PP
 161 .B flt_str2flt
 162 converts the string indicated by
 163 .I s
 164 to a floating point number, and stores this number through
 165 .IR e.
 166 The string should contain a floating point constant, which consists of
 167 an integer part, a decimal point, a fraction part, an \f(CWe\fP or an
 168 \f(CWE\fP, and an optionally signed integer exponent. The integer and
 169 fraction parts both consist of a sequence of digits. They may not both be
 170 missing. The decimal point, the \f(CWe\fP and the exponent may be
 171 missing.
 172 .PP
 173 .B flt_flt2str
 174 converts the number indicated by
 175 .I e
 176 into a string, in a scientific notation acceptable for EM. The result is
 177 stored in
 178 .IR buf .
 179 At most
 180 .I bufsize
 181 characters are stored.
 182 The maximum length needed is available in the constant FLT_STRLEN.
 183 .PP
 184 .B flt_arith2flt
 185 converts the number
 186 .I n
 187 to the floating point format used in this package and returns the result
 188 in
 189 .IR e . If the
 190 .I uns
 191 flag is set, the number
 192 .I n
 193 is regarded as an unsigned.
 194 .PP
 195 .B flt_flt2arith
 196 truncates the number indicated by
 197 .I e
 198 to the largest integer value smaller than or equal to the number indicated by
 199 .IR e .
 200 It returns this value. If the
 201 .I uns
 202 flag is set, the result is to be regarded as unsigned.
 203 .PP
 204 Before each operation, the
 205 .I flt_status
 206 variable is reset to 0. After an operation, it can be checked for one
 207 of the following values:
 208 .IP FLT_OVFL
 209 .br
 210 an overflow occurred. The result is a large value with the correct sign.
 211 This can occur with the routines
 212 .IR flt_add ,
 213 .IR flt_sub ,
 214 .IR flt_div ,
 215 .IR flt_mul ,
 216 .IR flt_flt2arith ,
 217 and
 218 .IR flt_str2flt .
 219 .IP FLT_UNFL
 220 .br
 221 an underflow occurred. The result is 0.
 222 This can occur with the routines
 223 .IR flt_div ,
 224 .IR flt_mul ,
 225 .IR flt_sub ,
 226 .IR flt_add ,
 227 and
 228 .IR flt_str2flt .
 229 .IP FLT_DIV0
 230 .br
 231 divide by 0. The result is a large value with the sign of the dividend.
 232 This can only occur with the routine
 233 .IR flt_div .
 234 .IP FLT_NOFLT
 235 .br
 236 indicates that the string did not represent a floating point number. The
 237 result is 0.
 238 This can only occur with the routine
 239 .IR flt_str2flt .
 240 .IP FLT_BTSM
 241 .br
 242 indicates that the buffer is too small. The contents of the buffer is
 243 undefined. This can only occur with the routine
 244 .IR flt_flt2str .
 245 .PP
 246 The routine
 247 .I flt_b64_sft
 248 shifts the mantissa
 249 .I m
 250 .I |n|
 251 bits left or right, depending on the sign of
 252 .IR n .
 253 If
 254 .I n
 255 is negative, it is a left-shift; If
 256 .I n
 257 is positive, it is a right shift.
 258 .SH FILES
 259 ~em/modules/h/flt_arith.h
 260 .br
 261 ~em/modules/h/em_arith.h
 262 .br
 263 ~em/modules/lib/libflt.a