From: Alan Cox Date: Tue, 21 Jun 2016 17:45:34 +0000 (+0100) Subject: libm: the backbone of the FP maths support X-Git-Url: https://git.ndcode.org/public/gitweb.cgi?a=commitdiff_plain;h=b2f21eeec3f6a2059ec57de80f8568c5816bdda0;p=FUZIX.git libm: the backbone of the FP maths support Only actually using the single precision paths in the current platforms as the SDCC Z80 port only supports "float", so we define double as float and #define over the methods. Anyone coming to this with a bigger box (can we do real doubles on 6809?) will need to do a fair chunk of further work. --- diff --git a/Library/libs/API.list b/Library/libs/API.list index 66f4a79a..e7abb2f2 100644 --- a/Library/libs/API.list +++ b/Library/libs/API.list @@ -99,7 +99,7 @@ fstat CODE ftell CODE ftw https://opensource.apple.com/source/Libc/Libc-825.26/gen/nftw.c ? fwrite CODE -gamma +gamma OBSOLETE (added lgamma, tgamma) getc CODE getchar CODE getcwd CODE @@ -142,9 +142,12 @@ ispunct CODE isspace CODE isupper CODE isxdigit CODE -j0 ULY -j1 ULY -jn ULY +j0 CODE +j0f CODE +j1 CODE +j1f CODE +jn CODE +jnf CODE jrand48 CODE kill CODE l64a CODE @@ -155,8 +158,9 @@ link CODE localtime CODE lockf NO (flock) log CODE | Z80 -logf ULY -log10 ULY +logf CODE +log10 CODE +log10f CODE longjmp CODE lrand48 CODE lsearch CODE @@ -173,8 +177,8 @@ memcpy CODE memset CODE mknod CODE mktemp -modf ULY -modff Z80 | ULY +modf CODE +modff Z80 | CODE monitor mount CODE mrand48 CODE @@ -200,6 +204,8 @@ rand CODE read CODE realloc CODE rewind CODE +round ; MUSL ones unsuitable licence +roundf ; scanf CODE seed48 CODE setbuf CODE @@ -213,8 +219,10 @@ setutent CODE setvbuf CODE sgetl signal CODE (but kernel bits to do) -sin ULY -sinh ULY +sin CODE +sinf CODE | Z80 +sinh CODE +sinhf CODE sleep CODE sprintf CODE sputl @@ -242,8 +250,10 @@ strtol CODE swab CODE sync CODE system CODE -tan ULY -tanh ULY +tan CODE +tanf CODE | Z80 +tanh CODE +tanhf CODE tdelete tempnam tfind @@ -273,6 +283,6 @@ vfprintf CODE vsprintf CODE wait CODE write CODE -y0 ULY (in j0) -y1 ULY (in j1) -yn ULY (in jn) +y0 CODE +y1 CODE +yn CODE diff --git a/Library/libs/Float-TODO b/Library/libs/Float-TODO new file mode 100644 index 00000000..f64e6c08 --- /dev/null +++ b/Library/libs/Float-TODO @@ -0,0 +1,13 @@ +exp2() +exp2f() +round() +roundf() + +Sort out exceptions within this code and the Z80 SDCC intrinsics + +Figure out which bits to pull from SDCC for Z80 builds + +Clean up properly to get a libm/libc divided correctly + +Make the _libm versions of stdio do #define #include to make +Makefiles tidier diff --git a/Library/libs/Makefile b/Library/libs/Makefile index 667632eb..a2f59874 100644 --- a/Library/libs/Makefile +++ b/Library/libs/Makefile @@ -69,9 +69,17 @@ SRC_HARD += regexp.c #SRC_C += memchr.c memcmp.c memcpy.c memset.c SRC_C += acosf.c acoshf.c asinf.c asinhf.c atan2f.c atanf.c atanhf.c -SRC_C += ceilf.c expf.c -SRC_C += fabsf.c floorf.c fmodf.c frexpf.c hypotf.c -SRC_C += scalbnf.c scalbinf.c sqrtf.c +SRC_C += cbrtf.c ceilf.c copysignf.c erff.c expf.c expm1f.c +SRC_C += fabsf.c fdimf.c floorf.c fmaxf.c fminf.c fmodf.c frexpf.c +SRC_C += hypotf.c ilogbf.c j0f.c j1f.c jnf.c +SRC_C += ldexpf.c lgammaf.c lgammaf_r.c logf.c log2f.c log10f.c logbf.c +SRC_C += lrintf.c lroundf.c +SRC_C += modff.c nearbyintf.c nextafterf.c powf.c +SRC_C += remainderf.c remquof.c rintf.c +SRC_C += scalbnf.c scalbinf.c +SRC_C += sinf.c sincosf.c sinhf.c +SRC_C += sqrtf.c tgammaf.c +SRC_C += __expo2f.c __float_bits.c __fpclassifyf.c __log1pf.c __signgam.c SRC_CT += termcap.c diff --git a/Library/libs/__cos.c b/Library/libs/__cos.c new file mode 100644 index 00000000..b473d1a3 --- /dev/null +++ b/Library/libs/__cos.c @@ -0,0 +1,72 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * __cos( x, y ) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Algorithm + * 1. Since cos(-x) = cos(x), we need only to consider positive x. + * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. + * 3. cos(x) is approximated by a polynomial of degree 14 on + * [0,pi/4] + * 4 14 + * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x + * where the remez error is + * + * | 2 4 6 8 10 12 14 | -58 + * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 + * | | + * + * 4 6 8 10 12 14 + * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then + * cos(x) ~ 1 - x*x/2 + r + * since cos(x+y) ~ cos(x) - sin(x)*y + * ~ cos(x) - x*y, + * a correction term is necessary in cos(x) and hence + * cos(x+y) = 1 - (x*x/2 - (r - x*y)) + * For better accuracy, rearrange to + * cos(x+y) ~ w + (tmp + (r-x*y)) + * where w = 1 - x*x/2 and tmp is a tiny correction term + * (1 - x*x/2 == w + tmp exactly in infinite precision). + * The exactness of w + tmp in infinite precision depends on w + * and tmp having the same precision as x. If they have extra + * precision due to compiler bugs, then the extra precision is + * only good provided it is retained in all terms of the final + * expression for cos(). Retention happens in all cases tested + * under FreeBSD, so don't pessimize things by forcibly clipping + * any extra precision in w. + */ + +#include +#include "libm.h" + +static const double +C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ +C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ +C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ +C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ +C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ +C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + +double __cos(double x, double y) +{ + double hz,z,r,w; + + z = x*x; + w = z*z; + r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6)); + hz = 0.5*z; + w = 1.0-hz; + return w + (((1.0-w)-hz) + (z*r-x*y)); +} diff --git a/Library/libs/__cosdf.c b/Library/libs/__cosdf.c new file mode 100644 index 00000000..9bba33e3 --- /dev/null +++ b/Library/libs/__cosdf.c @@ -0,0 +1,36 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +static const double +C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */ +C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */ +C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */ +C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ + +float __cosdf(double x) +{ + double r, w, z; + + /* Try to optimize for parallel evaluation as in __tandf.c. */ + z = x*x; + w = z*z; + r = C2+z*C3; + return ((1.0+z*C0) + w*C1) + (w*z)*r; +} diff --git a/Library/libs/__double_bits.c b/Library/libs/__double_bits.c new file mode 100644 index 00000000..7ade0a80 --- /dev/null +++ b/Library/libs/__double_bits.c @@ -0,0 +1,12 @@ +#include + +unsigned long __double_bits(double p) +{ + union { + double d; + unsigned long bits; + } conv; + + conv.d = p; + return conv.bits; +} diff --git a/Library/libs/__expo2.c b/Library/libs/__expo2.c new file mode 100644 index 00000000..6fd615d6 --- /dev/null +++ b/Library/libs/__expo2.c @@ -0,0 +1,19 @@ +/* From MUSL */ + +#include +#include "libm.h" + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ +static const int k = 2043; +static const double kln2 = 0x1.62066151add8bp+10; + +/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ +double __expo2(double x) +{ + double scale; + + /* note that k is odd and scale*scale overflows */ + INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0); + /* exp(x - k ln2) * 2**(k-1) */ + return exp(x - kln2) * scale * scale; +} diff --git a/Library/libs/__expo2f.c b/Library/libs/__expo2f.c new file mode 100644 index 00000000..6be0eb6c --- /dev/null +++ b/Library/libs/__expo2f.c @@ -0,0 +1,19 @@ +/* From MUSL */ + +#include +#include "libm.h" + +/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ +static const int k = 235; +static const float kln2 = 0x1.45c778p+7f; + +/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ +float __expo2f(float x) +{ + float scale; + + /* note that k is odd and scale*scale overflows */ + SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23); + /* exp(x - k ln2) * 2**(k-1) */ + return expf(x - kln2) * scale * scale; +} diff --git a/Library/libs/__float_bits.c b/Library/libs/__float_bits.c new file mode 100644 index 00000000..99588a06 --- /dev/null +++ b/Library/libs/__float_bits.c @@ -0,0 +1,12 @@ +#include + +unsigned int __float_bits(float p) +{ + union { + float f; + unsigned int bits; + } conv; + + conv.f = p; + return conv.bits; +} diff --git a/Library/libs/__fpclassify.c b/Library/libs/__fpclassify.c new file mode 100644 index 00000000..7c08ef3b --- /dev/null +++ b/Library/libs/__fpclassify.c @@ -0,0 +1,11 @@ +#include +#include "libm.h" + +int __fpclassify(double x) +{ + union dshape u = { x }; + int e = u.bits>>52 & 0x7ff; + if (!e) return u.bits<<1 ? FP_SUBNORMAL : FP_ZERO; + if (e==0x7ff) return u.bits<<12 ? FP_NAN : FP_INFINITE; + return FP_NORMAL; +} diff --git a/Library/libs/__fpclassifyf.c b/Library/libs/__fpclassifyf.c new file mode 100644 index 00000000..33781bdf --- /dev/null +++ b/Library/libs/__fpclassifyf.c @@ -0,0 +1,11 @@ +#include +#include "libm.h" + +int __fpclassifyf(float x) +{ + union fshape u = { x }; + int e = u.bits>>23 & 0xff; + if (!e) return u.bits<<1 ? FP_SUBNORMAL : FP_ZERO; + if (e==0xff) return u.bits<<9 ? FP_NAN : FP_INFINITE; + return FP_NORMAL; +} diff --git a/Library/libs/__log1p.c b/Library/libs/__log1p.c new file mode 100644 index 00000000..53b67ee7 --- /dev/null +++ b/Library/libs/__log1p.c @@ -0,0 +1,97 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_log.h */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * __log1p(f): + * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. + * + * The following describes the overall strategy for computing + * logarithms in base e. The argument reduction and adding the final + * term of the polynomial are done by the caller for increased accuracy + * when different bases are used. + * + * Method : + * 1. Argument Reduction: find k and f such that + * x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * 2. Approximation of log(1+f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Reme algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s + * (the values of Lg1 to Lg7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lg1*s +...+Lg7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log(1+f) = f - s*(f - R) (if f is not too large) + * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) + * + * 3. Finally, log(x) = k*ln2 + log(1+f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is split into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +#include +#include "libm.h" + +static const double +Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ +Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ +Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ + +/* + * We always inline __log1p(), since doing so produces a + * substantial performance improvement (~40% on amd64). + */ +double __log1p(double f) +{ + double hfsq,s,z,R,w,t1,t2; + + s = f/(2.0+f); + z = s*s; + w = z*z; + t1= w*(Lg2+w*(Lg4+w*Lg6)); + t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); + R = t2+t1; + hfsq = 0.5*f*f; + return s*(hfsq+R); +} diff --git a/Library/libs/__log1pf.c b/Library/libs/__log1pf.c new file mode 100644 index 00000000..1efcac33 --- /dev/null +++ b/Library/libs/__log1pf.c @@ -0,0 +1,38 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_logf.h */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in __log1p.c + */ + +#include +#include "libm.h" + +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +static const float +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ + +float __log1pf(float f) +{ + float hfsq,s,z,R,w,t1,t2; + + s = f/(2.0f + f); + z = s*s; + w = z*z; + t1 = w*(Lg2+w*Lg4); + t2 = z*(Lg1+w*Lg3); + R = t2+t1; + hfsq = 0.5f * f * f; + return s*(hfsq+R); +} diff --git a/Library/libs/__rem_pio2.c b/Library/libs/__rem_pio2.c new file mode 100644 index 00000000..5dfa2d24 --- /dev/null +++ b/Library/libs/__rem_pio2.c @@ -0,0 +1,176 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + * Optimized by Bruce D. Evans. + */ +/* __rem_pio2(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + * use __rem_pio2_large() for large x + */ + +#include +#include "libm.h" + +/* + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 + * pio2_1t: pi/2 - pio2_1 + * pio2_2: second 33 bit of pi/2 + * pio2_2t: pi/2 - (pio2_1+pio2_2) + * pio2_3: third 33 bit of pi/2 + * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) + */ +static const double +two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ +invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ +pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ +pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ +pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ +pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ +pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ +pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + +/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +int __rem_pio2(double x, double *y) +{ + double z,w,t,r,fn; + double tx[3],ty[2]; + int32_t e0,i,j,nx,n,ix,hx; + uint32_t low; + + GET_HIGH_WORD(hx,x); + ix = hx & 0x7fffffff; + if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ + if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ + goto medium; /* cancellation -- use medium case */ + if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ + if (hx > 0) { + z = x - pio2_1; /* one round good to 85 bits */ + y[0] = z - pio2_1t; + y[1] = (z-y[0]) - pio2_1t; + return 1; + } else { + z = x + pio2_1; + y[0] = z + pio2_1t; + y[1] = (z-y[0]) + pio2_1t; + return -1; + } + } else { + if (hx > 0) { + z = x - 2*pio2_1; + y[0] = z - 2*pio2_1t; + y[1] = (z-y[0]) - 2*pio2_1t; + return 2; + } else { + z = x + 2*pio2_1; + y[0] = z + 2*pio2_1t; + y[1] = (z-y[0]) + 2*pio2_1t; + return -2; + } + } + } + if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */ + if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ + if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ + goto medium; + if (hx > 0) { + z = x - 3*pio2_1; + y[0] = z - 3*pio2_1t; + y[1] = (z-y[0]) - 3*pio2_1t; + return 3; + } else { + z = x + 3*pio2_1; + y[0] = z + 3*pio2_1t; + y[1] = (z-y[0]) + 3*pio2_1t; + return -3; + } + } else { + if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ + goto medium; + if (hx > 0) { + z = x - 4*pio2_1; + y[0] = z - 4*pio2_1t; + y[1] = (z-y[0]) - 4*pio2_1t; + return 4; + } else { + z = x + 4*pio2_1; + y[0] = z + 4*pio2_1t; + y[1] = (z-y[0]) + 4*pio2_1t; + return -4; + } + } + } + if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ + uint32_t high; +medium: + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + STRICT_ASSIGN(double, fn, x*invpio2 + 0x1.8p52); + fn = fn - 0x1.8p52; +// FIXME +#ifdef HAVE_EFFICIENT_IRINT + n = irint(fn); +#else + n = (int32_t)fn; +#endif + r = x - fn*pio2_1; + w = fn*pio2_1t; /* 1st round, good to 85 bits */ + j = ix>>20; + y[0] = r - w; + GET_HIGH_WORD(high,y[0]); + i = j - ((high>>20)&0x7ff); + if (i > 16) { /* 2nd round, good to 118 bits */ + t = r; + w = fn*pio2_2; + r = t - w; + w = fn*pio2_2t - ((t-r)-w); + y[0] = r - w; + GET_HIGH_WORD(high,y[0]); + i = j - ((high>>20)&0x7ff); + if (i > 49) { /* 3rd round, good to 151 bits, covers all cases */ + t = r; + w = fn*pio2_3; + r = t - w; + w = fn*pio2_3t - ((t-r)-w); + y[0] = r - w; + } + } + y[1] = (r-y[0]) - w; + return n; + } + /* + * all other (large) arguments + */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + y[0] = y[1] = x - x; + return 0; + } + /* set z = scalbn(|x|,ilogb(x)-23) */ + GET_LOW_WORD(low,x); + e0 = (ix>>20) - 1046; /* e0 = ilogb(z)-23; */ + INSERT_WORDS(z, ix - ((int32_t)(e0<<20)), low); + for (i=0; i<2; i++) { + tx[i] = (double)((int32_t)(z)); + z = (z-tx[i])*two24; + } + tx[2] = z; + nx = 3; + while (tx[nx-1] == 0.0) nx--; /* skip zero term */ + n = __rem_pio2_large(tx,ty,e0,nx,1); + if (hx < 0) { + y[0] = -ty[0]; + y[1] = -ty[1]; + return -n; + } + y[0] = ty[0]; + y[1] = ty[1]; + return n; +} diff --git a/Library/libs/__rem_pio2_large.c b/Library/libs/__rem_pio2_large.c new file mode 100644 index 00000000..6790d661 --- /dev/null +++ b/Library/libs/__rem_pio2_large.c @@ -0,0 +1,446 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * __rem_pio2_large(x,y,e0,nx,prec) + * double x[],y[]; int e0,nx,prec; + * + * __rem_pio2_large return the last three digits of N with + * y = x - N*pi/2 + * so that |y| < pi/2. + * + * The method is to compute the integer (mod 8) and fraction parts of + * (2/pi)*x without doing the full multiplication. In general we + * skip the part of the product that are known to be a huge integer ( + * more accurately, = 0 mod 8 ). Thus the number of operations are + * independent of the exponent of the input. + * + * (2/pi) is represented by an array of 24-bit integers in ipio2[]. + * + * Input parameters: + * x[] The input value (must be positive) is broken into nx + * pieces of 24-bit integers in double precision format. + * x[i] will be the i-th 24 bit of x. The scaled exponent + * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 + * match x's up to 24 bits. + * + * Example of breaking a double positive z into x[0]+x[1]+x[2]: + * e0 = ilogb(z)-23 + * z = scalbn(z,-e0) + * for i = 0,1,2 + * x[i] = floor(z) + * z = (z-x[i])*2**24 + * + * + * y[] ouput result in an array of double precision numbers. + * The dimension of y[] is: + * 24-bit precision 1 + * 53-bit precision 2 + * 64-bit precision 2 + * 113-bit precision 3 + * The actual value is the sum of them. Thus for 113-bit + * precison, one may have to do something like: + * + * long double t,w,r_head, r_tail; + * t = (long double)y[2] + (long double)y[1]; + * w = (long double)y[0]; + * r_head = t+w; + * r_tail = w - (r_head - t); + * + * e0 The exponent of x[0]. Must be <= 16360 or you need to + * expand the ipio2 table. + * + * nx dimension of x[] + * + * prec an integer indicating the precision: + * 0 24 bits (single) + * 1 53 bits (double) + * 2 64 bits (extended) + * 3 113 bits (quad) + * + * External function: + * double scalbn(), floor(); + * + * + * Here is the description of some local variables: + * + * jk jk+1 is the initial number of terms of ipio2[] needed + * in the computation. The minimum and recommended value + * for jk is 3,4,4,6 for single, double, extended, and quad. + * jk+1 must be 2 larger than you might expect so that our + * recomputation test works. (Up to 24 bits in the integer + * part (the 24 bits of it that we compute) and 23 bits in + * the fraction part may be lost to cancelation before we + * recompute.) + * + * jz local integer variable indicating the number of + * terms of ipio2[] used. + * + * jx nx - 1 + * + * jv index for pointing to the suitable ipio2[] for the + * computation. In general, we want + * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 + * is an integer. Thus + * e0-3-24*jv >= 0 or (e0-3)/24 >= jv + * Hence jv = max(0,(e0-3)/24). + * + * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. + * + * q[] double array with integral value, representing the + * 24-bits chunk of the product of x and 2/pi. + * + * q0 the corresponding exponent of q[0]. Note that the + * exponent for q[i] would be q0-24*i. + * + * PIo2[] double precision array, obtained by cutting pi/2 + * into 24 bits chunks. + * + * f[] ipio2[] in floating point + * + * iq[] integer array by breaking up q[] in 24-bits chunk. + * + * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] + * + * ih integer. If >0 it indicates q[] is >= 0.5, hence + * it also indicates the *sign* of the result. + * + */ +/* + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +#include +#include "libm.h" + +static const int init_jk[] = {3,4,4,6}; /* initial value for jk */ + +/* + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi + * + * integer array, contains the (24*i)-th to (24*i+23)-th + * bit of 2/pi after binary point. The corresponding + * floating value is + * + * ipio2[i] * 2^(-24(i+1)). + * + * NB: This table must have at least (e0-3)/24 + jk terms. + * For quad precision (e0 <= 16360, jk = 6), this is 686. + */ +static const int32_t ipio2[] = { +0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, +0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, +0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, +0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, +0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, +0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, +0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, +0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, +0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, +0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, +0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + +#if LDBL_MAX_EXP > 1024 +0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, +0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, +0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, +0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, +0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, +0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, +0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, +0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, +0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, +0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, +0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, +0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, +0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, +0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, +0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, +0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, +0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, +0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, +0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, +0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, +0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, +0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, +0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, +0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, +0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, +0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, +0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, +0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, +0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, +0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, +0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, +0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, +0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, +0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, +0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, +0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, +0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, +0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, +0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, +0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, +0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, +0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, +0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, +0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, +0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, +0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, +0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, +0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, +0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, +0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, +0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, +0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, +0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, +0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, +0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, +0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, +0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, +0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, +0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, +0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, +0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, +0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, +0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, +0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, +0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, +0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, +0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, +0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, +0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, +0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, +0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, +0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, +0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, +0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, +0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, +0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, +0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, +0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, +0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, +0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, +0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, +0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, +0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, +0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, +0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, +0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, +0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, +0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, +0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, +0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, +0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, +0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, +0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, +0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, +0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, +0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, +0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, +0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, +0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, +0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, +0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, +0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, +0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, +0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +#endif +}; + +static const double PIo2[] = { + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ +}; + +static const double +two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ +twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ + +int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) +{ + int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; + double z,fw,f[20],fq[20],q[20]; + + /* initialize jk*/ + jk = init_jk[prec]; + jp = jk; + + /* determine jx,jv,q0, note that 3>q0 */ + jx = nx-1; + jv = (e0-3)/24; if(jv<0) jv=0; + q0 = e0-24*(jv+1); + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + j = jv-jx; m = jx+jk; + for (i=0; i<=m; i++,j++) + f[i] = j<0 ? 0.0 : (double)ipio2[j]; + + /* compute q[0],q[1],...q[jk] */ + for (i=0; i<=jk; i++) { + for (j=0,fw=0.0; j<=jx; j++) + fw += x[j]*f[jx+i-j]; + q[i] = fw; + } + + jz = jk; +recompute: + /* distill q[] into iq[] reversingly */ + for (i=0,j=jz,z=q[jz]; j>0; i++,j--) { + fw = (double)((int32_t)(twon24* z)); + iq[i] = (int32_t)(z-two24*fw); + z = q[j-1]+fw; + } + + /* compute n */ + z = scalbn(z,q0); /* actual value of z */ + z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ + n = (int32_t)z; + z -= (double)n; + ih = 0; + if (q0 > 0) { /* need iq[jz-1] to determine n */ + i = iq[jz-1]>>(24-q0); n += i; + iq[jz-1] -= i<<(24-q0); + ih = iq[jz-1]>>(23-q0); + } + else if (q0 == 0) ih = iq[jz-1]>>23; + else if (z >= 0.5) ih = 2; + + if (ih > 0) { /* q > 0.5 */ + n += 1; carry = 0; + for (i=0; i 0) { /* rare case: chance is 1 in 12 */ + switch(q0) { + case 1: + iq[jz-1] &= 0x7fffff; break; + case 2: + iq[jz-1] &= 0x3fffff; break; + } + } + if (ih == 2) { + z = 1.0 - z; + if (carry != 0) + z -= scalbn(1.0,q0); + } + } + + /* check if recomputation is needed */ + if (z == 0.0) { + j = 0; + for (i=jz-1; i>=jk; i--) j |= iq[i]; + if (j == 0) { /* need recomputation */ + for (k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */ + + for (i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */ + f[jx+i] = (double)ipio2[jv+i]; + for (j=0,fw=0.0; j<=jx; j++) + fw += x[j]*f[jx+i-j]; + q[i] = fw; + } + jz += k; + goto recompute; + } + } + + /* chop off zero terms */ + if (z == 0.0) { + jz -= 1; + q0 -= 24; + while (iq[jz] == 0) { + jz--; + q0 -= 24; + } + } else { /* break z into 24-bit if necessary */ + z = scalbn(z,-q0); + if (z >= two24) { + fw = (double)((int32_t)(twon24*z)); + iq[jz] = (int32_t)(z-two24*fw); + jz += 1; + q0 += 24; + iq[jz] = (int32_t)fw; + } else + iq[jz] = (int32_t)z; + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(1.0,q0); + for (i=jz; i>=0; i--) { + q[i] = fw*(double)iq[i]; + fw *= twon24; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for(i=jz; i>=0; i--) { + for (fw=0.0,k=0; k<=jp && k<=jz-i; k++) + fw += PIo2[k]*q[i+k]; + fq[jz-i] = fw; + } + + /* compress fq[] into y[] */ + switch(prec) { + case 0: + fw = 0.0; + for (i=jz; i>=0; i--) + fw += fq[i]; + y[0] = ih==0 ? fw : -fw; + break; + case 1: + case 2: + fw = 0.0; + for (i=jz; i>=0; i--) + fw += fq[i]; + STRICT_ASSIGN(double,fw,fw); + y[0] = ih==0 ? fw : -fw; + fw = fq[0]-fw; + for (i=1; i<=jz; i++) + fw += fq[i]; + y[1] = ih==0 ? fw : -fw; + break; + case 3: /* painful */ + for (i=jz; i>0; i--) { + fw = fq[i-1]+fq[i]; + fq[i] += fq[i-1]-fw; + fq[i-1] = fw; + } + for (i=jz; i>1; i--) { + fw = fq[i-1]+fq[i]; + fq[i] += fq[i-1]-fw; + fq[i-1] = fw; + } + for (fw=0.0,i=jz; i>=2; i--) + fw += fq[i]; + if (ih==0) { + y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; + } else { + y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; + } + } + return n&7; +} diff --git a/Library/libs/__signgam.c b/Library/libs/__signgam.c new file mode 100644 index 00000000..8f1d85ef --- /dev/null +++ b/Library/libs/__signgam.c @@ -0,0 +1 @@ +int signgam; diff --git a/Library/libs/__sin.c b/Library/libs/__sin.c new file mode 100644 index 00000000..9aead04b --- /dev/null +++ b/Library/libs/__sin.c @@ -0,0 +1,64 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* __sin( x, y, iy) + * kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). + * + * Algorithm + * 1. Since sin(-x) = -sin(x), we need only to consider positive x. + * 2. Callers must return sin(-0) = -0 without calling here since our + * odd polynomial is not evaluated in a way that preserves -0. + * Callers may do the optimization sin(x) ~ x for tiny x. + * 3. sin(x) is approximated by a polynomial of degree 13 on + * [0,pi/4] + * 3 13 + * sin(x) ~ x + S1*x + ... + S6*x + * where + * + * |sin(x) 2 4 6 8 10 12 | -58 + * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 + * | x | + * + * 4. sin(x+y) = sin(x) + sin'(x')*y + * ~ sin(x) + (1-x*x/2)*y + * For better accuracy, let + * 3 2 2 2 2 + * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) + * then 3 2 + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) + */ + +#include "libm.h" + +static const double +S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ +S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ +S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ +S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ +S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ +S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + +double __sin(double x, double y, int iy) +{ + double z,r,v,w; + + z = x*x; + w = z*z; + r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6); + v = z*x; + if (iy == 0) + return x + v*(S1 + z*r); + else + return x - ((z*(0.5*y - v*r) - y) - v*S1); +} diff --git a/Library/libs/__sindf.c b/Library/libs/__sindf.c new file mode 100644 index 00000000..83c0d7a5 --- /dev/null +++ b/Library/libs/__sindf.c @@ -0,0 +1,36 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include "libm.h" + +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +static const double +S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */ +S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */ +S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */ +S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ + +float __sindf(double x) +{ + double r, s, w, z; + + /* Try to optimize for parallel evaluation as in __tandf.c. */ + z = x*x; + w = z*z; + r = S3 + z*S4; + s = z*x; + return (x + s*(S1 + z*S2)) + s*w*r; +} diff --git a/Library/libs/__tan.c b/Library/libs/__tan.c new file mode 100644 index 00000000..01e3fe48 --- /dev/null +++ b/Library/libs/__tan.c @@ -0,0 +1,118 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ +/* + * ==================================================== + * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* __tan( x, y, k ) + * kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned. + * + * Algorithm + * 1. Since tan(-x) = -tan(x), we need only to consider positive x. + * 2. Callers must return tan(-0) = -0 without calling here since our + * odd polynomial is not evaluated in a way that preserves -0. + * Callers may do the optimization tan(x) ~ x for tiny x. + * 3. tan(x) is approximated by a odd polynomial of degree 27 on + * [0,0.67434] + * 3 27 + * tan(x) ~ x + T1*x + ... + T13*x + * where + * + * |tan(x) 2 4 26 | -59.2 + * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 + * | x | + * + * Note: tan(x+y) = tan(x) + tan'(x)*y + * ~ tan(x) + (1+x*x)*y + * Therefore, for better accuracy in computing tan(x+y), let + * 3 2 2 2 2 + * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) + * then + * 3 2 + * tan(x+y) = x + (T1*x + (x *(r+y)+y)) + * + * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then + * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) + * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) + */ + +#include "libm.h" + +static const double T[] = { + 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ + 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ + 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ + 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ + 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ + 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ + 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ + 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ + 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ + 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ + 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ + -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ + 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ +}, +pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ +pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ + +double __tan(double x, double y, int iy) +{ + double z, r, v, w, s, sign; + int32_t ix, hx; + + GET_HIGH_WORD(hx,x); + ix = hx & 0x7fffffff; /* high word of |x| */ + if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */ + if (hx < 0) { + x = -x; + y = -y; + } + z = pio4 - x; + w = pio4lo - y; + x = z + w; + y = 0.0; + } + z = x * x; + w = z * z; + /* + * Break x^5*(T[1]+x^2*T[2]+...) into + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) + */ + r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11])))); + v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12]))))); + s = z * x; + r = y + z * (s * (r + v) + y); + r += T[0] * s; + w = x + r; + if (ix >= 0x3FE59428) { + v = iy; + sign = 1 - ((hx >> 30) & 2); + return sign * (v - 2.0 * (x - (w * w / (w + v) - r))); + } + if (iy == 1) + return w; + else { + /* + * if allow error up to 2 ulp, simply return + * -1.0 / (x+r) here + */ + /* compute -1.0 / (x+r) accurately */ + double a, t; + z = w; + SET_LOW_WORD(z,0); + v = r - (z - x); /* z+v = r+x */ + t = a = -1.0 / w; /* a = -1.0/w */ + SET_LOW_WORD(t,0); + s = 1.0 + t * z; + return t + a * (s + t * v); + } +} diff --git a/Library/libs/__tandf.c b/Library/libs/__tandf.c new file mode 100644 index 00000000..36a8214e --- /dev/null +++ b/Library/libs/__tandf.c @@ -0,0 +1,55 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include "libm.h" + +/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ +static const double T[] = { + 0x15554d3418c99f.0p-54, /* 0.333331395030791399758 */ + 0x1112fd38999f72.0p-55, /* 0.133392002712976742718 */ + 0x1b54c91d865afe.0p-57, /* 0.0533812378445670393523 */ + 0x191df3908c33ce.0p-58, /* 0.0245283181166547278873 */ + 0x185dadfcecf44e.0p-61, /* 0.00297435743359967304927 */ + 0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */ +}; + +float __tandf(double x, int iy) +{ + double z,r,w,s,t,u; + + z = x*x; + /* + * Split up the polynomial into small independent terms to give + * opportunities for parallel evaluation. The chosen splitting is + * micro-optimized for Athlons (XP, X64). It costs 2 multiplications + * relative to Horner's method on sequential machines. + * + * We add the small terms from lowest degree up for efficiency on + * non-sequential machines (the lowest degree terms tend to be ready + * earlier). Apart from this, we don't care about order of + * operations, and don't need to to care since we have precision to + * spare. However, the chosen splitting is good for accuracy too, + * and would give results as accurate as Horner's method if the + * small terms were added from highest degree down. + */ + r = T[4] + z*T[5]; + t = T[2] + z*T[3]; + w = z*z; + s = z*x; + u = T[0] + z*T[1]; + r = (x + s*u) + (s*w)*(t + w*r); + if(iy==1) return r; + else return -1.0/r; +} diff --git a/Library/libs/cbrt.c b/Library/libs/cbrt.c new file mode 100644 index 00000000..3a310f38 --- /dev/null +++ b/Library/libs/cbrt.c @@ -0,0 +1,106 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + * Optimized by Bruce D. Evans. + */ +/* cbrt(x) + * Return cube root of x + */ + +#include +#include "libm.h" + +static const uint32_t +B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ +B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ + +/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ +static const double +P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */ +P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */ +P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */ +P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */ +P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ + +double cbrt(double x) +{ + int32_t hx; + union dshape u; + double r,s,t=0.0,w; + uint32_t sign; + uint32_t high,low; + + EXTRACT_WORDS(hx, low, x); + sign = hx & 0x80000000; + hx ^= sign; + if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */ + return x+x; + + /* + * Rough cbrt to 5 bits: + * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) + * where e is integral and >= 0, m is real and in [0, 1), and "/" and + * "%" are integer division and modulus with rounding towards minus + * infinity. The RHS is always >= the LHS and has a maximum relative + * error of about 1 in 16. Adding a bias of -0.03306235651 to the + * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE + * floating point representation, for finite positive normal values, + * ordinary integer divison of the value in bits magically gives + * almost exactly the RHS of the above provided we first subtract the + * exponent bias (1023 for doubles) and later add it back. We do the + * subtraction virtually to keep e >= 0 so that ordinary integer + * division rounds towards minus infinity; this is also efficient. + */ + if (hx < 0x00100000) { /* zero or subnormal? */ + if ((hx|low) == 0) + return x; /* cbrt(0) is itself */ + SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */ + t *= x; + GET_HIGH_WORD(high, t); + INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0); + } else + INSERT_WORDS(t, sign|(hx/3+B1), 0); + + /* + * New cbrt to 23 bits: + * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) + * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) + * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation + * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this + * gives us bounds for r = t**3/x. + * + * Try to optimize for parallel evaluation as in k_tanf.c. + */ + r = (t*t)*(t/x); + t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)); + + /* + * Round t away from zero to 23 bits (sloppily except for ensuring that + * the result is larger in magnitude than cbrt(x) but not much more than + * 2 23-bit ulps larger). With rounding towards zero, the error bound + * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps + * in the rounded t, the infinite-precision error in the Newton + * approximation barely affects third digit in the final error + * 0.667; the error in the rounded t can be up to about 3 23-bit ulps + * before the final error is larger than 0.667 ulps. + */ + u.value = t; + u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL; + t = u.value; + + /* one step Newton iteration to 53 bits with error < 0.667 ulps */ + s = t*t; /* t*t is exact */ + r = x/s; /* error <= 0.5 ulps; |r| < |t| */ + w = t+t; /* t+t is exact */ + r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ + t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ + return t; +} diff --git a/Library/libs/cbrtf.c b/Library/libs/cbrtf.c new file mode 100644 index 00000000..5807c849 --- /dev/null +++ b/Library/libs/cbrtf.c @@ -0,0 +1,70 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* cbrtf(x) + * Return cube root of x + */ + +#include +#include "libm.h" + +static const unsigned +B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */ +B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ + +float cbrtf(float x) +{ + double r,T; + float t; + int32_t hx; + uint32_t sign; + uint32_t high; + + GET_FLOAT_WORD(hx, x); + sign = hx & 0x80000000; + hx ^= sign; + if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */ + return x + x; + + /* rough cbrt to 5 bits */ + if (hx < 0x00800000) { /* zero or subnormal? */ + if (hx == 0) + return x; /* cbrt(+-0) is itself */ + SET_FLOAT_WORD(t, 0x4b800000); /* set t = 2**24 */ + t *= x; + GET_FLOAT_WORD(high, t); + SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2)); + } else + SET_FLOAT_WORD(t, sign|(hx/3+B1)); + + /* + * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In + * double precision so that its terms can be arranged for efficiency + * without causing overflow or underflow. + */ + T = t; + r = T*T*T; + T = T*((double)x+x+r)/(x+r+r); + + /* + * Second step Newton iteration to 47 bits. In double precision for + * efficiency and accuracy. + */ + r = T*T*T; + T = T*((double)x+x+r)/(x+r+r); + + /* rounding to 24 bits is perfect in round-to-nearest mode */ + return T; +} diff --git a/Library/libs/copysign.c b/Library/libs/copysign.c new file mode 100644 index 00000000..28dc1723 --- /dev/null +++ b/Library/libs/copysign.c @@ -0,0 +1,14 @@ +/* From MUSL */ + +#include +#include "libm.h" + +double copysign(double x, double y) { + union dshape ux, uy; + + ux.value = x; + uy.value = y; + ux.bits &= (uint64_t)-1>>1; + ux.bits |= uy.bits & (uint64_t)1<<63; + return ux.value; +} diff --git a/Library/libs/copysignf.c b/Library/libs/copysignf.c new file mode 100644 index 00000000..46bfe83e --- /dev/null +++ b/Library/libs/copysignf.c @@ -0,0 +1,14 @@ +/* From MUSL */ + +#include +#include "libm.h" + +float copysignf(float x, float y) { + union fshape ux, uy; + + ux.value = x; + uy.value = y; + ux.bits &= (uint32_t)-1>>1; + ux.bits |= uy.bits & (uint32_t)1<<31; + return ux.value; +} diff --git a/Library/libs/erf.c b/Library/libs/erf.c new file mode 100644 index 00000000..a3808db6 --- /dev/null +++ b/Library/libs/erf.c @@ -0,0 +1,304 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ + +#include +#include "libm.h" + +static const double +tiny = 1e-300, +/* c = (float)0.84506291151 */ +erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */ +efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */ +pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */ +pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */ +pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */ +pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */ +pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */ +qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */ +qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */ +qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */ +qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */ +qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */ +pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */ +pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */ +pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */ +pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */ +pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */ +pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */ +qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */ +qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */ +qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */ +qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */ +qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */ +qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */ +ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */ +ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */ +ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */ +ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */ +ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */ +ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */ +ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */ +sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */ +sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */ +sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */ +sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */ +sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */ +sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */ +sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */ +sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */ +rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */ +rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */ +rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */ +rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */ +rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */ +rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */ +sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */ +sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */ +sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */ +sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */ +sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */ +sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */ +sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ + +double erf(double x) +{ + int32_t hx,ix,i; + double R,S,P,Q,s,y,z,r; + + GET_HIGH_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + i = ((uint32_t)hx>>31)<<1; + return (double)(1-i) + 1.0/x; + } + if (ix < 0x3feb0000) { /* |x|<0.84375 */ + if (ix < 0x3e300000) { /* |x|<2**-28 */ + if (ix < 0x00800000) + /* avoid underflow */ + return 0.125*(8.0*x + efx8*x); + return x + efx*x; + } + z = x*x; + r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); + s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + y = r/s; + return x + x*y; + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x)-1.0; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = 1.0+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if (hx >= 0) + return erx + P/Q; + return -erx - P/Q; + } + if (ix >= 0x40180000) { /* inf > |x| >= 6 */ + if (hx >= 0) + return 1.0 - tiny; + return tiny - 1.0; + } + x = fabs(x); + s = 1.0/(x*x); + if (ix < 0x4006DB6E) { /* |x| < 1/0.35 */ + R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( + ra5+s*(ra6+s*ra7)))))); + S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( + sa5+s*(sa6+s*(sa7+s*sa8))))))); + } else { /* |x| >= 1/0.35 */ + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( + rb5+s*rb6))))); + S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( + sb5+s*(sb6+s*sb7)))))); + } + z = x; + SET_LOW_WORD(z,0); + r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S); + if (hx >= 0) + return 1.0 - r/x; + return r/x - 1.0; +} + +double erfc(double x) +{ + int32_t hx,ix; + double R,S,P,Q,s,y,z,r; + + GET_HIGH_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return (double)(((uint32_t)hx>>31)<<1) + 1.0/x; + } + if (ix < 0x3feb0000) { /* |x| < 0.84375 */ + if (ix < 0x3c700000) /* |x| < 2**-56 */ + return 1.0 - x; + z = x*x; + r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); + s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + y = r/s; + if (hx < 0x3fd00000) { /* x < 1/4 */ + return 1.0 - (x+x*y); + } else { + r = x*y; + r += x - 0.5; + return 0.5 - r ; + } + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x)-1.0; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = 1.0+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if (hx >= 0) { + z = 1.0-erx; + return z - P/Q; + } else { + z = erx+P/Q; + return 1.0 + z; + } + } + if (ix < 0x403c0000) { /* |x| < 28 */ + x = fabs(x); + s = 1.0/(x*x); + if (ix < 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/ + R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( + ra5+s*(ra6+s*ra7)))))); + S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( + sa5+s*(sa6+s*(sa7+s*sa8))))))); + } else { /* |x| >= 1/.35 ~ 2.857143 */ + if (hx < 0 && ix >= 0x40180000) /* x < -6 */ + return 2.0 - tiny; + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( + rb5+s*rb6))))); + S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( + sb5+s*(sb6+s*sb7)))))); + } + z = x; + SET_LOW_WORD(z, 0); + r = exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S); + if (hx > 0) + return r/x; + return 2.0 - r/x; + } + if (hx > 0) + return tiny*tiny; + return 2.0 - tiny; +} diff --git a/Library/libs/erff.c b/Library/libs/erff.c new file mode 100644 index 00000000..6240f297 --- /dev/null +++ b/Library/libs/erff.c @@ -0,0 +1,215 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +tiny = 1e-30, +/* c = (subfloat)0.84506291151 */ +erx = 8.4506291151e-01, /* 0x3f58560b */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +efx = 1.2837916613e-01, /* 0x3e0375d4 */ +efx8 = 1.0270333290e+00, /* 0x3f8375d4 */ +pp0 = 1.2837916613e-01, /* 0x3e0375d4 */ +pp1 = -3.2504209876e-01, /* 0xbea66beb */ +pp2 = -2.8481749818e-02, /* 0xbce9528f */ +pp3 = -5.7702702470e-03, /* 0xbbbd1489 */ +pp4 = -2.3763017452e-05, /* 0xb7c756b1 */ +qq1 = 3.9791721106e-01, /* 0x3ecbbbce */ +qq2 = 6.5022252500e-02, /* 0x3d852a63 */ +qq3 = 5.0813062117e-03, /* 0x3ba68116 */ +qq4 = 1.3249473704e-04, /* 0x390aee49 */ +qq5 = -3.9602282413e-06, /* 0xb684e21a */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */ +pa1 = 4.1485610604e-01, /* 0x3ed46805 */ +pa2 = -3.7220788002e-01, /* 0xbebe9208 */ +pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */ +pa4 = -1.1089469492e-01, /* 0xbde31cc2 */ +pa5 = 3.5478305072e-02, /* 0x3d1151b3 */ +pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */ +qa1 = 1.0642088205e-01, /* 0x3dd9f331 */ +qa2 = 5.4039794207e-01, /* 0x3f0a5785 */ +qa3 = 7.1828655899e-02, /* 0x3d931ae7 */ +qa4 = 1.2617121637e-01, /* 0x3e013307 */ +qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */ +qa6 = 1.1984500103e-02, /* 0x3c445aa3 */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +ra0 = -9.8649440333e-03, /* 0xbc21a093 */ +ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */ +ra2 = -1.0558626175e+01, /* 0xc128f022 */ +ra3 = -6.2375331879e+01, /* 0xc2798057 */ +ra4 = -1.6239666748e+02, /* 0xc322658c */ +ra5 = -1.8460508728e+02, /* 0xc3389ae7 */ +ra6 = -8.1287437439e+01, /* 0xc2a2932b */ +ra7 = -9.8143291473e+00, /* 0xc11d077e */ +sa1 = 1.9651271820e+01, /* 0x419d35ce */ +sa2 = 1.3765776062e+02, /* 0x4309a863 */ +sa3 = 4.3456588745e+02, /* 0x43d9486f */ +sa4 = 6.4538726807e+02, /* 0x442158c9 */ +sa5 = 4.2900814819e+02, /* 0x43d6810b */ +sa6 = 1.0863500214e+02, /* 0x42d9451f */ +sa7 = 6.5702495575e+00, /* 0x40d23f7c */ +sa8 = -6.0424413532e-02, /* 0xbd777f97 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +rb0 = -9.8649431020e-03, /* 0xbc21a092 */ +rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */ +rb2 = -1.7757955551e+01, /* 0xc18e104b */ +rb3 = -1.6063638306e+02, /* 0xc320a2ea */ +rb4 = -6.3756646729e+02, /* 0xc41f6441 */ +rb5 = -1.0250950928e+03, /* 0xc480230b */ +rb6 = -4.8351919556e+02, /* 0xc3f1c275 */ +sb1 = 3.0338060379e+01, /* 0x41f2b459 */ +sb2 = 3.2579251099e+02, /* 0x43a2e571 */ +sb3 = 1.5367296143e+03, /* 0x44c01759 */ +sb4 = 3.1998581543e+03, /* 0x4547fdbb */ +sb5 = 2.5530502930e+03, /* 0x451f90ce */ +sb6 = 4.7452853394e+02, /* 0x43ed43a7 */ +sb7 = -2.2440952301e+01; /* 0xc1b38712 */ + +float erff(float x) +{ + int32_t hx,ix,i; + float R,S,P,Q,s,y,z,r; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7f800000) { + /* erf(nan)=nan, erf(+-inf)=+-1 */ + i = ((uint32_t)hx>>31)<<1; + return (float)(1-i)+1.0f/x; + } + if (ix < 0x3f580000) { /* |x| < 0.84375 */ + if (ix < 0x31800000) { /* |x| < 2**-28 */ + if (ix < 0x04000000) + /*avoid underflow */ + return 0.125f*(8.0f*x + efx8*x); + return x + efx*x; + } + z = x*x; + r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); + s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + y = r/s; + return x + x*y; + } + if (ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ + s = fabsf(x)-1.0f; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = 1.0f+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if (hx >= 0) + return erx + P/Q; + return -erx - P/Q; + } + if (ix >= 0x40c00000) { /* inf > |x| >= 6 */ + if (hx >= 0) + return 1.0f - tiny; + return tiny - 1.0f; + } + x = fabsf(x); + s = 1.0f/(x*x); + if (ix < 0x4036DB6E) { /* |x| < 1/0.35 */ + R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( + ra5+s*(ra6+s*ra7)))))); + S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( + sa5+s*(sa6+s*(sa7+s*sa8))))))); + } else { /* |x| >= 1/0.35 */ + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( + rb5+s*rb6))))); + S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( + sb5+s*(sb6+s*sb7)))))); + } + GET_FLOAT_WORD(ix, x); + SET_FLOAT_WORD(z, ix&0xfffff000); + r = expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S); + if (hx >= 0) + return 1.0f - r/x; + return r/x - 1.0f; +} + +float erfcf(float x) +{ + int32_t hx,ix; + float R,S,P,Q,s,y,z,r; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7f800000) { + /* erfc(nan)=nan, erfc(+-inf)=0,2 */ + return (float)(((uint32_t)hx>>31)<<1) + 1.0f/x; + } + + if (ix < 0x3f580000) { /* |x| < 0.84375 */ + if (ix < 0x23800000) /* |x| < 2**-56 */ + return 1.0f - x; + z = x*x; + r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); + s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + y = r/s; + if (hx < 0x3e800000) { /* x<1/4 */ + return 1.0f - (x+x*y); + } else { + r = x*y; + r += (x-0.5f); + return 0.5f - r ; + } + } + if (ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ + s = fabsf(x)-1.0f; + P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); + Q = 1.0f+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + if(hx >= 0) { + z = 1.0f - erx; + return z - P/Q; + } else { + z = erx + P/Q; + return 1.0f + z; + } + } + if (ix < 0x41e00000) { /* |x| < 28 */ + x = fabsf(x); + s = 1.0f/(x*x); + if (ix < 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/ + R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( + ra5+s*(ra6+s*ra7)))))); + S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( + sa5+s*(sa6+s*(sa7+s*sa8))))))); + } else { /* |x| >= 1/.35 ~ 2.857143 */ + if (hx < 0 && ix >= 0x40c00000) /* x < -6 */ + return 2.0f-tiny; + R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( + rb5+s*rb6))))); + S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( + sb5+s*(sb6+s*sb7)))))); + } + GET_FLOAT_WORD(ix, x); + SET_FLOAT_WORD(z, ix&0xfffff000); + r = expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S); + if (hx > 0) + return r/x; + return 2.0f - r/x; + } + if (hx > 0) + return tiny*tiny; + return 2.0f - tiny; +} diff --git a/Library/libs/expm1.c b/Library/libs/expm1.c new file mode 100644 index 00000000..42344060 --- /dev/null +++ b/Library/libs/expm1.c @@ -0,0 +1,220 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* expm1(x) + * Returns exp(x)-1, the exponential of x minus 1. + * + * Method + * 1. Argument reduction: + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 + * + * Here a correction term c will be computed to compensate + * the error in r when rounded to a floating-point number. + * + * 2. Approximating expm1(r) by a special rational function on + * the interval [0,0.34658]: + * Since + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ... + * we define R1(r*r) by + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r) + * That is, + * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) + * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) + * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ... + * We use a special Reme algorithm on [0,0.347] to generate + * a polynomial of degree 5 in r*r to approximate R1. The + * maximum error of this polynomial approximation is bounded + * by 2**-61. In other words, + * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 + * where Q1 = -1.6666666666666567384E-2, + * Q2 = 3.9682539681370365873E-4, + * Q3 = -9.9206344733435987357E-6, + * Q4 = 2.5051361420808517002E-7, + * Q5 = -6.2843505682382617102E-9; + * z = r*r, + * with error bounded by + * | 5 | -61 + * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 + * | | + * + * expm1(r) = exp(r)-1 is then computed by the following + * specific way which minimize the accumulation rounding error: + * 2 3 + * r r [ 3 - (R1 + R1*r/2) ] + * expm1(r) = r + --- + --- * [--------------------] + * 2 2 [ 6 - r*(3 - R1*r/2) ] + * + * To compensate the error in the argument reduction, we use + * expm1(r+c) = expm1(r) + c + expm1(r)*c + * ~ expm1(r) + c + r*c + * Thus c+r*c will be added in as the correction terms for + * expm1(r+c). Now rearrange the term to avoid optimization + * screw up: + * ( 2 2 ) + * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) + * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) + * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) + * ( ) + * + * = r - E + * 3. Scale back to obtain expm1(x): + * From step 1, we have + * expm1(x) = either 2^k*[expm1(r)+1] - 1 + * = or 2^k*[expm1(r) + (1-2^-k)] + * 4. Implementation notes: + * (A). To save one multiplication, we scale the coefficient Qi + * to Qi*2^i, and replace z by (x^2)/2. + * (B). To achieve maximum accuracy, we compute expm1(x) by + * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) + * (ii) if k=0, return r-E + * (iii) if k=-1, return 0.5*(r-E)-0.5 + * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) + * else return 1.0+2.0*(r-E); + * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1) + * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else + * (vii) return 2^k(1-((E+2^-k)-r)) + * + * Special cases: + * expm1(INF) is INF, expm1(NaN) is NaN; + * expm1(-INF) is -1, and + * for finite argument, only expm1(0)=0 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 7.09782712893383973096e+02 then expm1(x) overflow + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +#include +#include "libm.h" + +static const double +huge = 1.0e+300, +tiny = 1.0e-300, +o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ +ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ +ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ +invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ +/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ +Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */ +Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ +Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ +Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ +Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ + +double expm1(double x) +{ + double y,hi,lo,c,t,e,hxs,hfx,r1,twopk; + int32_t k,xsb; + uint32_t hx; + + GET_HIGH_WORD(hx, x); + xsb = hx&0x80000000; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out huge and non-finite argument */ + if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */ + if (hx >= 0x40862E42) { /* if |x|>=709.78... */ + if (hx >= 0x7ff00000) { + uint32_t low; + + GET_LOW_WORD(low, x); + if (((hx&0xfffff)|low) != 0) /* NaN */ + return x+x; + return xsb==0 ? x : -1.0; /* exp(+-inf)={inf,-1} */ + } + if(x > o_threshold) + return huge*huge; /* overflow */ + } + if (xsb != 0) { /* x < -56*ln2, return -1.0 with inexact */ + /* raise inexact */ + if(x+tiny<0.0) + return tiny-1.0; /* return -1 */ + } + } + + /* argument reduction */ + if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + if (xsb == 0) { + hi = x - ln2_hi; + lo = ln2_lo; + k = 1; + } else { + hi = x + ln2_hi; + lo = -ln2_lo; + k = -1; + } + } else { + k = invln2*x + (xsb==0 ? 0.5 : -0.5); + t = k; + hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ + lo = t*ln2_lo; + } + STRICT_ASSIGN(double, x, hi - lo); + c = (hi-x)-lo; + } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */ + /* raise inexact flags when x != 0 */ + t = huge+x; + return x - (t-(huge+x)); + } else + k = 0; + + /* x is now in primary range */ + hfx = 0.5*x; + hxs = x*hfx; + r1 = 1.0+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); + t = 3.0-r1*hfx; + e = hxs*((r1-t)/(6.0 - x*t)); + if (k == 0) /* c is 0 */ + return x - (x*e-hxs); + INSERT_WORDS(twopk, 0x3ff00000+(k<<20), 0); /* 2^k */ + e = x*(e-c) - c; + e -= hxs; + if (k == -1) + return 0.5*(x-e) - 0.5; + if (k == 1) { + if (x < -0.25) + return -2.0*(e-(x+0.5)); + return 1.0+2.0*(x-e); + } + if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */ + y = 1.0 - (e-x); + if (k == 1024) + y = y*2.0*0x1p1023; + else + y = y*twopk; + return y - 1.0; + } + t = 1.0; + if (k < 20) { + SET_HIGH_WORD(t, 0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */ + y = t-(e-x); + y = y*twopk; + } else { + SET_HIGH_WORD(t, ((0x3ff-k)<<20)); /* 2^-k */ + y = x-(e+t); + y += 1.0; + y = y*twopk; + } + return y; +} diff --git a/Library/libs/expm1f.c b/Library/libs/expm1f.c new file mode 100644 index 00000000..8995f3a1 --- /dev/null +++ b/Library/libs/expm1f.c @@ -0,0 +1,125 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +huge = 1.0e+30, +tiny = 1.0e-30, +o_threshold = 8.8721679688e+01, /* 0x42b17180 */ +ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ +ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ +invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */ +/* + * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]: + * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04 + * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c): + */ +Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */ +Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ + +float expm1f(float x) +{ + float y,hi,lo,c,t,e,hxs,hfx,r1,twopk; + int32_t k,xsb; + uint32_t hx; + + GET_FLOAT_WORD(hx, x); + xsb = hx&0x80000000; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out huge and non-finite argument */ + if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */ + if (hx >= 0x42b17218) { /* if |x|>=88.721... */ + if (hx > 0x7f800000) /* NaN */ + return x+x; + if (hx == 0x7f800000) /* exp(+-inf)={inf,-1} */ + return xsb==0 ? x : -1.0; + if (x > o_threshold) + return huge*huge; /* overflow */ + } + if (xsb != 0) { /* x < -27*ln2 */ + /* raise inexact */ + if (x+tiny < 0.0f) + return tiny-1.0f; /* return -1 */ + } + } + + /* argument reduction */ + if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */ + if (xsb == 0) { + hi = x - ln2_hi; + lo = ln2_lo; + k = 1; + } else { + hi = x + ln2_hi; + lo = -ln2_lo; + k = -1; + } + } else { + k = invln2*x + (xsb==0 ? 0.5f : -0.5f); + t = k; + hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ + lo = t*ln2_lo; + } + x = (float)(hi - lo); + c = (hi-x)-lo; + } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */ + t = huge+x; /* return x with inexact flags when x!=0 */ + return x - (t-(huge+x)); + } else + k = 0; + + /* x is now in primary range */ + hfx = 0.5f*x; + hxs = x*hfx; + r1 = 1.0f+hxs*(Q1+hxs*Q2); + t = 3.0f - r1*hfx; + e = hxs*((r1-t)/(6.0f - x*t)); + if (k == 0) /* c is 0 */ + return x - (x*e-hxs); + SET_FLOAT_WORD(twopk, 0x3f800000+(k<<23)); /* 2^k */ + e = x*(e-c) - c; + e -= hxs; + if (k == -1) + return 0.5f*(x-e) - 0.5f; + if (k == 1) { + if (x < -0.25f) + return -2.0f*(e-(x+0.5f)); + return 1.0f + 2.0f*(x-e); + } + if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */ + y = 1.0f - (e - x); + if (k == 128) + y = y*2.0f*0x1p127f; + else + y = y*twopk; + return y - 1.0f; + } + t = 1.0f; + if (k < 23) { + SET_FLOAT_WORD(t, 0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */ + y = t - (e - x); + y = y*twopk; + } else { + SET_FLOAT_WORD(t, (0x7f-k)<<23); /* 2^-k */ + y = x - (e + t); + y += 1.0f; + y = y*twopk; + } + return y; +} diff --git a/Library/libs/fdim.c b/Library/libs/fdim.c new file mode 100644 index 00000000..f09e2fad --- /dev/null +++ b/Library/libs/fdim.c @@ -0,0 +1,12 @@ +/* From MUSL */ + +#include + +double fdim(double x, double y) +{ + if (isnan(x)) + return x; + if (isnan(y)) + return y; + return x > y ? x - y : 0; +} diff --git a/Library/libs/fdimf.c b/Library/libs/fdimf.c new file mode 100644 index 00000000..f132c6f9 --- /dev/null +++ b/Library/libs/fdimf.c @@ -0,0 +1,12 @@ +/* From MUSL */ + +#include + +float fdimf(float x, float y) +{ + if (isnan(x)) + return x; + if (isnan(y)) + return y; + return x > y ? x - y : 0; +} diff --git a/Library/libs/fmax.c b/Library/libs/fmax.c new file mode 100644 index 00000000..9b357f0c --- /dev/null +++ b/Library/libs/fmax.c @@ -0,0 +1,15 @@ +/* From MUSL */ + +#include + +double fmax(double x, double y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + /* handle signed zeros, see C99 Annex F.9.9.2 */ + if (signbit(x) != signbit(y)) + return signbit(x) ? y : x; + return x < y ? y : x; +} diff --git a/Library/libs/fmaxf.c b/Library/libs/fmaxf.c new file mode 100644 index 00000000..a17da67b --- /dev/null +++ b/Library/libs/fmaxf.c @@ -0,0 +1,15 @@ +/* From MUSL */ + +#include + +float fmaxf(float x, float y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + /* handle signed zeroes, see C99 Annex F.9.9.2 */ + if (signbit(x) != signbit(y)) + return signbit(x) ? y : x; + return x < y ? y : x; +} diff --git a/Library/libs/fmin.c b/Library/libs/fmin.c new file mode 100644 index 00000000..1bb5c818 --- /dev/null +++ b/Library/libs/fmin.c @@ -0,0 +1,15 @@ +/* From MUSL */ + +#include + +double fmin(double x, double y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + /* handle signed zeros, see C99 Annex F.9.9.2 */ + if (signbit(x) != signbit(y)) + return signbit(x) ? x : y; + return x < y ? x : y; +} diff --git a/Library/libs/fminf.c b/Library/libs/fminf.c new file mode 100644 index 00000000..3940992e --- /dev/null +++ b/Library/libs/fminf.c @@ -0,0 +1,15 @@ +/* From MUSL */ + +#include + +float fminf(float x, float y) +{ + if (isnan(x)) + return y; + if (isnan(y)) + return x; + /* handle signed zeros, see C99 Annex F.9.9.2 */ + if (signbit(x) != signbit(y)) + return signbit(x) ? x : y; + return x < y ? x : y; +} diff --git a/Library/libs/ilogb.c b/Library/libs/ilogb.c new file mode 100644 index 00000000..1bbc8b07 --- /dev/null +++ b/Library/libs/ilogb.c @@ -0,0 +1,27 @@ +/* From MUSL */ + +#include +#include +#include "libm.h" + +int ilogb(double x) +{ + union dshape u = {x}; + int e = u.bits>>52 & 0x7ff; + + if (!e) { + u.bits <<= 12; + if (u.bits == 0) { + FORCE_EVAL(0/0.0f); + return FP_ILOGB0; + } + /* subnormal x */ + for (e = -0x3ff; u.bits < (uint64_t)1<<63; e--, u.bits<<=1); + return e; + } + if (e == 0x7ff) { + FORCE_EVAL(0/0.0f); + return u.bits<<12 ? FP_ILOGBNAN : INT_MAX; + } + return e - 0x3ff; +} diff --git a/Library/libs/ilogbf.c b/Library/libs/ilogbf.c new file mode 100644 index 00000000..0e845969 --- /dev/null +++ b/Library/libs/ilogbf.c @@ -0,0 +1,27 @@ +/* From MUSL */ + +#include +#include +#include "libm.h" + +int ilogbf(float x) +{ + union fshape u = {x}; + int e = u.bits>>23 & 0xff; + + if (!e) { + u.bits <<= 9; + if (u.bits == 0) { +/*FIXME FORCE_EVAL(0/0.0f); */ + return FP_ILOGB0; + } + /* subnormal x */ + for (e = -0x7f; u.bits < (uint32_t)1<<31; e--, u.bits<<=1); + return e; + } + if (e == 0xff) { +/*FIXME FORCE_EVAL(0/0.0f); */ + return u.bits<<9 ? FP_ILOGBNAN : INT_MAX; + } + return e - 0x7f; +} diff --git a/Library/libs/j0.c b/Library/libs/j0.c new file mode 100644 index 00000000..f470ed06 --- /dev/null +++ b/Library/libs/j0.c @@ -0,0 +1,387 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j0(x), y0(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j0(x): + * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... + * 2. Reduce x to |x| since j0(x)=j0(-x), and + * for x in (0,2) + * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; + * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) + * for x in (2,inf) + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * as follow: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j0(nan)= nan + * j0(0) = 1 + * j0(inf) = 0 + * + * Method -- y0(x): + * 1. For x<2. + * Since + * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) + * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. + * We use the following function to approximate y0, + * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 + * where + * U(z) = u00 + u01*z + ... + u06*z^6 + * V(z) = 1 + v01*z + ... + v04*z^4 + * with absolute approximation error bounded by 2**-72. + * Note: For tiny x, U/V = u0 and j0(x)~1, hence + * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) + * 2. For x>=2. + * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) + * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) + * by the method mentioned above. + * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. + */ + +#include +#include "libm.h" + +static double pzero(double), qzero(double); + +static const double +huge = 1e300, +invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ +tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ +/* R0/S0 on [0, 2.00] */ +R02 = 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */ +R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */ +R04 = 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */ +R05 = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */ +S01 = 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */ +S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */ +S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */ +S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ + +double j0(double x) +{ + double z, s,c,ss,cc,r,u,v; + int32_t hx,ix; + + GET_HIGH_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) + return 1.0/(x*x); + x = fabs(x); + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sin(x); + c = cos(x); + ss = s-c; + cc = s+c; + if (ix < 0x7fe00000) { /* make sure x+x does not overflow */ + z = -cos(x+x); + if (s*c < 0.0) + cc = z/ss; + else + ss = z/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (ix > 0x48000000) + z = (invsqrtpi*cc)/sqrt(x); + else { + u = pzero(x); + v = qzero(x); + z = invsqrtpi*(u*cc-v*ss)/sqrt(x); + } + return z; + } + if (ix < 0x3f200000) { /* |x| < 2**-13 */ + /* raise inexact if x != 0 */ + if (huge+x > 1.0) { + if (ix < 0x3e400000) /* |x| < 2**-27 */ + return 1.0; + return 1.0 - 0.25*x*x; + } + } + z = x*x; + r = z*(R02+z*(R03+z*(R04+z*R05))); + s = 1.0+z*(S01+z*(S02+z*(S03+z*S04))); + if (ix < 0x3FF00000) { /* |x| < 1.00 */ + return 1.0 + z*(-0.25+(r/s)); + } else { + u = 0.5*x; + return (1.0+u)*(1.0-u) + z*(r/s); + } +} + +static const double +u00 = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */ +u01 = 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */ +u02 = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */ +u03 = 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */ +u04 = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */ +u05 = 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */ +u06 = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */ +v01 = 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */ +v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */ +v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */ +v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ + +double y0(double x) +{ + double z,s,c,ss,cc,u,v; + int32_t hx,ix,lx; + + EXTRACT_WORDS(hx, lx, x); + ix = 0x7fffffff & hx; + /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */ + if (ix >= 0x7ff00000) + return 1.0/(x+x*x); + if ((ix|lx) == 0) + return -1.0/0.0; + if (hx < 0) + return 0.0/0.0; + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) + cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + s = sin(x); + c = cos(x); + ss = s-c; + cc = s+c; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (ix < 0x7fe00000) { /* make sure x+x does not overflow */ + z = -cos(x+x); + if (s*c < 0.0) + cc = z/ss; + else + ss = z/cc; + } + if (ix > 0x48000000) + z = (invsqrtpi*ss)/sqrt(x); + else { + u = pzero(x); + v = qzero(x); + z = invsqrtpi*(u*ss+v*cc)/sqrt(x); + } + return z; + } + if (ix <= 0x3e400000) { /* x < 2**-27 */ + return u00 + tpi*log(x); + } + z = x*x; + u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06))))); + v = 1.0+z*(v01+z*(v02+z*(v03+z*v04))); + return u/v + tpi*(j0(x)*log(x)); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ + -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ + -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ + -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ + -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ +}; +static const double pS8[5] = { + 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ + 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ + 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ + 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ + 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ +}; + +static const double pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ + -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ + -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ + -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ + -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ + -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ +}; +static const double pS5[5] = { + 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ + 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ + 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ + 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ + 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ +}; + +static const double pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ + -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ + -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ + -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ + -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ + -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ +}; +static const double pS3[5] = { + 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ + 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ + 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ + 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ + 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ +}; + +static const double pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ + -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ + -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ + -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ + -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ + -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ +}; +static const double pS2[5] = { + 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ + 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ + 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ + 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ + 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ +}; + +static double pzero(double x) +{ + const double *p,*q; + double z,r,s; + int32_t ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x40200000){p = pR8; q = pS8;} + else if (ix >= 0x40122E8B){p = pR5; q = pS5;} + else if (ix >= 0x4006DB6D){p = pR3; q = pS3;} + else if (ix >= 0x40000000){p = pR2; q = pS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0 + r/s; +} + + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ + 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ + 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ + 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ + 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ +}; +static const double qS8[6] = { + 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ + 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ + 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ + 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ + 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ + -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ +}; + +static const double qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ + 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ + 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ + 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ + 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ + 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ +}; +static const double qS5[6] = { + 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ + 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ + 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ + 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ + 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ + -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ +}; + +static const double qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ + 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ + 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ + 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ + 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ + 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ +}; +static const double qS3[6] = { + 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ + 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ + 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ + 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ + 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ + -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ +}; + +static const double qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ + 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ + 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ + 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ + 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ + 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ +}; +static const double qS2[6] = { + 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ + 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ + 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ + 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ + 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ + -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ +}; + +static double qzero(double x) +{ + const double *p,*q; + double s,r,z; + int32_t ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x40200000){p = qR8; q = qS8;} + else if (ix >= 0x40122E8B){p = qR5; q = qS5;} + else if (ix >= 0x4006DB6D){p = qR3; q = qS3;} + else if (ix >= 0x40000000){p = qR2; q = qS2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (-.125 + r/s)/x; +} diff --git a/Library/libs/j0f.c b/Library/libs/j0f.c new file mode 100644 index 00000000..8a2c511c --- /dev/null +++ b/Library/libs/j0f.c @@ -0,0 +1,345 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static float pzerof(float), qzerof(float); + +static const float +huge = 1e30, +invsqrtpi = 5.6418961287e-01, /* 0x3f106ebb */ +tpi = 6.3661974669e-01, /* 0x3f22f983 */ +/* R0/S0 on [0, 2.00] */ +R02 = 1.5625000000e-02, /* 0x3c800000 */ +R03 = -1.8997929874e-04, /* 0xb947352e */ +R04 = 1.8295404516e-06, /* 0x35f58e88 */ +R05 = -4.6183270541e-09, /* 0xb19eaf3c */ +S01 = 1.5619102865e-02, /* 0x3c7fe744 */ +S02 = 1.1692678527e-04, /* 0x38f53697 */ +S03 = 5.1354652442e-07, /* 0x3509daa6 */ +S04 = 1.1661400734e-09; /* 0x30a045e8 */ + +float j0f(float x) +{ + float z, s,c,ss,cc,r,u,v; + int32_t hx,ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7f800000) + return 1.0f/(x*x); + x = fabsf(x); + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sinf(x); + c = cosf(x); + ss = s-c; + cc = s+c; + if (ix < 0x7f000000) { /* make sure x+x does not overflow */ + z = -cosf(x+x); + if (s*c < 0.0f) + cc = z/ss; + else + ss = z/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (ix > 0x80000000) + z = (invsqrtpi*cc)/sqrtf(x); + else { + u = pzerof(x); + v = qzerof(x); + z = invsqrtpi*(u*cc-v*ss)/sqrtf(x); + } + return z; + } + if (ix < 0x39000000) { /* |x| < 2**-13 */ + /* raise inexact if x != 0 */ + if (huge+x > 1.0f) { + if (ix < 0x32000000) /* |x| < 2**-27 */ + return 1.0f; + return 1.0f - 0.25f*x*x; + } + } + z = x*x; + r = z*(R02+z*(R03+z*(R04+z*R05))); + s = 1.0f+z*(S01+z*(S02+z*(S03+z*S04))); + if(ix < 0x3F800000) { /* |x| < 1.00 */ + return 1.0f + z*(-0.25f + (r/s)); + } else { + u = 0.5f*x; + return (1.0f+u)*(1.0f-u) + z*(r/s); + } +} + +static const float +u00 = -7.3804296553e-02, /* 0xbd9726b5 */ +u01 = 1.7666645348e-01, /* 0x3e34e80d */ +u02 = -1.3818567619e-02, /* 0xbc626746 */ +u03 = 3.4745343146e-04, /* 0x39b62a69 */ +u04 = -3.8140706238e-06, /* 0xb67ff53c */ +u05 = 1.9559013964e-08, /* 0x32a802ba */ +u06 = -3.9820518410e-11, /* 0xae2f21eb */ +v01 = 1.2730483897e-02, /* 0x3c509385 */ +v02 = 7.6006865129e-05, /* 0x389f65e0 */ +v03 = 2.5915085189e-07, /* 0x348b216c */ +v04 = 4.4111031494e-10; /* 0x2ff280c2 */ + +float y0f(float x) +{ + float z,s,c,ss,cc,u,v; + int32_t hx,ix; + + GET_FLOAT_WORD(hx, x); + ix = 0x7fffffff & hx; + /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0 */ + if (ix >= 0x7f800000) + return 1.0f/(x+x*x); + if (ix == 0) + return -__FINFINITY;/*-1.0f/0.0f;*/ + if (hx < 0) + return __sNaN;/*0.0f/0.0f;*/ + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) + cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + s = sinf(x); + c = cosf(x); + ss = s-c; + cc = s+c; + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if (ix < 0x7f000000) { /* make sure x+x not overflow */ + z = -cosf(x+x); + if (s*c < 0.0f) + cc = z/ss; + else + ss = z/cc; + } + if (ix > 0x80000000) + z = (invsqrtpi*ss)/sqrtf(x); + else { + u = pzerof(x); + v = qzerof(x); + z = invsqrtpi*(u*ss+v*cc)/sqrtf(x); + } + return z; + } + if (ix <= 0x32000000) { /* x < 2**-27 */ + return u00 + tpi*logf(x); + } + z = x*x; + u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06))))); + v = 1.0f+z*(v01+z*(v02+z*(v03+z*v04))); + return u/v + tpi*(j0f(x)*logf(x)); +} + +/* The asymptotic expansions of pzero is + * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. + * For x >= 2, We approximate pzero by + * pzero(x) = 1 + (R/S) + * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 + * S = 1 + pS0*s^2 + ... + pS4*s^10 + * and + * | pzero(x)-1-R/S | <= 2 ** ( -60.26) + */ +static const float pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -7.0312500000e-02, /* 0xbd900000 */ + -8.0816707611e+00, /* 0xc1014e86 */ + -2.5706311035e+02, /* 0xc3808814 */ + -2.4852163086e+03, /* 0xc51b5376 */ + -5.2530439453e+03, /* 0xc5a4285a */ +}; +static const float pS8[5] = { + 1.1653436279e+02, /* 0x42e91198 */ + 3.8337448730e+03, /* 0x456f9beb */ + 4.0597855469e+04, /* 0x471e95db */ + 1.1675296875e+05, /* 0x47e4087c */ + 4.7627726562e+04, /* 0x473a0bba */ +}; +static const float pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -1.1412546255e-11, /* 0xad48c58a */ + -7.0312492549e-02, /* 0xbd8fffff */ + -4.1596107483e+00, /* 0xc0851b88 */ + -6.7674766541e+01, /* 0xc287597b */ + -3.3123129272e+02, /* 0xc3a59d9b */ + -3.4643338013e+02, /* 0xc3ad3779 */ +}; +static const float pS5[5] = { + 6.0753936768e+01, /* 0x42730408 */ + 1.0512523193e+03, /* 0x44836813 */ + 5.9789707031e+03, /* 0x45bad7c4 */ + 9.6254453125e+03, /* 0x461665c8 */ + 2.4060581055e+03, /* 0x451660ee */ +}; + +static const float pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + -2.5470459075e-09, /* 0xb12f081b */ + -7.0311963558e-02, /* 0xbd8fffb8 */ + -2.4090321064e+00, /* 0xc01a2d95 */ + -2.1965976715e+01, /* 0xc1afba52 */ + -5.8079170227e+01, /* 0xc2685112 */ + -3.1447946548e+01, /* 0xc1fb9565 */ +}; +static const float pS3[5] = { + 3.5856033325e+01, /* 0x420f6c94 */ + 3.6151397705e+02, /* 0x43b4c1ca */ + 1.1936077881e+03, /* 0x44953373 */ + 1.1279968262e+03, /* 0x448cffe6 */ + 1.7358093262e+02, /* 0x432d94b8 */ +}; + +static const float pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -8.8753431271e-08, /* 0xb3be98b7 */ + -7.0303097367e-02, /* 0xbd8ffb12 */ + -1.4507384300e+00, /* 0xbfb9b1cc */ + -7.6356959343e+00, /* 0xc0f4579f */ + -1.1193166733e+01, /* 0xc1331736 */ + -3.2336456776e+00, /* 0xc04ef40d */ +}; +static const float pS2[5] = { + 2.2220300674e+01, /* 0x41b1c32d */ + 1.3620678711e+02, /* 0x430834f0 */ + 2.7047027588e+02, /* 0x43873c32 */ + 1.5387539673e+02, /* 0x4319e01a */ + 1.4657617569e+01, /* 0x416a859a */ +}; + +static float pzerof(float x) +{ + const float *p,*q; + float z,r,s; + int32_t ix; + + GET_FLOAT_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x41000000){p = pR8; q = pS8;} + else if (ix >= 0x40f71c58){p = pR5; q = pS5;} + else if (ix >= 0x4036db68){p = pR3; q = pS3;} + else if (ix >= 0x40000000){p = pR2; q = pS2;} + z = 1.0f/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0f + r/s; +} + + +/* For x >= 8, the asymptotic expansions of qzero is + * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. + * We approximate pzero by + * qzero(x) = s*(-1.25 + (R/S)) + * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 + * S = 1 + qS0*s^2 + ... + qS5*s^12 + * and + * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) + */ +static const float qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 7.3242187500e-02, /* 0x3d960000 */ + 1.1768206596e+01, /* 0x413c4a93 */ + 5.5767340088e+02, /* 0x440b6b19 */ + 8.8591972656e+03, /* 0x460a6cca */ + 3.7014625000e+04, /* 0x471096a0 */ +}; +static const float qS8[6] = { + 1.6377603149e+02, /* 0x4323c6aa */ + 8.0983447266e+03, /* 0x45fd12c2 */ + 1.4253829688e+05, /* 0x480b3293 */ + 8.0330925000e+05, /* 0x49441ed4 */ + 8.4050156250e+05, /* 0x494d3359 */ + -3.4389928125e+05, /* 0xc8a7eb69 */ +}; + +static const float qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.8408595828e-11, /* 0x2da1ec79 */ + 7.3242180049e-02, /* 0x3d95ffff */ + 5.8356351852e+00, /* 0x40babd86 */ + 1.3511157227e+02, /* 0x43071c90 */ + 1.0272437744e+03, /* 0x448067cd */ + 1.9899779053e+03, /* 0x44f8bf4b */ +}; +static const float qS5[6] = { + 8.2776611328e+01, /* 0x42a58da0 */ + 2.0778142090e+03, /* 0x4501dd07 */ + 1.8847289062e+04, /* 0x46933e94 */ + 5.6751113281e+04, /* 0x475daf1d */ + 3.5976753906e+04, /* 0x470c88c1 */ + -5.3543427734e+03, /* 0xc5a752be */ +}; + +static const float qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ + 4.3774099900e-09, /* 0x3196681b */ + 7.3241114616e-02, /* 0x3d95ff70 */ + 3.3442313671e+00, /* 0x405607e3 */ + 4.2621845245e+01, /* 0x422a7cc5 */ + 1.7080809021e+02, /* 0x432acedf */ + 1.6673394775e+02, /* 0x4326bbe4 */ +}; +static const float qS3[6] = { + 4.8758872986e+01, /* 0x42430916 */ + 7.0968920898e+02, /* 0x44316c1c */ + 3.7041481934e+03, /* 0x4567825f */ + 6.4604252930e+03, /* 0x45c9e367 */ + 2.5163337402e+03, /* 0x451d4557 */ + -1.4924745178e+02, /* 0xc3153f59 */ +}; + +static const float qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.5044444979e-07, /* 0x342189db */ + 7.3223426938e-02, /* 0x3d95f62a */ + 1.9981917143e+00, /* 0x3fffc4bf */ + 1.4495602608e+01, /* 0x4167edfd */ + 3.1666231155e+01, /* 0x41fd5471 */ + 1.6252708435e+01, /* 0x4182058c */ +}; +static const float qS2[6] = { + 3.0365585327e+01, /* 0x41f2ecb8 */ + 2.6934811401e+02, /* 0x4386ac8f */ + 8.4478375244e+02, /* 0x44533229 */ + 8.8293585205e+02, /* 0x445cbbe5 */ + 2.1266638184e+02, /* 0x4354aa98 */ + -5.3109550476e+00, /* 0xc0a9f358 */ +}; + +static float qzerof(float x) +{ + const float *p,*q; + float s,r,z; + int32_t ix; + + GET_FLOAT_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x41000000){p = qR8; q = qS8;} + else if (ix >= 0x40f71c58){p = qR5; q = qS5;} + else if (ix >= 0x4036db68){p = qR3; q = qS3;} + else if (ix >= 0x40000000){p = qR2; q = qS2;} + z = 1.0f/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (-.125f + r/s)/x; +} diff --git a/Library/libs/j1.c b/Library/libs/j1.c new file mode 100644 index 00000000..6eec7583 --- /dev/null +++ b/Library/libs/j1.c @@ -0,0 +1,383 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* j1(x), y1(x) + * Bessel function of the first and second kinds of order zero. + * Method -- j1(x): + * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... + * 2. Reduce x to |x| since j1(x)=-j1(-x), and + * for x in (0,2) + * j1(x) = x/2 + x*z*R0/S0, where z = x*x; + * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) + * for x in (2,inf) + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * as follow: + * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (sin(x) + cos(x)) + * (To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one.) + * + * 3 Special cases + * j1(nan)= nan + * j1(0) = 0 + * j1(inf) = 0 + * + * Method -- y1(x): + * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN + * 2. For x<2. + * Since + * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) + * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. + * We use the following function to approximate y1, + * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 + * where for x in [0,2] (abs err less than 2**-65.89) + * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 + * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 + * Note: For tiny x, 1/x dominate y1 and hence + * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) + * 3. For x>=2. + * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) + * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) + * by method mentioned above. + */ + +#include +#include "libm.h" + +static double pone(double), qone(double); + +static const double +huge = 1e300, +invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ +tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ +/* R0/S0 on [0,2] */ +r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */ +r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */ +r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */ +r03 = 4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */ +s01 = 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */ +s02 = 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */ +s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */ +s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */ +s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ + +double j1(double x) +{ + double z,s,c,ss,cc,r,u,v,y; + int32_t hx,ix; + + GET_HIGH_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) + return 1.0/x; + y = fabs(x); + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sin(y); + c = cos(y); + ss = -s-c; + cc = s-c; + if (ix < 0x7fe00000) { /* make sure y+y not overflow */ + z = cos(y+y); + if (s*c > 0.0) + cc = z/ss; + else + ss = z/cc; + } + /* + * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x) + * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x) + */ + if (ix > 0x48000000) + z = (invsqrtpi*cc)/sqrt(y); + else { + u = pone(y); + v = qone(y); + z = invsqrtpi*(u*cc-v*ss)/sqrt(y); + } + if (hx < 0) + return -z; + else + return z; + } + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + /* raise inexact if x!=0 */ + if (huge+x > 1.0) + return 0.5*x; + } + z = x*x; + r = z*(r00+z*(r01+z*(r02+z*r03))); + s = 1.0+z*(s01+z*(s02+z*(s03+z*(s04+z*s05)))); + r *= x; + return x*0.5 + r/s; +} + +static const double U0[5] = { + -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ + 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ + -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ + 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ + -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ +}; +static const double V0[5] = { + 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ + 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ + 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ + 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ + 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ +}; + + +double y1(double x) +{ + double z,s,c,ss,cc,u,v; + int32_t hx,ix,lx; + + EXTRACT_WORDS(hx, lx, x); + ix = 0x7fffffff & hx; + /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ + if (ix >= 0x7ff00000) + return 1.0/(x+x*x); + if ((ix|lx) == 0) + return -1.0/0.0; + if (hx < 0) + return 0.0/0.0; + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sin(x); + c = cos(x); + ss = -s-c; + cc = s-c; + if (ix < 0x7fe00000) { /* make sure x+x not overflow */ + z = cos(x+x); + if (s*c > 0.0) + cc = z/ss; + else + ss = z/cc; + } + /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (ix > 0x48000000) + z = (invsqrtpi*ss)/sqrt(x); + else { + u = pone(x); + v = qone(x); + z = invsqrtpi*(u*ss+v*cc)/sqrt(x); + } + return z; + } + if (ix <= 0x3c900000) /* x < 2**-54 */ + return -tpi/x; + z = x*x; + u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); + v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); + return x*(u/v) + tpi*(j1(x)*log(x)-1.0/x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ + 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ + 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ + 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ + 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ +}; +static const double ps8[5] = { + 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ + 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ + 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ + 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ + 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ +}; + +static const double pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ + 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ + 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ + 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ + 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ + 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ +}; +static const double ps5[5] = { + 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ + 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ + 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ + 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ + 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ +}; + +static const double pr3[6] = { + 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ + 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ + 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ + 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ + 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ + 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ +}; +static const double ps3[5] = { + 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ + 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ + 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ + 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ + 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ +}; + +static const double pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ + 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ + 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ + 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ + 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ + 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ +}; +static const double ps2[5] = { + 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ + 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ + 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ + 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ + 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ +}; + +static double pone(double x) +{ + const double *p,*q; + double z,r,s; + int32_t ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x40200000){p = pr8; q = ps8;} + else if (ix >= 0x40122E8B){p = pr5; q = ps5;} + else if (ix >= 0x4006DB6D){p = pr3; q = ps3;} + else if (ix >= 0x40000000){p = pr2; q = ps2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0+ r/s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ + -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ + -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ + -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ + -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ + -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ +}; +static const double qs8[6] = { + 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ + 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ + 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ + 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ + 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ + -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ +}; + +static const double qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ + -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ + -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ + -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ + -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ + -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ +}; +static const double qs5[6] = { + 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ + 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ + 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ + 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ + 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ + -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ +}; + +static const double qr3[6] = { + -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ + -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ + -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ + -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ + -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ + -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ +}; +static const double qs3[6] = { + 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ + 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ + 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ + 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ + 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ + -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ +}; + +static const double qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ + -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ + -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ + -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ + -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ + -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ +}; +static const double qs2[6] = { + 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ + 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ + 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ + 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ + 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ + -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ +}; + +static double qone(double x) +{ + const double *p,*q; + double s,r,z; + int32_t ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x40200000){p = qr8; q = qs8;} + else if (ix >= 0x40122E8B){p = qr5; q = qs5;} + else if (ix >= 0x4006DB6D){p = qr3; q = qs3;} + else if (ix >= 0x40000000){p = qr2; q = qs2;} + z = 1.0/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (.375 + r/s)/x; +} diff --git a/Library/libs/j1f.c b/Library/libs/j1f.c new file mode 100644 index 00000000..83d374e4 --- /dev/null +++ b/Library/libs/j1f.c @@ -0,0 +1,340 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static float ponef(float), qonef(float); + +static const float +huge = 1e30, +invsqrtpi = 5.6418961287e-01, /* 0x3f106ebb */ +tpi = 6.3661974669e-01, /* 0x3f22f983 */ +/* R0/S0 on [0,2] */ +r00 = -6.2500000000e-02, /* 0xbd800000 */ +r01 = 1.4070566976e-03, /* 0x3ab86cfd */ +r02 = -1.5995563444e-05, /* 0xb7862e36 */ +r03 = 4.9672799207e-08, /* 0x335557d2 */ +s01 = 1.9153760746e-02, /* 0x3c9ce859 */ +s02 = 1.8594678841e-04, /* 0x3942fab6 */ +s03 = 1.1771846857e-06, /* 0x359dffc2 */ +s04 = 5.0463624390e-09, /* 0x31ad6446 */ +s05 = 1.2354227016e-11; /* 0x2d59567e */ + +float j1f(float x) +{ + float z,s,c,ss,cc,r,u,v,y; + int32_t hx,ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + if (ix >= 0x7f800000) + return 1.0f/x; + y = fabsf(x); + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sinf(y); + c = cosf(y); + ss = -s-c; + cc = s-c; + if (ix < 0x7f000000) { /* make sure y+y not overflow */ + z = cosf(y+y); + if (s*c > 0.0f) + cc = z/ss; + else + ss = z/cc; + } + /* + * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x) + * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x) + */ + if (ix > 0x80000000) + z = (invsqrtpi*cc)/sqrtf(y); + else { + u = ponef(y); + v = qonef(y); + z = invsqrtpi*(u*cc-v*ss)/sqrtf(y); + } + if (hx < 0) + return -z; + return z; + } + if (ix < 0x32000000) { /* |x| < 2**-27 */ + /* raise inexact if x!=0 */ + if (huge+x > 1.0f) + return 0.5f*x; + } + z = x*x; + r = z*(r00+z*(r01+z*(r02+z*r03))); + s = 1.0f+z*(s01+z*(s02+z*(s03+z*(s04+z*s05)))); + r *= x; + return 0.5f*x + r/s; +} + +static const float U0[5] = { + -1.9605709612e-01, /* 0xbe48c331 */ + 5.0443872809e-02, /* 0x3d4e9e3c */ + -1.9125689287e-03, /* 0xbafaaf2a */ + 2.3525259166e-05, /* 0x37c5581c */ + -9.1909917899e-08, /* 0xb3c56003 */ +}; +static const float V0[5] = { + 1.9916731864e-02, /* 0x3ca3286a */ + 2.0255257550e-04, /* 0x3954644b */ + 1.3560879779e-06, /* 0x35b602d4 */ + 6.2274145840e-09, /* 0x31d5f8eb */ + 1.6655924903e-11, /* 0x2d9281cf */ +}; + +float y1f(float x) +{ + float z,s,c,ss,cc,u,v; + int32_t hx,ix; + + GET_FLOAT_WORD(hx, x); + ix = 0x7fffffff & hx; + /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ + if (ix >= 0x7f800000) + return 1.0f/(x+x*x); + if (ix == 0) + return -__FINFINITY;/*-1.0f/0.0f;*/ + if (hx < 0) + return __sNaN;/*0.0f/0.0f;*/ + if (ix >= 0x40000000) { /* |x| >= 2.0 */ + s = sinf(x); + c = cosf(x); + ss = -s-c; + cc = s-c; + if (ix < 0x7f000000) { /* make sure x+x not overflow */ + z = cosf(x+x); + if (s*c > 0.0f) + cc = z/ss; + else + ss = z/cc; + } + /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if (ix > 0x48000000) + z = (invsqrtpi*ss)/sqrtf(x); + else { + u = ponef(x); + v = qonef(x); + z = invsqrtpi*(u*ss+v*cc)/sqrtf(x); + } + return z; + } + if (ix <= 0x24800000) /* x < 2**-54 */ + return -tpi/x; + z = x*x; + u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); + v = 1.0f+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); + return x*(u/v) + tpi*(j1f(x)*logf(x)-1.0f/x); +} + +/* For x >= 8, the asymptotic expansions of pone is + * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. + * We approximate pone by + * pone(x) = 1 + (R/S) + * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 + * S = 1 + ps0*s^2 + ... + ps4*s^10 + * and + * | pone(x)-1-R/S | <= 2 ** ( -60.06) + */ + +static const float pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + 1.1718750000e-01, /* 0x3df00000 */ + 1.3239480972e+01, /* 0x4153d4ea */ + 4.1205184937e+02, /* 0x43ce06a3 */ + 3.8747453613e+03, /* 0x45722bed */ + 7.9144794922e+03, /* 0x45f753d6 */ +}; +static const float ps8[5] = { + 1.1420736694e+02, /* 0x42e46a2c */ + 3.6509309082e+03, /* 0x45642ee5 */ + 3.6956207031e+04, /* 0x47105c35 */ + 9.7602796875e+04, /* 0x47bea166 */ + 3.0804271484e+04, /* 0x46f0a88b */ +}; + +static const float pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + 1.3199052094e-11, /* 0x2d68333f */ + 1.1718749255e-01, /* 0x3defffff */ + 6.8027510643e+00, /* 0x40d9b023 */ + 1.0830818176e+02, /* 0x42d89dca */ + 5.1763616943e+02, /* 0x440168b7 */ + 5.2871520996e+02, /* 0x44042dc6 */ +}; +static const float ps5[5] = { + 5.9280597687e+01, /* 0x426d1f55 */ + 9.9140142822e+02, /* 0x4477d9b1 */ + 5.3532670898e+03, /* 0x45a74a23 */ + 7.8446904297e+03, /* 0x45f52586 */ + 1.5040468750e+03, /* 0x44bc0180 */ +}; + +static const float pr3[6] = { + 3.0250391081e-09, /* 0x314fe10d */ + 1.1718686670e-01, /* 0x3defffab */ + 3.9329774380e+00, /* 0x407bb5e7 */ + 3.5119403839e+01, /* 0x420c7a45 */ + 9.1055007935e+01, /* 0x42b61c2a */ + 4.8559066772e+01, /* 0x42423c7c */ +}; +static const float ps3[5] = { + 3.4791309357e+01, /* 0x420b2a4d */ + 3.3676245117e+02, /* 0x43a86198 */ + 1.0468714600e+03, /* 0x4482dbe3 */ + 8.9081134033e+02, /* 0x445eb3ed */ + 1.0378793335e+02, /* 0x42cf936c */ +}; + +static const float pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + 1.0771083225e-07, /* 0x33e74ea8 */ + 1.1717621982e-01, /* 0x3deffa16 */ + 2.3685150146e+00, /* 0x401795c0 */ + 1.2242610931e+01, /* 0x4143e1bc */ + 1.7693971634e+01, /* 0x418d8d41 */ + 5.0735230446e+00, /* 0x40a25a4d */ +}; +static const float ps2[5] = { + 2.1436485291e+01, /* 0x41ab7dec */ + 1.2529022980e+02, /* 0x42fa9499 */ + 2.3227647400e+02, /* 0x436846c7 */ + 1.1767937469e+02, /* 0x42eb5bd7 */ + 8.3646392822e+00, /* 0x4105d590 */ +}; + +static float ponef(float x) +{ + const float *p,*q; + float z,r,s; + int32_t ix; + + GET_FLOAT_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x41000000){p = pr8; q = ps8;} + else if (ix >= 0x40f71c58){p = pr5; q = ps5;} + else if (ix >= 0x4036db68){p = pr3; q = ps3;} + else if (ix >= 0x40000000){p = pr2; q = ps2;} + z = 1.0f/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); + return 1.0f + r/s; +} + +/* For x >= 8, the asymptotic expansions of qone is + * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. + * We approximate pone by + * qone(x) = s*(0.375 + (R/S)) + * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 + * S = 1 + qs1*s^2 + ... + qs6*s^12 + * and + * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) + */ + +static const float qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */ + 0.0000000000e+00, /* 0x00000000 */ + -1.0253906250e-01, /* 0xbdd20000 */ + -1.6271753311e+01, /* 0xc1822c8d */ + -7.5960174561e+02, /* 0xc43de683 */ + -1.1849806641e+04, /* 0xc639273a */ + -4.8438511719e+04, /* 0xc73d3683 */ +}; +static const float qs8[6] = { + 1.6139537048e+02, /* 0x43216537 */ + 7.8253862305e+03, /* 0x45f48b17 */ + 1.3387534375e+05, /* 0x4802bcd6 */ + 7.1965775000e+05, /* 0x492fb29c */ + 6.6660125000e+05, /* 0x4922be94 */ + -2.9449025000e+05, /* 0xc88fcb48 */ +}; + +static const float qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */ + -2.0897993405e-11, /* 0xadb7d219 */ + -1.0253904760e-01, /* 0xbdd1fffe */ + -8.0564479828e+00, /* 0xc100e736 */ + -1.8366960144e+02, /* 0xc337ab6b */ + -1.3731937256e+03, /* 0xc4aba633 */ + -2.6124443359e+03, /* 0xc523471c */ +}; +static const float qs5[6] = { + 8.1276550293e+01, /* 0x42a28d98 */ + 1.9917987061e+03, /* 0x44f8f98f */ + 1.7468484375e+04, /* 0x468878f8 */ + 4.9851425781e+04, /* 0x4742bb6d */ + 2.7948074219e+04, /* 0x46da5826 */ + -4.7191835938e+03, /* 0xc5937978 */ +}; + +static const float qr3[6] = { + -5.0783124372e-09, /* 0xb1ae7d4f */ + -1.0253783315e-01, /* 0xbdd1ff5b */ + -4.6101160049e+00, /* 0xc0938612 */ + -5.7847221375e+01, /* 0xc267638e */ + -2.2824453735e+02, /* 0xc3643e9a */ + -2.1921012878e+02, /* 0xc35b35cb */ +}; +static const float qs3[6] = { + 4.7665153503e+01, /* 0x423ea91e */ + 6.7386511230e+02, /* 0x4428775e */ + 3.3801528320e+03, /* 0x45534272 */ + 5.5477290039e+03, /* 0x45ad5dd5 */ + 1.9031191406e+03, /* 0x44ede3d0 */ + -1.3520118713e+02, /* 0xc3073381 */ +}; + +static const float qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */ + -1.7838172539e-07, /* 0xb43f8932 */ + -1.0251704603e-01, /* 0xbdd1f475 */ + -2.7522056103e+00, /* 0xc0302423 */ + -1.9663616180e+01, /* 0xc19d4f16 */ + -4.2325313568e+01, /* 0xc2294d1f */ + -2.1371921539e+01, /* 0xc1aaf9b2 */ +}; +static const float qs2[6] = { + 2.9533363342e+01, /* 0x41ec4454 */ + 2.5298155212e+02, /* 0x437cfb47 */ + 7.5750280762e+02, /* 0x443d602e */ + 7.3939318848e+02, /* 0x4438d92a */ + 1.5594900513e+02, /* 0x431bf2f2 */ + -4.9594988823e+00, /* 0xc09eb437 */ +}; + +static float qonef(float x) +{ + const float *p,*q; + float s,r,z; + int32_t ix; + + GET_FLOAT_WORD(ix, x); + ix &= 0x7fffffff; + if (ix >= 0x40200000){p = qr8; q = qs8;} + else if (ix >= 0x40f71c58){p = qr5; q = qs5;} + else if (ix >= 0x4036db68){p = qr3; q = qs3;} + else if (ix >= 0x40000000){p = qr2; q = qs2;} + z = 1.0f/(x*x); + r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); + s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); + return (.375f + r/s)/x; +} diff --git a/Library/libs/jn.c b/Library/libs/jn.c new file mode 100644 index 00000000..9cc27676 --- /dev/null +++ b/Library/libs/jn.c @@ -0,0 +1,278 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * jn(n, x), yn(n, x) + * floating point Bessel's function of the 1st and 2nd kind + * of order n + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for nx, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include +#include "libm.h" + +static const double invsqrtpi = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ + +double jn(int n, double x) +{ + int32_t i,hx,ix,lx,sgn; + double a, b, temp, di; + double z, w; + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + EXTRACT_WORDS(hx, lx, x); + ix = 0x7fffffff & hx; + /* if J(n,NaN) is NaN */ + if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000) + return x+x; + if (n < 0) { + n = -n; + x = -x; + hx ^= 0x80000000; + } + if (n == 0) return j0(x); + if (n == 1) return j1(x); + + sgn = (n&1)&(hx>>31); /* even n -- 0, odd n -- sign(x) */ + x = fabs(x); + if ((ix|lx) == 0 || ix >= 0x7ff00000) /* if x is 0 or inf */ + b = 0.0; + else if ((double)n <= x) { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + if (ix >= 0x52D00000) { /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch(n&3) { + case 0: temp = cos(x)+sin(x); break; + case 1: temp = -cos(x)+sin(x); break; + case 2: temp = -cos(x)-sin(x); break; + case 3: temp = cos(x)-sin(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + a = j0(x); + b = j1(x); + for (i=1; i 33) /* underflow */ + b = 0.0; + else { + temp = x*0.5; + b = temp; + for (a=1.0,i=2; i<=n; i++) { + a *= (double)i; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + } + b = b/a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + double t,v; + double q0,q1,h,tmp; + int32_t k,m; + + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; + z = w+h; + q1 = w*z - 1.0; + k = 1; + while (q1 < 1.0e9) { + k += 1; + z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for (t=0.0, i = 2*(n+k); i>=m; i -= 2) + t = 1.0/(i/x-t); + a = t; + b = 1.0; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = n; + v = 2.0/x; + tmp = tmp*log(fabs(v*tmp)); + if (tmp < 7.09782712893383973096e+02) { + for (i=n-1,di=(double)(i+i); i>0; i--) { + temp = b; + b *= di; + b = b/x - a; + a = temp; + di -= 2.0; + } + } else { + for (i=n-1,di=(double)(i+i); i>0; i--) { + temp = b; + b *= di; + b = b/x - a; + a = temp; + di -= 2.0; + /* scale b to avoid spurious overflow */ + if (b > 1e100) { + a /= b; + t /= b; + b = 1.0; + } + } + } + z = j0(x); + w = j1(x); + if (fabs(z) >= fabs(w)) + b = t*z/b; + else + b = t*w/a; + } + } + if (sgn==1) return -b; + return b; +} + + + +double yn(int n, double x) +{ + int32_t i,hx,ix,lx; + int32_t sign; + double a, b, temp; + + EXTRACT_WORDS(hx, lx, x); + ix = 0x7fffffff & hx; + /* if Y(n,NaN) is NaN */ + if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000) + return x+x; + if ((ix|lx) == 0) + return -1.0/0.0; + if (hx < 0) + return 0.0/0.0; + sign = 1; + if (n < 0) { + n = -n; + sign = 1 - ((n&1)<<1); + } + if (n == 0) + return y0(x); + if (n == 1) + return sign*y1(x); + if (ix == 0x7ff00000) + return 0.0; + if (ix >= 0x52D00000) { /* x > 2**302 */ + /* (x >> n**2) + * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + * Let s=sin(x), c=cos(x), + * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + * + * n sin(xn)*sqt2 cos(xn)*sqt2 + * ---------------------------------- + * 0 s-c c+s + * 1 -s-c -c+s + * 2 -s+c -c-s + * 3 s+c c-s + */ + switch(n&3) { + case 0: temp = sin(x)-cos(x); break; + case 1: temp = -sin(x)-cos(x); break; + case 2: temp = -sin(x)+cos(x); break; + case 3: temp = sin(x)+cos(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + uint32_t high; + a = y0(x); + b = y1(x); + /* quit if b is -inf */ + GET_HIGH_WORD(high, b); + for (i=1; i 0) return b; + return -b; +} diff --git a/Library/libs/jnf.c b/Library/libs/jnf.c new file mode 100644 index 00000000..d687c0d8 --- /dev/null +++ b/Library/libs/jnf.c @@ -0,0 +1,208 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +float jnf(int n, float x) +{ + int32_t i,hx,ix, sgn; + float a, b, temp, di; + float z, w; + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + GET_FLOAT_WORD(hx, x); + ix = 0x7fffffff & hx; + /* if J(n,NaN) is NaN */ + if (ix > 0x7f800000) + return x+x; + if (n < 0) { + n = -n; + x = -x; + hx ^= 0x80000000; + } + if (n == 0) return j0f(x); + if (n == 1) return j1f(x); + + sgn = (n&1)&(hx>>31); /* even n -- 0, odd n -- sign(x) */ + x = fabsf(x); + if (ix == 0 || ix >= 0x7f800000) /* if x is 0 or inf */ + b = 0.0f; + else if((float)n <= x) { + /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ + a = j0f(x); + b = j1f(x); + for (i=1; i 33) /* underflow */ + b = 0.0f; + else { + temp = 0.5f * x; + b = temp; + for (a=1.0f,i=2; i<=n; i++) { + a *= (float)i; /* a = n! */ + b *= temp; /* b = (x/2)^n */ + } + b = b/a; + } + } else { + /* use backward recurrence */ + /* x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- ------ ------ ..... + * 2n - 2(n+1) - 2(n+2) + * + * 1 1 1 + * (for large x) = ---- ------ ------ ..... + * 2n 2(n+1) 2(n+2) + * -- - ------ - ------ - + * x x x + * + * Let w = 2n/x and h=2/x, then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms needed, let + * Q(0) = w, Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + * When Q(k) > 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quadruple + */ + /* determine k */ + float t,v; + float q0,q1,h,tmp; + int32_t k,m; + + w = (n+n)/x; + h = 2.0f/x; + z = w+h; + q0 = w; + q1 = w*z - 1.0f; + k = 1; + while (q1 < 1.0e9f) { + k += 1; + z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for (t=0.0f, i = 2*(n+k); i>=m; i -= 2) + t = 1.0f/(i/x-t); + a = t; + b = 1.0f; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * Hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = n; + v = 2.0f/x; + tmp = tmp*logf(fabsf(v*tmp)); + if (tmp < 88.721679688f) { + for (i=n-1,di=(float)(i+i); i>0; i--) { + temp = b; + b *= di; + b = b/x - a; + a = temp; + di -= 2.0f; + } + } else { + for (i=n-1,di=(float)(i+i); i>0; i--){ + temp = b; + b *= di; + b = b/x - a; + a = temp; + di -= 2.0f; + /* scale b to avoid spurious overflow */ + if (b > 1e10f) { + a /= b; + t /= b; + b = 1.0f; + } + } + } + z = j0f(x); + w = j1f(x); + if (fabsf(z) >= fabsf(w)) + b = t*z/b; + else + b = t*w/a; + } + } + if (sgn == 1) return -b; + return b; +} + +float ynf(int n, float x) +{ + int32_t i,hx,ix,ib; + int32_t sign; + float a, b, temp; + + GET_FLOAT_WORD(hx, x); + ix = 0x7fffffff & hx; + /* if Y(n,NaN) is NaN */ + if (ix > 0x7f800000) + return x+x; + if (ix == 0) + return -__FINFINITY;/*1.0f/0.0f;*/ + if (hx < 0) + return __sNaN;/*0.0f/0.0f;*/ + sign = 1; + if (n < 0) { + n = -n; + sign = 1 - ((n&1)<<1); + } + if (n == 0) + return y0f(x); + if (n == 1) + return sign*y1f(x); + if (ix == 0x7f800000) + return 0.0f; + + a = y0f(x); + b = y1f(x); + /* quit if b is -inf */ + GET_FLOAT_WORD(ib,b); + for (i = 1; i < n && ib != 0xff800000; i++){ + temp = b; + b = ((float)(i+i)/x)*b - a; + GET_FLOAT_WORD(ib, b); + a = temp; + } + if (sign > 0) + return b; + return -b; +} diff --git a/Library/libs/ldexp.c b/Library/libs/ldexp.c new file mode 100644 index 00000000..fed1a6b6 --- /dev/null +++ b/Library/libs/ldexp.c @@ -0,0 +1,8 @@ +/* From MUSL */ + +#include + +double ldexp(double x, int n) +{ + return scalbn(x, n); +} diff --git a/Library/libs/ldexpf.c b/Library/libs/ldexpf.c new file mode 100644 index 00000000..3ef58a7c --- /dev/null +++ b/Library/libs/ldexpf.c @@ -0,0 +1,8 @@ +/* From MUSL */ + +#include + +float ldexpf(float x, int n) +{ + return scalbnf(x, n); +} diff --git a/Library/libs/lgamma.c b/Library/libs/lgamma.c new file mode 100644 index 00000000..f42b29e3 --- /dev/null +++ b/Library/libs/lgamma.c @@ -0,0 +1,8 @@ +/* From MUSL */ + +#include + +double lgamma(double x) +{ + return lgamma_r(x, &signgam); +} diff --git a/Library/libs/lgamma_r.c b/Library/libs/lgamma_r.c new file mode 100644 index 00000000..a75ff259 --- /dev/null +++ b/Library/libs/lgamma_r.c @@ -0,0 +1,313 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ +/* lgamma_r(x, signgamp) + * Reentrant version of the logarithm of the Gamma function + * with user provide pointer for the sign of Gamma(x). + * + * Method: + * 1. Argument Reduction for 0 < x <= 8 + * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may + * reduce x to a number in [1.5,2.5] by + * lgamma(1+s) = log(s) + lgamma(s) + * for example, + * lgamma(7.3) = log(6.3) + lgamma(6.3) + * = log(6.3*5.3) + lgamma(5.3) + * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) + * 2. Polynomial approximation of lgamma around its + * minimun ymin=1.461632144968362245 to maintain monotonicity. + * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use + * Let z = x-ymin; + * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) + * where + * poly(z) is a 14 degree polynomial. + * 2. Rational approximation in the primary interval [2,3] + * We use the following approximation: + * s = x-2.0; + * lgamma(x) = 0.5*s + s*P(s)/Q(s) + * with accuracy + * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 + * Our algorithms are based on the following observation + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... + * 2 3 + * + * where Euler = 0.5771... is the Euler constant, which is very + * close to 0.5. + * + * 3. For x>=8, we have + * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... + * (better formula: + * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) + * Let z = 1/x, then we approximation + * f(z) = lgamma(x) - (x-0.5)(log(x)-1) + * by + * 3 5 11 + * w = w0 + w1*z + w2*z + w3*z + ... + w6*z + * where + * |w - f(z)| < 2**-58.74 + * + * 4. For negative x, since (G is gamma function) + * -x*G(-x)*G(x) = pi/sin(pi*x), + * we have + * G(x) = pi/(sin(pi*x)*(-x)*G(-x)) + * since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 + * Hence, for x<0, signgam = sign(sin(pi*x)) and + * lgamma(x) = log(|Gamma(x)|) + * = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); + * Note: one should avoid compute pi*(-x) directly in the + * computation of sin(pi*(-x)). + * + * 5. Special Cases + * lgamma(2+s) ~ s*(1-Euler) for tiny s + * lgamma(1) = lgamma(2) = 0 + * lgamma(x) ~ -log(|x|) for tiny x + * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero + * lgamma(inf) = inf + * lgamma(-inf) = inf (bug for bug compatible with C99!?) + * + */ + +#include +#include "libm.h" + +static const double +two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ +pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ +a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ +a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */ +a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */ +a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */ +a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */ +a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */ +a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */ +a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */ +a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */ +a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */ +a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */ +a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */ +tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */ +tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */ +/* tt = -(tail of tf) */ +tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */ +t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */ +t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */ +t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */ +t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */ +t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */ +t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */ +t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */ +t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */ +t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */ +t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */ +t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */ +t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */ +t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */ +t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */ +t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */ +u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ +u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */ +u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */ +u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */ +u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */ +u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */ +v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */ +v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */ +v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */ +v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */ +v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */ +s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */ +s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */ +s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */ +s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */ +s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */ +s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */ +s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */ +r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */ +r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */ +r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */ +r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */ +r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */ +r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */ +w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */ +w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */ +w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */ +w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */ +w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */ +w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */ +w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ + +static double sin_pi(double x) +{ + double y,z; + int n,ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + + if (ix < 0x3fd00000) + return __sin(pi*x, 0.0, 0); + + y = -x; /* negative x is assumed */ + + /* + * argument reduction, make sure inexact flag not raised if input + * is an integer + */ + z = floor(y); + if (z != y) { /* inexact anyway */ + y *= 0.5; + y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */ + n = (int)(y*4.0); + } else { + if (ix >= 0x43400000) { + y = 0.0; /* y must be even */ + n = 0; + } else { + if (ix < 0x43300000) + z = y + two52; /* exact */ + GET_LOW_WORD(n, z); + n &= 1; + y = n; + n <<= 2; + } + } + switch (n) { + case 0: y = __sin(pi*y, 0.0, 0); break; + case 1: + case 2: y = __cos(pi*(0.5-y), 0.0); break; + case 3: + case 4: y = __sin(pi*(1.0-y), 0.0, 0); break; + case 5: + case 6: y = -__cos(pi*(y-1.5), 0.0); break; + default: y = __sin(pi*(y-2.0), 0.0, 0); break; + } + return -y; +} + + +double lgamma_r(double x, int *signgamp) +{ + double t,y,z,nadj,p,p1,p2,p3,q,r,w; + int32_t hx; + int i,lx,ix; + + EXTRACT_WORDS(hx, lx, x); + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + *signgamp = 1; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) + return x*x; + if ((ix|lx) == 0) + return 1.0/0.0; + if (ix < 0x3b900000) { /* |x|<2**-70, return -log(|x|) */ + if(hx < 0) { + *signgamp = -1; + return -log(-x); + } + return -log(x); + } + if (hx < 0) { + if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */ + return 1.0/0.0; + t = sin_pi(x); + if (t == 0.0) /* -integer */ + return 1.0/0.0; + nadj = log(pi/fabs(t*x)); + if (t < 0.0) + *signgamp = -1; + x = -x; + } + + /* purge off 1 and 2 */ + if (((ix - 0x3ff00000)|lx) == 0 || ((ix - 0x40000000)|lx) == 0) + r = 0; + /* for x < 2.0 */ + else if (ix < 0x40000000) { + if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -log(x); + if (ix >= 0x3FE76944) { + y = 1.0 - x; + i = 0; + } else if (ix >= 0x3FCDA661) { + y = x - (tc-1.0); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0; + if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */ + y = 2.0 - x; + i = 0; + } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */ + y = x - tc; + i = 1; + } else { + y = x - 1.0; + i = 2; + } + } + switch (i) { + case 0: + z = y*y; + p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))); + p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))); + p = y*p1+p2; + r += (p-0.5*y); + break; + case 1: + z = y*y; + w = z*y; + p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */ + p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))); + p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))); + p = z*p1-(tt-w*(p2+y*p3)); + r += tf + p; + break; + case 2: + p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))); + p2 = 1.0+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))); + r += -0.5*y + p1/p2; + } + } else if (ix < 0x40200000) { /* x < 8.0 */ + i = (int)x; + y = x - (double)i; + p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); + q = 1.0+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); + r = 0.5*y+p/q; + z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ + switch (i) { + case 7: z *= y + 6.0; /* FALLTHRU */ + case 6: z *= y + 5.0; /* FALLTHRU */ + case 5: z *= y + 4.0; /* FALLTHRU */ + case 4: z *= y + 3.0; /* FALLTHRU */ + case 3: z *= y + 2.0; /* FALLTHRU */ + r += log(z); + break; + } + } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */ + t = log(x); + z = 1.0/x; + y = z*z; + w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))); + r = (x-0.5)*(t-1.0)+w; + } else /* 2**58 <= x <= inf */ + r = x*(log(x)-1.0); + if (hx < 0) + r = nadj - r; + return r; +} + diff --git a/Library/libs/lgammaf.c b/Library/libs/lgammaf.c new file mode 100644 index 00000000..1e85c041 --- /dev/null +++ b/Library/libs/lgammaf.c @@ -0,0 +1,6 @@ +#include + +float lgammaf(float x) +{ + return lgammaf_r(x, &signgam); +} diff --git a/Library/libs/lgammaf_r.c b/Library/libs/lgammaf_r.c new file mode 100644 index 00000000..9a436f0c --- /dev/null +++ b/Library/libs/lgammaf_r.c @@ -0,0 +1,248 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +two23= 8.3886080000e+06, /* 0x4b000000 */ +pi = 3.1415927410e+00, /* 0x40490fdb */ +a0 = 7.7215664089e-02, /* 0x3d9e233f */ +a1 = 3.2246702909e-01, /* 0x3ea51a66 */ +a2 = 6.7352302372e-02, /* 0x3d89f001 */ +a3 = 2.0580807701e-02, /* 0x3ca89915 */ +a4 = 7.3855509982e-03, /* 0x3bf2027e */ +a5 = 2.8905137442e-03, /* 0x3b3d6ec6 */ +a6 = 1.1927076848e-03, /* 0x3a9c54a1 */ +a7 = 5.1006977446e-04, /* 0x3a05b634 */ +a8 = 2.2086278477e-04, /* 0x39679767 */ +a9 = 1.0801156895e-04, /* 0x38e28445 */ +a10 = 2.5214456400e-05, /* 0x37d383a2 */ +a11 = 4.4864096708e-05, /* 0x383c2c75 */ +tc = 1.4616321325e+00, /* 0x3fbb16c3 */ +tf = -1.2148628384e-01, /* 0xbdf8cdcd */ +/* tt = -(tail of tf) */ +tt = 6.6971006518e-09, /* 0x31e61c52 */ +t0 = 4.8383611441e-01, /* 0x3ef7b95e */ +t1 = -1.4758771658e-01, /* 0xbe17213c */ +t2 = 6.4624942839e-02, /* 0x3d845a15 */ +t3 = -3.2788541168e-02, /* 0xbd064d47 */ +t4 = 1.7970675603e-02, /* 0x3c93373d */ +t5 = -1.0314224288e-02, /* 0xbc28fcfe */ +t6 = 6.1005386524e-03, /* 0x3bc7e707 */ +t7 = -3.6845202558e-03, /* 0xbb7177fe */ +t8 = 2.2596477065e-03, /* 0x3b141699 */ +t9 = -1.4034647029e-03, /* 0xbab7f476 */ +t10 = 8.8108185446e-04, /* 0x3a66f867 */ +t11 = -5.3859531181e-04, /* 0xba0d3085 */ +t12 = 3.1563205994e-04, /* 0x39a57b6b */ +t13 = -3.1275415677e-04, /* 0xb9a3f927 */ +t14 = 3.3552918467e-04, /* 0x39afe9f7 */ +u0 = -7.7215664089e-02, /* 0xbd9e233f */ +u1 = 6.3282704353e-01, /* 0x3f2200f4 */ +u2 = 1.4549225569e+00, /* 0x3fba3ae7 */ +u3 = 9.7771751881e-01, /* 0x3f7a4bb2 */ +u4 = 2.2896373272e-01, /* 0x3e6a7578 */ +u5 = 1.3381091878e-02, /* 0x3c5b3c5e */ +v1 = 2.4559779167e+00, /* 0x401d2ebe */ +v2 = 2.1284897327e+00, /* 0x4008392d */ +v3 = 7.6928514242e-01, /* 0x3f44efdf */ +v4 = 1.0422264785e-01, /* 0x3dd572af */ +v5 = 3.2170924824e-03, /* 0x3b52d5db */ +s0 = -7.7215664089e-02, /* 0xbd9e233f */ +s1 = 2.1498242021e-01, /* 0x3e5c245a */ +s2 = 3.2577878237e-01, /* 0x3ea6cc7a */ +s3 = 1.4635047317e-01, /* 0x3e15dce6 */ +s4 = 2.6642270386e-02, /* 0x3cda40e4 */ +s5 = 1.8402845599e-03, /* 0x3af135b4 */ +s6 = 3.1947532989e-05, /* 0x3805ff67 */ +r1 = 1.3920053244e+00, /* 0x3fb22d3b */ +r2 = 7.2193557024e-01, /* 0x3f38d0c5 */ +r3 = 1.7193385959e-01, /* 0x3e300f6e */ +r4 = 1.8645919859e-02, /* 0x3c98bf54 */ +r5 = 7.7794247773e-04, /* 0x3a4beed6 */ +r6 = 7.3266842264e-06, /* 0x36f5d7bd */ +w0 = 4.1893854737e-01, /* 0x3ed67f1d */ +w1 = 8.3333335817e-02, /* 0x3daaaaab */ +w2 = -2.7777778450e-03, /* 0xbb360b61 */ +w3 = 7.9365057172e-04, /* 0x3a500cfd */ +w4 = -5.9518753551e-04, /* 0xba1c065c */ +w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */ +w6 = -1.6309292987e-03; /* 0xbad5c4e8 */ + +static float sin_pif(float x) +{ + float y,z; + int n,ix; + + GET_FLOAT_WORD(ix, x); + ix &= 0x7fffffff; + + if(ix < 0x3e800000) + return __sindf(pi*x); + + y = -x; /* negative x is assumed */ + + /* + * argument reduction, make sure inexact flag not raised if input + * is an integer + */ + z = floorf(y); + if (z != y) { /* inexact anyway */ + y *= 0.5f; + y = 2.0f*(y - floorf(y)); /* y = |x| mod 2.0 */ + n = (int)(y*4.0f); + } else { + if (ix >= 0x4b800000) { + y = 0.0f; /* y must be even */ + n = 0; + } else { + if (ix < 0x4b000000) + z = y + two23; /* exact */ + GET_FLOAT_WORD(n, z); + n &= 1; + y = n; + n <<= 2; + } + } + switch (n) { + case 0: y = __sindf(pi*y); break; + case 1: + case 2: y = __cosdf(pi*(0.5f - y)); break; + case 3: + case 4: y = __sindf(pi*(1.0f - y)); break; + case 5: + case 6: y = -__cosdf(pi*(y - 1.5f)); break; + default: y = __sindf(pi*(y - 2.0f)); break; + } + return -y; +} + + +float lgammaf_r(float x, int *signgamp) +{ + float t,y,z,nadj,p,p1,p2,p3,q,r,w; + int32_t hx; + int i,ix; + + GET_FLOAT_WORD(hx, x); + + /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + *signgamp = 1; + ix = hx & 0x7fffffff; + if (ix >= 0x7f800000) + return x*x; + if (ix == 0) + return __FINFINITY;/*1.0f/0.0f;*/ + if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */ + if (hx < 0) { + *signgamp = -1; + return -logf(-x); + } + return -logf(x); + } + if (hx < 0) { + if (ix >= 0x4b000000) /* |x| >= 2**23, must be -integer */ + return __FINFINITY;/* 1.0f/0.0f;*/ + t = sin_pif(x); + if (t == 0.0f) /* -integer */ + return __FINFINITY;/*1.0f/0.0f;*/ + nadj = logf(pi/fabsf(t*x)); + if (t < 0.0f) + *signgamp = -1; + x = -x; + } + + /* purge off 1 and 2 */ + if (ix == 0x3f800000 || ix == 0x40000000) + r = 0; + /* for x < 2.0 */ + else if (ix < 0x40000000) { + if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */ + r = -logf(x); + if (ix >= 0x3f3b4a20) { + y = 1.0f - x; + i = 0; + } else if (ix >= 0x3e6d3308) { + y = x - (tc-1.0f); + i = 1; + } else { + y = x; + i = 2; + } + } else { + r = 0.0f; + if (ix >= 0x3fdda618) { /* [1.7316,2] */ + y = 2.0f - x; + i = 0; + } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */ + y = x - tc; + i = 1; + } else { + y = x - 1.0f; + i = 2; + } + } + switch(i) { + case 0: + z = y*y; + p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))); + p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))); + p = y*p1+p2; + r += p - 0.5f*y; + break; + case 1: + z = y*y; + w = z*y; + p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */ + p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))); + p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))); + p = z*p1-(tt-w*(p2+y*p3)); + r += (tf + p); + break; + case 2: + p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))); + p2 = 1.0f+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))); + r += -0.5f*y + p1/p2; + } + } else if (ix < 0x41000000) { /* x < 8.0 */ + i = (int)x; + y = x - (float)i; + p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); + q = 1.0f+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); + r = 0.5f*y+p/q; + z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */ + switch (i) { + case 7: z *= y + 6.0f; /* FALLTHRU */ + case 6: z *= y + 5.0f; /* FALLTHRU */ + case 5: z *= y + 4.0f; /* FALLTHRU */ + case 4: z *= y + 3.0f; /* FALLTHRU */ + case 3: z *= y + 2.0f; /* FALLTHRU */ + r += logf(z); + break; + } + } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */ + t = logf(x); + z = 1.0f/x; + y = z*z; + w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))); + r = (x-0.5f)*(t-1.0f)+w; + } else /* 2**58 <= x <= inf */ + r = x*(logf(x)-1.0f); + if (hx < 0) + r = nadj - r; + return r; +} + diff --git a/Library/libs/libm.h b/Library/libs/libm.h index b42eed52..b5684080 100644 --- a/Library/libs/libm.h +++ b/Library/libs/libm.h @@ -134,4 +134,7 @@ float __cosdf(double); float __tandf(double,int); float __expo2f(float); +double __log1p(double); +float __log1pf(float); + #endif diff --git a/Library/libs/log10.c b/Library/libs/log10.c new file mode 100644 index 00000000..ca4bd864 --- /dev/null +++ b/Library/libs/log10.c @@ -0,0 +1,82 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 10 logarithm of x. See e_log.c and k_log.h for most + * comments. + * + * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) + * in not-quite-routine extra precision. + */ + +#include +#include "libm.h" + +static const double +two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ +ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ +ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */ +log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ +log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ + +double log10(double x) +{ + double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2; + int32_t i,k,hx; + uint32_t lx; + + EXTRACT_WORDS(hx, lx, x); + + k = 0; + if (hx < 0x00100000) { /* x < 2**-1022 */ + if (((hx&0x7fffffff)|lx) == 0) + return -two54/0.0; /* log(+-0)=-inf */ + if (hx<0) + return (x-x)/0.0; /* log(-#) = NaN */ + /* subnormal number, scale up x */ + k -= 54; + x *= two54; + GET_HIGH_WORD(hx, x); + } + if (hx >= 0x7ff00000) + return x+x; + if (hx == 0x3ff00000 && lx == 0) + return 0.0; /* log(1) = +0 */ + k += (hx>>20) - 1023; + hx &= 0x000fffff; + i = (hx+0x95f64)&0x100000; + SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ + k += i>>20; + y = (double)k; + f = x - 1.0; + hfsq = 0.5*f*f; + r = __log1p(f); + + /* See log2.c for details. */ + hi = f - hfsq; + SET_LOW_WORD(hi, 0); + lo = (f - hi) - hfsq + r; + val_hi = hi*ivln10hi; + y2 = y*log10_2hi; + val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; + + /* + * Extra precision in for adding y*log10_2hi is not strictly needed + * since there is no very large cancellation near x = sqrt(2) or + * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs + * with some parallelism and it reduces the error for many args. + */ + w = y2 + val_hi; + val_lo += (y2 - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; +} diff --git a/Library/libs/log10f.c b/Library/libs/log10f.c new file mode 100644 index 00000000..e3ae1cc7 --- /dev/null +++ b/Library/libs/log10f.c @@ -0,0 +1,69 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log10.c. + */ + +#include +#include "libm.h" + +static const float +two25 = 3.3554432000e+07, /* 0x4c000000 */ +ivln10hi = 4.3432617188e-01, /* 0x3ede6000 */ +ivln10lo = -3.1689971365e-05, /* 0xb804ead9 */ +log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */ +log10_2lo = 7.9034151668e-07; /* 0x355427db */ + +float log10f(float x) +{ + float f,hfsq,hi,lo,r,y; + int32_t i,k,hx; + + GET_FLOAT_WORD(hx, x); + + k = 0; + if (hx < 0x00800000) { /* x < 2**-126 */ + if ((hx&0x7fffffff) == 0) + return -two25/0.0f; /* log(+-0)=-inf */ + if (hx < 0) + return __sNaN;/*(x-x)/0.0f;*/ /* log(-#) = NaN */ + /* subnormal number, scale up x */ + k -= 25; + x *= two25; + GET_FLOAT_WORD(hx, x); + } + if (hx >= 0x7f800000) + return x+x; + if (hx == 0x3f800000) + return 0.0f; /* log(1) = +0 */ + k += (hx>>23) - 127; + hx &= 0x007fffff; + i = (hx+(0x4afb0d))&0x800000; + SET_FLOAT_WORD(x, hx|(i^0x3f800000)); /* normalize x or x/2 */ + k += i>>23; + y = (float)k; + f = x - 1.0f; + hfsq = 0.5f * f * f; + r = __log1pf(f); + +// FIXME +// /* See log2f.c and log2.c for details. */ +// if (sizeof(float_t) > sizeof(float)) +// return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) + +// y * ((float_t)log10_2lo + log10_2hi); + hi = f - hfsq; + GET_FLOAT_WORD(hx, hi); + SET_FLOAT_WORD(hi, hx&0xfffff000); + lo = (f - hi) - hfsq + r; + return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + + hi*ivln10hi + y*log10_2hi; +} diff --git a/Library/libs/log2.c b/Library/libs/log2.c new file mode 100644 index 00000000..361bb48d --- /dev/null +++ b/Library/libs/log2.c @@ -0,0 +1,105 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 2 logarithm of x. See log.c and __log1p.h for most + * comments. + * + * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, + * then does the combining and scaling steps + * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k + * in not-quite-routine extra precision. + */ + +#include +#include "libm.h" + +static const double +two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ +ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ +ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ + +double log2(double x) +{ + double f,hfsq,hi,lo,r,val_hi,val_lo,w,y; + int32_t i,k,hx; + uint32_t lx; + + EXTRACT_WORDS(hx, lx, x); + + k = 0; + if (hx < 0x00100000) { /* x < 2**-1022 */ + if (((hx&0x7fffffff)|lx) == 0) + return -two54/0.0; /* log(+-0)=-inf */ + if (hx < 0) + return (x-x)/0.0; /* log(-#) = NaN */ + /* subnormal number, scale up x */ + k -= 54; + x *= two54; + GET_HIGH_WORD(hx, x); + } + if (hx >= 0x7ff00000) + return x+x; + if (hx == 0x3ff00000 && lx == 0) + return 0.0; /* log(1) = +0 */ + k += (hx>>20) - 1023; + hx &= 0x000fffff; + i = (hx+0x95f64) & 0x100000; + SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ + k += i>>20; + y = (double)k; + f = x - 1.0; + hfsq = 0.5*f*f; + r = __log1p(f); + + /* + * f-hfsq must (for args near 1) be evaluated in extra precision + * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). + * This is fairly efficient since f-hfsq only depends on f, so can + * be evaluated in parallel with R. Not combining hfsq with R also + * keeps R small (though not as small as a true `lo' term would be), + * so that extra precision is not needed for terms involving R. + * + * Compiler bugs involving extra precision used to break Dekker's + * theorem for spitting f-hfsq as hi+lo, unless double_t was used + * or the multi-precision calculations were avoided when double_t + * has extra precision. These problems are now automatically + * avoided as a side effect of the optimization of combining the + * Dekker splitting step with the clear-low-bits step. + * + * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra + * precision to avoid a very large cancellation when x is very near + * these values. Unlike the above cancellations, this problem is + * specific to base 2. It is strange that adding +-1 is so much + * harder than adding +-ln2 or +-log10_2. + * + * This uses Dekker's theorem to normalize y+val_hi, so the + * compiler bugs are back in some configurations, sigh. And I + * don't want to used double_t to avoid them, since that gives a + * pessimization and the support for avoiding the pessimization + * is not yet available. + * + * The multi-precision calculations for the multiplications are + * routine. + */ + hi = f - hfsq; + SET_LOW_WORD(hi, 0); + lo = (f - hi) - hfsq + r; + val_hi = hi*ivln2hi; + val_lo = (lo+hi)*ivln2lo + lo*ivln2hi; + + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; +} diff --git a/Library/libs/log2f.c b/Library/libs/log2f.c new file mode 100644 index 00000000..5ae83dec --- /dev/null +++ b/Library/libs/log2f.c @@ -0,0 +1,79 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log2.c. + */ + +#include +#include "libm.h" + +static const float +two25 = 3.3554432000e+07, /* 0x4c000000 */ +ivln2hi = 1.4428710938e+00, /* 0x3fb8b000 */ +ivln2lo = -1.7605285393e-04; /* 0xb9389ad4 */ + +float log2f(float x) +{ + float f,hfsq,hi,lo,r,y; + int32_t i,k,hx; + + GET_FLOAT_WORD(hx, x); + + k = 0; + if (hx < 0x00800000) { /* x < 2**-126 */ + if ((hx&0x7fffffff) == 0) + return -two25/0.0f; /* log(+-0)=-inf */ + if (hx < 0) + return __sNaN;/*(x-x)/0.0f;*/ /* log(-#) = NaN */ + /* subnormal number, scale up x */ + k -= 25; + x *= two25; + GET_FLOAT_WORD(hx, x); + } + if (hx >= 0x7f800000) + return x+x; + if (hx == 0x3f800000) + return 0.0f; /* log(1) = +0 */ + k += (hx>>23) - 127; + hx &= 0x007fffff; + i = (hx+(0x4afb0d))&0x800000; + SET_FLOAT_WORD(x, hx|(i^0x3f800000)); /* normalize x or x/2 */ + k += i>>23; + y = (float)k; + f = x - 1.0f; + hfsq = 0.5f * f * f; + r = __log1pf(f); + + /* + * We no longer need to avoid falling into the multi-precision + * calculations due to compiler bugs breaking Dekker's theorem. + * Keep avoiding this as an optimization. See log2.c for more + * details (some details are here only because the optimization + * is not yet available in double precision). + * + * Another compiler bug turned up. With gcc on i386, + * (ivln2lo + ivln2hi) would be evaluated in float precision + * despite runtime evaluations using double precision. So we + * must cast one of its terms to float_t. This makes the whole + * expression have type float_t, so return is forced to waste + * time clobbering its extra precision. + */ +// FIXME +// if (sizeof(float_t) > sizeof(float)) +// return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y; + + hi = f - hfsq; + GET_FLOAT_WORD(hx,hi); + SET_FLOAT_WORD(hi,hx&0xfffff000); + lo = (f - hi) - hfsq + r; + return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y; +} diff --git a/Library/libs/logb.c b/Library/libs/logb.c new file mode 100644 index 00000000..6100b858 --- /dev/null +++ b/Library/libs/logb.c @@ -0,0 +1,18 @@ +#include +#include "libm.h" + +/* +special cases: + logb(+-0) = -inf, and raise divbyzero + logb(+-inf) = +inf + logb(nan) = nan +*/ + +double logb(double x) +{ + if (!isfinite(x)) + return x * x; + if (x == 0) + return -1/(x+0); + return ilogb(x); +} diff --git a/Library/libs/logbf.c b/Library/libs/logbf.c new file mode 100644 index 00000000..950d3569 --- /dev/null +++ b/Library/libs/logbf.c @@ -0,0 +1,10 @@ +#include "libm.h" + +float logbf(float x) +{ + if (!isfinite(x)) + return x * x; + if (x == 0) + return -1/(x+0); + return ilogbf(x); +} diff --git a/Library/libs/logf.c b/Library/libs/logf.c new file mode 100644 index 00000000..b2403f12 --- /dev/null +++ b/Library/libs/logf.c @@ -0,0 +1,88 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ +ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ +two25 = 3.355443200e+07, /* 0x4c000000 */ +/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ +Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */ +Lg2 = 0xccce13.0p-25, /* 0.40000972152 */ +Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ +Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ + +float logf(float x) +{ + float hfsq,f,s,z,R,w,t1,t2,dk; + int32_t k,ix,i,j; + + GET_FLOAT_WORD(ix, x); + + k = 0; + if (ix < 0x00800000) { /* x < 2**-126 */ + if ((ix & 0x7fffffff) == 0) + return -two25/0.0f; /* log(+-0)=-inf */ + if (ix < 0) + return __sNaN; /*(x-x)/0.0f;*/ /* log(-#) = NaN */ + /* subnormal number, scale up x */ + k -= 25; + x *= two25; + GET_FLOAT_WORD(ix, x); + } + if (ix >= 0x7f800000) + return x+x; + k += (ix>>23) - 127; + ix &= 0x007fffff; + i = (ix + (0x95f64<<3)) & 0x800000; + SET_FLOAT_WORD(x, ix|(i^0x3f800000)); /* normalize x or x/2 */ + k += i>>23; + f = x - 1.0f; + if ((0x007fffff & (0x8000 + ix)) < 0xc000) { /* -2**-9 <= f < 2**-9 */ + if (f == 0.0f) { + if (k == 0) + return 0.0f; + dk = (float)k; + return dk*ln2_hi + dk*ln2_lo; + } + R = f*f*(0.5f - 0.33333333333333333f*f); + if (k == 0) + return f-R; + dk = (float)k; + return dk*ln2_hi - ((R-dk*ln2_lo)-f); + } + s = f/(2.0f + f); + dk = (float)k; + z = s*s; + i = ix-(0x6147a<<3); + w = z*z; + j = (0x6b851<<3)-ix; + t1= w*(Lg2+w*Lg4); + t2= z*(Lg1+w*Lg3); + i |= j; + R = t2 + t1; + if (i > 0) { + hfsq = 0.5f * f * f; + if (k == 0) + return f - (hfsq-s*(hfsq+R)); + return dk*ln2_hi - ((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f); + } else { + if (k == 0) + return f - s*(f-R); + return dk*ln2_hi - ((s*(f-R)-dk*ln2_lo)-f); + } +} diff --git a/Library/libs/lrint.c b/Library/libs/lrint.c new file mode 100644 index 00000000..63825b9e --- /dev/null +++ b/Library/libs/lrint.c @@ -0,0 +1,49 @@ +/* From MUSL */ + +#include +#include +#include +#include "libm.h" + +/* +If the result cannot be represented (overflow, nan), then +lrint raises the invalid exception. + +Otherwise if the input was not an integer then the inexact +exception is raised. + +C99 is a bit vague about whether inexact exception is +allowed to be raised when invalid is raised. +(F.9 explicitly allows spurious inexact exceptions, F.9.6.5 +does not make it clear if that rule applies to lrint, but +IEEE 754r 7.8 seems to forbid spurious inexact exception in +the ineger conversion functions) + +So we try to make sure that no spurious inexact exception is +raised in case of an overflow. + +If the bit size of long > precision of double, then there +cannot be inexact rounding in case the result overflows, +otherwise LONG_MAX and LONG_MIN can be represented exactly +as a double. +*/ + +#if LONG_MAX < 1U<<53 && defined(FE_INEXACT) +long lrint(double x) +{ + #pragma STDC FENV_ACCESS ON + int e; + + e = fetestexcept(FE_INEXACT); + x = rint(x); + if (!e && (x > LONG_MAX || x < LONG_MIN)) + feclearexcept(FE_INEXACT); + /* conversion */ + return x; +} +#else +long lrint(double x) +{ + return rint(x); +} +#endif diff --git a/Library/libs/lrintf.c b/Library/libs/lrintf.c new file mode 100644 index 00000000..75110678 --- /dev/null +++ b/Library/libs/lrintf.c @@ -0,0 +1,10 @@ +/* From MUSL */ + +#include + +/* uses LONG_MAX > 2^24, see comments in lrint.c */ + +long lrintf(float x) +{ + return rintf(x); +} diff --git a/Library/libs/lround.c b/Library/libs/lround.c new file mode 100644 index 00000000..5208bc4d --- /dev/null +++ b/Library/libs/lround.c @@ -0,0 +1,8 @@ +/* From MUSL */ + +#include + +long lround(double x) +{ + return round(x); +} diff --git a/Library/libs/lroundf.c b/Library/libs/lroundf.c new file mode 100644 index 00000000..0d437de9 --- /dev/null +++ b/Library/libs/lroundf.c @@ -0,0 +1,8 @@ +/* From MUSL */ + +#include + +long lroundf(float x) +{ + return roundf(x); +} diff --git a/Library/libs/modf.c b/Library/libs/modf.c new file mode 100644 index 00000000..1128aedc --- /dev/null +++ b/Library/libs/modf.c @@ -0,0 +1,40 @@ +/* From MUSL */ + +#include +#include "libm.h" + +double modf(double x, double *iptr) +{ + union {double x; uint64_t n;} u = {x}; + uint64_t mask; + int e; + + e = (int)(u.n>>52 & 0x7ff) - 0x3ff; + + /* no fractional part */ + if (e >= 52) { + *iptr = x; + if (e == 0x400 && u.n<<12 != 0) /* nan */ + return x; + u.n &= (uint64_t)1<<63; + return u.x; + } + + /* no integral part*/ + if (e < 0) { + u.n &= (uint64_t)1<<63; + *iptr = u.x; + return x; + } + + mask = (uint64_t)-1>>12 >> e; + if ((u.n & mask) == 0) { + *iptr = x; + u.n &= (uint64_t)1<<63; + return u.x; + } + u.n &= ~mask; + *iptr = u.x; + STRICT_ASSIGN(double, x, x - *iptr); + return x; +} diff --git a/Library/libs/modff.c b/Library/libs/modff.c new file mode 100644 index 00000000..dd0a3530 --- /dev/null +++ b/Library/libs/modff.c @@ -0,0 +1,40 @@ +/* From MUSL */ + +#include +#include "libm.h" + +float modff(float x, float *iptr) +{ + union {float x; uint32_t n;} u = {x}; + uint32_t mask; + int e; + + e = (int)(u.n>>23 & 0xff) - 0x7f; + + /* no fractional part */ + if (e >= 23) { + *iptr = x; + if (e == 0x80 && u.n<<9 != 0) { /* nan */ + return x; + } + u.n &= 0x80000000; + return u.x; + } + /* no integral part */ + if (e < 0) { + u.n &= 0x80000000; + *iptr = u.x; + return x; + } + + mask = 0x007fffff>>e; + if ((u.n & mask) == 0) { + *iptr = x; + u.n &= 0x80000000; + return u.x; + } + u.n &= ~mask; + *iptr = u.x; + x = (float)(x - *iptr); + return x; +} diff --git a/Library/libs/nearbyint.c b/Library/libs/nearbyint.c new file mode 100644 index 00000000..bd1bc48a --- /dev/null +++ b/Library/libs/nearbyint.c @@ -0,0 +1,23 @@ +/* From MUSL */ + +/*#include */ +#include + +/* nearbyint is the same as rint, but it must not raise the inexact exception */ + +/* FIXME: exceptions to be done */ +double nearbyint(double x) +{ +#ifdef FE_INEXACT + #pragma STDC FENV_ACCESS ON + int e; + + e = fetestexcept(FE_INEXACT); +#endif + x = rint(x); +#ifdef FE_INEXACT + if (!e) + feclearexcept(FE_INEXACT); +#endif + return x; +} diff --git a/Library/libs/nearbyintf.c b/Library/libs/nearbyintf.c new file mode 100644 index 00000000..671d46b6 --- /dev/null +++ b/Library/libs/nearbyintf.c @@ -0,0 +1,22 @@ +/* From MUSL */ + +/*#include */ +#include + +/* FIXCME: exceptions to be done */ + +float nearbyintf(float x) +{ +#ifdef FE_INEXACT + #pragma STDC FENV_ACCESS ON + int e; + + e = fetestexcept(FE_INEXACT); +#endif + x = rintf(x); +#ifdef FE_INEXACT + if (!e) + feclearexcept(FE_INEXACT); +#endif + return x; +} diff --git a/Library/libs/nextafter.c b/Library/libs/nextafter.c new file mode 100644 index 00000000..26f6bdef --- /dev/null +++ b/Library/libs/nextafter.c @@ -0,0 +1,38 @@ +/* From MUSL */ + +#include +#include "libm.h" + +#define N_SIGN ((uint64_t)1<<63) + +double nextafter(double x, double y) +{ + union dshape ux, uy; + uint64_t ax, ay; + int e; + + if (isnan(x) || isnan(y)) + return x + y; + ux.value = x; + uy.value = y; + if (ux.bits == uy.bits) + return y; + ax = ux.bits & ~N_SIGN; + ay = uy.bits & ~N_SIGN; + if (ax == 0) { + if (ay == 0) + return y; + ux.bits = (uy.bits & N_SIGN) | 1; + } else if (ax > ay || ((ux.bits ^ uy.bits) & N_SIGN)) + ux.bits--; + else + ux.bits++; + e = ux.bits >> 52 & 0x7ff; + /* raise overflow if ux.value is infinite and x is finite */ + if (e == 0x7ff) + FORCE_EVAL(x+x); + /* raise underflow if ux.value is subnormal or zero */ + if (e == 0) + FORCE_EVAL(x*x + ux.value*ux.value); + return ux.value; +} diff --git a/Library/libs/nextafterf.c b/Library/libs/nextafterf.c new file mode 100644 index 00000000..08d58f4a --- /dev/null +++ b/Library/libs/nextafterf.c @@ -0,0 +1,39 @@ +/* From MUSL */ + +#include +#include "libm.h" + +#define N_SIGN 0x80000000 + +float nextafterf(float x, float y) +{ + union fshape ux, uy; + uint32_t ax, ay, e; + + if (isnan(x) || isnan(y)) + return x + y; + ux.value = x; + uy.value = y; + if (ux.bits == uy.bits) + return y; + ax = ux.bits & ~N_SIGN; + ay = uy.bits & ~N_SIGN; + if (ax == 0) { + if (ay == 0) + return y; + ux.bits = (uy.bits & N_SIGN) | 1; + } else if (ax > ay || ((ux.bits ^ uy.bits) & N_SIGN)) + ux.bits--; + else + ux.bits++; + e = ux.bits & 0x7f800000; + /* raise overflow if ux.value is infinite and x is finite */ + if (e == 0x7f800000) { + volatile float dummy = x + x; + } + /* raise underflow if ux.value is subnormal or zero */ + if (e == 0) { + volatile float dummy = x*x + ux.value*ux.value; + } + return ux.value; +} diff --git a/Library/libs/pow.c b/Library/libs/pow.c new file mode 100644 index 00000000..a74c4f64 --- /dev/null +++ b/Library/libs/pow.c @@ -0,0 +1,317 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* pow(x,y) return x**y + * + * n + * Method: Let x = 2 * (1+f) + * 1. Compute and return log2(x) in two pieces: + * log2(x) = w1 + w2, + * where w1 has 53-24 = 29 bit trailing zeros. + * 2. Perform y*log2(x) = n+y' by simulating muti-precision + * arithmetic, where |y'|<=0.5. + * 3. Return x**y = 2**n*exp(y'*log2) + * + * Special cases: + * 1. (anything) ** 0 is 1 + * 2. 1 ** (anything) is 1 + * 3. (anything except 1) ** NAN is NAN + * 4. NAN ** (anything except 0) is NAN + * 5. +-(|x| > 1) ** +INF is +INF + * 6. +-(|x| > 1) ** -INF is +0 + * 7. +-(|x| < 1) ** +INF is +0 + * 8. +-(|x| < 1) ** -INF is +INF + * 9. -1 ** +-INF is 1 + * 10. +0 ** (+anything except 0, NAN) is +0 + * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 + * 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero + * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero + * 14. -0 ** (+odd integer) is -0 + * 15. -0 ** (-odd integer) is -INF, raise divbyzero + * 16. +INF ** (+anything except 0,NAN) is +INF + * 17. +INF ** (-anything except 0,NAN) is +0 + * 18. -INF ** (+odd integer) is -INF + * 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) + * 20. (anything) ** 1 is (anything) + * 21. (anything) ** -1 is 1/(anything) + * 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) + * 23. (-anything except 0 and inf) ** (non-integer) is NAN + * + * Accuracy: + * pow(x,y) returns x**y nearly rounded. In particular + * pow(integer,integer) + * always returns the correct integer provided it is + * representable. + * + * Constants : + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ + +#include +#include "libm.h" + +static const double +bp[] = {1.0, 1.5,}, +dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */ +dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */ +two53 = 9007199254740992.0, /* 0x43400000, 0x00000000 */ +huge = 1.0e300, +tiny = 1.0e-300, +/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */ +L1 = 5.99999999999994648725e-01, /* 0x3FE33333, 0x33333303 */ +L2 = 4.28571428578550184252e-01, /* 0x3FDB6DB6, 0xDB6FABFF */ +L3 = 3.33333329818377432918e-01, /* 0x3FD55555, 0x518F264D */ +L4 = 2.72728123808534006489e-01, /* 0x3FD17460, 0xA91D4101 */ +L5 = 2.30660745775561754067e-01, /* 0x3FCD864A, 0x93C9DB65 */ +L6 = 2.06975017800338417784e-01, /* 0x3FCA7E28, 0x4A454EEF */ +P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ +P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ +P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ +P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ +P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */ +lg2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ +lg2_h = 6.93147182464599609375e-01, /* 0x3FE62E43, 0x00000000 */ +lg2_l = -1.90465429995776804525e-09, /* 0xBE205C61, 0x0CA86C39 */ +ovt = 8.0085662595372944372e-017, /* -(1024-log2(ovfl+.5ulp)) */ +cp = 9.61796693925975554329e-01, /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */ +cp_h = 9.61796700954437255859e-01, /* 0x3FEEC709, 0xE0000000 =(float)cp */ +cp_l = -7.02846165095275826516e-09, /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h*/ +ivln2 = 1.44269504088896338700e+00, /* 0x3FF71547, 0x652B82FE =1/ln2 */ +ivln2_h = 1.44269502162933349609e+00, /* 0x3FF71547, 0x60000000 =24b 1/ln2*/ +ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/ + +double pow(double x, double y) +{ + double z,ax,z_h,z_l,p_h,p_l; + double y1,t1,t2,r,s,t,u,v,w; + int32_t i,j,k,yisint,n; + int32_t hx,hy,ix,iy; + uint32_t lx,ly; + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if ((iy|ly) == 0) + return 1.0; + /* 1**y = 1, even if y is NaN */ + if (hx == 0x3ff00000 && lx == 0) + return 1.0; + /* NaN if either arg is NaN */ + if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) || + iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0)) + return x + y; + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (hx < 0) { + if (iy >= 0x43400000) + yisint = 2; /* even integer y */ + else if (iy >= 0x3ff00000) { + k = (iy>>20) - 0x3ff; /* exponent */ + if (k > 20) { + j = ly>>(52-k); + if ((j<<(52-k)) == ly) + yisint = 2 - (j&1); + } else if (ly == 0) { + j = iy>>(20-k); + if ((j<<(20-k)) == iy) + yisint = 2 - (j&1); + } + } + } + + /* special value of y */ + if (ly == 0) { + if (iy == 0x7ff00000) { /* y is +-inf */ + if (((ix-0x3ff00000)|lx) == 0) /* (-1)**+-inf is 1 */ + return 1.0; + else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */ + return hy >= 0 ? y : 0.0; + else /* (|x|<1)**+-inf = 0,inf */ + return hy >= 0 ? 0.0 : -y; + } + if (iy == 0x3ff00000) /* y is +-1 */ + return hy >= 0 ? x : 1.0/x; + if (hy == 0x40000000) /* y is 2 */ + return x*x; + if (hy == 0x3fe00000) { /* y is 0.5 */ + if (hx >= 0) /* x >= +0 */ + return sqrt(x); + } + } + + ax = fabs(x); + /* special value of x */ + if (lx == 0) { + if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) { /* x is +-0,+-inf,+-1 */ + z = ax; + if (hy < 0) /* z = (1/|x|) */ + z = 1.0/z; + if (hx < 0) { + if (((ix-0x3ff00000)|yisint) == 0) { + z = (z-z)/(z-z); /* (-1)**non-int is NaN */ + } else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return z; + } + } + + s = 1.0; /* sign of result */ + if (hx < 0) { + if (yisint == 0) /* (x<0)**(non-int) is NaN */ + return (x-x)/(x-x); + if (yisint == 1) /* (x<0)**(odd int) */ + s = -1.0; + } + + /* |y| is huge */ + if (iy > 0x41e00000) { /* if |y| > 2**31 */ + if (iy > 0x43f00000) { /* if |y| > 2**64, must o/uflow */ + if (ix <= 0x3fefffff) + return hy < 0 ? huge*huge : tiny*tiny; + if (ix >= 0x3ff00000) + return hy > 0 ? huge*huge : tiny*tiny; + } + /* over/underflow if x is not close to one */ + if (ix < 0x3fefffff) + return hy < 0 ? s*huge*huge : s*tiny*tiny; + if (ix > 0x3ff00000) + return hy > 0 ? s*huge*huge : s*tiny*tiny; + /* now |1-x| is tiny <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1.0; /* t has 20 trailing zeros */ + w = (t*t)*(0.5 - t*(0.3333333333333333333333-t*0.25)); + u = ivln2_h*t; /* ivln2_h has 21 sig. bits */ + v = t*ivln2_l - w*ivln2; + t1 = u + v; + SET_LOW_WORD(t1, 0); + t2 = v - (t1-u); + } else { + double ss,s2,s_h,s_l,t_h,t_l; + n = 0; + /* take care subnormal number */ + if (ix < 0x00100000) { + ax *= two53; + n -= 53; + GET_HIGH_WORD(ix,ax); + } + n += ((ix)>>20) - 0x3ff; + j = ix & 0x000fffff; + /* determine interval */ + ix = j | 0x3ff00000; /* normalize ix */ + if (j <= 0x3988E) /* |x|>1)|0x20000000) + 0x00080000 + (k<<18)); + t_l = ax - (t_h-bp[k]); + s_l = v*((u-s_h*t_h)-s_h*t_l); + /* compute log(ax) */ + s2 = ss*ss; + r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6))))); + r += s_l*(s_h+ss); + s2 = s_h*s_h; + t_h = 3.0 + s2 + r; + SET_LOW_WORD(t_h, 0); + t_l = r - ((t_h-3.0)-s2); + /* u+v = ss*(1+...) */ + u = s_h*t_h; + v = s_l*t_h + t_l*ss; + /* 2/(3log2)*(ss+...) */ + p_h = u + v; + SET_LOW_WORD(p_h, 0); + p_l = v - (p_h-u); + z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = cp_l*p_h+p_l*cp + dp_l[k]; + /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = (double)n; + t1 = ((z_h + z_l) + dp_h[k]) + t; + SET_LOW_WORD(t1, 0); + t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + y1 = y; + SET_LOW_WORD(y1, 0); + p_l = (y-y1)*t1 + y*t2; + p_h = y1*t1; + z = p_l + p_h; + EXTRACT_WORDS(j, i, z); + if (j >= 0x40900000) { /* z >= 1024 */ + if (((j-0x40900000)|i) != 0) /* if z > 1024 */ + return s*huge*huge; /* overflow */ + if (p_l + ovt > z - p_h) + return s*huge*huge; /* overflow */ + } else if ((j&0x7fffffff) >= 0x4090cc00) { /* z <= -1075 */ // FIXME: instead of abs(j) use unsigned j + if (((j-0xc090cc00)|i) != 0) /* z < -1075 */ + return s*tiny*tiny; /* underflow */ + if (p_l <= z - p_h) + return s*tiny*tiny; /* underflow */ + } + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i>>20) - 0x3ff; + n = 0; + if (i > 0x3fe00000) { /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00100000>>(k+1)); + k = ((n&0x7fffffff)>>20) - 0x3ff; /* new k for n */ + t = 0.0; + SET_HIGH_WORD(t, n & ~(0x000fffff>>k)); + n = ((n&0x000fffff)|0x00100000)>>(20-k); + if (j < 0) + n = -n; + p_h -= t; + } + t = p_l + p_h; + SET_LOW_WORD(t, 0); + u = t*lg2_h; + v = (p_l-(t-p_h))*lg2 + t*lg2_l; + z = u + v; + w = v - (z-u); + t = z*z; + t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); + r = (z*t1)/(t1-2.0) - (w + z*w); + z = 1.0 - (r-z); + GET_HIGH_WORD(j, z); + j += n<<20; + if ((j>>20) <= 0) /* subnormal output */ + z = scalbn(z,n); + else + SET_HIGH_WORD(z, j); + return s*z; +} diff --git a/Library/libs/powf.c b/Library/libs/powf.c new file mode 100644 index 00000000..7790f6a9 --- /dev/null +++ b/Library/libs/powf.c @@ -0,0 +1,260 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +bp[] = {1.0, 1.5,}, +dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */ +dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */ +two24 = 16777216.0, /* 0x4b800000 */ +huge = 1.0e30, +tiny = 1.0e-30, +/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */ +L1 = 6.0000002384e-01, /* 0x3f19999a */ +L2 = 4.2857143283e-01, /* 0x3edb6db7 */ +L3 = 3.3333334327e-01, /* 0x3eaaaaab */ +L4 = 2.7272811532e-01, /* 0x3e8ba305 */ +L5 = 2.3066075146e-01, /* 0x3e6c3255 */ +L6 = 2.0697501302e-01, /* 0x3e53f142 */ +P1 = 1.6666667163e-01, /* 0x3e2aaaab */ +P2 = -2.7777778450e-03, /* 0xbb360b61 */ +P3 = 6.6137559770e-05, /* 0x388ab355 */ +P4 = -1.6533901999e-06, /* 0xb5ddea0e */ +P5 = 4.1381369442e-08, /* 0x3331bb4c */ +lg2 = 6.9314718246e-01, /* 0x3f317218 */ +lg2_h = 6.93145752e-01, /* 0x3f317200 */ +lg2_l = 1.42860654e-06, /* 0x35bfbe8c */ +ovt = 4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */ +cp = 9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */ +cp_h = 9.6191406250e-01, /* 0x3f764000 =12b cp */ +cp_l = -1.1736857402e-04, /* 0xb8f623c6 =tail of cp_h */ +ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */ +ivln2_h = 1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/ +ivln2_l = 7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/ + +float powf(float x, float y) +{ + float z,ax,z_h,z_l,p_h,p_l; + float y1,t1,t2,r,s,sn,t,u,v,w; + int32_t i,j,k,yisint,n; + int32_t hx,hy,ix,iy,is; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x**0 = 1, even if x is NaN */ + if (iy == 0) + return 1.0f; + /* 1**y = 1, even if y is NaN */ + if (hx == 0x3f800000) + return 1.0f; + /* NaN if either arg is NaN */ + if (ix > 0x7f800000 || iy > 0x7f800000) + return x + y; + + /* determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (hx < 0) { + if (iy >= 0x4b800000) + yisint = 2; /* even integer y */ + else if (iy >= 0x3f800000) { + k = (iy>>23) - 0x7f; /* exponent */ + j = iy>>(23-k); + if ((j<<(23-k)) == iy) + yisint = 2 - (j & 1); + } + } + + /* special value of y */ + if (iy == 0x7f800000) { /* y is +-inf */ + if (ix == 0x3f800000) /* (-1)**+-inf is 1 */ + return 1.0f; + else if (ix > 0x3f800000) /* (|x|>1)**+-inf = inf,0 */ + return hy >= 0 ? y : 0.0f; + else /* (|x|<1)**+-inf = 0,inf */ + return hy >= 0 ? 0.0f: -y; + } + if (iy == 0x3f800000) /* y is +-1 */ + return hy >= 0 ? x : 1.0f/x; + if (hy == 0x40000000) /* y is 2 */ + return x*x; + if (hy == 0x3f000000) { /* y is 0.5 */ + if (hx >= 0) /* x >= +0 */ + return sqrtf(x); + } + + ax = fabsf(x); + /* special value of x */ + if (ix == 0x7f800000 || ix == 0 || ix == 0x3f800000) { /* x is +-0,+-inf,+-1 */ + z = ax; + if (hy < 0) /* z = (1/|x|) */ + z = 1.0f/z; + if (hx < 0) { + if (((ix-0x3f800000)|yisint) == 0) { + z = __sNaN;/* (z-z)/(z-z);*/ /* (-1)**non-int is NaN */ + } else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return z; + } + + sn = 1.0f; /* sign of result */ + if (hx < 0) { + if (yisint == 0) /* (x<0)**(non-int) is NaN */ + return __sNaN; /*(x-x)/(x-x);*/ + if (yisint == 1) /* (x<0)**(odd int) */ + sn = -1.0f; + } + + /* |y| is huge */ + if (iy > 0x4d000000) { /* if |y| > 2**27 */ + /* over/underflow if x is not close to one */ + if (ix < 0x3f7ffff8) + return hy < 0 ? sn*huge*huge : sn*tiny*tiny; + if (ix > 0x3f800007) + return hy > 0 ? sn*huge*huge : sn*tiny*tiny; + /* now |1-x| is tiny <= 2**-20, suffice to compute + log(x) by x-x^2/2+x^3/3-x^4/4 */ + t = ax - 1; /* t has 20 trailing zeros */ + w = (t*t)*(0.5f - t*(0.333333333333f - t*0.25f)); + u = ivln2_h*t; /* ivln2_h has 16 sig. bits */ + v = t*ivln2_l - w*ivln2; + t1 = u + v; + GET_FLOAT_WORD(is, t1); + SET_FLOAT_WORD(t1, is & 0xfffff000); + t2 = v - (t1-u); + } else { + float s2,s_h,s_l,t_h,t_l; + n = 0; + /* take care subnormal number */ + if (ix < 0x00800000) { + ax *= two24; + n -= 24; + GET_FLOAT_WORD(ix, ax); + } + n += ((ix)>>23) - 0x7f; + j = ix & 0x007fffff; + /* determine interval */ + ix = j | 0x3f800000; /* normalize ix */ + if (j <= 0x1cc471) /* |x|>1) & 0xfffff000) | 0x20000000; + SET_FLOAT_WORD(t_h, is + 0x00400000 + (k<<21)); + t_l = ax - (t_h - bp[k]); + s_l = v*((u - s_h*t_h) - s_h*t_l); + /* compute log(ax) */ + s2 = s*s; + r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6))))); + r += s_l*(s_h+s); + s2 = s_h*s_h; + t_h = 3.0f + s2 + r; + GET_FLOAT_WORD(is, t_h); + SET_FLOAT_WORD(t_h, is & 0xfffff000); + t_l = r - ((t_h - 3.0f) - s2); + /* u+v = s*(1+...) */ + u = s_h*t_h; + v = s_l*t_h + t_l*s; + /* 2/(3log2)*(s+...) */ + p_h = u + v; + GET_FLOAT_WORD(is, p_h); + SET_FLOAT_WORD(p_h, is & 0xfffff000); + p_l = v - (p_h - u); + z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */ + z_l = cp_l*p_h + p_l*cp+dp_l[k]; + /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ + t = (float)n; + t1 = (((z_h + z_l) + dp_h[k]) + t); + GET_FLOAT_WORD(is, t1); + SET_FLOAT_WORD(t1, is & 0xfffff000); + t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); + } + + /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ + GET_FLOAT_WORD(is, y); + SET_FLOAT_WORD(y1, is & 0xfffff000); + p_l = (y-y1)*t1 + y*t2; + p_h = y1*t1; + z = p_l + p_h; + GET_FLOAT_WORD(j, z); + if (j > 0x43000000) /* if z > 128 */ + return sn*huge*huge; /* overflow */ + else if (j == 0x43000000) { /* if z == 128 */ + if (p_l + ovt > z - p_h) + return sn*huge*huge; /* overflow */ + } else if ((j&0x7fffffff) > 0x43160000) /* z < -150 */ // FIXME: check should be (uint32_t)j > 0xc3160000 + return sn*tiny*tiny; /* underflow */ + else if (j == 0xc3160000) { /* z == -150 */ + if (p_l <= z-p_h) + return sn*tiny*tiny; /* underflow */ + } + /* + * compute 2**(p_h+p_l) + */ + i = j & 0x7fffffff; + k = (i>>23) - 0x7f; + n = 0; + if (i > 0x3f000000) { /* if |z| > 0.5, set n = [z+0.5] */ + n = j + (0x00800000>>(k+1)); + k = ((n&0x7fffffff)>>23) - 0x7f; /* new k for n */ + SET_FLOAT_WORD(t, n & ~(0x007fffff>>k)); + n = ((n&0x007fffff)|0x00800000)>>(23-k); + if (j < 0) + n = -n; + p_h -= t; + } + t = p_l + p_h; + GET_FLOAT_WORD(is, t); + SET_FLOAT_WORD(t, is & 0xffff8000); + u = t*lg2_h; + v = (p_l-(t-p_h))*lg2 + t*lg2_l; + z = u + v; + w = v - (z - u); + t = z*z; + t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); + r = (z*t1)/(t1-2.0f) - (w+z*w); + z = 1.0f - (r - z); + GET_FLOAT_WORD(j, z); + j += n<<23; + if ((j>>23) <= 0) /* subnormal output */ + z = scalbnf(z, n); + else + SET_FLOAT_WORD(z, j); + return sn*z; +} diff --git a/Library/libs/remainder.c b/Library/libs/remainder.c new file mode 100644 index 00000000..de1cb0c7 --- /dev/null +++ b/Library/libs/remainder.c @@ -0,0 +1,67 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_remainder.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* remainder(x,p) + * Return : + * returns x REM p = x - [x/p]*p as if in infinite + * precise arithmetic, where [x/p] is the (infinite bit) + * integer nearest x/p (in half way case choose the even one). + * Method : + * Based on fmod() return x-[x/p]chopped*p exactlp. + */ + +#include +#include "libm.h" + +double remainder(double x, double p) +{ + int32_t hx,hp; + uint32_t sx,lx,lp; + double p_half; + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hp, lp, p); + sx = hx & 0x80000000; + hp &= 0x7fffffff; + hx &= 0x7fffffff; + + /* purge off exception values */ + if ((hp|lp) == 0 || /* p = 0 */ + hx >= 0x7ff00000 || /* x not finite */ + (hp >= 0x7ff00000 && (hp-0x7ff00000 | lp) != 0)) /* p is NaN */ + return (x*p)/(x*p); + + if (hp <= 0x7fdfffff) + x = fmod(x, p+p); /* now x < 2p */ + if (((hx-hp)|(lx-lp)) == 0) + return 0.0*x; + x = fabs(x); + p = fabs(p); + if (hp < 0x00200000) { + if (x + x > p) { + x -= p; + if (x + x >= p) + x -= p; + } + } else { + p_half = 0.5*p; + if (x > p_half) { + x -= p; + if (x >= p_half) + x -= p; + } + } + GET_HIGH_WORD(hx, x); + if ((hx&0x7fffffff) == 0) + hx = 0; + SET_HIGH_WORD(x, hx^sx); + return x; +} diff --git a/Library/libs/remainderf.c b/Library/libs/remainderf.c new file mode 100644 index 00000000..4ef8ba81 --- /dev/null +++ b/Library/libs/remainderf.c @@ -0,0 +1,60 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_remainderf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +float remainderf(float x, float p) +{ + int32_t hx,hp; + uint32_t sx; + float p_half; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hp, p); + sx = hx & 0x80000000; + hp &= 0x7fffffff; + hx &= 0x7fffffff; + + /* purge off exception values */ + if (hp == 0 || hx >= 0x7f800000 || hp > 0x7f800000) /* p = 0, x not finite, p is NaN */ + return (x*p)/(x*p); + + if (hp <= 0x7effffff) + x = fmodf(x, p + p); /* now x < 2p */ + if (hx - hp == 0) + return 0.0f*x; + x = fabsf(x); + p = fabsf(p); + if (hp < 0x01000000) { + if (x + x > p) { + x -= p; + if (x + x >= p) + x -= p; + } + } else { + p_half = 0.5f*p; + if (x > p_half) { + x -= p; + if (x >= p_half) + x -= p; + } + } + GET_FLOAT_WORD(hx, x); + if ((hx & 0x7fffffff) == 0) + hx = 0; + SET_FLOAT_WORD(x, hx ^ sx); + return x; +} diff --git a/Library/libs/remquo.c b/Library/libs/remquo.c new file mode 100644 index 00000000..fc0bf6c1 --- /dev/null +++ b/Library/libs/remquo.c @@ -0,0 +1,172 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_remquo.c */ +/*- + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the IEEE remainder and set *quo to the last n bits of the + * quotient, rounded to the nearest integer. We choose n=31 because + * we wind up computing all the integer bits of the quotient anyway as + * a side-effect of computing the remainder by the shift and subtract + * method. In practice, this is far more bits than are needed to use + * remquo in reduction algorithms. + */ + +#include +#include "libm.h" + +static const double Zero[] = {0.0, -0.0,}; + +double remquo(double x, double y, int *quo) +{ + int32_t n,hx,hy,hz,ix,iy,sx,i; + uint32_t lx,ly,lz,q,sxy; + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + sxy = (hx ^ hy) & 0x80000000; + sx = hx & 0x80000000; /* sign of x */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if ((hy|ly) == 0 || hx >= 0x7ff00000 || /* y = 0, or x not finite */ + (hy|((ly|-ly)>>31)) > 0x7ff00000) /* or y is NaN */ + return (x*y)/(x*y); + if (hx <= hy) { + if (hx < hy || lx < ly) { /* |x| < |y| return x or x-y */ + q = 0; + goto fixup; + } + if (lx == ly) { /* |x| = |y| return x*0 */ + *quo = sxy ? -1 : 1; + return Zero[(uint32_t)sx>>31]; + } + } + + // FIXME: use ilogb? + + /* determine ix = ilogb(x) */ + if (hx < 0x00100000) { /* subnormal x */ + if (hx == 0) { + for (ix = -1043, i=lx; i>0; i<<=1) ix--; + } else { + for (ix = -1022, i=hx<<11; i>0; i<<=1) ix--; + } + } else + ix = (hx>>20) - 1023; + + /* determine iy = ilogb(y) */ + if (hy < 0x00100000) { /* subnormal y */ + if (hy == 0) { + for (iy = -1043, i=ly; i>0; i<<=1) iy--; + } else { + for (iy = -1022, i=hy<<11; i>0; i<<=1) iy--; + } + } else + iy = (hy>>20) - 1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if (ix >= -1022) + hx = 0x00100000|(0x000fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -1022 - ix; + if (n <= 31) { + hx = (hx<>(32-n)); + lx <<= n; + } else { + hx = lx<<(n-32); + lx = 0; + } + } + if (iy >= -1022) + hy = 0x00100000|(0x000fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -1022 - iy; + if (n <= 31) { + hy = (hy<>(32-n)); + ly <<= n; + } else { + hy = ly<<(n-32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + q = 0; + while (n--) { + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz--; + if (hz < 0) { + hx = hx + hx + (lx>>31); + lx = lx + lx; + } else { + hx = hz + hz + (lz>>31); + lx = lz + lz; + q++; + } + q <<= 1; + } + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz--; + if (hz >= 0) { + hx = hz; + lx = lz; + q++; + } + + /* convert back to floating value and restore the sign */ + if ((hx|lx) == 0) { /* return sign(x)*0 */ + q &= 0x7fffffff; + *quo = sxy ? -q : q; + return Zero[(uint32_t)sx>>31]; + } + while (hx < 0x00100000) { /* normalize x */ + hx = hx + hx + (lx>>31); + lx = lx + lx; + iy--; + } + if (iy >= -1022) { /* normalize output */ + hx = (hx-0x00100000)|((iy+1023)<<20); + } else { /* subnormal output */ + n = -1022 - iy; + if (n <= 20) { + lx = (lx>>n)|((uint32_t)hx<<(32-n)); + hx >>= n; + } else if (n <= 31) { + lx = (hx<<(32-n))|(lx>>n); + hx = 0; + } else { + lx = hx>>(n-32); + hx = 0; + } + } +fixup: + INSERT_WORDS(x, hx, lx); + y = fabs(y); + if (y < 0x1p-1021) { + if (x + x > y || (x + x == y && (q & 1))) { + q++; + x -= y; + } + } else if (x > 0.5*y || (x == 0.5*y && (q & 1))) { + q++; + x -= y; + } + GET_HIGH_WORD(hx, x); + SET_HIGH_WORD(x, hx ^ sx); + q &= 0x7fffffff; + *quo = sxy ? -q : q; + return x; +} diff --git a/Library/libs/remquof.c b/Library/libs/remquof.c new file mode 100644 index 00000000..cb2967c5 --- /dev/null +++ b/Library/libs/remquof.c @@ -0,0 +1,127 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_remquof.c */ +/*- + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the IEEE remainder and set *quo to the last n bits of the + * quotient, rounded to the nearest integer. We choose n=31 because + * we wind up computing all the integer bits of the quotient anyway as + * a side-effect of computing the remainder by the shift and subtract + * method. In practice, this is far more bits than are needed to use + * remquo in reduction algorithms. + */ + +#include +#include "libm.h" + +static const float Zero[] = {0.0, -0.0,}; + +float remquof(float x, float y, int *quo) +{ + int32_t n,hx,hy,hz,ix,iy,sx,i; + uint32_t q,sxy; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + sxy = (hx ^ hy) & 0x80000000; + sx = hx & 0x80000000; /* sign of x */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + if (hy == 0 || hx >= 0x7f800000 || hy > 0x7f800000) /* y=0,NaN;or x not finite */ + return (x*y)/(x*y); + if (hx < hy) { /* |x| < |y| return x or x-y */ + q = 0; + goto fixup; + } else if(hx==hy) { /* |x| = |y| return x*0*/ + *quo = sxy ? -1 : 1; + return Zero[(uint32_t)sx>>31]; + } + + /* determine ix = ilogb(x) */ + if (hx < 0x00800000) { /* subnormal x */ + for (ix = -126, i=hx<<8; i>0; i<<=1) ix--; + } else + ix = (hx>>23) - 127; + + /* determine iy = ilogb(y) */ + if (hy < 0x00800000) { /* subnormal y */ + for (iy = -126, i=hy<<8; i>0; i<<=1) iy--; + } else + iy = (hy>>23) - 127; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if (ix >= -126) + hx = 0x00800000|(0x007fffff&hx); + else { /* subnormal x, shift x to normal */ + n = -126 - ix; + hx <<= n; + } + if (iy >= -126) + hy = 0x00800000|(0x007fffff&hy); + else { /* subnormal y, shift y to normal */ + n = -126 - iy; + hy <<= n; + } + + /* fix point fmod */ + n = ix - iy; + q = 0; + while (n--) { + hz = hx - hy; + if (hz < 0) + hx = hx << 1; + else { + hx = hz << 1; + q++; + } + q <<= 1; + } + hz = hx - hy; + if (hz >= 0) { + hx = hz; + q++; + } + + /* convert back to floating value and restore the sign */ + if (hx == 0) { /* return sign(x)*0 */ + q &= 0x7fffffff; + *quo = sxy ? -q : q; + return Zero[(uint32_t)sx>>31]; + } + while (hx < 0x00800000) { /* normalize x */ + hx <<= 1; + iy--; + } + if (iy >= -126) { /* normalize output */ + hx = (hx-0x00800000)|((iy+127)<<23); + } else { /* subnormal output */ + n = -126 - iy; + hx >>= n; + } +fixup: + SET_FLOAT_WORD(x,hx); + y = fabsf(y); + if (y < 0x1p-125f) { + if (x + x > y || (x + x == y && (q & 1))) { + q++; + x -= y; + } + } else if (x > 0.5f*y || (x == 0.5f*y && (q & 1))) { + q++; + x -= y; + } + GET_FLOAT_WORD(hx, x); + SET_FLOAT_WORD(x, hx ^ sx); + q &= 0x7fffffff; + *quo = sxy ? -q : q; + return x; +} diff --git a/Library/libs/rint.c b/Library/libs/rint.c new file mode 100644 index 00000000..8196f669 --- /dev/null +++ b/Library/libs/rint.c @@ -0,0 +1,91 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_rint.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * rint(x) + * Return x rounded to integral value according to the prevailing + * rounding mode. + * Method: + * Using floating addition. + * Exception: + * Inexact flag raised if x not equal to rint(x). + */ + +#include +#include "libm.h" + +static const double +TWO52[2] = { + 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */ + -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */ +}; + +double rint(double x) +{ + int32_t i0,j0,sx; + uint32_t i,i1; + double w,t; + + EXTRACT_WORDS(i0, i1, x); + // FIXME: signed shift + sx = (i0>>31) & 1; + j0 = ((i0>>20)&0x7ff) - 0x3ff; + if (j0 < 20) { + if (j0 < 0) { + if (((i0&0x7fffffff)|i1) == 0) + return x; + i1 |= i0 & 0x0fffff; + i0 &= 0xfffe0000; + i0 |= ((i1|-i1)>>12) & 0x80000; + SET_HIGH_WORD(x, i0); + w = (double)(TWO52[sx] + x); + t = w - TWO52[sx]; + GET_HIGH_WORD(i0, t); + SET_HIGH_WORD(t, (i0&0x7fffffff)|(sx<<31)); + return t; + } else { + i = 0x000fffff>>j0; + if (((i0&i)|i1) == 0) + return x; /* x is integral */ + i >>= 1; + if (((i0&i)|i1) != 0) { + /* + * Some bit is set after the 0.5 bit. To avoid the + * possibility of errors from double rounding in + * w = TWO52[sx]+x, adjust the 0.25 bit to a lower + * guard bit. We do this for all j0<=51. The + * adjustment is trickiest for j0==18 and j0==19 + * since then it spans the word boundary. + */ + if (j0 == 19) + i1 = 0x40000000; + else if (j0 == 18) + i1 = 0x80000000; + else + i0 = (i0 & ~i)|(0x20000>>j0); + } + } + } else if (j0 > 51) { + if (j0 == 0x400) + return x+x; /* inf or NaN */ + return x; /* x is integral */ + } else { + i = (uint32_t)0xffffffff>>(j0-20); + if ((i1&i) == 0) + return x; /* x is integral */ + i >>= 1; + if ((i1&i) != 0) + i1 = (i1 & ~i)|(0x40000000>>(j0-20)); + } + INSERT_WORDS(x, i0, i1); + w = (double)(TWO52[sx] + x); + return w - TWO52[sx]; +} diff --git a/Library/libs/rintf.c b/Library/libs/rintf.c new file mode 100644 index 00000000..6df2841c --- /dev/null +++ b/Library/libs/rintf.c @@ -0,0 +1,49 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_rintf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +static const float +TWO23[2] = { + 8.3886080000e+06, /* 0x4b000000 */ + -8.3886080000e+06, /* 0xcb000000 */ +}; + +float rintf(float x) +{ + int32_t i0,j0,sx; + float w,t; + + GET_FLOAT_WORD(i0, x); + sx = (i0>>31) & 1; + j0 = ((i0>>23)&0xff) - 0x7f; + if (j0 < 23) { + if (j0 < 0) { + if ((i0&0x7fffffff) == 0) + return x; + w = (float)(TWO23[sx] + x); + t = w - TWO23[sx]; + GET_FLOAT_WORD(i0, t); + SET_FLOAT_WORD(t, (i0&0x7fffffff)|(sx<<31)); + return t; + } + w = (float)(TWO23[sx] + x); + return w - TWO23[sx]; + } + if (j0 == 0x80) + return x+x; /* inf or NaN */ + return x; /* x is integral */ +} diff --git a/Library/libs/sin.c b/Library/libs/sin.c new file mode 100644 index 00000000..77935828 --- /dev/null +++ b/Library/libs/sin.c @@ -0,0 +1,78 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sin(x) + * Return sine function of x. + * + * kernel function: + * __sin ... sine function on [-pi/4,pi/4] + * __cos ... cose function on [-pi/4,pi/4] + * __rem_pio2 ... argument reduction routine + * + * Method. + * Let S,C and T denote the sin, cos and tan respectively on + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 + * in [-pi/4 , +pi/4], and let n = k mod 4. + * We have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C T + * 1 C -S -1/T + * 2 -S -C T + * 3 -C S -1/T + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * TRIG(x) returns trig(x) nearly rounded + */ + +#include +#include "libm.h" + +double sin(double x) +{ + double y[2], z=0.0; + int32_t n, ix; + + /* High word of x. */ + GET_HIGH_WORD(ix, x); + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3fe921fb) { + if (ix < 0x3e500000) { /* |x| < 2**-26 */ + /* raise inexact if x != 0 */ + if ((int)x == 0) + return x; + } + return __sin(x, z, 0); + } + + /* sin(Inf or NaN) is NaN */ + if (ix >= 0x7ff00000) + return x - x; + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n&3) { + case 0: return __sin(y[0], y[1], 1); + case 1: return __cos(y[0], y[1]); + case 2: return -__sin(y[0], y[1], 1); + default: + return -__cos(y[0], y[1]); + } +} diff --git a/Library/libs/sincos.c b/Library/libs/sincos.c new file mode 100644 index 00000000..e527097e --- /dev/null +++ b/Library/libs/sincos.c @@ -0,0 +1,69 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +void sincos(double x, double *sin, double *cos) +{ + double y[2], s, c; + uint32_t n, ix; + + GET_HIGH_WORD(ix, x); + ix &= 0x7fffffff; + + /* |x| ~< pi/4 */ + if (ix <= 0x3fe921fb) { + /* if |x| < 2**-27 * sqrt(2) */ + if (ix < 0x3e46a09e) { + /* raise inexact if x != 0 */ + if ((int)x == 0) { + *sin = x; + *cos = 1.0; + } + return; + } + *sin = __sin(x, 0.0, 0); + *cos = __cos(x, 0.0); + return; + } + + /* sincos(Inf or NaN) is NaN */ + if (ix >= 0x7ff00000) { + *sin = *cos = x - x; + return; + } + + /* argument reduction needed */ + n = __rem_pio2(x, y); + s = __sin(y[0], y[1], 1); + c = __cos(y[0], y[1]); + switch (n&3) { + case 0: + *sin = s; + *cos = c; + break; + case 1: + *sin = c; + *cos = -s; + break; + case 2: + *sin = -s; + *cos = -c; + break; + case 3: + default: + *sin = -c; + *cos = s; + break; + } +} diff --git a/Library/libs/sincosf.c b/Library/libs/sincosf.c new file mode 100644 index 00000000..e3e418cd --- /dev/null +++ b/Library/libs/sincosf.c @@ -0,0 +1,114 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +/* Small multiples of pi/2 rounded to double precision. */ +static const double +s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */ +s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ +s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ +s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ + +void sincosf(float x, float *sin, float *cos) +{ + double y, s, c; + uint32_t n, hx, ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + + /* |x| ~<= pi/4 */ + if (ix <= 0x3f490fda) { + /* |x| < 2**-12 */ + if (ix < 0x39800000) { + /* raise inexact if x != 0 */ + if((int)x == 0) { + *sin = x; + *cos = 1.0f; + } + return; + } + *sin = __sindf(x); + *cos = __cosdf(x); + return; + } + + /* |x| ~<= 5*pi/4 */ + if (ix <= 0x407b53d1) { + if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ + if (hx < 0x80000000) { + *sin = __cosdf(x - s1pio2); + *cos = __sindf(s1pio2 - x); + } else { + *sin = -__cosdf(x + s1pio2); + *cos = __sindf(x + s1pio2); + } + return; + } + *sin = __sindf(hx < 0x80000000 ? s2pio2 - x : -s2pio2 - x); + *cos = -__cosdf(hx < 0x80000000 ? x - s2pio2 : x + s2pio2); + return; + } + + /* |x| ~<= 9*pi/4 */ + if (ix <= 0x40e231d5) { + if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ + if (hx < 0x80000000) { + *sin = -__cosdf(x - s3pio2); + *cos = __sindf(x - s3pio2); + } else { + *sin = __cosdf(x + s3pio2); + *cos = __sindf(-s3pio2 - x); + } + return; + } + *sin = __sindf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2); + *cos = __cosdf(hx < 0x80000000 ? x - s4pio2 : x + s4pio2); + return; + } + + /* sin(Inf or NaN) is NaN */ + if (ix >= 0x7f800000) { + *sin = *cos = x - x; + return; + } + + /* general argument reduction needed */ + n = __rem_pio2f(x, &y); + s = __sindf(y); + c = __cosdf(y); + switch (n&3) { + case 0: + *sin = s; + *cos = c; + break; + case 1: + *sin = c; + *cos = -s; + break; + case 2: + *sin = -s; + *cos = -c; + break; + case 3: + default: + *sin = -c; + *cos = s; + break; + } +} diff --git a/Library/libs/sinf.c b/Library/libs/sinf.c new file mode 100644 index 00000000..567cce13 --- /dev/null +++ b/Library/libs/sinf.c @@ -0,0 +1,74 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include +#include "libm.h" + +/* Small multiples of pi/2 rounded to double precision. */ +static const double +s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */ +s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ +s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ +s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ + +float sinf(float x) +{ + double y; + int32_t n, hx, ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + + if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ + if (ix < 0x39800000) /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + if((int)x == 0) + return x; + return __sindf(x); + } + if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ + if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */ + if (hx > 0) + return __cosdf(x - s1pio2); + else + return -__cosdf(x + s1pio2); + } + return __sindf(hx > 0 ? s2pio2 - x : -s2pio2 - x); + } + if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ + if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */ + if (hx > 0) + return -__cosdf(x - s3pio2); + else + return __cosdf(x + s3pio2); + } + return __sindf(hx > 0 ? x - s4pio2 : x + s4pio2); + } + + /* sin(Inf or NaN) is NaN */ + if (ix >= 0x7f800000) + return x - x; + + /* general argument reduction needed */ + n = __rem_pio2f(x, &y); + switch (n&3) { + case 0: return __sindf(y); + case 1: return __cosdf(y); + case 2: return __sindf(-y); + default: + return -__cosdf(y); + } +} diff --git a/Library/libs/sinh.c b/Library/libs/sinh.c new file mode 100644 index 00000000..327fb26c --- /dev/null +++ b/Library/libs/sinh.c @@ -0,0 +1,72 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sinh.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sinh(x) + * Method : + * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2 + * 1. Replace x by |x| (sinh(-x) = -sinh(x)). + * 2. + * E + E/(E+1) + * 0 <= x <= 22 : sinh(x) := --------------, E=expm1(x) + * 2 + * + * 22 <= x <= lnovft : sinh(x) := exp(x)/2 + * lnovft <= x <= ln2ovft: sinh(x) := exp(x/2)/2 * exp(x/2) + * ln2ovft < x : sinh(x) := x*shuge (overflow) + * + * Special cases: + * sinh(x) is |x| if x is +INF, -INF, or NaN. + * only sinh(0)=0 is exact for finite x. + */ + +#include +#include "libm.h" + +static const double huge = 1.0e307; + +double sinh(double x) +{ + double t, h; + int32_t ix, jx; + + /* High word of |x|. */ + GET_HIGH_WORD(jx, x); + ix = jx & 0x7fffffff; + + /* x is INF or NaN */ + if (ix >= 0x7ff00000) + return x + x; + + h = 0.5; + if (jx < 0) h = -h; + /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */ + if (ix < 0x40360000) { /* |x|<22 */ + if (ix < 0x3e300000) /* |x|<2**-28 */ + /* raise inexact, return x */ + if (huge+x > 1.0) + return x; + t = expm1(fabs(x)); + if (ix < 0x3ff00000) + return h*(2.0*t - t*t/(t+1.0)); + return h*(t + t/(t+1.0)); + } + + /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */ + if (ix < 0x40862E42) + return h*exp(fabs(x)); + + /* |x| in [log(maxdouble), overflowthresold] */ + if (ix <= 0x408633CE) + return h * 2.0 * __expo2(fabs(x)); /* h is for sign only */ + + /* |x| > overflowthresold, sinh(x) overflow */ + return x*huge; +} diff --git a/Library/libs/sinhf.c b/Library/libs/sinhf.c new file mode 100644 index 00000000..19da0841 --- /dev/null +++ b/Library/libs/sinhf.c @@ -0,0 +1,58 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sinhf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include "math.h" +#include "libm.h" + +static const float huge = 1.0e37; + +float sinhf(float x) +{ + float t, h; + int32_t ix, jx; + + GET_FLOAT_WORD(jx, x); + ix = jx & 0x7fffffff; + + /* x is INF or NaN */ + if (ix >= 0x7f800000) + return x + x; + + h = 0.5; + if (jx < 0) + h = -h; + /* |x| in [0,9], return sign(x)*0.5*(E+E/(E+1))) */ + if (ix < 0x41100000) { /* |x|<9 */ + if (ix < 0x39800000) /* |x|<2**-12 */ + /* raise inexact, return x */ + if (huge+x > 1.0f) + return x; + t = expm1f(fabsf(x)); + if (ix < 0x3f800000) + return h*(2.0f*t - t*t/(t+1.0f)); + return h*(t + t/(t+1.0f)); + } + + /* |x| in [9, logf(maxfloat)] return 0.5*exp(|x|) */ + if (ix < 0x42b17217) + return h*expf(fabsf(x)); + + /* |x| in [logf(maxfloat), overflowthresold] */ + if (ix <= 0x42b2d4fc) + return h * 2.0f * __expo2f(fabsf(x)); /* h is for sign only */ + + /* |x| > overflowthresold, sinh(x) overflow */ + return x*huge; +} diff --git a/Library/libs/tgamma.c b/Library/libs/tgamma.c new file mode 100644 index 00000000..4ace4060 --- /dev/null +++ b/Library/libs/tgamma.c @@ -0,0 +1,16 @@ +/* From MUSL */ + +#include + +// FIXME: use lanczos approximation + +double tgamma(double x) +{ + int sign; + double y; + + y = exp(lgamma_r(x, &sign)); + if (sign < 0) + y = -y; + return y; +} diff --git a/Library/libs/tgammaf.c b/Library/libs/tgammaf.c new file mode 100644 index 00000000..5f22c372 --- /dev/null +++ b/Library/libs/tgammaf.c @@ -0,0 +1,16 @@ +/* From MUSL */ + +#include + +// FIXME: use lanczos approximation + +float tgammaf(float x) +{ + int sign; + float y; + + y = exp(lgammaf_r(x, &sign)); + if (sign < 0) + y = -y; + return y; +} diff --git a/Library/libs/trunc.c b/Library/libs/trunc.c new file mode 100644 index 00000000..e3f4fbd4 --- /dev/null +++ b/Library/libs/trunc.c @@ -0,0 +1,64 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_trunc.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * trunc(x) + * Return x rounded toward 0 to integral value + * Method: + * Bit twiddling. + * Exception: + * Inexact flag raised if x not equal to trunc(x). + */ + +#include +#include "libm.h" + +static const double huge = 1.0e300; + +double trunc(double x) +{ + int32_t i0,i1,j0; + uint32_t i; + + EXTRACT_WORDS(i0, i1, x); + j0 = ((i0>>20)&0x7ff) - 0x3ff; + if (j0 < 20) { + if (j0 < 0) { /* |x|<1, return 0*sign(x) */ + /* raise inexact if x != 0 */ + if (huge+x > 0.0) { + i0 &= 0x80000000U; + i1 = 0; + } + } else { + i = 0x000fffff>>j0; + if (((i0&i)|i1) == 0) + return x; /* x is integral */ + /* raise inexact */ + if (huge+x > 0.0) { + i0 &= ~i; + i1 = 0; + } + } + } else if (j0 > 51) { + if (j0 == 0x400) + return x + x; /* inf or NaN */ + return x; /* x is integral */ + } else { + i = (uint32_t)0xffffffff>>(j0-20); + if ((i1&i) == 0) + return x; /* x is integral */ + /* raise inexact */ + if (huge+x > 0.0) + i1 &= ~i; + } + INSERT_WORDS(x, i0, i1); + return x; +} diff --git a/Library/libs/truncf.c b/Library/libs/truncf.c new file mode 100644 index 00000000..da2cdd7f --- /dev/null +++ b/Library/libs/truncf.c @@ -0,0 +1,53 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_truncf.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * truncf(x) + * Return x rounded toward 0 to integral value + * Method: + * Bit twiddling. + * Exception: + * Inexact flag raised if x not equal to truncf(x). + */ + +#include +#include "libm.h" + +static const float huge = 1.0e30f; + +float truncf(float x) +{ + int32_t i0,j0; + uint32_t i; + + GET_FLOAT_WORD(i0, x); + j0 = ((i0>>23)&0xff) - 0x7f; + if (j0 < 23) { + if (j0 < 0) { /* |x|<1, return 0*sign(x) */ + /* raise inexact if x != 0 */ + if (huge+x > 0.0f) + i0 &= 0x80000000; + } else { + i = 0x007fffff>>j0; + if ((i0&i) == 0) + return x; /* x is integral */ + /* raise inexact */ + if (huge+x > 0.0f) + i0 &= ~i; + } + } else { + if (j0 == 0x80) + return x + x; /* inf or NaN */ + return x; /* x is integral */ + } + SET_FLOAT_WORD(x, i0); + return x; +}