Commit 438553e9 authored by Alexandre Julliard's avatar Alexandre Julliard

libs: Import the math library from upstream musl 1.2.3.

parent 35f9091b
......@@ -725,6 +725,8 @@ TIFF_PE_LIBS
TIFF_PE_CFLAGS
PNG_PE_LIBS
PNG_PE_CFLAGS
MUSL_PE_LIBS
MUSL_PE_CFLAGS
MPG123_PE_LIBS
MPG123_PE_CFLAGS
LDAP_PE_LIBS
......@@ -1577,6 +1579,7 @@ enable_lcms2
enable_ldap
enable_mfuuid
enable_mpg123
enable_musl
enable_png
enable_strmbase
enable_strmiids
......@@ -1734,6 +1737,8 @@ LDAP_PE_CFLAGS
LDAP_PE_LIBS
MPG123_PE_CFLAGS
MPG123_PE_LIBS
MUSL_PE_CFLAGS
MUSL_PE_LIBS
PNG_PE_CFLAGS
PNG_PE_LIBS
TIFF_PE_CFLAGS
......@@ -2526,6 +2531,10 @@ Some influential environment variables:
version
MPG123_PE_LIBS
Linker flags for the PE mpg123, overriding the bundled version
MUSL_PE_CFLAGS
C compiler flags for the PE musl, overriding the bundled version
MUSL_PE_LIBS
Linker flags for the PE musl, overriding the bundled version
PNG_PE_CFLAGS
C compiler flags for the PE png, overriding the bundled version
PNG_PE_LIBS Linker flags for the PE png, overriding the bundled version
......@@ -13161,6 +13170,21 @@ fi
printf "%s\n" "$as_me:${as_lineno-$LINENO}: mpg123 cflags: $MPG123_PE_CFLAGS" >&5
printf "%s\n" "$as_me:${as_lineno-$LINENO}: mpg123 libs: $MPG123_PE_LIBS" >&5
if ${MUSL_PE_LIBS:+false} :
then :
MUSL_PE_LIBS=musl
if ${MUSL_PE_CFLAGS:+false} :
then :
MUSL_PE_CFLAGS=
else $as_nop
enable_musl=no
fi
else $as_nop
enable_musl=no
fi
printf "%s\n" "$as_me:${as_lineno-$LINENO}: musl cflags: $MUSL_PE_CFLAGS" >&5
printf "%s\n" "$as_me:${as_lineno-$LINENO}: musl libs: $MUSL_PE_LIBS" >&5
if ${PNG_PE_LIBS:+false} :
then :
PNG_PE_LIBS="png \$(ZLIB_PE_LIBS)"
......@@ -21976,6 +22000,7 @@ wine_fn_config_makefile libs/lcms2 enable_lcms2
wine_fn_config_makefile libs/ldap enable_ldap
wine_fn_config_makefile libs/mfuuid enable_mfuuid
wine_fn_config_makefile libs/mpg123 enable_mpg123
wine_fn_config_makefile libs/musl enable_musl
wine_fn_config_makefile libs/png enable_png
wine_fn_config_makefile libs/strmbase enable_strmbase
wine_fn_config_makefile libs/strmiids enable_strmiids
......@@ -23128,6 +23153,8 @@ LDAP_PE_CFLAGS = $LDAP_PE_CFLAGS
LDAP_PE_LIBS = $LDAP_PE_LIBS
MPG123_PE_CFLAGS = $MPG123_PE_CFLAGS
MPG123_PE_LIBS = $MPG123_PE_LIBS
MUSL_PE_CFLAGS = $MUSL_PE_CFLAGS
MUSL_PE_LIBS = $MUSL_PE_LIBS
PNG_PE_CFLAGS = $PNG_PE_CFLAGS
PNG_PE_LIBS = $PNG_PE_LIBS
TIFF_PE_CFLAGS = $TIFF_PE_CFLAGS
......
......@@ -1121,6 +1121,7 @@ WINE_EXTLIB_FLAGS(JXR, jxr, jxr, "-I\$(top_srcdir)/libs/jxr/jxrgluelib -I\$(top_
WINE_EXTLIB_FLAGS(LCMS2, lcms2, lcms2, "-I\$(top_srcdir)/libs/lcms2/include")
WINE_EXTLIB_FLAGS(LDAP, ldap, ldap, "-I\$(top_srcdir)/libs/ldap/include")
WINE_EXTLIB_FLAGS(MPG123, mpg123, mpg123, "-I\$(top_srcdir)/libs/mpg123/src/libmpg123")
WINE_EXTLIB_FLAGS(MUSL, musl, musl)
WINE_EXTLIB_FLAGS(PNG, png, "png \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/png")
WINE_EXTLIB_FLAGS(TIFF, tiff, "tiff \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/tiff/libtiff")
WINE_EXTLIB_FLAGS(VKD3D, vkd3d, vkd3d, "-I\$(top_srcdir)/libs/vkd3d/include")
......@@ -3284,6 +3285,7 @@ WINE_CONFIG_MAKEFILE(libs/lcms2)
WINE_CONFIG_MAKEFILE(libs/ldap)
WINE_CONFIG_MAKEFILE(libs/mfuuid)
WINE_CONFIG_MAKEFILE(libs/mpg123)
WINE_CONFIG_MAKEFILE(libs/musl)
WINE_CONFIG_MAKEFILE(libs/png)
WINE_CONFIG_MAKEFILE(libs/strmbase)
WINE_CONFIG_MAKEFILE(libs/strmiids)
......
......@@ -136,7 +136,7 @@ _ACRTIMP int __cdecl _fpclass(double);
_ACRTIMP double __cdecl nextafter(double, double);
#ifndef __i386__
#if !defined(__i386__) || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl sinf(float);
_ACRTIMP float __cdecl cosf(float);
......@@ -206,13 +206,13 @@ static inline int _fpclassf(float x)
#endif
#if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
#if (!defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)) || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl fabsf(float);
#else
static inline float fabsf(float x) { return fabs(x); }
#endif
#if !defined(__i386__) || _MSVCR_VER>=120
#if !defined(__i386__) || _MSVCR_VER>=120 || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl _chgsignf(float);
_ACRTIMP float __cdecl _copysignf(float, float);
......
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Valentin Ochs
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Valentin Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.
EXTLIB = libmusl.a
EXTRAINCL = -I$(srcdir)/src/internal -I$(srcdir)/arch/generic
EXTRADEFS = -D_ACRTIMP= -D_NO_CRT_MATH_INLINE
SOURCES = \
src/math/__cos.c \
src/math/__cosdf.c \
src/math/__expo2.c \
src/math/__expo2f.c \
src/math/__fpclassify.c \
src/math/__fpclassifyf.c \
src/math/__math_divzero.c \
src/math/__math_divzerof.c \
src/math/__math_invalid.c \
src/math/__math_invalidf.c \
src/math/__rem_pio2.c \
src/math/__rem_pio2_large.c \
src/math/__rem_pio2f.c \
src/math/__sin.c \
src/math/__sindf.c \
src/math/__tan.c \
src/math/__tandf.c \
src/math/acos.c \
src/math/acosf.c \
src/math/acosh.c \
src/math/acoshf.c \
src/math/asin.c \
src/math/asinf.c \
src/math/asinh.c \
src/math/asinhf.c \
src/math/atan.c \
src/math/atan2.c \
src/math/atan2f.c \
src/math/atanf.c \
src/math/atanh.c \
src/math/atanhf.c \
src/math/cbrt.c \
src/math/cbrtf.c \
src/math/ceil.c \
src/math/ceilf.c \
src/math/copysign.c \
src/math/copysignf.c \
src/math/cos.c \
src/math/cosf.c \
src/math/cosh.c \
src/math/coshf.c \
src/math/erf.c \
src/math/erff.c \
src/math/exp.c \
src/math/exp2.c \
src/math/exp2f.c \
src/math/exp2f_data.c \
src/math/exp_data.c \
src/math/expf.c \
src/math/expm1.c \
src/math/expm1f.c \
src/math/fabs.c \
src/math/fabsf.c \
src/math/fdim.c \
src/math/fdimf.c \
src/math/floor.c \
src/math/floorf.c \
src/math/fma.c \
src/math/fmaf.c \
src/math/fmax.c \
src/math/fmaxf.c \
src/math/fmin.c \
src/math/fminf.c \
src/math/fmod.c \
src/math/fmodf.c \
src/math/frexp.c \
src/math/frexpf.c \
src/math/hypot.c \
src/math/hypotf.c \
src/math/ilogb.c \
src/math/ilogbf.c \
src/math/j0.c \
src/math/j1.c \
src/math/jn.c \
src/math/ldexp.c \
src/math/lgamma.c \
src/math/lgamma_r.c \
src/math/lgammaf.c \
src/math/lgammaf_r.c \
src/math/log.c \
src/math/log10.c \
src/math/log10f.c \
src/math/log1p.c \
src/math/log1pf.c \
src/math/log2.c \
src/math/log2_data.c \
src/math/log2f.c \
src/math/log2f_data.c \
src/math/log_data.c \
src/math/logb.c \
src/math/logbf.c \
src/math/logf.c \
src/math/logf_data.c \
src/math/modf.c \
src/math/modff.c \
src/math/nan.c \
src/math/nanf.c \
src/math/nextafter.c \
src/math/nextafterf.c \
src/math/nexttoward.c \
src/math/nexttowardf.c \
src/math/pow.c \
src/math/pow_data.c \
src/math/powf.c \
src/math/powf_data.c \
src/math/remainder.c \
src/math/remainderf.c \
src/math/remquo.c \
src/math/remquof.c \
src/math/rint.c \
src/math/rintf.c \
src/math/round.c \
src/math/roundf.c \
src/math/scalbn.c \
src/math/scalbnf.c \
src/math/signgam.c \
src/math/sin.c \
src/math/sincos.c \
src/math/sincosf.c \
src/math/sinf.c \
src/math/sinh.c \
src/math/sinhf.c \
src/math/sqrt.c \
src/math/sqrt_data.c \
src/math/sqrtf.c \
src/math/tan.c \
src/math/tanf.c \
src/math/tanh.c \
src/math/tanhf.c \
src/math/tgamma.c \
src/math/tgammaf.c \
src/math/trunc.c \
src/math/truncf.c
#ifndef FEATURES_H
#define FEATURES_H
#define weak
#define hidden
#define weak_alias(old, new)
#endif
#ifndef _LIBM_H
#define _LIBM_H
#include <stdint.h>
#include <float.h>
#include <math.h>
#include <errno.h>
#include <features.h>
typedef float float_t;
typedef double double_t;
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t m;
uint16_t se;
} i;
};
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
/* This is the m68k variant of 80-bit long double, and this definition only works
* on archs where the alignment requirement of uint64_t is <= 4. */
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t pad;
uint64_t m;
} i;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t lo;
uint32_t mid;
uint16_t top;
uint16_t se;
} i;
struct {
uint64_t lo;
uint64_t hi;
} i2;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t top;
uint32_t mid;
uint64_t lo;
} i;
struct {
uint64_t hi;
uint64_t lo;
} i2;
};
#else
#error Unsupported long double representation
#endif
/* Support non-nearest rounding mode. */
#define WANT_ROUNDING 1
/* Support signaling NaNs. */
#define WANT_SNAN 0
#if WANT_SNAN
#error SNaN is unsupported
#else
#define issignalingf_inline(x) 0
#define issignaling_inline(x) 0
#endif
#ifndef TOINT_INTRINSICS
#define TOINT_INTRINSICS 0
#endif
#if TOINT_INTRINSICS
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static double_t roundtoint(double_t);
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static int32_t converttoint(double_t);
#endif
/* Helps static branch prediction so hot path can be better optimized. */
#ifdef __GNUC__
#define predict_true(x) __builtin_expect(!!(x), 1)
#define predict_false(x) __builtin_expect(x, 0)
#else
#define predict_true(x) (x)
#define predict_false(x) (x)
#endif
/* Evaluate an expression as the specified type. With standard excess
precision handling a type cast or assignment is enough (with
-ffloat-store an assignment is required, in old compilers argument
passing and return statement may not drop excess precision). */
static inline float eval_as_float(float x)
{
float y = x;
return y;
}
static inline double eval_as_double(double x)
{
double y = x;
return y;
}
/* fp_barrier returns its input, but limits code transformations
as if it had a side-effect (e.g. observable io) and returned
an arbitrary value. */
#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
volatile float y = x;
return y;
}
#endif
#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
volatile double y = x;
return y;
}
#endif
#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x)
{
volatile long double y = x;
return y;
}
#endif
/* fp_force_eval ensures that the input value is computed when that's
otherwise unused. To prevent the constant folding of the input
expression, an additional fp_barrier may be needed or a compilation
mode that does so (e.g. -frounding-math in gcc). Then it can be
used to evaluate an expression for its fenv side-effects only. */
#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
volatile float y;
y = x;
}
#endif
#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
volatile double y;
y = x;
}
#endif
#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x)
{
volatile long double y;
y = x;
}
#endif
#define FORCE_EVAL(x) do { \
if (sizeof(x) == sizeof(float)) { \
fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
fp_force_eval(x); \
} else { \
fp_force_evall(x); \
} \
} while(0)
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
#define EXTRACT_WORDS(hi,lo,d) \
do { \
uint64_t __u = asuint64(d); \
(hi) = __u >> 32; \
(lo) = (uint32_t)__u; \
} while (0)
#define GET_HIGH_WORD(hi,d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo,d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define INSERT_WORDS(d,hi,lo) \
do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
#define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo)
#define GET_FLOAT_WORD(w,d) \
do { \
(w) = asuint(d); \
} while (0)
#define SET_FLOAT_WORD(d,w) \
do { \
(d) = asfloat(w); \
} while (0)
hidden int __rem_pio2_large(double*,double*,int,int,int);
hidden int __rem_pio2(double,double*);
hidden double __sin(double,double,int);
hidden double __cos(double,double);
hidden double __tan(double,double,int);
hidden double __expo2(double,double);
hidden int __rem_pio2f(float,double*);
hidden float __sindf(double);
hidden float __cosdf(double);
hidden float __tandf(double,int);
hidden float __expo2f(float,float);
hidden int __rem_pio2l(long double, long double *);
hidden long double __sinl(long double, long double, int);
hidden long double __cosl(long double, long double);
hidden long double __tanl(long double, long double, int);
hidden long double __polevll(long double, const long double *, int);
hidden long double __p1evll(long double, const long double *, int);
extern int __signgam;
hidden double __lgamma_r(double, int *);
hidden float __lgammaf_r(float, int *);
/* error handling functions */
hidden float __math_xflowf(uint32_t, float);
hidden float __math_uflowf(uint32_t);
hidden float __math_oflowf(uint32_t);
hidden float __math_divzerof(uint32_t);
hidden float __math_invalidf(float);
hidden double __math_xflow(uint32_t, double);
hidden double __math_uflow(uint32_t);
hidden double __math_oflow(uint32_t);
hidden double __math_divzero(uint32_t);
hidden double __math_invalid(double);
#if LDBL_MANT_DIG != DBL_MANT_DIG
hidden long double __math_invalidl(long double);
#endif
#endif
/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* __cos( x, y )
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
*
* Algorithm
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
* 3. cos(x) is approximated by a polynomial of degree 14 on
* [0,pi/4]
* 4 14
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
* where the remez error is
*
* | 2 4 6 8 10 12 14 | -58
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
* | |
*
* 4 6 8 10 12 14
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
* cos(x) ~ 1 - x*x/2 + r
* since cos(x+y) ~ cos(x) - sin(x)*y
* ~ cos(x) - x*y,
* a correction term is necessary in cos(x) and hence
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
* For better accuracy, rearrange to
* cos(x+y) ~ w + (tmp + (r-x*y))
* where w = 1 - x*x/2 and tmp is a tiny correction term
* (1 - x*x/2 == w + tmp exactly in infinite precision).
* The exactness of w + tmp in infinite precision depends on w
* and tmp having the same precision as x. If they have extra
* precision due to compiler bugs, then the extra precision is
* only good provided it is retained in all terms of the final
* expression for cos(). Retention happens in all cases tested
* under FreeBSD, so don't pessimize things by forcibly clipping
* any extra precision in w.
*/
#include "libm.h"
static const double
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double __cos(double x, double y)
{
double_t hz,z,r,w;
z = x*x;
w = z*z;
r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
hz = 0.5*z;
w = 1.0-hz;
return w + (((1.0-w)-hz) + (z*r-x*y));
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
static const double
C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */
C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */
C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */
C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
float __cosdf(double x)
{
double_t r, w, z;
/* Try to optimize for parallel evaluation as in __tandf.c. */
z = x*x;
w = z*z;
r = C2+z*C3;
return ((1.0+z*C0) + w*C1) + (w*z)*r;
}
#include "libm.h"
/* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */
static const int k = 2043;
static const double kln2 = 0x1.62066151add8bp+10;
/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
double __expo2(double x, double sign)
{
double scale;
/* note that k is odd and scale*scale overflows */
INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0);
/* exp(x - k ln2) * 2**(k-1) */
/* in directed rounding correct sign before rounding or overflow is important */
return exp(x - kln2) * (sign * scale) * scale;
}
#include "libm.h"
/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */
static const int k = 235;
static const float kln2 = 0x1.45c778p+7f;
/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
float __expo2f(float x, float sign)
{
float scale;
/* note that k is odd and scale*scale overflows */
SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23);
/* exp(x - k ln2) * 2**(k-1) */
/* in directed rounding correct sign before rounding or overflow is important */
return expf(x - kln2) * (sign * scale) * scale;
}
#include <math.h>
#include <stdint.h>
short __cdecl _dclass(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
return FP_NORMAL;
}
#include <math.h>
#include <stdint.h>
short __cdecl _fdclass(float x)
{
union {float f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
return FP_NORMAL;
}
#include "libm.h"
double __math_divzero(uint32_t sign)
{
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}
#include "libm.h"
float __math_divzerof(uint32_t sign)
{
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
}
#include "libm.h"
double __math_invalid(double x)
{
return (x - x) / (x - x);
}
#include "libm.h"
float __math_invalidf(float x)
{
return (x - x) / (x - x);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
* Optimized by Bruce D. Evans.
*/
/* __rem_pio2(x,y)
*
* return the remainder of x rem pi/2 in y[0]+y[1]
* use __rem_pio2_large() for large x
*/
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 33 bit of pi/2
* pio2_1t: pi/2 - pio2_1
* pio2_2: second 33 bit of pi/2
* pio2_2t: pi/2 - (pio2_1+pio2_2)
* pio2_3: third 33 bit of pi/2
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb54442d18p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
int __rem_pio2(double x, double *y)
{
union {double f; uint64_t i;} u = {x};
double_t z,w,t,r,fn;
double tx[3],ty[2];
uint32_t ix;
int sign, n, ex, ey, i;
sign = u.i>>63;
ix = u.i>>32 & 0x7fffffff;
if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
goto medium; /* cancellation -- use medium case */
if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
if (!sign) {
z = x - pio2_1; /* one round good to 85 bits */
y[0] = z - pio2_1t;
y[1] = (z-y[0]) - pio2_1t;
return 1;
} else {
z = x + pio2_1;
y[0] = z + pio2_1t;
y[1] = (z-y[0]) + pio2_1t;
return -1;
}
} else {
if (!sign) {
z = x - 2*pio2_1;
y[0] = z - 2*pio2_1t;
y[1] = (z-y[0]) - 2*pio2_1t;
return 2;
} else {
z = x + 2*pio2_1;
y[0] = z + 2*pio2_1t;
y[1] = (z-y[0]) + 2*pio2_1t;
return -2;
}
}
}
if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
goto medium;
if (!sign) {
z = x - 3*pio2_1;
y[0] = z - 3*pio2_1t;
y[1] = (z-y[0]) - 3*pio2_1t;
return 3;
} else {
z = x + 3*pio2_1;
y[0] = z + 3*pio2_1t;
y[1] = (z-y[0]) + 3*pio2_1t;
return -3;
}
} else {
if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
goto medium;
if (!sign) {
z = x - 4*pio2_1;
y[0] = z - 4*pio2_1t;
y[1] = (z-y[0]) - 4*pio2_1t;
return 4;
} else {
z = x + 4*pio2_1;
y[0] = z + 4*pio2_1t;
y[1] = (z-y[0]) + 4*pio2_1t;
return -4;
}
}
}
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
/* rint(x/(pi/2)) */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;
ex = ix>>20;
if (ex - ey > 16) { /* 2nd round, good to 118 bits */
t = r;
w = fn*pio2_2;
r = t - w;
w = fn*pio2_2t - ((t-r)-w);
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;
if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
t = r;
w = fn*pio2_3;
r = t - w;
w = fn*pio2_3t - ((t-r)-w);
y[0] = r - w;
}
}
y[1] = (r - y[0]) - w;
return n;
}
/*
* all other (large) arguments
*/
if (ix >= 0x7ff00000) { /* x is inf or NaN */
y[0] = y[1] = x - x;
return 0;
}
/* set z = scalbn(|x|,-ilogb(x)+23) */
u.f = x;
u.i &= (uint64_t)-1>>12;
u.i |= (uint64_t)(0x3ff + 23)<<52;
z = u.f;
for (i=0; i < 2; i++) {
tx[i] = (double)(int32_t)z;
z = (z-tx[i])*0x1p24;
}
tx[i] = z;
/* skip zero terms, first term is non-zero */
while (tx[i] == 0.0)
i--;
n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
if (sign) {
y[0] = -ty[0];
y[1] = -ty[1];
return -n;
}
y[0] = ty[0];
y[1] = ty[1];
return n;
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* __rem_pio2_large(x,y,e0,nx,prec)
* double x[],y[]; int e0,nx,prec;
*
* __rem_pio2_large return the last three digits of N with
* y = x - N*pi/2
* so that |y| < pi/2.
*
* The method is to compute the integer (mod 8) and fraction parts of
* (2/pi)*x without doing the full multiplication. In general we
* skip the part of the product that are known to be a huge integer (
* more accurately, = 0 mod 8 ). Thus the number of operations are
* independent of the exponent of the input.
*
* (2/pi) is represented by an array of 24-bit integers in ipio2[].
*
* Input parameters:
* x[] The input value (must be positive) is broken into nx
* pieces of 24-bit integers in double precision format.
* x[i] will be the i-th 24 bit of x. The scaled exponent
* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
* match x's up to 24 bits.
*
* Example of breaking a double positive z into x[0]+x[1]+x[2]:
* e0 = ilogb(z)-23
* z = scalbn(z,-e0)
* for i = 0,1,2
* x[i] = floor(z)
* z = (z-x[i])*2**24
*
*
* y[] ouput result in an array of double precision numbers.
* The dimension of y[] is:
* 24-bit precision 1
* 53-bit precision 2
* 64-bit precision 2
* 113-bit precision 3
* The actual value is the sum of them. Thus for 113-bit
* precison, one may have to do something like:
*
* long double t,w,r_head, r_tail;
* t = (long double)y[2] + (long double)y[1];
* w = (long double)y[0];
* r_head = t+w;
* r_tail = w - (r_head - t);
*
* e0 The exponent of x[0]. Must be <= 16360 or you need to
* expand the ipio2 table.
*
* nx dimension of x[]
*
* prec an integer indicating the precision:
* 0 24 bits (single)
* 1 53 bits (double)
* 2 64 bits (extended)
* 3 113 bits (quad)
*
* External function:
* double scalbn(), floor();
*
*
* Here is the description of some local variables:
*
* jk jk+1 is the initial number of terms of ipio2[] needed
* in the computation. The minimum and recommended value
* for jk is 3,4,4,6 for single, double, extended, and quad.
* jk+1 must be 2 larger than you might expect so that our
* recomputation test works. (Up to 24 bits in the integer
* part (the 24 bits of it that we compute) and 23 bits in
* the fraction part may be lost to cancelation before we
* recompute.)
*
* jz local integer variable indicating the number of
* terms of ipio2[] used.
*
* jx nx - 1
*
* jv index for pointing to the suitable ipio2[] for the
* computation. In general, we want
* ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
* is an integer. Thus
* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
* Hence jv = max(0,(e0-3)/24).
*
* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
*
* q[] double array with integral value, representing the
* 24-bits chunk of the product of x and 2/pi.
*
* q0 the corresponding exponent of q[0]. Note that the
* exponent for q[i] would be q0-24*i.
*
* PIo2[] double precision array, obtained by cutting pi/2
* into 24 bits chunks.
*
* f[] ipio2[] in floating point
*
* iq[] integer array by breaking up q[] in 24-bits chunk.
*
* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
*
* ih integer. If >0 it indicates q[] is >= 0.5, hence
* it also indicates the *sign* of the result.
*
*/
/*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
/*
* Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
*
* integer array, contains the (24*i)-th to (24*i+23)-th
* bit of 2/pi after binary point. The corresponding
* floating value is
*
* ipio2[i] * 2^(-24(i+1)).
*
* NB: This table must have at least (e0-3)/24 + jk terms.
* For quad precision (e0 <= 16360, jk = 6), this is 686.
*/
static const int32_t ipio2[] = {
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
#if LDBL_MAX_EXP > 1024
0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
#endif
};
static const double PIo2[] = {
1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
};
int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
{
int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
double z,fw,f[20],fq[20],q[20];
/* initialize jk*/
jk = init_jk[prec];
jp = jk;
/* determine jx,jv,q0, note that 3>q0 */
jx = nx-1;
jv = (e0-3)/24; if(jv<0) jv=0;
q0 = e0-24*(jv+1);
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
j = jv-jx; m = jx+jk;
for (i=0; i<=m; i++,j++)
f[i] = j<0 ? 0.0 : (double)ipio2[j];
/* compute q[0],q[1],...q[jk] */
for (i=0; i<=jk; i++) {
for (j=0,fw=0.0; j<=jx; j++)
fw += x[j]*f[jx+i-j];
q[i] = fw;
}
jz = jk;
recompute:
/* distill q[] into iq[] reversingly */
for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
fw = (double)(int32_t)(0x1p-24*z);
iq[i] = (int32_t)(z - 0x1p24*fw);
z = q[j-1]+fw;
}
/* compute n */
z = scalbn(z,q0); /* actual value of z */
z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
n = (int32_t)z;
z -= (double)n;
ih = 0;
if (q0 > 0) { /* need iq[jz-1] to determine n */
i = iq[jz-1]>>(24-q0); n += i;
iq[jz-1] -= i<<(24-q0);
ih = iq[jz-1]>>(23-q0);
}
else if (q0 == 0) ih = iq[jz-1]>>23;
else if (z >= 0.5) ih = 2;
if (ih > 0) { /* q > 0.5 */
n += 1; carry = 0;
for (i=0; i<jz; i++) { /* compute 1-q */
j = iq[i];
if (carry == 0) {
if (j != 0) {
carry = 1;
iq[i] = 0x1000000 - j;
}
} else
iq[i] = 0xffffff - j;
}
if (q0 > 0) { /* rare case: chance is 1 in 12 */
switch(q0) {
case 1:
iq[jz-1] &= 0x7fffff; break;
case 2:
iq[jz-1] &= 0x3fffff; break;
}
}
if (ih == 2) {
z = 1.0 - z;
if (carry != 0)
z -= scalbn(1.0,q0);
}
}
/* check if recomputation is needed */
if (z == 0.0) {
j = 0;
for (i=jz-1; i>=jk; i--) j |= iq[i];
if (j == 0) { /* need recomputation */
for (k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
for (i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
f[jx+i] = (double)ipio2[jv+i];
for (j=0,fw=0.0; j<=jx; j++)
fw += x[j]*f[jx+i-j];
q[i] = fw;
}
jz += k;
goto recompute;
}
}
/* chop off zero terms */
if (z == 0.0) {
jz -= 1;
q0 -= 24;
while (iq[jz] == 0) {
jz--;
q0 -= 24;
}
} else { /* break z into 24-bit if necessary */
z = scalbn(z,-q0);
if (z >= 0x1p24) {
fw = (double)(int32_t)(0x1p-24*z);
iq[jz] = (int32_t)(z - 0x1p24*fw);
jz += 1;
q0 += 24;
iq[jz] = (int32_t)fw;
} else
iq[jz] = (int32_t)z;
}
/* convert integer "bit" chunk to floating-point value */
fw = scalbn(1.0,q0);
for (i=jz; i>=0; i--) {
q[i] = fw*(double)iq[i];
fw *= 0x1p-24;
}
/* compute PIo2[0,...,jp]*q[jz,...,0] */
for(i=jz; i>=0; i--) {
for (fw=0.0,k=0; k<=jp && k<=jz-i; k++)
fw += PIo2[k]*q[i+k];
fq[jz-i] = fw;
}
/* compress fq[] into y[] */
switch(prec) {
case 0:
fw = 0.0;
for (i=jz; i>=0; i--)
fw += fq[i];
y[0] = ih==0 ? fw : -fw;
break;
case 1:
case 2:
fw = 0.0;
for (i=jz; i>=0; i--)
fw += fq[i];
// TODO: drop excess precision here once double_t is used
fw = (double)fw;
y[0] = ih==0 ? fw : -fw;
fw = fq[0]-fw;
for (i=1; i<=jz; i++)
fw += fq[i];
y[1] = ih==0 ? fw : -fw;
break;
case 3: /* painful */
for (i=jz; i>0; i--) {
fw = fq[i-1]+fq[i];
fq[i] += fq[i-1]-fw;
fq[i-1] = fw;
}
for (i=jz; i>1; i--) {
fw = fq[i-1]+fq[i];
fq[i] += fq[i-1]-fw;
fq[i-1] = fw;
}
for (fw=0.0,i=jz; i>=2; i--)
fw += fq[i];
if (ih==0) {
y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
} else {
y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
}
}
return n&7;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __rem_pio2f(x,y)
*
* return the remainder of x rem pi/2 in *y
* use double precision for everything except passing x
* use __rem_pio2_large() for large x
*/
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 25 bits of pi/2
* pio2_1t: pi/2 - pio2_1
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb6p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
int __rem_pio2f(float x, double *y)
{
union {float f; uint32_t i;} u = {x};
double tx[1],ty[1];
double_t fn;
uint32_t ix;
int n, sign, e0;
ix = u.i & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
*y = x - fn*pio2_1 - fn*pio2_1t;
/* Matters with directed rounding. */
if (predict_false(*y < -pio4)) {
n--;
fn--;
*y = x - fn*pio2_1 - fn*pio2_1t;
} else if (predict_false(*y > pio4)) {
n++;
fn++;
*y = x - fn*pio2_1 - fn*pio2_1t;
}
return n;
}
if(ix>=0x7f800000) { /* x is inf or NaN */
*y = x-x;
return 0;
}
/* scale x into [2^23, 2^24-1] */
sign = u.i>>31;
e0 = (ix>>23) - (0x7f+23); /* e0 = ilogb(|x|)-23, positive */
u.i = ix - (e0<<23);
tx[0] = u.f;
n = __rem_pio2_large(tx,ty,e0,1,0);
if (sign) {
*y = -ty[0];
return -n;
}
*y = ty[0];
return n;
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __sin( x, y, iy)
* kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
*
* Algorithm
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. Callers must return sin(-0) = -0 without calling here since our
* odd polynomial is not evaluated in a way that preserves -0.
* Callers may do the optimization sin(x) ~ x for tiny x.
* 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4]
* 3 13
* sin(x) ~ x + S1*x + ... + S6*x
* where
*
* |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
* | x |
*
* 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y
* For better accuracy, let
* 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
*/
#include "libm.h"
static const double
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double __sin(double x, double y, int iy)
{
double_t z,r,v,w;
z = x*x;
w = z*z;
r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
v = z*x;
if (iy == 0)
return x + v*(S1 + z*r);
else
return x - ((z*(0.5*y - v*r) - y) - v*S1);
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
static const double
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
float __sindf(double x)
{
double_t r, s, w, z;
/* Try to optimize for parallel evaluation as in __tandf.c. */
z = x*x;
w = z*z;
r = S3 + z*S4;
s = z*x;
return (x + s*(S1 + z*S2)) + s*w*r;
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */
/*
* ====================================================
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __tan( x, y, k )
* kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
*
* Algorithm
* 1. Since tan(-x) = -tan(x), we need only to consider positive x.
* 2. Callers must return tan(-0) = -0 without calling here since our
* odd polynomial is not evaluated in a way that preserves -0.
* Callers may do the optimization tan(x) ~ x for tiny x.
* 3. tan(x) is approximated by a odd polynomial of degree 27 on
* [0,0.67434]
* 3 27
* tan(x) ~ x + T1*x + ... + T13*x
* where
*
* |tan(x) 2 4 26 | -59.2
* |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
* | x |
*
* Note: tan(x+y) = tan(x) + tan'(x)*y
* ~ tan(x) + (1+x*x)*y
* Therefore, for better accuracy in computing tan(x+y), let
* 3 2 2 2 2
* r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
* then
* 3 2
* tan(x+y) = x + (T1*x + (x *(r+y)+y))
*
* 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
* tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
* = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
*/
#include "libm.h"
static const double T[] = {
3.33333333333334091986e-01, /* 3FD55555, 55555563 */
1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
-1.85586374855275456654e-05, /* BEF375CB, DB605373 */
2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
},
pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
double __tan(double x, double y, int odd)
{
double_t z, r, v, w, s, a;
double w0, a0;
uint32_t hx;
int big, sign;
GET_HIGH_WORD(hx,x);
big = (hx&0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
if (big) {
sign = hx>>31;
if (sign) {
x = -x;
y = -y;
}
x = (pio4 - x) + (pio4lo - y);
y = 0.0;
}
z = x * x;
w = z * z;
/*
* Break x^5*(T[1]+x^2*T[2]+...) into
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/
r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11]))));
v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12])))));
s = z * x;
r = y + z*(s*(r + v) + y) + s*T[0];
w = x + r;
if (big) {
s = 1 - 2*odd;
v = s - 2.0 * (x + (r - w*w/(w + s)));
return sign ? -v : v;
}
if (!odd)
return w;
/* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
w0 = w;
SET_LOW_WORD(w0, 0);
v = r - (w0 - x); /* w0+v = r+x */
a0 = a = -1.0 / w;
SET_LOW_WORD(a0, 0);
return a0 + a*(1.0 + a0*w0 + a0*v);
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */
static const double T[] = {
0x15554d3418c99f.0p-54, /* 0.333331395030791399758 */
0x1112fd38999f72.0p-55, /* 0.133392002712976742718 */
0x1b54c91d865afe.0p-57, /* 0.0533812378445670393523 */
0x191df3908c33ce.0p-58, /* 0.0245283181166547278873 */
0x185dadfcecf44e.0p-61, /* 0.00297435743359967304927 */
0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */
};
float __tandf(double x, int odd)
{
double_t z,r,w,s,t,u;
z = x*x;
/*
* Split up the polynomial into small independent terms to give
* opportunities for parallel evaluation. The chosen splitting is
* micro-optimized for Athlons (XP, X64). It costs 2 multiplications
* relative to Horner's method on sequential machines.
*
* We add the small terms from lowest degree up for efficiency on
* non-sequential machines (the lowest degree terms tend to be ready
* earlier). Apart from this, we don't care about order of
* operations, and don't need to to care since we have precision to
* spare. However, the chosen splitting is good for accuracy too,
* and would give results as accurate as Horner's method if the
* small terms were added from highest degree down.
*/
r = T[4] + z*T[5];
t = T[2] + z*T[3];
w = z*z;
s = z*x;
u = T[0] + z*T[1];
r = (x + s*u) + (s*w)*(t + w*r);
return odd ? -1.0/r : r;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* acos(x)
* Method :
* acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x)
* For |x|<=0.5
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* For x>0.5
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
* = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z))
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
* for f so that f+c ~ sqrt(z).
* For x<-0.5
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
* Function needed: sqrt
*/
#include "libm.h"
static const double
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z)
{
double_t p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q;
}
double __cdecl acos(double x)
{
double z,w,s,c,df;
uint32_t hx,ix;
GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3ff00000) {
uint32_t lx;
GET_LOW_WORD(lx,x);
if ((ix-0x3ff00000 | lx) == 0) {
/* acos(1)=0, acos(-1)=pi */
if (hx >> 31)
return 2*pio2_hi + 0x1p-120f;
return 0;
}
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3fe00000) {
if (ix <= 0x3c600000) /* |x| < 2**-57 */
return pio2_hi + 0x1p-120f;
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
}
/* x < -0.5 */
if (hx >> 31) {
z = (1.0+x)*0.5;
s = sqrt(z);
w = R(z)*s-pio2_lo;
return 2*(pio2_hi - (s+w));
}
/* x > 0.5 */
z = (1.0-x)*0.5;
s = sqrt(z);
df = s;
SET_LOW_WORD(df,0);
c = (z-df*df)/(s+df);
w = R(z)*s+c;
return 2*(df+w);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
static float R(float z)
{
float_t p, q;
p = z*(pS0+z*(pS1+z*pS2));
q = 1.0f+z*qS1;
return p/q;
}
float __cdecl acosf(float x)
{
float z,w,s,c,df;
uint32_t hx,ix;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3f800000) {
if (ix == 0x3f800000) {
if (hx >> 31)
return 2*pio2_hi + 0x1p-120f;
return 0;
}
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3f000000) {
if (ix <= 0x32800000) /* |x| < 2**-26 */
return pio2_hi + 0x1p-120f;
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
}
/* x < -0.5 */
if (hx >> 31) {
z = (1+x)*0.5f;
s = sqrtf(z);
w = R(z)*s-pio2_lo;
return 2*(pio2_hi - (s+w));
}
/* x > 0.5 */
z = (1-x)*0.5f;
s = sqrtf(z);
GET_FLOAT_WORD(hx,s);
SET_FLOAT_WORD(df,hx&0xfffff000);
c = (z-df*df)/(s+df);
w = R(z)*s+c;
return 2*(df+w);
}
#include "libm.h"
#if FLT_EVAL_METHOD==2
#undef sqrt
#define sqrt sqrtl
#endif
/* acosh(x) = log(x + sqrt(x*x-1)) */
double __cdecl acosh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
/* x < 1 domain error is handled in the called functions */
if (e < 0x3ff + 1)
/* |x| < 2, up to 2ulp error in [1,1.125] */
return log1p(x-1 + sqrt((x-1)*(x-1)+2*(x-1)));
if (e < 0x3ff + 26)
/* |x| < 0x1p26 */
return log(2*x - 1/(x+sqrt(x*x-1)));
/* |x| >= 0x1p26 or nan */
return log(x) + 0.693147180559945309417232121458176568;
}
#include "libm.h"
#if FLT_EVAL_METHOD==2
#undef sqrtf
#define sqrtf sqrtl
#elif FLT_EVAL_METHOD==1
#undef sqrtf
#define sqrtf sqrt
#endif
/* acosh(x) = log(x + sqrt(x*x-1)) */
float __cdecl acoshf(float x)
{
union {float f; uint32_t i;} u = {x};
uint32_t a = u.i & 0x7fffffff;
if (a < 0x3f800000+(1<<23))
/* |x| < 2, invalid if x < 1 */
/* up to 2ulp error in [1,1.125] */
return log1pf(x-1 + sqrtf((x-1)*(x-1)+2*(x-1)));
if (u.i < 0x3f800000+(12<<23))
/* 2 <= x < 0x1p12 */
return logf(2*x - 1/(x+sqrtf(x*x-1)));
/* x >= 0x1p12 or x <= -2 or nan */
return logf(x) + 0.693147180559945309417232121458176568f;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* asin(x)
* Method :
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
* we approximate asin(x) on [0,0.5] by
* asin(x) = x + x*x^2*R(x^2)
* where
* R(x^2) is a rational approximation of (asin(x)-x)/x^3
* and its remez error is bounded by
* |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
*
* For x in [0.5,1]
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
* then for x>0.98
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
* For x<=0.98, let pio4_hi = pio2_hi/2, then
* f = hi part of s;
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
* and
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
*/
#include "libm.h"
static const double
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
/* coefficients for R(x^2) */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z)
{
double_t p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q;
}
double __cdecl asin(double x)
{
double z,r,s;
uint32_t hx,ix;
GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3ff00000) {
uint32_t lx;
GET_LOW_WORD(lx, x);
if ((ix-0x3ff00000 | lx) == 0)
/* asin(1) = +-pi/2 with inexact */
return x*pio2_hi + 0x1p-120f;
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3fe00000) {
/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
if (ix < 0x3e500000 && ix >= 0x00100000)
return x;
return x + x*R(x*x);
}
/* 1 > |x| >= 0.5 */
z = (1 - fabs(x))*0.5;
s = sqrt(z);
r = R(z);
if (ix >= 0x3fef3333) { /* if |x| > 0.975 */
x = pio2_hi-(2*(s+s*r)-pio2_lo);
} else {
double f,c;
/* f+c = sqrt(z) */
f = s;
SET_LOW_WORD(f,0);
c = (z-f*f)/(s+f);
x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
}
if (hx >> 31)
return -x;
return x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const double
pio2 = 1.570796326794896558e+00;
static const float
/* coefficients for R(x^2) */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
static float R(float z)
{
float_t p, q;
p = z*(pS0+z*(pS1+z*pS2));
q = 1.0f+z*qS1;
return p/q;
}
float __cdecl asinf(float x)
{
double s;
float z;
uint32_t hx,ix;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7fffffff;
if (ix >= 0x3f800000) { /* |x| >= 1 */
if (ix == 0x3f800000) /* |x| == 1 */
return x*pio2 + 0x1p-120f; /* asin(+-1) = +-pi/2 with inexact */
return 0/(x-x); /* asin(|x|>1) is NaN */
}
if (ix < 0x3f000000) { /* |x| < 0.5 */
/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
if (ix < 0x39800000 && ix >= 0x00800000)
return x;
return x + x*R(x*x);
}
/* 1 > |x| >= 0.5 */
z = (1 - fabsf(x))*0.5f;
s = sqrt(z);
x = pio2 - 2*(s+s*R(z));
if (hx >> 31)
return -x;
return x;
}
#include "libm.h"
/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
double __cdecl asinh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
unsigned s = u.i >> 63;
/* |x| */
u.i &= (uint64_t)-1/2;
x = u.f;
if (e >= 0x3ff + 26) {
/* |x| >= 0x1p26 or inf or nan */
x = log(x) + 0.693147180559945309417232121458176568;
} else if (e >= 0x3ff + 1) {
/* |x| >= 2 */
x = log(2*x + 1/(sqrt(x*x+1)+x));
} else if (e >= 0x3ff - 26) {
/* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */
x = log1p(x + x*x/(sqrt(x*x+1)+1));
} else {
/* |x| < 0x1p-26, raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
}
return s ? -x : x;
}
#include "libm.h"
/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
float __cdecl asinhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t i = u.i & 0x7fffffff;
unsigned s = u.i >> 31;
/* |x| */
u.i = i;
x = u.f;
if (i >= 0x3f800000 + (12<<23)) {
/* |x| >= 0x1p12 or inf or nan */
x = logf(x) + 0.693147180559945309417232121458176568f;
} else if (i >= 0x3f800000 + (1<<23)) {
/* |x| >= 2 */
x = logf(2*x + 1/(sqrtf(x*x+1)+x));
} else if (i >= 0x3f800000 - (12<<23)) {
/* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */
x = log1pf(x + x*x/(sqrtf(x*x+1)+1));
} else {
/* |x| < 0x1p-12, raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
}
return s ? -x : x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* atan(x)
* Method
* 1. Reduce x to positive by atan(x) = -atan(-x).
* 2. According to the integer k=4t+0.25 chopped, t=x, the argument
* is further reduced to one of the following intervals and the
* arctangent of t is evaluated by the corresponding formula:
*
* [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
* [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
* [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
* [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
* [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double atanhi[] = {
4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
};
static const double atanlo[] = {
2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
};
static const double aT[] = {
3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
-1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
-1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
-7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
-5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
-3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
};
double __cdecl atan(double x)
{
double_t w,s1,s2,z;
uint32_t ix,sign;
int id;
GET_HIGH_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix >= 0x44100000) { /* if |x| >= 2^66 */
if (isnan(x))
return x;
z = atanhi[3] + 0x1p-120f;
return sign ? -z : z;
}
if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
if (ix < 0x3e400000) { /* |x| < 2^-27 */
if (ix < 0x00100000)
/* raise underflow for subnormal x */
FORCE_EVAL((float)x);
return x;
}
id = -1;
} else {
x = fabs(x);
if (ix < 0x3ff30000) { /* |x| < 1.1875 */
if (ix < 0x3fe60000) { /* 7/16 <= |x| < 11/16 */
id = 0;
x = (2.0*x-1.0)/(2.0+x);
} else { /* 11/16 <= |x| < 19/16 */
id = 1;
x = (x-1.0)/(x+1.0);
}
} else {
if (ix < 0x40038000) { /* |x| < 2.4375 */
id = 2;
x = (x-1.5)/(1.0+1.5*x);
} else { /* 2.4375 <= |x| < 2^66 */
id = 3;
x = -1.0/x;
}
}
}
/* end of argument reduction */
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
if (id < 0)
return x - x*(s1+s2);
z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
return sign ? -z : z;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* atan2(y,x)
* Method :
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Special cases:
*
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double
pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
double __cdecl atan2(double y, double x)
{
double z;
uint32_t m,lx,ly,ix,iy;
if (isnan(x) || isnan(y))
return x+y;
EXTRACT_WORDS(ix, lx, x);
EXTRACT_WORDS(iy, ly, y);
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
return atan(y);
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix = ix & 0x7fffffff;
iy = iy & 0x7fffffff;
/* when y = 0 */
if ((iy|ly) == 0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi; /* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if ((ix|lx) == 0)
return m&1 ? -pi/2 : pi/2;
/* when x is INF */
if (ix == 0x7ff00000) {
if (iy == 0x7ff00000) {
switch(m) {
case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3*pi/4; /* atan(+INF,-INF) */
case 3: return -3*pi/4; /* atan(-INF,-INF) */
}
} else {
switch(m) {
case 0: return 0.0; /* atan(+...,+INF) */
case 1: return -0.0; /* atan(-...,+INF) */
case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi; /* atan(-...,-INF) */
}
}
}
/* |y/x| > 0x1p64 */
if (ix+(64<<20) < iy || iy == 0x7ff00000)
return m&1 ? -pi/2 : pi/2;
/* z = atan(|y/x|) without spurious underflow */
if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
z = 0;
else
z = atan(fabs(y/x));
switch (m) {
case 0: return z; /* atan(+,+) */
case 1: return -z; /* atan(-,+) */
case 2: return pi - (z-pi_lo); /* atan(+,-) */
default: /* case 3 */
return (z-pi_lo) - pi; /* atan(-,-) */
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
pi = 3.1415927410e+00, /* 0x40490fdb */
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
float __cdecl atan2f(float y, float x)
{
float z;
uint32_t m,ix,iy;
if (isnan(x) || isnan(y))
return x+y;
GET_FLOAT_WORD(ix, x);
GET_FLOAT_WORD(iy, y);
if (ix == 0x3f800000) /* x=1.0 */
return atanf(y);
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix &= 0x7fffffff;
iy &= 0x7fffffff;
/* when y = 0 */
if (iy == 0) {
switch (m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi; /* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if (ix == 0)
return m&1 ? -pi/2 : pi/2;
/* when x is INF */
if (ix == 0x7f800000) {
if (iy == 0x7f800000) {
switch (m) {
case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3*pi/4; /*atan(+INF,-INF)*/
case 3: return -3*pi/4; /*atan(-INF,-INF)*/
}
} else {
switch (m) {
case 0: return 0.0f; /* atan(+...,+INF) */
case 1: return -0.0f; /* atan(-...,+INF) */
case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi; /* atan(-...,-INF) */
}
}
}
/* |y/x| > 0x1p26 */
if (ix+(26<<23) < iy || iy == 0x7f800000)
return m&1 ? -pi/2 : pi/2;
/* z = atan(|y/x|) with correct underflow */
if ((m&2) && iy+(26<<23) < ix) /*|y/x| < 0x1p-26, x < 0 */
z = 0.0;
else
z = atanf(fabsf(y/x));
switch (m) {
case 0: return z; /* atan(+,+) */
case 1: return -z; /* atan(-,+) */
case 2: return pi - (z-pi_lo); /* atan(+,-) */
default: /* case 3 */
return (z-pi_lo) - pi; /* atan(-,-) */
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float atanhi[] = {
4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */
7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */
9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */
1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */
};
static const float atanlo[] = {
5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */
3.7748947079e-08, /* atan(1.0)lo 0x33222168 */
3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */
7.5497894159e-08, /* atan(inf)lo 0x33a22168 */
};
static const float aT[] = {
3.3333328366e-01,
-1.9999158382e-01,
1.4253635705e-01,
-1.0648017377e-01,
6.1687607318e-02,
};
float __cdecl atanf(float x)
{
float_t w,s1,s2,z;
uint32_t ix,sign;
int id;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x4c800000) { /* if |x| >= 2**26 */
if (isnan(x))
return x;
z = atanhi[3] + 0x1p-120f;
return sign ? -z : z;
}
if (ix < 0x3ee00000) { /* |x| < 0.4375 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
if (ix < 0x00800000)
/* raise underflow for subnormal x */
FORCE_EVAL(x*x);
return x;
}
id = -1;
} else {
x = fabsf(x);
if (ix < 0x3f980000) { /* |x| < 1.1875 */
if (ix < 0x3f300000) { /* 7/16 <= |x| < 11/16 */
id = 0;
x = (2.0f*x - 1.0f)/(2.0f + x);
} else { /* 11/16 <= |x| < 19/16 */
id = 1;
x = (x - 1.0f)/(x + 1.0f);
}
} else {
if (ix < 0x401c0000) { /* |x| < 2.4375 */
id = 2;
x = (x - 1.5f)/(1.0f + 1.5f*x);
} else { /* 2.4375 <= |x| < 2**26 */
id = 3;
x = -1.0f/x;
}
}
}
/* end of argument reduction */
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*aT[4]));
s2 = w*(aT[1]+w*aT[3]);
if (id < 0)
return x - x*(s1+s2);
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return sign ? -z : z;
}
#include "libm.h"
/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
double __cdecl atanh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
unsigned s = u.i >> 63;
double_t y;
/* |x| */
u.i &= (uint64_t)-1/2;
y = u.f;
if (e < 0x3ff - 1) {
if (e < 0x3ff - 32) {
/* handle underflow */
if (e == 0)
FORCE_EVAL((float)y);
} else {
/* |x| < 0.5, up to 1.7ulp error */
y = 0.5*log1p(2*y + 2*y*y/(1-y));
}
} else {
/* avoid overflow */
y = 0.5*log1p(2*(y/(1-y)));
}
return s ? -y : y;
}
#include "libm.h"
/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
float __cdecl atanhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
unsigned s = u.i >> 31;
float_t y;
/* |x| */
u.i &= 0x7fffffff;
y = u.f;
if (u.i < 0x3f800000 - (1<<23)) {
if (u.i < 0x3f800000 - (32<<23)) {
/* handle underflow */
if (u.i < (1<<23))
FORCE_EVAL((float)(y*y));
} else {
/* |x| < 0.5, up to 1.7ulp error */
y = 0.5f*log1pf(2*y + 2*y*y/(1-y));
}
} else {
/* avoid overflow */
y = 0.5f*log1pf(2*(y/(1-y)));
}
return s ? -y : y;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
* Optimized by Bruce D. Evans.
*/
/* cbrt(x)
* Return cube root of x
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
static const double
P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double __cdecl cbrt(double x)
{
union {double f; uint64_t i;} u = {x};
double_t r,s,t,w;
uint32_t hx = u.i>>32 & 0x7fffffff;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x;
/*
* Rough cbrt to 5 bits:
* cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
* where e is integral and >= 0, m is real and in [0, 1), and "/" and
* "%" are integer division and modulus with rounding towards minus
* infinity. The RHS is always >= the LHS and has a maximum relative
* error of about 1 in 16. Adding a bias of -0.03306235651 to the
* (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
* floating point representation, for finite positive normal values,
* ordinary integer divison of the value in bits magically gives
* almost exactly the RHS of the above provided we first subtract the
* exponent bias (1023 for doubles) and later add it back. We do the
* subtraction virtually to keep e >= 0 so that ordinary integer
* division rounds towards minus infinity; this is also efficient.
*/
if (hx < 0x00100000) { /* zero or subnormal? */
u.f = x*0x1p54;
hx = u.i>>32 & 0x7fffffff;
if (hx == 0)
return x; /* cbrt(0) is itself */
hx = hx/3 + B2;
} else
hx = hx/3 + B1;
u.i &= 1ULL<<63;
u.i |= (uint64_t)hx << 32;
t = u.f;
/*
* New cbrt to 23 bits:
* cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
* where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
* to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
* gives us bounds for r = t**3/x.
*
* Try to optimize for parallel evaluation as in __tanf.c.
*/
r = (t*t)*(t/x);
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
/*
* Round t away from zero to 23 bits (sloppily except for ensuring that
* the result is larger in magnitude than cbrt(x) but not much more than
* 2 23-bit ulps larger). With rounding towards zero, the error bound
* would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
* in the rounded t, the infinite-precision error in the Newton
* approximation barely affects third digit in the final error
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
* before the final error is larger than 0.667 ulps.
*/
u.f = t;
u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
t = u.f;
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
s = t*t; /* t*t is exact */
r = x/s; /* error <= 0.5 ulps; |r| < |t| */
w = t+t; /* t+t is exact */
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
return t;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* cbrtf(x)
* Return cube root of x
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const unsigned
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
float __cdecl cbrtf(float x)
{
double_t r,T;
union {float f; uint32_t i;} u = {x};
uint32_t hx = u.i & 0x7fffffff;
if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
return x + x;
/* rough cbrt to 5 bits */
if (hx < 0x00800000) { /* zero or subnormal? */
if (hx == 0)
return x; /* cbrt(+-0) is itself */
u.f = x*0x1p24f;
hx = u.i & 0x7fffffff;
hx = hx/3 + B2;
} else
hx = hx/3 + B1;
u.i &= 0x80000000;
u.i |= hx;
/*
* First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
* double precision so that its terms can be arranged for efficiency
* without causing overflow or underflow.
*/
T = u.f;
r = T*T*T;
T = T*((double_t)x+x+r)/(x+r+r);
/*
* Second step Newton iteration to 47 bits. In double precision for
* efficiency and accuracy.
*/
r = T*T*T;
T = T*((double_t)x+x+r)/(x+r+r);
/* rounding to 24 bits is perfect in round-to-nearest mode */
return T;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl ceil(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);
return u.i >> 63 ? -0.0 : 1;
}
if (y < 0)
return x + y + 1;
return x + y;
}
#include "libm.h"
float __cdecl ceilf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i += m;
u.i &= ~m;
} else {
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31)
u.f = -0.0;
else if (u.i << 1)
u.f = 1.0;
}
return u.f;
}
#include "libm.h"
double __cdecl copysign(double x, double y) {
union {double f; uint64_t i;} ux={x}, uy={y};
ux.i &= -1ULL/2;
ux.i |= uy.i & 1ULL<<63;
return ux.f;
}
#include <math.h>
#include <stdint.h>
float __cdecl copysignf(float x, float y)
{
union {float f; uint32_t i;} ux={x}, uy={y};
ux.i &= 0x7fffffff;
ux.i |= uy.i & 0x80000000;
return ux.f;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* cos(x)
* Return cosine function of x.
*
* kernel function:
* __sin ... sine function on [-pi/4,pi/4]
* __cos ... cosine function on [-pi/4,pi/4]
* __rem_pio2 ... argument reduction routine
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
#include "libm.h"
double __cdecl cos(double x)
{
double y[2];
uint32_t ix;
unsigned n;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */
if (ix <= 0x3fe921fb) {
if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
/* raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0;
}
return __cos(x, 0);
}
/* cos(Inf or NaN) is NaN */
if (ix >= 0x7ff00000)
return x-x;
/* argument reduction */
n = __rem_pio2(x, y);
switch (n&3) {
case 0: return __cos(y[0], y[1]);
case 1: return -__sin(y[0], y[1], 1);
case 2: return -__cos(y[0], y[1]);
default:
return __sin(y[0], y[1], 1);
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* Small multiples of pi/2 rounded to double precision. */
static const double
c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float __cdecl cosf(float x)
{
double y;
uint32_t ix;
unsigned n, sign;
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
/* raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0f;
}
return __cosdf(x);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
else {
if (sign)
return __sindf(x + c1pio2);
else
return __sindf(c1pio2 - x);
}
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
return __cosdf(sign ? x+c4pio2 : x-c4pio2);
else {
if (sign)
return __sindf(-x - c3pio2);
else
return __sindf(x - c3pio2);
}
}
/* cos(Inf or NaN) is NaN */
if (ix >= 0x7f800000)
return x-x;
/* general argument reduction needed */
n = __rem_pio2f(x,&y);
switch (n&3) {
case 0: return __cosdf(y);
case 1: return __sindf(-y);
case 2: return -__cosdf(y);
default:
return __sindf(y);
}
}
#include "libm.h"
/* cosh(x) = (exp(x) + 1/exp(x))/2
* = 1 + 0.5*(exp(x)-1)*(exp(x)-1)/exp(x)
* = 1 + x*x/2 + o(x^4)
*/
double __cdecl cosh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
uint32_t w;
double t;
/* |x| */
u.i &= (uint64_t)-1/2;
x = u.f;
w = u.i >> 32;
/* |x| < log(2) */
if (w < 0x3fe62e42) {
if (w < 0x3ff00000 - (26<<20)) {
/* raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
return 1;
}
t = expm1(x);
return 1 + t*t/(2*(1+t));
}
/* |x| < log(DBL_MAX) */
if (w < 0x40862e42) {
t = exp(x);
/* note: if x>log(0x1p26) then the 1/t is not needed */
return 0.5*(t + 1/t);
}
/* |x| > log(DBL_MAX) or nan */
/* note: the result is stored to handle overflow */
t = __expo2(x, 1.0);
return t;
}
#include "libm.h"
float __cdecl coshf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t w;
float t;
/* |x| */
u.i &= 0x7fffffff;
x = u.f;
w = u.i;
/* |x| < log(2) */
if (w < 0x3f317217) {
if (w < 0x3f800000 - (12<<23)) {
FORCE_EVAL(x + 0x1p120f);
return 1;
}
t = expm1f(x);
return 1 + t*t/(2*(1+t));
}
/* |x| < log(FLT_MAX) */
if (w < 0x42b17217) {
t = expf(x);
return 0.5f*(t + 1/t);
}
/* |x| > log(FLT_MAX) or nan */
t = __expo2f(x, 1.0f);
return t;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* double erf(double x)
* double erfc(double x)
* x
* 2 |\
* erf(x) = --------- | exp(-t*t)dt
* sqrt(pi) \|
* 0
*
* erfc(x) = 1-erf(x)
* Note that
* erf(-x) = -erf(x)
* erfc(-x) = 2 - erfc(x)
*
* Method:
* 1. For |x| in [0, 0.84375]
* erf(x) = x + x*R(x^2)
* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
* where R = P/Q where P is an odd poly of degree 8 and
* Q is an odd poly of degree 10.
* -57.90
* | R - (erf(x)-x)/x | <= 2
*
*
* Remark. The formula is derived by noting
* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
* and that
* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
* is close to one. The interval is chosen because the fix
* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
* near 0.6174), and by some experiment, 0.84375 is chosen to
* guarantee the error is less than one ulp for erf.
*
* 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
* c = 0.84506291151 rounded to single (24 bits)
* erf(x) = sign(x) * (c + P1(s)/Q1(s))
* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
* 1+(c+P1(s)/Q1(s)) if x < 0
* |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06
* Remark: here we use the taylor series expansion at x=1.
* erf(1+s) = erf(1) + s*Poly(s)
* = 0.845.. + P1(s)/Q1(s)
* That is, we use rational approximation to approximate
* erf(1+s) - (c = (single)0.84506291151)
* Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
* where
* P1(s) = degree 6 poly in s
* Q1(s) = degree 6 poly in s
*
* 3. For x in [1.25,1/0.35(~2.857143)],
* erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1)
* erf(x) = 1 - erfc(x)
* where
* R1(z) = degree 7 poly in z, (z=1/x^2)
* S1(z) = degree 8 poly in z
*
* 4. For x in [1/0.35,28]
* erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
* = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0
* = 2.0 - tiny (if x <= -6)
* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else
* erf(x) = sign(x)*(1.0 - tiny)
* where
* R2(z) = degree 6 poly in z, (z=1/x^2)
* S2(z) = degree 7 poly in z
*
* Note1:
* To compute exp(-x*x-0.5625+R/S), let s be a single
* precision number and s := x; then
* -x*x = -s*s + (s-x)*(s+x)
* exp(-x*x-0.5626+R/S) =
* exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
* Note2:
* Here 4 and 5 make use of the asymptotic series
* exp(-x*x)
* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
* x*sqrt(pi)
* We use rational approximation to approximate
* g(s)=f(1/x^2) = log(erfc(x)*x) - x*x + 0.5625
* Here is the error bound for R1/S1 and R2/S2
* |R1/S1 - f(x)| < 2**(-62.57)
* |R2/S2 - f(x)| < 2**(-61.52)
*
* 5. For inf > x >= 28
* erf(x) = sign(x) *(1 - tiny) (raise inexact)
* erfc(x) = tiny*tiny (raise underflow) if x > 0
* = 2 - tiny if x<0
*
* 7. Special case:
* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
* erfc/erf(NaN) is NaN
*/
#include "libm.h"
static const double
erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp0 = 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
pp1 = -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
pp2 = -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
pp3 = -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
pp4 = -2.37630166566501626084e-05, /* 0xBEF8EAD6, 0x120016AC */
qq1 = 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
qq2 = 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
qq3 = 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
qq4 = 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
qq5 = -3.96022827877536812320e-06, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
pa1 = 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
pa2 = -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
pa3 = 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
pa4 = -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
pa5 = 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
pa6 = -2.16637559486879084300e-03, /* 0xBF61BF38, 0x0A96073F */
qa1 = 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
qa2 = 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
qa3 = 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
qa4 = 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
qa5 = 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
qa6 = 1.19844998467991074170e-02, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
ra1 = -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
ra2 = -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
ra3 = -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
ra4 = -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
ra5 = -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
ra6 = -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
ra7 = -9.81432934416914548592e+00, /* 0xC023A0EF, 0xC69AC25C */
sa1 = 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
sa2 = 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
sa3 = 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
sa4 = 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
sa5 = 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
sa6 = 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
sa7 = 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
sa8 = -6.04244152148580987438e-02, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
rb1 = -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
rb2 = -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
rb3 = -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
rb4 = -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
rb5 = -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
rb6 = -4.83519191608651397019e+02, /* 0xC07E384E, 0x9BDC383F */
sb1 = 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
sb2 = 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
sb3 = 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
sb4 = 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
sb5 = 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
sb6 = 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
sb7 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */
static double erfc1(double x)
{
double_t s,P,Q;
s = fabs(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
}
static double erfc2(uint32_t ix, double x)
{
double_t s,R,S;
double z;
if (ix < 0x3ff40000) /* |x| < 1.25 */
return erfc1(x);
x = fabs(x);
s = 1/(x*x);
if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| > 1/.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
}
z = x;
SET_LOW_WORD(z,0);
return exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S)/x;
}
double __cdecl erf(double x)
{
double r,s,z,y;
uint32_t ix;
int sign;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */
return 1-2*sign + 1/x;
}
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3e300000) { /* |x| < 2**-28 */
/* avoid underflow */
return 0.125*(8*x + efx8*x);
}
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
return x + x*y;
}
if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
y = 1 - erfc2(ix,x);
else
y = 1 - 0x1p-1022;
return sign ? -y : y;
}
double __cdecl erfc(double x)
{
double r,s,z,y;
uint32_t ix;
int sign;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3feb0000) { /* |x| < 0.84375 */
if (ix < 0x3c700000) /* |x| < 2**-56 */
return 1.0 - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3fd00000) { /* x < 1/4 */
return 1.0 - (x+x*y);
}
return 0.5 - (x - 0.5 + x*y);
}
if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-1022 : 0x1p-1022*0x1p-1022;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
erx = 8.4506291151e-01, /* 0x3f58560b */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.0270333290e+00, /* 0x3f8375d4 */
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
pp1 = -3.2504209876e-01, /* 0xbea66beb */
pp2 = -2.8481749818e-02, /* 0xbce9528f */
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
qq4 = 1.3249473704e-04, /* 0x390aee49 */
qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
qa4 = 1.2617121637e-01, /* 0x3e013307 */
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
ra2 = -1.0558626175e+01, /* 0xc128f022 */
ra3 = -6.2375331879e+01, /* 0xc2798057 */
ra4 = -1.6239666748e+02, /* 0xc322658c */
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
ra7 = -9.8143291473e+00, /* 0xc11d077e */
sa1 = 1.9651271820e+01, /* 0x419d35ce */
sa2 = 1.3765776062e+02, /* 0x4309a863 */
sa3 = 4.3456588745e+02, /* 0x43d9486f */
sa4 = 6.4538726807e+02, /* 0x442158c9 */
sa5 = 4.2900814819e+02, /* 0x43d6810b */
sa6 = 1.0863500214e+02, /* 0x42d9451f */
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
rb2 = -1.7757955551e+01, /* 0xc18e104b */
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
rb5 = -1.0250950928e+03, /* 0xc480230b */
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
sb3 = 1.5367296143e+03, /* 0x44c01759 */
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
sb5 = 2.5530502930e+03, /* 0x451f90ce */
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
static float erfc1(float x)
{
float_t s,P,Q;
s = fabsf(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
}
static float erfc2(uint32_t ix, float x)
{
float_t s,R,S;
float z;
if (ix < 0x3fa00000) /* |x| < 1.25 */
return erfc1(x);
x = fabsf(x);
s = 1/(x*x);
if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| >= 1/0.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
}
GET_FLOAT_WORD(ix, x);
SET_FLOAT_WORD(z, ix&0xffffe000);
return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
}
float __cdecl erff(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */
return 1-2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x31800000) { /* |x| < 2**-28 */
/*avoid underflow */
return 0.125f*(8*x + efx8*x);
}
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
return x + x*y;
}
if (ix < 0x40c00000) /* |x| < 6 */
y = 1 - erfc2(ix,x);
else
y = 1 - 0x1p-120f;
return sign ? -y : y;
}
float __cdecl erfcf(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x23800000) /* |x| < 2**-56 */
return 1.0f - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3e800000) /* x < 1/4 */
return 1.0f - (x+x*y);
return 0.5f - (x - 0.5f + x*y);
}
if (ix < 0x41e00000) { /* |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
}
/*
* Double-precision e^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double __cdecl exp(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (asuint64(x) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble(sbits);
y = 2 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double __cdecl exp2(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (!(asuint64(x) >> 63))
return __math_oflow(0);
else if (asuint64(x) >= asuint64(-1075.0))
return __math_uflow(0);
}
if (2 * asuint64(x) > 2 * asuint64(928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double(x + Shift);
ki = asuint64(kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
Wrong count: 168353 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float __cdecl exp2f(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(128.0f))) {
/* |x| >= 128 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf(0);
if (x <= -150.0f)
return __math_uflowf(0);
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp2f_data.h"
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
},
};
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP2F_DATA_H
#define _EXP2F_DATA_H
#include <features.h>
#include <stdint.h>
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern hidden const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
#endif
/*
* Shared data between exp, exp2 and pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
const struct exp_data __exp_data = {
// N/ln2
.invln2N = 0x1.71547652b82fep0 * N,
// -ln2/N
.negln2hiN = -0x1.62e42fefa0000p-8,
.negln2loN = -0x1.cf79abc9e3b3ap-47,
// Used for rounding when !TOINT_INTRINSICS
#if EXP_USE_TOINT_NARROW
.shift = 0x1800000000.8p0,
#else
.shift = 0x1.8p52,
#endif
// exp polynomial coefficients.
.poly = {
// abs error: 1.555*2^-66
// ulp error: 0.509 (0.511 without fma)
// if |x| < ln2/256+eps
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
// abs error if |x| < ln2/128: 1.7145*2^-56
0x1.ffffffffffdbdp-2,
0x1.555555555543cp-3,
0x1.55555cf172b91p-5,
0x1.1111167a4d017p-7,
},
.exp2_shift = 0x1.8p52 / N,
// exp2 polynomial coefficients.
.exp2_poly = {
// abs error: 1.2195*2^-65
// ulp error: 0.507 (0.511 without fma)
// if |x| < 1/256
// abs error if |x| < 1/128: 1.9941*2^-56
0x1.62e42fefa39efp-1,
0x1.ebfbdff82c424p-3,
0x1.c6b08d70cf4b5p-5,
0x1.3b2abd24650ccp-7,
0x1.5d7e09b4e3a84p-10,
},
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
// tab[2*k] = asuint64(T[k])
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
.tab = {
0x0, 0x3ff0000000000000,
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
0xbc6a033489906e0b, 0x3fef9b66affed31b,
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
0x3c968efde3a8a894, 0x3fef387a6e756238,
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
0x3c859f48a72a4c6d, 0x3feedea64c123422,
0xbc9312607a28698a, 0x3feeda4504ac801c,
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
0x3c4363ed60c2ac11, 0x3feece086061892d,
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
0x3c93350518fdd78e, 0x3feeaf4736b527da,
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
0x3c9063e1e21c5409, 0x3feeab07dd485429,
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
0x3c9432e62b64c035, 0x3feea76f15ad2148,
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
0xbc845378892be9ae, 0x3feea34634ccc320,
0xbc93cedd78565858, 0x3feea23882552225,
0x3c5710aa807e1964, 0x3feea155d44ca973,
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
0xbc6a12ad8734b982, 0x3feea012750bdabf,
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
0xbc8619321e55e68a, 0x3fee9feb564267c9,
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
0xbc9369b6f13b3734, 0x3feea589994cce13,
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
0xbc94d450d872576e, 0x3feea8d99b4492ed,
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
0x3c71affc2b91ce27, 0x3feec49182a3f090,
0x3c6dd235e10a73bb, 0x3feec86319e32323,
0xbc87c50422622263, 0x3feecc667b5de565,
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
0x3c90cc319cee31d2, 0x3feed99e1330b358,
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
0xbc90a40e3da6f640, 0x3feef9728de5593a,
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
0xbc91eee26b588a35, 0x3fef05b030a1064a,
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
0x3c676b2c6c921968, 0x3fef3720dcef9069,
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
0xbc900dae3875a949, 0x3fef4f87080d89f2,
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
0xbc82919e2040220f, 0x3fef60e316c98398,
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
0x3c843a59ac016b4b, 0x3fef7321f301b460,
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
0xbc892ab93b470dc9, 0x3fef864614f5a129,
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
},
};
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP_DATA_H
#define _EXP_DATA_H
#include <features.h>
#include <stdint.h>
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
extern hidden const struct exp_data {
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data;
#endif
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
Wrong count: 170635 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define InvLn2N __exp2f_data.invln2_scaled
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float __cdecl expf(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(88.0f))) {
/* |x| >= 88 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf(0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf(0);
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally ties-to-even rule is used, otherwise the magnitude of r
can be bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#else
# define SHIFT __exp2f_data.shift
kd = eval_as_double(z + SHIFT);
ki = asuint64(kd);
kd -= SHIFT;
#endif
r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* expm1(x)
* Returns exp(x)-1, the exponential of x minus 1.
*
* Method
* 1. Argument reduction:
* Given x, find r and integer k such that
*
* x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
*
* Here a correction term c will be computed to compensate
* the error in r when rounded to a floating-point number.
*
* 2. Approximating expm1(r) by a special rational function on
* the interval [0,0.34658]:
* Since
* r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
* we define R1(r*r) by
* r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
* That is,
* R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
* = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
* = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
* We use a special Remez algorithm on [0,0.347] to generate
* a polynomial of degree 5 in r*r to approximate R1. The
* maximum error of this polynomial approximation is bounded
* by 2**-61. In other words,
* R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
* where Q1 = -1.6666666666666567384E-2,
* Q2 = 3.9682539681370365873E-4,
* Q3 = -9.9206344733435987357E-6,
* Q4 = 2.5051361420808517002E-7,
* Q5 = -6.2843505682382617102E-9;
* z = r*r,
* with error bounded by
* | 5 | -61
* | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
* | |
*
* expm1(r) = exp(r)-1 is then computed by the following
* specific way which minimize the accumulation rounding error:
* 2 3
* r r [ 3 - (R1 + R1*r/2) ]
* expm1(r) = r + --- + --- * [--------------------]
* 2 2 [ 6 - r*(3 - R1*r/2) ]
*
* To compensate the error in the argument reduction, we use
* expm1(r+c) = expm1(r) + c + expm1(r)*c
* ~ expm1(r) + c + r*c
* Thus c+r*c will be added in as the correction terms for
* expm1(r+c). Now rearrange the term to avoid optimization
* screw up:
* ( 2 2 )
* ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
* expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
* ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
* ( )
*
* = r - E
* 3. Scale back to obtain expm1(x):
* From step 1, we have
* expm1(x) = either 2^k*[expm1(r)+1] - 1
* = or 2^k*[expm1(r) + (1-2^-k)]
* 4. Implementation notes:
* (A). To save one multiplication, we scale the coefficient Qi
* to Qi*2^i, and replace z by (x^2)/2.
* (B). To achieve maximum accuracy, we compute expm1(x) by
* (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
* (ii) if k=0, return r-E
* (iii) if k=-1, return 0.5*(r-E)-0.5
* (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
* else return 1.0+2.0*(r-E);
* (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
* (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else
* (vii) return 2^k(1-((E+2^-k)-r))
*
* Special cases:
* expm1(INF) is INF, expm1(NaN) is NaN;
* expm1(-INF) is -1, and
* for finite argument, only expm1(0)=0 is exact.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Misc. info.
* For IEEE double
* if x > 7.09782712893383973096e+02 then expm1(x) overflow
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double
o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */
Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double __cdecl expm1(double x)
{
double_t y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
union {double f; uint64_t i;} u = {x};
uint32_t hx = u.i>>32 & 0x7fffffff;
int k, sign = u.i>>63;
/* filter out huge and non-finite argument */
if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
if (isnan(x))
return x;
if (sign)
return -1;
if (x > o_threshold) {
x *= 0x1p1023;
return x;
}
}
/* argument reduction */
if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
if (!sign) {
hi = x - ln2_hi;
lo = ln2_lo;
k = 1;
} else {
hi = x + ln2_hi;
lo = -ln2_lo;
k = -1;
}
} else {
k = invln2*x + (sign ? -0.5 : 0.5);
t = k;
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
x = hi-lo;
c = (hi-x)-lo;
} else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
if (hx < 0x00100000)
FORCE_EVAL((float)x);
return x;
} else
k = 0;
/* x is now in primary range */
hfx = 0.5*x;
hxs = x*hfx;
r1 = 1.0+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
t = 3.0-r1*hfx;
e = hxs*((r1-t)/(6.0 - x*t));
if (k == 0) /* c is 0 */
return x - (x*e-hxs);
e = x*(e-c) - c;
e -= hxs;
/* exp(x) ~ 2^k (x_reduced - e + 1) */
if (k == -1)
return 0.5*(x-e) - 0.5;
if (k == 1) {
if (x < -0.25)
return -2.0*(e-(x+0.5));
return 1.0+2.0*(x-e);
}
u.i = (uint64_t)(0x3ff + k)<<52; /* 2^k */
twopk = u.f;
if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
y = x - e + 1.0;
if (k == 1024)
y = y*2.0*0x1p1023;
else
y = y*twopk;
return y - 1.0;
}
u.i = (uint64_t)(0x3ff - k)<<52; /* 2^-k */
if (k < 20)
y = (x-e+(1-u.f))*twopk;
else
y = (x-(e+u.f)+1)*twopk;
return y;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]:
* |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04
* Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c):
*/
Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */
Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */
float __cdecl expm1f(float x)
{
float_t y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
union {float f; uint32_t i;} u = {x};
uint32_t hx = u.i & 0x7fffffff;
int k, sign = u.i >> 31;
/* filter out huge and non-finite argument */
if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
if (hx > 0x7f800000) /* NaN */
return x;
if (sign)
return -1;
if (hx > 0x42b17217) { /* x > log(FLT_MAX) */
x *= 0x1p127f;
return x;
}
}
/* argument reduction */
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
if (!sign) {
hi = x - ln2_hi;
lo = ln2_lo;
k = 1;
} else {
hi = x + ln2_hi;
lo = -ln2_lo;
k = -1;
}
} else {
k = invln2*x + (sign ? -0.5f : 0.5f);
t = k;
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
x = hi-lo;
c = (hi-x)-lo;
} else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
if (hx < 0x00800000)
FORCE_EVAL(x*x);
return x;
} else
k = 0;
/* x is now in primary range */
hfx = 0.5f*x;
hxs = x*hfx;
r1 = 1.0f+hxs*(Q1+hxs*Q2);
t = 3.0f - r1*hfx;
e = hxs*((r1-t)/(6.0f - x*t));
if (k == 0) /* c is 0 */
return x - (x*e-hxs);
e = x*(e-c) - c;
e -= hxs;
/* exp(x) ~ 2^k (x_reduced - e + 1) */
if (k == -1)
return 0.5f*(x-e) - 0.5f;
if (k == 1) {
if (x < -0.25f)
return -2.0f*(e-(x+0.5f));
return 1.0f + 2.0f*(x-e);
}
u.i = (0x7f+k)<<23; /* 2^k */
twopk = u.f;
if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
y = x - e + 1.0f;
if (k == 128)
y = y*2.0f*0x1p127f;
else
y = y*twopk;
return y - 1.0f;
}
u.i = (0x7f-k)<<23; /* 2^-k */
if (k < 23)
y = (x-e+(1-u.f))*twopk;
else
y = (x-(e+u.f)+1)*twopk;
return y;
}
#include <math.h>
#include <stdint.h>
double __cdecl fabs(double x)
{
union {double f; uint64_t i;} u = {x};
u.i &= -1ULL/2;
return u.f;
}
#include <math.h>
#include <stdint.h>
float __cdecl fabsf(float x)
{
union {float f; uint32_t i;} u = {x};
u.i &= 0x7fffffff;
return u.f;
}
#include <math.h>
double __cdecl fdim(double x, double y)
{
if (isnan(x))
return x;
if (isnan(y))
return y;
return x > y ? x - y : 0;
}
#include <math.h>
float __cdecl fdimf(float x, float y)
{
if (isnan(x))
return x;
if (isnan(y))
return y;
return x > y ? x - y : 0;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl floor(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);
return u.i >> 63 ? -1 : 0;
}
if (y > 0)
return x + y - 1;
return x + y;
}
#include "libm.h"
float __cdecl floorf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31)
u.i += m;
u.i &= ~m;
} else {
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i = 0;
else if (u.i << 1)
u.f = -1.0;
}
return u.f;
}
#include <stdint.h>
#include <float.h>
#include <math.h>
#include "libm.h"
static inline int a_clz_64(uint64_t x)
{
#ifdef __GNUC
if (x>>32) return __builtin_clz(x>>32);
return __builtin_clz(x) + 32;
#else
uint32_t y;
int r;
if (x>>32) y=x>>32, r=0; else y=x, r=32;
if (y>>16) y>>=16; else r |= 16;
if (y>>8) y>>=8; else r |= 8;
if (y>>4) y>>=4; else r |= 4;
if (y>>2) y>>=2; else r |= 2;
return r | !(y>>1);
#endif
}
#define ASUINT64(x) ((union {double f; uint64_t i;}){x}).i
#define ZEROINFNAN (0x7ff-0x3ff-52-1)
struct num { uint64_t m; int e; int sign; };
static struct num normalize(double x)
{
uint64_t ix = ASUINT64(x);
int e = ix>>52;
int sign = e & 0x800;
e &= 0x7ff;
if (!e) {
ix = ASUINT64(x*0x1p63);
e = ix>>52 & 0x7ff;
e = e ? e-63 : 0x800;
}
ix &= (1ull<<52)-1;
ix |= 1ull<<52;
ix <<= 1;
e -= 0x3ff + 52 + 1;
return (struct num){ix,e,sign};
}
static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y)
{
uint64_t t1,t2,t3;
uint64_t xlo = (uint32_t)x, xhi = x>>32;
uint64_t ylo = (uint32_t)y, yhi = y>>32;
t1 = xlo*ylo;
t2 = xlo*yhi + xhi*ylo;
t3 = xhi*yhi;
*lo = t1 + (t2<<32);
*hi = t3 + (t2>>32) + (t1 > *lo);
}
double __cdecl fma(double x, double y, double z)
{
#pragma STDC FENV_ACCESS ON
/* normalize so top 10bits and last bit are 0 */
struct num nx, ny, nz;
nx = normalize(x);
ny = normalize(y);
nz = normalize(z);
if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN)
return x*y + z;
if (nz.e >= ZEROINFNAN) {
if (nz.e > ZEROINFNAN) /* z==0 */
return x*y + z;
return z;
}
/* mul: r = x*y */
uint64_t rhi, rlo, zhi, zlo;
mul(&rhi, &rlo, nx.m, ny.m);
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
/* align exponents */
int e = nx.e + ny.e;
int d = nz.e - e;
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
if (d > 0) {
if (d < 64) {
zlo = nz.m<<d;
zhi = nz.m>>64-d;
} else {
zlo = 0;
zhi = nz.m;
e = nz.e - 64;
d -= 64;
if (d == 0) {
} else if (d < 64) {
rlo = rhi<<64-d | rlo>>d | !!(rlo<<64-d);
rhi = rhi>>d;
} else {
rlo = 1;
rhi = 0;
}
}
} else {
zhi = 0;
d = -d;
if (d == 0) {
zlo = nz.m;
} else if (d < 64) {
zlo = nz.m>>d | !!(nz.m<<64-d);
} else {
zlo = 1;
}
}
/* add */
int sign = nx.sign^ny.sign;
int samesign = !(sign^nz.sign);
int nonzero = 1;
if (samesign) {
/* r += z */
rlo += zlo;
rhi += zhi + (rlo < zlo);
} else {
/* r -= z */
uint64_t t = rlo;
rlo -= zlo;
rhi = rhi - zhi - (t < rlo);
if (rhi>>63) {
rlo = -rlo;
rhi = -rhi-!!rlo;
sign = !sign;
}
nonzero = !!rhi;
}
/* set rhi to top 63bit of the result (last bit is sticky) */
if (nonzero) {
e += 64;
d = a_clz_64(rhi)-1;
/* note: d > 0 */
rhi = rhi<<d | rlo>>64-d | !!(rlo<<d);
} else if (rlo) {
d = a_clz_64(rlo)-1;
if (d < 0)
rhi = rlo>>1 | (rlo&1);
else
rhi = rlo<<d;
} else {
/* exact +-0 */
return x*y + z;
}
e -= d;
/* convert to double */
int64_t i = rhi; /* i is in [1<<62,(1<<63)-1] */
if (sign)
i = -i;
double r = i; /* |r| is in [0x1p62,0x1p63] */
if (e < -1022-62) {
/* result is subnormal before rounding */
if (e == -1022-63) {
double c = 0x1p63;
if (sign)
c = -c;
if (r == c) {
/* min normal after rounding, underflow depends
on arch behaviour which can be imitated by
a double to float conversion */
float fltmin = 0x0.ffffff8p-63*FLT_MIN * r;
return DBL_MIN/FLT_MIN * fltmin;
}
/* one bit is lost when scaled, add another top bit to
only round once at conversion if it is inexact */
if (rhi << 53) {
i = rhi>>1 | (rhi&1) | 1ull<<62;
if (sign)
i = -i;
r = i;
r = 2*r - c; /* remove top bit */
/* raise underflow portably, such that it
cannot be optimized away */
{
double_t tiny = DBL_MIN/FLT_MIN * r;
r += (double)(tiny*tiny) * (r-r);
}
}
} else {
/* only round once when scaled */
d = 10;
i = ( rhi>>d | !!(rhi<<64-d) ) << d;
if (sign)
i = -i;
r = i;
}
}
return scalbn(r, e);
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
/*-
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <fenv.h>
#include <math.h>
#include <stdint.h>
#include "libm.h"
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
* A double has more than twice as much precision than a float, so
* direct double-precision arithmetic suffices, except where double
* rounding occurs.
*/
float __cdecl fmaf(float x, float y, float z)
{
#pragma STDC FENV_ACCESS ON
double xy, result;
union {double f; uint64_t i;} u;
int e;
xy = (double)x * y;
result = xy + z;
u.f = result;
e = u.i>>52 & 0x7ff;
/* Common case: The double precision result is fine. */
if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
e == 0x7ff || /* NaN */
(result - xy == z && result - z == xy) || /* exact */
fegetround() != FE_TONEAREST) /* not round-to-nearest */
{
/*
underflow may not be raised correctly, example:
fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
*/
if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) {
fp_barrierf((float)u.f * (float)u.f);
}
z = result;
return z;
}
/*
* If result is inexact, and exactly halfway between two float values,
* we need to adjust the low-order bit in the direction of the error.
*/
double err;
int neg = u.i >> 63;
if (neg == (z > xy))
err = xy - result + z;
else
err = z - result + xy;
if (neg == (err < 0))
u.i++;
else
u.i--;
z = u.f;
return z;
}
#include <math.h>
double __cdecl fmax(double x, double y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? y : x;
return x < y ? y : x;
}
#include <math.h>
float __cdecl fmaxf(float x, float y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeroes, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? y : x;
return x < y ? y : x;
}
#include <math.h>
double __cdecl fmin(double x, double y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? x : y;
return x < y ? x : y;
}
#include <math.h>
float __cdecl fminf(float x, float y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? x : y;
return x < y ? x : y;
}
#include <math.h>
#include <stdint.h>
double __cdecl fmod(double x, double y)
{
union {double f; uint64_t i;} ux = {x}, uy = {y};
int ex = ux.i>>52 & 0x7ff;
int ey = uy.i>>52 & 0x7ff;
int sx = ux.i>>63;
uint64_t i;
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
/* float load/store to inner loops ruining performance and code size */
uint64_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1ULL >> 12;
uxi |= 1ULL << 52;
}
if (!ey) {
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1ULL >> 12;
uy.i |= 1ULL << 52;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>52 == 0; uxi <<= 1, ex--);
/* scale result */
if (ex > 0) {
uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52;
} else {
uxi >>= -ex + 1;
}
uxi |= (uint64_t)sx << 63;
ux.i = uxi;
return ux.f;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl fmodf(float x, float y)
{
union {float f; uint32_t i;} ux = {x}, uy = {y};
int ex = ux.i>>23 & 0xff;
int ey = uy.i>>23 & 0xff;
uint32_t sx = ux.i & 0x80000000;
uint32_t i;
uint32_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1U >> 9;
uxi |= 1U << 23;
}
if (!ey) {
for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1U >> 9;
uy.i |= 1U << 23;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>23 == 0; uxi <<= 1, ex--);
/* scale result up */
if (ex > 0) {
uxi -= 1U << 23;
uxi |= (uint32_t)ex << 23;
} else {
uxi >>= -ex + 1;
}
uxi |= sx;
ux.i = uxi;
return ux.f;
}
#include <math.h>
#include <stdint.h>
double __cdecl frexp(double x, int *e)
{
union { double d; uint64_t i; } y = { x };
int ee = y.i>>52 & 0x7ff;
if (!ee) {
if (x) {
x = frexp(x*0x1p64, e);
*e -= 64;
} else *e = 0;
return x;
} else if (ee == 0x7ff) {
return x;
}
*e = ee - 0x3fe;
y.i &= 0x800fffffffffffffull;
y.i |= 0x3fe0000000000000ull;
return y.d;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl frexpf(float x, int *e)
{
union { float f; uint32_t i; } y = { x };
int ee = y.i>>23 & 0xff;
if (!ee) {
if (x) {
x = frexpf(x*0x1p64, e);
*e -= 64;
} else *e = 0;
return x;
} else if (ee == 0xff) {
return x;
}
*e = ee - 0x7e;
y.i &= 0x807ffffful;
y.i |= 0x3f000000ul;
return y.f;
}
#include <math.h>
#include <stdint.h>
#include <float.h>
#include "libm.h"
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
#define SPLIT (0x1p32 + 1)
#else
#define SPLIT (0x1p27 + 1)
#endif
static void sq(double_t *hi, double_t *lo, double x)
{
double_t xh, xl, xc;
xc = (double_t)x*SPLIT;
xh = x - xc + xc;
xl = x - xh;
*hi = (double_t)x*x;
*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
}
double __cdecl _hypot(double x, double y)
{
union {double f; uint64_t i;} ux = {x}, uy = {y}, ut;
int ex, ey;
double_t hx, lx, hy, ly, z;
/* arrange |x| >= |y| */
ux.i &= -1ULL>>1;
uy.i &= -1ULL>>1;
if (ux.i < uy.i) {
ut = ux;
ux = uy;
uy = ut;
}
/* special cases */
ex = ux.i>>52;
ey = uy.i>>52;
x = ux.f;
y = uy.f;
/* note: hypot(inf,nan) == inf */
if (ey == 0x7ff)
return y;
if (ex == 0x7ff || uy.i == 0)
return x;
/* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
/* 64 difference is enough for ld80 double_t */
if (ex - ey > 64)
return x + y;
/* precise sqrt argument in nearest rounding mode without overflow */
/* xh*xh must not overflow and xl*xl must not underflow in sq */
z = 1;
if (ex > 0x3ff+510) {
z = 0x1p700;
x *= 0x1p-700;
y *= 0x1p-700;
} else if (ey < 0x3ff-450) {
z = 0x1p-700;
x *= 0x1p700;
y *= 0x1p700;
}
sq(&hx, &lx, x);
sq(&hy, &ly, y);
return z*sqrt(ly+lx+hy+hx);
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl _hypotf(float x, float y)
{
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
float_t z;
ux.i &= -1U>>1;
uy.i &= -1U>>1;
if (ux.i < uy.i) {
ut = ux;
ux = uy;
uy = ut;
}
x = ux.f;
y = uy.f;
if (uy.i == 0xff<<23)
return y;
if (ux.i >= 0xff<<23 || uy.i == 0 || ux.i - uy.i >= 25<<23)
return x + y;
z = 1;
if (ux.i >= (0x7f+60)<<23) {
z = 0x1p90f;
x *= 0x1p-90f;
y *= 0x1p-90f;
} else if (uy.i < (0x7f-60)<<23) {
z = 0x1p-90f;
x *= 0x1p90f;
y *= 0x1p90f;
}
return z*sqrtf((double)x*x + (double)y*y);
}
#include <limits.h>
#include "libm.h"
int __cdecl ilogb(double x)
{
#pragma STDC FENV_ACCESS ON
union {double f; uint64_t i;} u = {x};
uint64_t i = u.i;
int e = i>>52 & 0x7ff;
if (!e) {
i <<= 12;
if (i == 0) {
FORCE_EVAL(0/0.0f);
return FP_ILOGB0;
}
/* subnormal x */
for (e = -0x3ff; i>>63 == 0; e--, i<<=1);
return e;
}
if (e == 0x7ff) {
FORCE_EVAL(0/0.0f);
return i<<12 ? FP_ILOGBNAN : INT_MAX;
}
return e - 0x3ff;
}
#include <limits.h>
#include "libm.h"
int __cdecl ilogbf(float x)
{
#pragma STDC FENV_ACCESS ON
union {float f; uint32_t i;} u = {x};
uint32_t i = u.i;
int e = i>>23 & 0xff;
if (!e) {
i <<= 9;
if (i == 0) {
FORCE_EVAL(0/0.0f);
return FP_ILOGB0;
}
/* subnormal x */
for (e = -0x7f; i>>31 == 0; e--, i<<=1);
return e;
}
if (e == 0xff) {
FORCE_EVAL(0/0.0f);
return i<<9 ? FP_ILOGBNAN : INT_MAX;
}
return e - 0x7f;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* j0(x), y0(x)
* Bessel function of the first and second kinds of order zero.
* Method -- j0(x):
* 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ...
* 2. Reduce x to |x| since j0(x)=j0(-x), and
* for x in (0,2)
* j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x;
* (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 )
* for x in (2,inf)
* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0))
* where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
* as follow:
* cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
* = 1/sqrt(2) * (cos(x) + sin(x))
* sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4)
* = 1/sqrt(2) * (sin(x) - cos(x))
* (To avoid cancellation, use
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.)
*
* 3 Special cases
* j0(nan)= nan
* j0(0) = 1
* j0(inf) = 0
*
* Method -- y0(x):
* 1. For x<2.
* Since
* y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...)
* therefore y0(x)-2/pi*j0(x)*ln(x) is an even function.
* We use the following function to approximate y0,
* y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2
* where
* U(z) = u00 + u01*z + ... + u06*z^6
* V(z) = 1 + v01*z + ... + v04*z^4
* with absolute approximation error bounded by 2**-72.
* Note: For tiny x, U/V = u0 and j0(x)~1, hence
* y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27)
* 2. For x>=2.
* y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0))
* where x0 = x-pi/4. It is better to compute sin(x0),cos(x0)
* by the method mentioned above.
* 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0.
*/
#include "libm.h"
static double pzero(double), qzero(double);
static const double
invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
/* common method when |x|>=2 */
static double common(uint32_t ix, double x, int y0)
{
double s,c,ss,cc,z;
/*
* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4))
* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4))
*
* sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2)
* cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2)
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
*/
s = sin(x);
c = cos(x);
if (y0)
c = -c;
cc = s+c;
/* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
if (ix < 0x7fe00000) {
ss = s-c;
z = -cos(2*x);
if (s*c < 0)
cc = z/ss;
else
ss = z/cc;
if (ix < 0x48000000) {
if (y0)
ss = -ss;
cc = pzero(x)*cc-qzero(x)*ss;
}
}
return invsqrtpi*cc/sqrt(x);
}
/* R0/S0 on [0, 2.00] */
static const double
R02 = 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
R03 = -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
R04 = 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
R05 = -4.61832688532103189199e-09, /* 0xBE33D5E7, 0x73D63FCE */
S01 = 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
S02 = 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
S03 = 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
S04 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
double __cdecl _j0(double x)
{
double z,r,s;
uint32_t ix;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* j0(+-inf)=0, j0(nan)=nan */
if (ix >= 0x7ff00000)
return 1/(x*x);
x = fabs(x);
if (ix >= 0x40000000) { /* |x| >= 2 */
/* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
return common(ix,x,0);
}
/* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */
if (ix >= 0x3f200000) { /* |x| >= 2**-13 */
/* up to 4ulp error close to 2 */
z = x*x;
r = z*(R02+z*(R03+z*(R04+z*R05)));
s = 1+z*(S01+z*(S02+z*(S03+z*S04)));
return (1+x/2)*(1-x/2) + z*(r/s);
}
/* 1 - x*x/4 */
/* prevent underflow */
/* inexact should be raised when x!=0, this is not done correctly */
if (ix >= 0x38000000) /* |x| >= 2**-127 */
x = 0.25*x*x;
return 1 - x;
}
static const double
u00 = -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
u01 = 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
u02 = -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
u03 = 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
u04 = -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
u05 = 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
u06 = -3.98205194132103398453e-11, /* 0xBDC5E43D, 0x693FB3C8 */
v01 = 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
v02 = 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
v03 = 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
v04 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
double __cdecl _y0(double x)
{
double z,u,v;
uint32_t ix,lx;
EXTRACT_WORDS(ix, lx, x);
/* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
if ((ix<<1 | lx) == 0)
return -1/0.0;
if (ix>>31)
return 0/0.0;
if (ix >= 0x7ff00000)
return 1/x;
if (ix >= 0x40000000) { /* x >= 2 */
/* large ulp errors near zeros: 3.958, 7.086,.. */
return common(ix,x,1);
}
/* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */
if (ix >= 0x3e400000) { /* x >= 2**-27 */
/* large ulp error near the first zero, x ~= 0.89 */
z = x*x;
u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
v = 1.0+z*(v01+z*(v02+z*(v03+z*v04)));
return u/v + tpi*(j0(x)*log(x));
}
return u00 + tpi*log(x);
}
/* The asymptotic expansions of pzero is
* 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x.
* For x >= 2, We approximate pzero by
* pzero(x) = 1 + (R/S)
* where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10
* S = 1 + pS0*s^2 + ... + pS4*s^10
* and
* | pzero(x)-1-R/S | <= 2 ** ( -60.26)
*/
static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
-7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */
-8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */
-2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */
-2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */
-5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */
};
static const double pS8[5] = {
1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */
3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */
4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */
1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */
4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */
};
static const double pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
-1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */
-7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */
-4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */
-6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */
-3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */
-3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */
};
static const double pS5[5] = {
6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */
1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */
5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */
9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */
2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */
};
static const double pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
-2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */
-7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */
-2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */
-2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */
-5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */
-3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */
};
static const double pS3[5] = {
3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */
3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */
1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */
1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */
1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */
};
static const double pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
-8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */
-7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */
-1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */
-7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */
-1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */
-3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */
};
static const double pS2[5] = {
2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */
1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */
2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */
1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */
1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */
};
static double pzero(double x)
{
const double *p,*q;
double_t z,r,s;
uint32_t ix;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
if (ix >= 0x40200000){p = pR8; q = pS8;}
else if (ix >= 0x40122E8B){p = pR5; q = pS5;}
else if (ix >= 0x4006DB6D){p = pR3; q = pS3;}
else /*ix >= 0x40000000*/ {p = pR2; q = pS2;}
z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
return 1.0 + r/s;
}
/* For x >= 8, the asymptotic expansions of qzero is
* -1/8 s + 75/1024 s^3 - ..., where s = 1/x.
* We approximate pzero by
* qzero(x) = s*(-1.25 + (R/S))
* where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10
* S = 1 + qS0*s^2 + ... + qS5*s^12
* and
* | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22)
*/
static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */
1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */
5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */
8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */
3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */
};
static const double qS8[6] = {
1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */
8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */
1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */
8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */
8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */
-3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */
};
static const double qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */
7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */
5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */
1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */
1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */
1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */
};
static const double qS5[6] = {
8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */
2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */
1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */
5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */
3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */
-5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */
};
static const double qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */
7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */
3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */
4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */
1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */
1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */
};
static const double qS3[6] = {
4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */
7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */
3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */
6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */
2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */
-1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */
};
static const double qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */
7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */
1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */
1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */
3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */
1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */
};
static const double qS2[6] = {
3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */
2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */
8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */
8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */
2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */
-5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */
};
static double qzero(double x)
{
const double *p,*q;
double_t s,r,z;
uint32_t ix;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
if (ix >= 0x40200000){p = qR8; q = qS8;}
else if (ix >= 0x40122E8B){p = qR5; q = qS5;}
else if (ix >= 0x4006DB6D){p = qR3; q = qS3;}
else /*ix >= 0x40000000*/ {p = qR2; q = qS2;}
z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
return (-.125 + r/s)/x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* j1(x), y1(x)
* Bessel function of the first and second kinds of order zero.
* Method -- j1(x):
* 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ...
* 2. Reduce x to |x| since j1(x)=-j1(-x), and
* for x in (0,2)
* j1(x) = x/2 + x*z*R0/S0, where z = x*x;
* (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 )
* for x in (2,inf)
* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1))
* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
* where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
* as follow:
* cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
* = 1/sqrt(2) * (sin(x) - cos(x))
* sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
* = -1/sqrt(2) * (sin(x) + cos(x))
* (To avoid cancellation, use
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.)
*
* 3 Special cases
* j1(nan)= nan
* j1(0) = 0
* j1(inf) = 0
*
* Method -- y1(x):
* 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN
* 2. For x<2.
* Since
* y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...)
* therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function.
* We use the following function to approximate y1,
* y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2
* where for x in [0,2] (abs err less than 2**-65.89)
* U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4
* V(z) = 1 + v0[0]*z + ... + v0[4]*z^5
* Note: For tiny x, 1/x dominate y1 and hence
* y1(tiny) = -2/pi/tiny, (choose tiny<2**-54)
* 3. For x>=2.
* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
* where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
* by method mentioned above.
*/
#include "libm.h"
static double pone(double), qone(double);
static const double
invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
static double common(uint32_t ix, double x, int y1, int sign)
{
double z,s,c,ss,cc;
/*
* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4))
* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4))
*
* sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2)
* cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2)
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
*/
s = sin(x);
if (y1)
s = -s;
c = cos(x);
cc = s-c;
if (ix < 0x7fe00000) {
/* avoid overflow in 2*x */
ss = -s-c;
z = cos(2*x);
if (s*c > 0)
cc = z/ss;
else
ss = z/cc;
if (ix < 0x48000000) {
if (y1)
ss = -ss;
cc = pone(x)*cc-qone(x)*ss;
}
}
if (sign)
cc = -cc;
return invsqrtpi*cc/sqrt(x);
}
/* R0/S0 on [0,2] */
static const double
r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
r03 = 4.96727999609584448412e-08, /* 0x3E6AAAFA, 0x46CA0BD9 */
s01 = 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
s02 = 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
s03 = 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
s04 = 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
double __cdecl _j1(double x)
{
double z,r,s;
uint32_t ix;
int sign;
GET_HIGH_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000)
return 1/(x*x);
if (ix >= 0x40000000) /* |x| >= 2 */
return common(ix, fabs(x), 0, sign);
if (ix >= 0x38000000) { /* |x| >= 2**-127 */
z = x*x;
r = z*(r00+z*(r01+z*(r02+z*r03)));
s = 1+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
z = r/s;
} else
/* avoid underflow, raise inexact if x!=0 */
z = x;
return (0.5 + z)*x;
}
static const double U0[5] = {
-1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */
5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */
-1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */
2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */
-9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */
};
static const double V0[5] = {
1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */
2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */
1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */
6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */
1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */
};
double __cdecl _y1(double x)
{
double z,u,v;
uint32_t ix,lx;
EXTRACT_WORDS(ix, lx, x);
/* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
if ((ix<<1 | lx) == 0)
return -1/0.0;
if (ix>>31)
return 0/0.0;
if (ix >= 0x7ff00000)
return 1/x;
if (ix >= 0x40000000) /* x >= 2 */
return common(ix, x, 1, 0);
if (ix < 0x3c900000) /* x < 2**-54 */
return -tpi/x;
z = x*x;
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = 1+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
return x*(u/v) + tpi*(j1(x)*log(x)-1/x);
}
/* For x >= 8, the asymptotic expansions of pone is
* 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x.
* We approximate pone by
* pone(x) = 1 + (R/S)
* where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10
* S = 1 + ps0*s^2 + ... + ps4*s^10
* and
* | pone(x)-1-R/S | <= 2 ** ( -60.06)
*/
static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */
1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */
4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */
3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */
7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */
};
static const double ps8[5] = {
1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */
3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */
3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */
9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */
3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */
};
static const double pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */
1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */
6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */
1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */
5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */
5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */
};
static const double ps5[5] = {
5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */
9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */
5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */
7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */
1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */
};
static const double pr3[6] = {
3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */
1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */
3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */
3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */
9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */
4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */
};
static const double ps3[5] = {
3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */
3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */
1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */
8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */
1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */
};
static const double pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */
1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */
2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */
1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */
1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */
5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */
};
static const double ps2[5] = {
2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */
1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */
2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */
1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */
8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */
};
static double pone(double x)
{
const double *p,*q;
double_t z,r,s;
uint32_t ix;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
if (ix >= 0x40200000){p = pr8; q = ps8;}
else if (ix >= 0x40122E8B){p = pr5; q = ps5;}
else if (ix >= 0x4006DB6D){p = pr3; q = ps3;}
else /*ix >= 0x40000000*/ {p = pr2; q = ps2;}
z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
return 1.0+ r/s;
}
/* For x >= 8, the asymptotic expansions of qone is
* 3/8 s - 105/1024 s^3 - ..., where s = 1/x.
* We approximate pone by
* qone(x) = s*(0.375 + (R/S))
* where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10
* S = 1 + qs1*s^2 + ... + qs6*s^12
* and
* | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13)
*/
static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
-1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */
-1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */
-7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */
-1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */
-4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */
};
static const double qs8[6] = {
1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */
7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */
1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */
7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */
6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */
-2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */
};
static const double qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
-2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */
-1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */
-8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */
-1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */
-1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */
-2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */
};
static const double qs5[6] = {
8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */
1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */
1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */
4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */
2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */
-4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */
};
static const double qr3[6] = {
-5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */
-1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */
-4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */
-5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */
-2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */
-2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */
};
static const double qs3[6] = {
4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */
6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */
3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */
5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */
1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */
-1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */
};
static const double qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
-1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */
-1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */
-2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */
-1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */
-4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */
-2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */
};
static const double qs2[6] = {
2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */
2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */
7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */
7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */
1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */
-4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */
};
static double qone(double x)
{
const double *p,*q;
double_t s,r,z;
uint32_t ix;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
if (ix >= 0x40200000){p = qr8; q = qs8;}
else if (ix >= 0x40122E8B){p = qr5; q = qs5;}
else if (ix >= 0x4006DB6D){p = qr3; q = qs3;}
else /*ix >= 0x40000000*/ {p = qr2; q = qs2;}
z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
return (.375 + r/s)/x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* jn(n, x), yn(n, x)
* floating point Bessel's function of the 1st and 2nd kind
* of order n
*
* Special cases:
* y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal;
* y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal.
* Note 2. About jn(n,x), yn(n,x)
* For n=0, j0(x) is called,
* for n=1, j1(x) is called,
* for n<=x, forward recursion is used starting
* from values of j0(x) and j1(x).
* for n>x, a continued fraction approximation to
* j(n,x)/j(n-1,x) is evaluated and then backward
* recursion is used starting from a supposed value
* for j(n,x). The resulting value of j(0,x) is
* compared with the actual value to correct the
* supposed value of j(n,x).
*
* yn(n,x) is similar in all respects, except
* that forward recursion is used for all
* values of n>1.
*/
#include "libm.h"
static const double invsqrtpi = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
double __cdecl _jn(int n, double x)
{
uint32_t ix, lx;
int nm1, i, sign;
double a, b, temp;
EXTRACT_WORDS(ix, lx, x);
sign = ix>>31;
ix &= 0x7fffffff;
if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */
return x;
/* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
* Thus, J(-n,x) = J(n,-x)
*/
/* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */
if (n == 0)
return j0(x);
if (n < 0) {
nm1 = -(n+1);
x = -x;
sign ^= 1;
} else
nm1 = n-1;
if (nm1 == 0)
return j1(x);
sign &= n; /* even n: 0, odd n: signbit(x) */
x = fabs(x);
if ((ix|lx) == 0 || ix == 0x7ff00000) /* if x is 0 or inf */
b = 0.0;
else if (nm1 < x) {
/* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
if (ix >= 0x52d00000) { /* x > 2**302 */
/* (x >> n**2)
* Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
* Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
* Let s=sin(x), c=cos(x),
* xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
*
* n sin(xn)*sqt2 cos(xn)*sqt2
* ----------------------------------
* 0 s-c c+s
* 1 -s-c -c+s
* 2 -s+c -c-s
* 3 s+c c-s
*/
switch(nm1&3) {
case 0: temp = -cos(x)+sin(x); break;
case 1: temp = -cos(x)-sin(x); break;
case 2: temp = cos(x)-sin(x); break;
default:
case 3: temp = cos(x)+sin(x); break;
}
b = invsqrtpi*temp/sqrt(x);
} else {
a = j0(x);
b = j1(x);
for (i=0; i<nm1; ) {
i++;
temp = b;
b = b*(2.0*i/x) - a; /* avoid underflow */
a = temp;
}
}
} else {
if (ix < 0x3e100000) { /* x < 2**-29 */
/* x is tiny, return the first Taylor expansion of J(n,x)
* J(n,x) = 1/n!*(x/2)^n - ...
*/
if (nm1 > 32) /* underflow */
b = 0.0;
else {
temp = x*0.5;
b = temp;
a = 1.0;
for (i=2; i<=nm1+1; i++) {
a *= (double)i; /* a = n! */
b *= temp; /* b = (x/2)^n */
}
b = b/a;
}
} else {
/* use backward recurrence */
/* x x^2 x^2
* J(n,x)/J(n-1,x) = ---- ------ ------ .....
* 2n - 2(n+1) - 2(n+2)
*
* 1 1 1
* (for large x) = ---- ------ ------ .....
* 2n 2(n+1) 2(n+2)
* -- - ------ - ------ -
* x x x
*
* Let w = 2n/x and h=2/x, then the above quotient
* is equal to the continued fraction:
* 1
* = -----------------------
* 1
* w - -----------------
* 1
* w+h - ---------
* w+2h - ...
*
* To determine how many terms needed, let
* Q(0) = w, Q(1) = w(w+h) - 1,
* Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
* When Q(k) > 1e4 good for single
* When Q(k) > 1e9 good for double
* When Q(k) > 1e17 good for quadruple
*/
/* determine k */
double t,q0,q1,w,h,z,tmp,nf;
int k;
nf = nm1 + 1.0;
w = 2*nf/x;
h = 2/x;
z = w+h;
q0 = w;
q1 = w*z - 1.0;
k = 1;
while (q1 < 1.0e9) {
k += 1;
z += h;
tmp = z*q1 - q0;
q0 = q1;
q1 = tmp;
}
for (t=0.0, i=k; i>=0; i--)
t = 1/(2*(i+nf)/x - t);
a = t;
b = 1.0;
/* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
* Hence, if n*(log(2n/x)) > ...
* single 8.8722839355e+01
* double 7.09782712893383973096e+02
* long double 1.1356523406294143949491931077970765006170e+04
* then recurrent value may overflow and the result is
* likely underflow to zero
*/
tmp = nf*log(fabs(w));
if (tmp < 7.09782712893383973096e+02) {
for (i=nm1; i>0; i--) {
temp = b;
b = b*(2.0*i)/x - a;
a = temp;
}
} else {
for (i=nm1; i>0; i--) {
temp = b;
b = b*(2.0*i)/x - a;
a = temp;
/* scale b to avoid spurious overflow */
if (b > 0x1p500) {
a /= b;
t /= b;
b = 1.0;
}
}
}
z = j0(x);
w = j1(x);
if (fabs(z) >= fabs(w))
b = t*z/b;
else
b = t*w/a;
}
}
return sign ? -b : b;
}
double __cdecl _yn(int n, double x)
{
uint32_t ix, lx, ib;
int nm1, sign, i;
double a, b, temp;
EXTRACT_WORDS(ix, lx, x);
sign = ix>>31;
ix &= 0x7fffffff;
if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */
return x;
if (sign && (ix|lx)!=0) /* x < 0 */
return 0/0.0;
if (ix == 0x7ff00000)
return 0.0;
if (n == 0)
return y0(x);
if (n < 0) {
nm1 = -(n+1);
sign = n&1;
} else {
nm1 = n-1;
sign = 0;
}
if (nm1 == 0)
return sign ? -y1(x) : y1(x);
if (ix >= 0x52d00000) { /* x > 2**302 */
/* (x >> n**2)
* Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
* Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
* Let s=sin(x), c=cos(x),
* xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
*
* n sin(xn)*sqt2 cos(xn)*sqt2
* ----------------------------------
* 0 s-c c+s
* 1 -s-c -c+s
* 2 -s+c -c-s
* 3 s+c c-s
*/
switch(nm1&3) {
case 0: temp = -sin(x)-cos(x); break;
case 1: temp = -sin(x)+cos(x); break;
case 2: temp = sin(x)+cos(x); break;
default:
case 3: temp = sin(x)-cos(x); break;
}
b = invsqrtpi*temp/sqrt(x);
} else {
a = y0(x);
b = y1(x);
/* quit if b is -inf */
GET_HIGH_WORD(ib, b);
for (i=0; i<nm1 && ib!=0xfff00000; ){
i++;
temp = b;
b = (2.0*i/x)*b - a;
GET_HIGH_WORD(ib, b);
a = temp;
}
}
return sign ? -b : b;
}
#include <math.h>
double __cdecl ldexp(double x, int n)
{
return scalbn(x, n);
}
#include <math.h>
#include "libm.h"
double __cdecl lgamma(double x)
{
return __lgamma_r(x, &__signgam);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* lgamma_r(x, signgamp)
* Reentrant version of the logarithm of the Gamma function
* with user provide pointer for the sign of Gamma(x).
*
* Method:
* 1. Argument Reduction for 0 < x <= 8
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
* reduce x to a number in [1.5,2.5] by
* lgamma(1+s) = log(s) + lgamma(s)
* for example,
* lgamma(7.3) = log(6.3) + lgamma(6.3)
* = log(6.3*5.3) + lgamma(5.3)
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
* 2. Polynomial approximation of lgamma around its
* minimun ymin=1.461632144968362245 to maintain monotonicity.
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
* Let z = x-ymin;
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
* where
* poly(z) is a 14 degree polynomial.
* 2. Rational approximation in the primary interval [2,3]
* We use the following approximation:
* s = x-2.0;
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
* with accuracy
* |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
* Our algorithms are based on the following observation
*
* zeta(2)-1 2 zeta(3)-1 3
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
* 2 3
*
* where Euler = 0.5771... is the Euler constant, which is very
* close to 0.5.
*
* 3. For x>=8, we have
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
* (better formula:
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
* Let z = 1/x, then we approximation
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
* by
* 3 5 11
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
* where
* |w - f(z)| < 2**-58.74
*
* 4. For negative x, since (G is gamma function)
* -x*G(-x)*G(x) = pi/sin(pi*x),
* we have
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
* Hence, for x<0, signgam = sign(sin(pi*x)) and
* lgamma(x) = log(|Gamma(x)|)
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
* Note: one should avoid compute pi*(-x) directly in the
* computation of sin(pi*(-x)).
*
* 5. Special Cases
* lgamma(2+s) ~ s*(1-Euler) for tiny s
* lgamma(1) = lgamma(2) = 0
* lgamma(x) ~ -log(|x|) for tiny x
* lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero
* lgamma(inf) = inf
* lgamma(-inf) = inf (bug for bug compatible with C99!?)
*
*/
#include "libm.h"
static const double
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
/* tt = -(tail of tf) */
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
/* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
static double sin_pi(double x)
{
int n;
/* spurious inexact if odd int */
x = 2.0*(x*0.5 - floor(x*0.5)); /* x mod 2.0 */
n = (int)(x*4.0);
n = (n+1)/2;
x -= n*0.5f;
x *= pi;
switch (n) {
default: /* case 4: */
case 0: return __sin(x, 0.0, 0);
case 1: return __cos(x, 0.0);
case 2: return __sin(-x, 0.0, 0);
case 3: return -__cos(x, 0.0);
}
}
double __lgamma_r(double x, int *signgamp)
{
union {double f; uint64_t i;} u = {x};
double_t t,y,z,nadj,p,p1,p2,p3,q,r,w;
uint32_t ix;
int sign,i;
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
*signgamp = 1;
sign = u.i>>63;
ix = u.i>>32 & 0x7fffffff;
if (ix >= 0x7ff00000)
return x*x;
if (ix < (0x3ff-70)<<20) { /* |x|<2**-70, return -log(|x|) */
if(sign) {
x = -x;
*signgamp = -1;
}
return -log(x);
}
if (sign) {
x = -x;
t = sin_pi(x);
if (t == 0.0) /* -integer */
return 1.0/(x-x);
if (t > 0.0)
*signgamp = -1;
else
t = -t;
nadj = log(pi/(t*x));
}
/* purge off 1 and 2 */
if ((ix == 0x3ff00000 || ix == 0x40000000) && (uint32_t)u.i == 0)
r = 0;
/* for x < 2.0 */
else if (ix < 0x40000000) {
if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
r = -log(x);
if (ix >= 0x3FE76944) {
y = 1.0 - x;
i = 0;
} else if (ix >= 0x3FCDA661) {
y = x - (tc-1.0);
i = 1;
} else {
y = x;
i = 2;
}
} else {
r = 0.0;
if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
y = 2.0 - x;
i = 0;
} else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
y = x - tc;
i = 1;
} else {
y = x - 1.0;
i = 2;
}
}
switch (i) {
case 0:
z = y*y;
p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
p = y*p1+p2;
r += (p-0.5*y);
break;
case 1:
z = y*y;
w = z*y;
p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
p = z*p1-(tt-w*(p2+y*p3));
r += tf + p;
break;
case 2:
p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
p2 = 1.0+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
r += -0.5*y + p1/p2;
}
} else if (ix < 0x40200000) { /* x < 8.0 */
i = (int)x;
y = x - (double)i;
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
q = 1.0+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
r = 0.5*y+p/q;
z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
switch (i) {
case 7: z *= y + 6.0; /* FALLTHRU */
case 6: z *= y + 5.0; /* FALLTHRU */
case 5: z *= y + 4.0; /* FALLTHRU */
case 4: z *= y + 3.0; /* FALLTHRU */
case 3: z *= y + 2.0; /* FALLTHRU */
r += log(z);
break;
}
} else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
t = log(x);
z = 1.0/x;
y = z*z;
w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
r = (x-0.5)*(t-1.0)+w;
} else /* 2**58 <= x <= inf */
r = x*(log(x)-1.0);
if (sign)
r = nadj - r;
return r;
}
weak_alias(__lgamma_r, lgamma_r);
#include <math.h>
#include "libm.h"
float __cdecl lgammaf(float x)
{
return __lgammaf_r(x, &__signgam);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
pi = 3.1415927410e+00, /* 0x40490fdb */
a0 = 7.7215664089e-02, /* 0x3d9e233f */
a1 = 3.2246702909e-01, /* 0x3ea51a66 */
a2 = 6.7352302372e-02, /* 0x3d89f001 */
a3 = 2.0580807701e-02, /* 0x3ca89915 */
a4 = 7.3855509982e-03, /* 0x3bf2027e */
a5 = 2.8905137442e-03, /* 0x3b3d6ec6 */
a6 = 1.1927076848e-03, /* 0x3a9c54a1 */
a7 = 5.1006977446e-04, /* 0x3a05b634 */
a8 = 2.2086278477e-04, /* 0x39679767 */
a9 = 1.0801156895e-04, /* 0x38e28445 */
a10 = 2.5214456400e-05, /* 0x37d383a2 */
a11 = 4.4864096708e-05, /* 0x383c2c75 */
tc = 1.4616321325e+00, /* 0x3fbb16c3 */
tf = -1.2148628384e-01, /* 0xbdf8cdcd */
/* tt = -(tail of tf) */
tt = 6.6971006518e-09, /* 0x31e61c52 */
t0 = 4.8383611441e-01, /* 0x3ef7b95e */
t1 = -1.4758771658e-01, /* 0xbe17213c */
t2 = 6.4624942839e-02, /* 0x3d845a15 */
t3 = -3.2788541168e-02, /* 0xbd064d47 */
t4 = 1.7970675603e-02, /* 0x3c93373d */
t5 = -1.0314224288e-02, /* 0xbc28fcfe */
t6 = 6.1005386524e-03, /* 0x3bc7e707 */
t7 = -3.6845202558e-03, /* 0xbb7177fe */
t8 = 2.2596477065e-03, /* 0x3b141699 */
t9 = -1.4034647029e-03, /* 0xbab7f476 */
t10 = 8.8108185446e-04, /* 0x3a66f867 */
t11 = -5.3859531181e-04, /* 0xba0d3085 */
t12 = 3.1563205994e-04, /* 0x39a57b6b */
t13 = -3.1275415677e-04, /* 0xb9a3f927 */
t14 = 3.3552918467e-04, /* 0x39afe9f7 */
u0 = -7.7215664089e-02, /* 0xbd9e233f */
u1 = 6.3282704353e-01, /* 0x3f2200f4 */
u2 = 1.4549225569e+00, /* 0x3fba3ae7 */
u3 = 9.7771751881e-01, /* 0x3f7a4bb2 */
u4 = 2.2896373272e-01, /* 0x3e6a7578 */
u5 = 1.3381091878e-02, /* 0x3c5b3c5e */
v1 = 2.4559779167e+00, /* 0x401d2ebe */
v2 = 2.1284897327e+00, /* 0x4008392d */
v3 = 7.6928514242e-01, /* 0x3f44efdf */
v4 = 1.0422264785e-01, /* 0x3dd572af */
v5 = 3.2170924824e-03, /* 0x3b52d5db */
s0 = -7.7215664089e-02, /* 0xbd9e233f */
s1 = 2.1498242021e-01, /* 0x3e5c245a */
s2 = 3.2577878237e-01, /* 0x3ea6cc7a */
s3 = 1.4635047317e-01, /* 0x3e15dce6 */
s4 = 2.6642270386e-02, /* 0x3cda40e4 */
s5 = 1.8402845599e-03, /* 0x3af135b4 */
s6 = 3.1947532989e-05, /* 0x3805ff67 */
r1 = 1.3920053244e+00, /* 0x3fb22d3b */
r2 = 7.2193557024e-01, /* 0x3f38d0c5 */
r3 = 1.7193385959e-01, /* 0x3e300f6e */
r4 = 1.8645919859e-02, /* 0x3c98bf54 */
r5 = 7.7794247773e-04, /* 0x3a4beed6 */
r6 = 7.3266842264e-06, /* 0x36f5d7bd */
w0 = 4.1893854737e-01, /* 0x3ed67f1d */
w1 = 8.3333335817e-02, /* 0x3daaaaab */
w2 = -2.7777778450e-03, /* 0xbb360b61 */
w3 = 7.9365057172e-04, /* 0x3a500cfd */
w4 = -5.9518753551e-04, /* 0xba1c065c */
w5 = 8.3633989561e-04, /* 0x3a5b3dd2 */
w6 = -1.6309292987e-03; /* 0xbad5c4e8 */
/* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
static float sin_pi(float x)
{
double_t y;
int n;
/* spurious inexact if odd int */
x = 2*(x*0.5f - floorf(x*0.5f)); /* x mod 2.0 */
n = (int)(x*4);
n = (n+1)/2;
y = x - n*0.5f;
y *= 3.14159265358979323846;
switch (n) {
default: /* case 4: */
case 0: return __sindf(y);
case 1: return __cosdf(y);
case 2: return __sindf(-y);
case 3: return -__cosdf(y);
}
}
float __lgammaf_r(float x, int *signgamp)
{
union {float f; uint32_t i;} u = {x};
float t,y,z,nadj,p,p1,p2,p3,q,r,w;
uint32_t ix;
int i,sign;
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
*signgamp = 1;
sign = u.i>>31;
ix = u.i & 0x7fffffff;
if (ix >= 0x7f800000)
return x*x;
if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
if (sign) {
*signgamp = -1;
x = -x;
}
return -logf(x);
}
if (sign) {
x = -x;
t = sin_pi(x);
if (t == 0.0f) /* -integer */
return 1.0f/(x-x);
if (t > 0.0f)
*signgamp = -1;
else
t = -t;
nadj = logf(pi/(t*x));
}
/* purge off 1 and 2 */
if (ix == 0x3f800000 || ix == 0x40000000)
r = 0;
/* for x < 2.0 */
else if (ix < 0x40000000) {
if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
r = -logf(x);
if (ix >= 0x3f3b4a20) {
y = 1.0f - x;
i = 0;
} else if (ix >= 0x3e6d3308) {
y = x - (tc-1.0f);
i = 1;
} else {
y = x;
i = 2;
}
} else {
r = 0.0f;
if (ix >= 0x3fdda618) { /* [1.7316,2] */
y = 2.0f - x;
i = 0;
} else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
y = x - tc;
i = 1;
} else {
y = x - 1.0f;
i = 2;
}
}
switch(i) {
case 0:
z = y*y;
p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
p = y*p1+p2;
r += p - 0.5f*y;
break;
case 1:
z = y*y;
w = z*y;
p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
p = z*p1-(tt-w*(p2+y*p3));
r += (tf + p);
break;
case 2:
p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
p2 = 1.0f+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
r += -0.5f*y + p1/p2;
}
} else if (ix < 0x41000000) { /* x < 8.0 */
i = (int)x;
y = x - (float)i;
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
q = 1.0f+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
r = 0.5f*y+p/q;
z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
switch (i) {
case 7: z *= y + 6.0f; /* FALLTHRU */
case 6: z *= y + 5.0f; /* FALLTHRU */
case 5: z *= y + 4.0f; /* FALLTHRU */
case 4: z *= y + 3.0f; /* FALLTHRU */
case 3: z *= y + 2.0f; /* FALLTHRU */
r += logf(z);
break;
}
} else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
t = logf(x);
z = 1.0f/x;
y = z*z;
w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
r = (x-0.5f)*(t-1.0f)+w;
} else /* 2**58 <= x <= inf */
r = x*(logf(x)-1.0f);
if (sign)
r = nadj - r;
return r;
}
weak_alias(__lgammaf_r, lgammaf_r);
/*
* Double-precision log(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log_data.h"
#define T __log_data.tab
#define T2 __log_data.tab2
#define B __log_data.poly1
#define A __log_data.poly
#define Ln2hi __log_data.ln2hi
#define Ln2lo __log_data.ln2lo
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
{
return asuint64(x) >> 48;
}
double __cdecl log(double x)
{
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = top16(x);
#define LO asuint64(1.0 - 0x1p-4)
#define HI asuint64(1.0 + 0x1.09p-4)
if (predict_false(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
y = r3 *
(B[1] + r * B[2] + r2 * B[3] +
r3 * (B[4] + r * B[5] + r2 * B[6] +
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
return eval_as_double(y);
}
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t)k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
y = lo + r2 * A[0] +
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
return eval_as_double(y);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* Return the base 10 logarithm of x. See log.c for most comments.
*
* Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
* as in log.c, then combine and scale in extra precision:
* log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const double
ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
double __cdecl log10(double x)
{
union {double f; uint64_t i;} u = {x};
double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
uint32_t hx;
int k;
hx = u.i>>32;
k = 0;
if (hx < 0x00100000 || hx>>31) {
if (u.i<<1 == 0)
return -1/(x*x); /* log(+-0)=-inf */
if (hx>>31)
return (x-x)/0.0; /* log(-#) = NaN */
/* subnormal number, scale x up */
k -= 54;
x *= 0x1p54;
u.f = x;
hx = u.i>>32;
} else if (hx >= 0x7ff00000) {
return x;
} else if (hx == 0x3ff00000 && u.i<<32 == 0)
return 0;
/* reduce x into [sqrt(2)/2, sqrt(2)] */
hx += 0x3ff00000 - 0x3fe6a09e;
k += (int)(hx>>20) - 0x3ff;
hx = (hx&0x000fffff) + 0x3fe6a09e;
u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
x = u.f;
f = x - 1.0;
hfsq = 0.5*f*f;
s = f/(2.0+f);
z = s*s;
w = z*z;
t1 = w*(Lg2+w*(Lg4+w*Lg6));
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
R = t2 + t1;
/* See log2.c for details. */
/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
hi = f - hfsq;
u.f = hi;
u.i &= (uint64_t)-1<<32;
hi = u.f;
lo = f - hi - hfsq + s*(hfsq+R);
/* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
val_hi = hi*ivln10hi;
dk = k;
y = dk*log10_2hi;
val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
/*
* Extra precision in for adding y is not strictly needed
* since there is no very large cancellation near x = sqrt(2) or
* x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
* with some parallelism and it reduces the error for many args.
*/
w = y + val_hi;
val_lo += (y - w) + val_hi;
val_hi = w;
return val_lo + val_hi;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* See comments in log10.c.
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const float
ivln10hi = 4.3432617188e-01, /* 0x3ede6000 */
ivln10lo = -3.1689971365e-05, /* 0xb804ead9 */
log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */
log10_2lo = 7.9034151668e-07, /* 0x355427db */
/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
float __cdecl log10f(float x)
{
union {float f; uint32_t i;} u = {x};
float_t hfsq,f,s,z,R,w,t1,t2,dk,hi,lo;
uint32_t ix;
int k;
ix = u.i;
k = 0;
if (ix < 0x00800000 || ix>>31) { /* x < 2**-126 */
if (ix<<1 == 0)
return -1/(x*x); /* log(+-0)=-inf */
if (ix>>31)
return (x-x)/0.0f; /* log(-#) = NaN */
/* subnormal number, scale up x */
k -= 25;
x *= 0x1p25f;
u.f = x;
ix = u.i;
} else if (ix >= 0x7f800000) {
return x;
} else if (ix == 0x3f800000)
return 0;
/* reduce x into [sqrt(2)/2, sqrt(2)] */
ix += 0x3f800000 - 0x3f3504f3;
k += (int)(ix>>23) - 0x7f;
ix = (ix&0x007fffff) + 0x3f3504f3;
u.i = ix;
x = u.f;
f = x - 1.0f;
s = f/(2.0f + f);
z = s*s;
w = z*z;
t1= w*(Lg2+w*Lg4);
t2= z*(Lg1+w*Lg3);
R = t2 + t1;
hfsq = 0.5f*f*f;
hi = f - hfsq;
u.f = hi;
u.i &= 0xfffff000;
hi = u.f;
lo = f - hi - hfsq + s*(hfsq+R);
dk = k;
return dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi + dk*log10_2hi;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* double log1p(double x)
* Return the natural logarithm of 1+x.
*
* Method :
* 1. Argument Reduction: find k and f such that
* 1+x = 2^k * (1+f),
* where sqrt(2)/2 < 1+f < sqrt(2) .
*
* Note. If k=0, then f=x is exact. However, if k!=0, then f
* may not be representable exactly. In that case, a correction
* term is need. Let u=1+x rounded. Let c = (1+x)-u, then
* log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
* and add back the correction term c/u.
* (Note: when x > 2**53, one can simply return log(x))
*
* 2. Approximation of log(1+f): See log.c
*
* 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c
*
* Special cases:
* log1p(x) is NaN with signal if x < -1 (including -INF) ;
* log1p(+INF) is +INF; log1p(-1) is -INF with signal;
* log1p(NaN) is that NaN with no signal.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*
* Note: Assuming log() return accurate answer, the following
* algorithm can be used to compute log1p(x) to within a few ULP:
*
* u = 1+x;
* if(u==1.0) return x ; else
* return log(u)*(x/(u-1.0));
*
* See HP-15C Advanced Functions Handbook, p.193.
*/
#include "libm.h"
static const double
ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
double __cdecl log1p(double x)
{
union {double f; uint64_t i;} u = {x};
double_t hfsq,f,c,s,z,R,w,t1,t2,dk;
uint32_t hx,hu;
int k;
hx = u.i>>32;
k = 1;
if (hx < 0x3fda827a || hx>>31) { /* 1+x < sqrt(2)+ */
if (hx >= 0xbff00000) { /* x <= -1.0 */
if (x == -1)
return x/0.0; /* log1p(-1) = -inf */
return (x-x)/0.0; /* log1p(x<-1) = NaN */
}
if (hx<<1 < 0x3ca00000<<1) { /* |x| < 2**-53 */
/* underflow if subnormal */
if ((hx&0x7ff00000) == 0)
FORCE_EVAL((float)x);
return x;
}
if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
k = 0;
c = 0;
f = x;
}
} else if (hx >= 0x7ff00000)
return x;
if (k) {
u.f = 1 + x;
hu = u.i>>32;
hu += 0x3ff00000 - 0x3fe6a09e;
k = (int)(hu>>20) - 0x3ff;
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
if (k < 54) {
c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
c /= u.f;
} else
c = 0;
/* reduce u into [sqrt(2)/2, sqrt(2)] */
hu = (hu&0x000fffff) + 0x3fe6a09e;
u.i = (uint64_t)hu<<32 | (u.i&0xffffffff);
f = u.f - 1;
}
hfsq = 0.5*f*f;
s = f/(2.0+f);
z = s*s;
w = z*z;
t1 = w*(Lg2+w*(Lg4+w*Lg6));
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
R = t2 + t1;
dk = k;
return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
float __cdecl log1pf(float x)
{
union {float f; uint32_t i;} u = {x};
float_t hfsq,f,c,s,z,R,w,t1,t2,dk;
uint32_t ix,iu;
int k;
ix = u.i;
k = 1;
if (ix < 0x3ed413d0 || ix>>31) { /* 1+x < sqrt(2)+ */
if (ix >= 0xbf800000) { /* x <= -1.0 */
if (x == -1)
return x/0.0f; /* log1p(-1)=+inf */
return (x-x)/0.0f; /* log1p(x<-1)=NaN */
}
if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
/* underflow if subnormal */
if ((ix&0x7f800000) == 0)
FORCE_EVAL(x*x);
return x;
}
if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
k = 0;
c = 0;
f = x;
}
} else if (ix >= 0x7f800000)
return x;
if (k) {
u.f = 1 + x;
iu = u.i;
iu += 0x3f800000 - 0x3f3504f3;
k = (int)(iu>>23) - 0x7f;
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
if (k < 25) {
c = k >= 2 ? 1-(u.f-x) : x-(u.f-1);
c /= u.f;
} else
c = 0;
/* reduce u into [sqrt(2)/2, sqrt(2)] */
iu = (iu&0x007fffff) + 0x3f3504f3;
u.i = iu;
f = u.f - 1;
}
s = f/(2.0f + f);
z = s*s;
w = z*z;
t1= w*(Lg2+w*Lg4);
t2= z*(Lg1+w*Lg3);
R = t2 + t1;
hfsq = 0.5f*f*f;
dk = k;
return s*(hfsq+R) + (dk*ln2_lo+c) - hfsq + f + dk*ln2_hi;
}
/*
* Double-precision log2(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log2_data.h"
#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
{
return asuint64(x) >> 48;
}
double __cdecl log2(double x)
{
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = top16(x);
#define LO asuint64(1.0 - 0x1.5b51p-5)
#define HI asuint64(1.0 + 0x1.6ab2p-5)
if (predict_false(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
#if __FP_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
return eval_as_double(y);
}
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
kd = (double_t)k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
return eval_as_double(y);
}
/*
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log2_data.h"
#define N (1 << LOG2_TABLE_BITS)
const struct log2_data __log2_data = {
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
.invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = {
// relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1,
0x1.ec709dc3a03f7p-2,
-0x1.71547652b7c3fp-2,
0x1.2776c50f05be4p-2,
-0x1.ec709dd768fe5p-3,
0x1.a61761ec4e736p-3,
-0x1.7153fbc64a79bp-3,
0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3,
},
.poly = {
// relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8
-0x1.71547652b8339p-1,
0x1.ec709dc3a04bep-2,
-0x1.7154764702ffbp-2,
0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3,
},
/* Algorithm:
x = 2^k z
log2(x) = k + log2(c) + log2(z/c)
log2(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log2(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p10 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-64 and
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
Note: 1) ensures that k + logc can be computed without rounding error, 2)
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
},
#if !__FP_FAST_FMA
.tab2 = {
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
},
#endif
};
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG2_DATA_H
#define _LOG2_DATA_H
#include <features.h>
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern hidden const struct log2_data {
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG2_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data;
#endif
/*
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log2f_data.h"
/*
LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4
ULP error: 0.752 (nearest rounding.)
Relative error: 1.9 * 2^-26 (before rounding.)
*/
#define N (1 << LOG2F_TABLE_BITS)
#define T __log2f_data.tab
#define A __log2f_data.poly
#define OFF 0x3f330000
float __cdecl log2f(float x)
{
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float(y);
}
/*
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log2f_data.h"
const struct log2f_data __log2f_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
},
.poly = {
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
0x1.715475f35c8b8p0,
}
};
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG2F_DATA_H
#define _LOG2F_DATA_H
#include <features.h>
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
extern hidden const struct log2f_data {
struct {
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data;
#endif
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log_data.h"
#define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly1 = {
// relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1,
0x1.5555555555577p-2,
-0x1.ffffffffffdcbp-3,
0x1.999999995dd0cp-3,
-0x1.55555556745a7p-3,
0x1.24924a344de3p-3,
-0x1.fffffa4423d65p-4,
0x1.c7184282ad6cap-4,
-0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4,
},
.poly = {
// relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.555555551305bp-2,
-0x1.fffffffeb459p-3,
0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3,
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p9 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-66 and
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
},
#if !__FP_FAST_FMA
.tab2 = {
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
},
#endif
};
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG_DATA_H
#define _LOG_DATA_H
#include <features.h>
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern hidden const struct log_data {
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data;
#endif
#include <math.h>
#include "libm.h"
/*
special cases:
logb(+-0) = -inf, and raise divbyzero
logb(+-inf) = +inf
logb(nan) = nan
*/
double __cdecl logb(double x)
{
if (!isfinite(x))
return x * x;
if (x == 0)
return -1/(x*x);
return ilogb(x);
}
#include <math.h>
float __cdecl logbf(float x)
{
if (!isfinite(x))
return x * x;
if (x == 0)
return -1/(x*x);
return ilogbf(x);
}
/*
* Single-precision log function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "logf_data.h"
/*
LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4
ULP error: 0.818 (nearest rounding.)
Relative error: 1.957 * 2^-26 (before rounding.)
*/
#define T __logf_data.tab
#define A __logf_data.poly
#define Ln2 __logf_data.ln2
#define N (1 << LOGF_TABLE_BITS)
#define OFF 0x3f330000
float __cdecl logf(float x)
{
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t)tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0x1ff << 23);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t)k * Ln2;
/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float(y);
}
/*
* Data definition for logf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "logf_data.h"
const struct logf_data __logf_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
},
.ln2 = 0x1.62e42fefa39efp-1,
.poly = {
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
}
};
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOGF_DATA_H
#define _LOGF_DATA_H
#include <features.h>
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
extern hidden const struct logf_data {
struct {
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data;
#endif
#include "libm.h"
double __cdecl modf(double x, double *iptr)
{
union {double f; uint64_t i;} u = {x};
uint64_t mask;
int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
/* no fractional part */
if (e >= 52) {
*iptr = x;
if (e == 0x400 && u.i<<12 != 0) /* nan */
return x;
u.i &= 1ULL<<63;
return u.f;
}
/* no integral part*/
if (e < 0) {
u.i &= 1ULL<<63;
*iptr = u.f;
return x;
}
mask = -1ULL>>12>>e;
if ((u.i & mask) == 0) {
*iptr = x;
u.i &= 1ULL<<63;
return u.f;
}
u.i &= ~mask;
*iptr = u.f;
return x - u.f;
}
#include "libm.h"
float __cdecl modff(float x, float *iptr)
{
union {float f; uint32_t i;} u = {x};
uint32_t mask;
int e = (int)(u.i>>23 & 0xff) - 0x7f;
/* no fractional part */
if (e >= 23) {
*iptr = x;
if (e == 0x80 && u.i<<9 != 0) { /* nan */
return x;
}
u.i &= 0x80000000;
return u.f;
}
/* no integral part */
if (e < 0) {
u.i &= 0x80000000;
*iptr = u.f;
return x;
}
mask = 0x007fffff>>e;
if ((u.i & mask) == 0) {
*iptr = x;
u.i &= 0x80000000;
return u.f;
}
u.i &= ~mask;
*iptr = u.f;
return x - u.f;
}
#include <math.h>
double __cdecl nan(const char *s)
{
return NAN;
}
#include <math.h>
float __cdecl nanf(const char *s)
{
return NAN;
}
#include "libm.h"
double __cdecl nextafter(double x, double y)
{
union {double f; uint64_t i;} ux={x}, uy={y};
uint64_t ax, ay;
int e;
if (isnan(x) || isnan(y))
return x + y;
if (ux.i == uy.i)
return y;
ax = ux.i & -1ULL/2;
ay = uy.i & -1ULL/2;
if (ax == 0) {
if (ay == 0)
return y;
ux.i = (uy.i & 1ULL<<63) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 1ULL<<63))
ux.i--;
else
ux.i++;
e = ux.i >> 52 & 0x7ff;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7ff)
FORCE_EVAL(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0)
FORCE_EVAL(x*x + ux.f*ux.f);
return ux.f;
}
#include "libm.h"
float __cdecl nextafterf(float x, float y)
{
union {float f; uint32_t i;} ux={x}, uy={y};
uint32_t ax, ay, e;
if (isnan(x) || isnan(y))
return x + y;
if (ux.i == uy.i)
return y;
ax = ux.i & 0x7fffffff;
ay = uy.i & 0x7fffffff;
if (ax == 0) {
if (ay == 0)
return y;
ux.i = (uy.i & 0x80000000) | 1;
} else if (ax > ay || ((ux.i ^ uy.i) & 0x80000000))
ux.i--;
else
ux.i++;
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000)
FORCE_EVAL(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0)
FORCE_EVAL(x*x + ux.f*ux.f);
return ux.f;
}
#include "libm.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
double __cdecl nexttoward(double x, long double y)
{
return nextafter(x, y);
}
#else
double __cdecl nexttoward(double x, long double y)
{
union {double f; uint64_t i;} ux = {x};
int e;
if (isnan(x) || isnan(y))
return x + y;
if (x == y)
return y;
if (x == 0) {
ux.i = 1;
if (signbit(y))
ux.i |= 1ULL<<63;
} else if (x < y) {
if (signbit(x))
ux.i--;
else
ux.i++;
} else {
if (signbit(x))
ux.i++;
else
ux.i--;
}
e = ux.i>>52 & 0x7ff;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7ff)
FORCE_EVAL(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0)
FORCE_EVAL(x*x + ux.f*ux.f);
return ux.f;
}
#endif
#include "libm.h"
float __cdecl nexttowardf(float x, long double y)
{
union {float f; uint32_t i;} ux = {x};
uint32_t e;
if (isnan(x) || isnan(y))
return x + y;
if (x == y)
return y;
if (x == 0) {
ux.i = 1;
if (signbit(y))
ux.i |= 0x80000000;
} else if (x < y) {
if (signbit(x))
ux.i--;
else
ux.i++;
} else {
if (signbit(x))
ux.i++;
else
ux.i--;
}
e = ux.i & 0x7f800000;
/* raise overflow if ux.f is infinite and x is finite */
if (e == 0x7f800000)
FORCE_EVAL(x+x);
/* raise underflow if ux.f is subnormal or zero */
if (e == 0)
FORCE_EVAL(x*x + ux.f*ux.f);
return ux.f;
}
/*
* Double-precision x^y function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#include "pow_data.h"
/*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*/
#define T __pow_log_data.tab
#define A __pow_log_data.poly
#define Ln2hi __pow_log_data.ln2hi
#define Ln2lo __pow_log_data.ln2lo
#define N (1 << POW_LOG_TABLE_BITS)
#define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t log_inline(uint64_t ix, double_t *tail)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble(iz);
kd = (double_t)k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if __FP_FAST_FMA
r = __builtin_fma(z, invc, -1.0);
#else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
#endif
/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if __FP_FAST_FMA
hi = t2 + ar2;
lo3 = __builtin_fma(ar, r, -ar2);
lo4 = t2 - hi + ar2;
#else
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
#endif
/* p = log1p(r) - r - A[0]*r*r. */
p = (ar3 * (A[1] + r * A[2] +
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
}
#undef N
#undef T
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble(sbits);
y = scale + scale * tmp;
if (fabs(y) < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double(hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble(sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >=
top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000) {
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12(1024.0)) {
/* Note: inf and nan are already handled. */
if (asuint64(x) >> 63)
return __math_uflow(sign_bias);
else
return __math_oflow(sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint64_t iy)
{
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int zeroinfnan(uint64_t i)
{
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
}
double __cdecl pow(double x, double y)
{
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;
ix = asuint64(x);
iy = asuint64(y);
topx = top12(x);
topy = top12(y);
if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (predict_false(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignaling_inline(x) ? x + y : 1.0;
if (ix == asuint64(1.0))
return issignaling_inline(y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64(INFINITY) ||
2 * iy > 2 * asuint64(INFINITY))
return x + y;
if (2 * ix == 2 * asuint64(1.0))
return 1.0;
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (predict_false(zeroinfnan(ix))) {
double_t x2 = x * x;
if (ix >> 63 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? fp_barrier(1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalid(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64(1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be) {
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64(1.0) ? 1.0 + y :
1.0 - y;
else
return 1.0;
}
return (ix > asuint64(1.0)) == (topy < 0x800) ?
__math_oflow(0) :
__math_uflow(0);
}
if (topx == 0) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint64(x * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}
double_t lo;
double_t hi = log_inline(ix, &lo);
double_t ehi, elo;
#if __FP_FAST_FMA
ehi = y * hi;
elo = y * lo + __builtin_fma(y, hi, -ehi);
#else
double_t yhi = asdouble(iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif
return exp_inline(ehi, elo, sign_bias);
}
/*
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "pow_data.h"
#define N (1 << POW_LOG_TABLE_BITS)
const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly = {
// relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation.
-0x1p-1,
0x1.555555555556p-2 * -2,
-0x1.0000000000006p-2 * -2,
0x1.999999959554ep-3 * 4,
-0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8,
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
and z falls into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = round(0x1p43*log(c))/0x1p43
tab[i].logctail = (double)(log(c) - logc)
where c is chosen near the center of the subinterval such that 1/c has only a
few precision bits so z/c - 1 is exactly representible as double:
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = {
#define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
},
};
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _POW_DATA_H
#define _POW_DATA_H
#include <features.h>
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern hidden const struct pow_log_data {
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {
double invc, pad, logc, logctail;
} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data;
#endif
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
#include "powf_data.h"
/*
POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5
ULP error: 0.82 (~ 0.5 + relerr*2^24)
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*/
#define N (1 << POWF_LOG2_TABLE_BITS)
#define T __powf_log2_data.tab
#define A __powf_log2_data.poly
#define OFF 0x3f330000
/* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t log2_inline(uint32_t ix)
{
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
}
#undef N
#undef T
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
/* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
{
uint64_t ki, ski, t;
double_t kd, z, r, r2, y, s;
#if TOINT_INTRINSICS
#define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint(xd); /* k */
ki = converttoint(xd);
#else
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N */
#endif
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint32_t iy)
{
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
}
static inline int zeroinfnan(uint32_t ix)
{
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
}
float __cdecl powf(float x, float y)
{
uint32_t sign_bias = 0;
uint32_t ix, iy;
ix = asuint(x);
iy = asuint(y);
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
zeroinfnan(iy))) {
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (predict_false(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignalingf_inline(x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline(y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 ||
2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (predict_false(zeroinfnan(ix))) {
float_t x2 = x * x;
if (ix & 0x80000000 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalidf(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint(x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline(ix);
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
asuint64(126.0 * POWF_SCALE) >> 47)) {
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
return __math_oflowf(sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf(sign_bias);
}
return exp2_inline(ylogx, sign_bias);
}
/*
* Data definition for powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "powf_data.h"
const struct powf_log2_data __powf_log2_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
},
.poly = {
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
0x1.71547652ab82bp0 * POWF_SCALE,
}
};
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _POWF_DATA_H
#define _POWF_DATA_H
#include "libm.h"
#include "exp2f_data.h"
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
#define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
extern hidden const struct powf_log2_data {
struct {
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data;
#endif
#include <math.h>
#include "libm.h"
double __cdecl remainder(double x, double y)
{
int q;
return remquo(x, y, &q);
}
weak_alias(remainder, drem);
#include <math.h>
#include "libm.h"
float __cdecl remainderf(float x, float y)
{
int q;
return remquof(x, y, &q);
}
weak_alias(remainderf, dremf);
#include <math.h>
#include <stdint.h>
double __cdecl remquo(double x, double y, int *quo)
{
union {double f; uint64_t i;} ux = {x}, uy = {y};
int ex = ux.i>>52 & 0x7ff;
int ey = uy.i>>52 & 0x7ff;
int sx = ux.i>>63;
int sy = uy.i>>63;
uint32_t q;
uint64_t i;
uint64_t uxi = ux.i;
*quo = 0;
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
return (x*y)/(x*y);
if (ux.i<<1 == 0)
return x;
/* normalize x and y */
if (!ex) {
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1ULL >> 12;
uxi |= 1ULL << 52;
}
if (!ey) {
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1ULL >> 12;
uy.i |= 1ULL << 52;
}
q = 0;
if (ex < ey) {
if (ex+1 == ey)
goto end;
return x;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 63 == 0) {
uxi = i;
q++;
}
uxi <<= 1;
q <<= 1;
}
i = uxi - uy.i;
if (i >> 63 == 0) {
uxi = i;
q++;
}
if (uxi == 0)
ex = -60;
else
for (; uxi>>52 == 0; uxi <<= 1, ex--);
end:
/* scale result and decide between |x| and |x|-|y| */
if (ex > 0) {
uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52;
} else {
uxi >>= -ex + 1;
}
ux.i = uxi;
x = ux.f;
if (sy)
y = -y;
if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
x -= y;
q++;
}
q &= 0x7fffffff;
*quo = sx^sy ? -(int)q : (int)q;
return sx ? -x : x;
}
#include <math.h>
#include <stdint.h>
float __cdecl remquof(float x, float y, int *quo)
{
union {float f; uint32_t i;} ux = {x}, uy = {y};
int ex = ux.i>>23 & 0xff;
int ey = uy.i>>23 & 0xff;
int sx = ux.i>>31;
int sy = uy.i>>31;
uint32_t q;
uint32_t i;
uint32_t uxi = ux.i;
*quo = 0;
if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
return (x*y)/(x*y);
if (ux.i<<1 == 0)
return x;
/* normalize x and y */
if (!ex) {
for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1U >> 9;
uxi |= 1U << 23;
}
if (!ey) {
for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1U >> 9;
uy.i |= 1U << 23;
}
q = 0;
if (ex < ey) {
if (ex+1 == ey)
goto end;
return x;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 31 == 0) {
uxi = i;
q++;
}
uxi <<= 1;
q <<= 1;
}
i = uxi - uy.i;
if (i >> 31 == 0) {
uxi = i;
q++;
}
if (uxi == 0)
ex = -30;
else
for (; uxi>>23 == 0; uxi <<= 1, ex--);
end:
/* scale result and decide between |x| and |x|-|y| */
if (ex > 0) {
uxi -= 1U << 23;
uxi |= (uint32_t)ex << 23;
} else {
uxi >>= -ex + 1;
}
ux.i = uxi;
x = ux.f;
if (sy)
y = -y;
if (ex == ey || (ex+1 == ey && (2*x > y || (2*x == y && q%2)))) {
x -= y;
q++;
}
q &= 0x7fffffff;
*quo = sx^sy ? -(int)q : (int)q;
return sx ? -x : x;
}
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl rint(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
int s = u.i>>63;
double_t y;
if (e >= 0x3ff+52)
return x;
if (s)
y = x - toint + toint;
else
y = x + toint - toint;
if (y == 0)
return s ? -0.0 : 0;
return y;
}
#include <float.h>
#include <math.h>
#include <stdint.h>
#include "libm.h"
#if FLT_EVAL_METHOD==0
#define EPS FLT_EPSILON
#elif FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const float_t toint = 1/EPS;
float __cdecl rintf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
int s = u.i>>31;
float_t y;
if (e >= 0x7f+23)
return x;
if (s)
y = x - toint + toint;
else
y = x + toint - toint;
if (y == 0)
return s ? -0.0f : 0.0f;
return y;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl round(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52)
return x;
if (u.i >> 63)
x = -x;
if (e < 0x3ff-1) {
/* raise inexact if x!=0 */
FORCE_EVAL(x + toint);
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5)
y = y + x - 1;
else if (y <= -0.5)
y = y + x + 1;
else
y = y + x;
if (u.i >> 63)
y = -y;
return y;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0
#define EPS FLT_EPSILON
#elif FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const float_t toint = 1/EPS;
float __cdecl roundf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = u.i >> 23 & 0xff;
float_t y;
if (e >= 0x7f+23)
return x;
if (u.i >> 31)
x = -x;
if (e < 0x7f-1) {
FORCE_EVAL(x + toint);
return 0*u.f;
}
y = x + toint - toint - x;
if (y > 0.5f)
y = y + x - 1;
else if (y <= -0.5f)
y = y + x + 1;
else
y = y + x;
if (u.i >> 31)
y = -y;
return y;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
double __cdecl scalbn(double x, int n)
{
union {double f; uint64_t i;} u;
double_t y = x;
if (n > 1023) {
y *= 0x1p1023;
n -= 1023;
if (n > 1023) {
y *= 0x1p1023;
n -= 1023;
if (n > 1023)
n = 1023;
}
} else if (n < -1022) {
/* make sure final n < -53 to avoid double
rounding in the subnormal range */
y *= 0x1p-1022 * 0x1p53;
n += 1022 - 53;
if (n < -1022) {
y *= 0x1p-1022 * 0x1p53;
n += 1022 - 53;
if (n < -1022)
n = -1022;
}
}
u.i = (uint64_t)(0x3ff+n)<<52;
x = y * u.f;
return x;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl scalbnf(float x, int n)
{
union {float f; uint32_t i;} u;
float_t y = x;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127)
n = 127;
}
} else if (n < -126) {
y *= 0x1p-126f * 0x1p24f;
n += 126 - 24;
if (n < -126) {
y *= 0x1p-126f * 0x1p24f;
n += 126 - 24;
if (n < -126)
n = -126;
}
}
u.i = (uint32_t)(0x7f+n)<<23;
x = y * u.f;
return x;
}
#include <math.h>
#include "libm.h"
int __signgam = 0;
weak_alias(__signgam, signgam);
/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* sin(x)
* Return sine function of x.
*
* kernel function:
* __sin ... sine function on [-pi/4,pi/4]
* __cos ... cose function on [-pi/4,pi/4]
* __rem_pio2 ... argument reduction routine
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
#include "libm.h"
double __cdecl sin(double x)
{
double y[2];
uint32_t ix;
unsigned n;
/* High word of x. */
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */
if (ix <= 0x3fe921fb) {
if (ix < 0x3e500000) { /* |x| < 2**-26 */
/* raise inexact if x != 0 and underflow if subnormal*/
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
return x;
}
return __sin(x, 0.0, 0);
}
/* sin(Inf or NaN) is NaN */
if (ix >= 0x7ff00000)
return x - x;
/* argument reduction needed */
n = __rem_pio2(x, y);
switch (n&3) {
case 0: return __sin(y[0], y[1], 1);
case 1: return __cos(y[0], y[1]);
case 2: return -__sin(y[0], y[1], 1);
default:
return -__cos(y[0], y[1]);
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#define _GNU_SOURCE
#include "libm.h"
void __cdecl sincos(double x, double *sin, double *cos)
{
double y[2], s, c;
uint32_t ix;
unsigned n;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */
if (ix <= 0x3fe921fb) {
/* if |x| < 2**-27 * sqrt(2) */
if (ix < 0x3e46a09e) {
/* raise inexact if x!=0 and underflow if subnormal */
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
*sin = x;
*cos = 1.0;
return;
}
*sin = __sin(x, 0.0, 0);
*cos = __cos(x, 0.0);
return;
}
/* sincos(Inf or NaN) is NaN */
if (ix >= 0x7ff00000) {
*sin = *cos = x - x;
return;
}
/* argument reduction needed */
n = __rem_pio2(x, y);
s = __sin(y[0], y[1], 1);
c = __cos(y[0], y[1]);
switch (n&3) {
case 0:
*sin = s;
*cos = c;
break;
case 1:
*sin = c;
*cos = -s;
break;
case 2:
*sin = -s;
*cos = -c;
break;
case 3:
default:
*sin = -c;
*cos = s;
break;
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#define _GNU_SOURCE
#include "libm.h"
/* Small multiples of pi/2 rounded to double precision. */
static const double
s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
void __cdecl sincosf(float x, float *sin, float *cos)
{
double y;
float_t s, c;
uint32_t ix;
unsigned n, sign;
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
/* |x| ~<= pi/4 */
if (ix <= 0x3f490fda) {
/* |x| < 2**-12 */
if (ix < 0x39800000) {
/* raise inexact if x!=0 and underflow if subnormal */
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
*sin = x;
*cos = 1.0f;
return;
}
*sin = __sindf(x);
*cos = __cosdf(x);
return;
}
/* |x| ~<= 5*pi/4 */
if (ix <= 0x407b53d1) {
if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
if (sign) {
*sin = -__cosdf(x + s1pio2);
*cos = __sindf(x + s1pio2);
} else {
*sin = __cosdf(s1pio2 - x);
*cos = __sindf(s1pio2 - x);
}
return;
}
/* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
*sin = -__sindf(sign ? x + s2pio2 : x - s2pio2);
*cos = -__cosdf(sign ? x + s2pio2 : x - s2pio2);
return;
}
/* |x| ~<= 9*pi/4 */
if (ix <= 0x40e231d5) {
if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
if (sign) {
*sin = __cosdf(x + s3pio2);
*cos = -__sindf(x + s3pio2);
} else {
*sin = -__cosdf(x - s3pio2);
*cos = __sindf(x - s3pio2);
}
return;
}
*sin = __sindf(sign ? x + s4pio2 : x - s4pio2);
*cos = __cosdf(sign ? x + s4pio2 : x - s4pio2);
return;
}
/* sin(Inf or NaN) is NaN */
if (ix >= 0x7f800000) {
*sin = *cos = x - x;
return;
}
/* general argument reduction needed */
n = __rem_pio2f(x, &y);
s = __sindf(y);
c = __cosdf(y);
switch (n&3) {
case 0:
*sin = s;
*cos = c;
break;
case 1:
*sin = c;
*cos = -s;
break;
case 2:
*sin = -s;
*cos = -c;
break;
case 3:
default:
*sin = -c;
*cos = s;
break;
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* Small multiples of pi/2 rounded to double precision. */
static const double
s1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float __cdecl sinf(float x)
{
double y;
uint32_t ix;
int n, sign;
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
return x;
}
return __sindf(x);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
if (sign)
return -__cosdf(x + s1pio2);
else
return __cosdf(x - s1pio2);
}
return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
if (sign)
return __cosdf(x + s3pio2);
else
return -__cosdf(x - s3pio2);
}
return __sindf(sign ? x + s4pio2 : x - s4pio2);
}
/* sin(Inf or NaN) is NaN */
if (ix >= 0x7f800000)
return x - x;
/* general argument reduction needed */
n = __rem_pio2f(x, &y);
switch (n&3) {
case 0: return __sindf(y);
case 1: return __cosdf(y);
case 2: return __sindf(-y);
default:
return -__cosdf(y);
}
}
#include "libm.h"
/* sinh(x) = (exp(x) - 1/exp(x))/2
* = (exp(x)-1 + (exp(x)-1)/exp(x))/2
* = x + x^3/6 + o(x^5)
*/
double __cdecl sinh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
uint32_t w;
double t, h, absx;
h = 0.5;
if (u.i >> 63)
h = -h;
/* |x| */
u.i &= (uint64_t)-1/2;
absx = u.f;
w = u.i >> 32;
/* |x| < log(DBL_MAX) */
if (w < 0x40862e42) {
t = expm1(absx);
if (w < 0x3ff00000) {
if (w < 0x3ff00000 - (26<<20))
/* note: inexact and underflow are raised by expm1 */
/* note: this branch avoids spurious underflow */
return x;
return h*(2*t - t*t/(t+1));
}
/* note: |x|>log(0x1p26)+eps could be just h*exp(x) */
return h*(t + t/(t+1));
}
/* |x| > log(DBL_MAX) or nan */
/* note: the result is stored to handle overflow */
t = __expo2(absx, 2*h);
return t;
}
#include "libm.h"
float __cdecl sinhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t w;
float t, h, absx;
h = 0.5;
if (u.i >> 31)
h = -h;
/* |x| */
u.i &= 0x7fffffff;
absx = u.f;
w = u.i;
/* |x| < log(FLT_MAX) */
if (w < 0x42b17217) {
t = expm1f(absx);
if (w < 0x3f800000) {
if (w < 0x3f800000 - (12<<23))
return x;
return h*(2*t - t*t/(t+1));
}
return h*(t + t/(t+1));
}
/* |x| > logf(FLT_MAX) or nan */
t = __expo2f(absx, 2*h);
return t;
}
#include <stdint.h>
#include <math.h>
#include "libm.h"
#include "sqrt_data.h"
#define FENV_SUPPORT 1
/* returns a*b*2^-32 - e, with error 0 <= e < 1. */
static inline uint32_t mul32(uint32_t a, uint32_t b)
{
return (uint64_t)a*b >> 32;
}
/* returns a*b*2^-64 - e, with error 0 <= e < 3. */
static inline uint64_t mul64(uint64_t a, uint64_t b)
{
uint64_t ahi = a>>32;
uint64_t alo = a&0xffffffff;
uint64_t bhi = b>>32;
uint64_t blo = b&0xffffffff;
return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
}
double __cdecl sqrt(double x)
{
uint64_t ix, top, m;
/* special case handling. */
ix = asuint64(x);
top = ix >> 52;
if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return x;
if (ix == 0x7ff0000000000000)
return x;
if (ix > 0x7ff0000000000000)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
top = ix >> 52;
top -= 52;
}
/* argument reduction:
x = 4^e m; with integer e, and m in [1, 4)
m: fixed point representation [2.62]
2^e is the exponent part of the result. */
int even = top & 1;
m = (ix << 11) | 0x8000000000000000;
if (even) m >>= 1;
top = (top + 0x3ff) >> 1;
/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
initial estimate:
7bit table lookup (1bit exponent and 6bit significand).
iterative approximation:
using 2 goldschmidt iterations with 32bit int arithmetics
and a final iteration with 64bit int arithmetics.
details:
the relative error (e = r0 sqrt(m)-1) of a linear estimate
(r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
a table lookup is faster and needs one less iteration
6 bit lookup table (128b) gives |e| < 0x1.f9p-8
7 bit lookup table (256b) gives |e| < 0x1.fdp-9
for single and double prec 6bit is enough but for quad
prec 7bit is needed (or modified iterations). to avoid
one more iteration >=13bit table would be needed (16k).
a newton-raphson iteration for r is
w = r*r
u = 3 - m*w
r = r*u/2
can use a goldschmidt iteration for s at the end or
s = m*r
first goldschmidt iteration is
s = m*r
u = 3 - s*r
r = r*u/2
s = s*u/2
next goldschmidt iteration is
u = 3 - s*r
r = r*u/2
s = s*u/2
and at the end r is not computed only s.
they use the same amount of operations and converge at the
same quadratic rate, i.e. if
r1 sqrt(m) - 1 = e, then
r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
the advantage of goldschmidt is that the mul for s and r
are independent (computed in parallel), however it is not
"self synchronizing": it only uses the input m in the
first iteration so rounding errors accumulate. at the end
or when switching to larger precision arithmetics rounding
errors dominate so the first iteration should be used.
the fixed point representations are
m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
and after switching to 64 bit
m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */
static const uint64_t three = 0xc0000000;
uint64_t r, s, d, u, i;
i = (ix >> 46) % 128;
r = (uint32_t)__rsqrt_tab[i] << 16;
/* |r sqrt(m) - 1| < 0x1.fdp-9 */
s = mul32(m>>32, r);
/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
d = mul32(s, r);
u = three - d;
r = mul32(r, u) << 1;
/* |r sqrt(m) - 1| < 0x1.7bp-16 */
s = mul32(s, u) << 1;
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
d = mul32(s, r);
u = three - d;
r = mul32(r, u) << 1;
/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
r = r << 32;
s = mul64(m, r);
d = mul64(s, r);
u = (three<<32) - d;
s = mul64(s, u); /* repr: 3.61 */
/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
s = (s - 2) >> 9; /* repr: 12.52 */
/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
/* s < sqrt(m) < s + 0x1.09p-52,
compute nearest rounded result:
the nearest result to 52 bits is either s or s+0x1p-52,
we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */
uint64_t d0, d1, d2;
double y, t;
d0 = (m << 42) - s*s;
d1 = s - d0;
d2 = d1 + s + 1;
s += d1 >> 63;
s &= 0x000fffffffffffff;
s |= top << 52;
y = asdouble(s);
if (FENV_SUPPORT) {
/* handle rounding modes and inexact exception:
only (s+1)^2 == 2^42 m case is exact otherwise
add a tiny value to cause the fenv effects. */
uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
tiny |= (d1^d2) & 0x8000000000000000;
t = asdouble(tiny);
y = eval_as_double(y + t);
}
return y;
}
#include "sqrt_data.h"
const uint16_t __rsqrt_tab[128] = {
0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
};
#ifndef _SQRT_DATA_H
#define _SQRT_DATA_H
#include <features.h>
#include <stdint.h>
/* if x in [1,2): i = (int)(64*x);
if x in [2,4): i = (int)(32*x-64);
__rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
|__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
extern hidden const uint16_t __rsqrt_tab[128];
#endif
#include <stdint.h>
#include <math.h>
#include "libm.h"
#include "sqrt_data.h"
#define FENV_SUPPORT 1
static inline uint32_t mul32(uint32_t a, uint32_t b)
{
return (uint64_t)a*b >> 32;
}
/* see sqrt.c for more detailed comments. */
float __cdecl sqrtf(float x)
{
uint32_t ix, m, m1, m0, even, ey;
ix = asuint(x);
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return x;
if (ix == 0x7f800000)
return x;
if (ix > 0x7f800000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 4^e m; with int e and m in [1, 4). */
even = ix & 0x00800000;
m1 = (ix << 8) | 0x80000000;
m0 = (ix << 7) & 0x7fffffff;
m = even ? m0 : m1;
/* 2^e is the exponent part of the return value. */
ey = ix >> 1;
ey += 0x3f800000 >> 1;
ey &= 0x7f800000;
/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
static const uint32_t three = 0xc0000000;
uint32_t r, s, d, u, i;
i = (ix >> 17) % 128;
r = (uint32_t)__rsqrt_tab[i] << 16;
/* |r*sqrt(m) - 1| < 0x1p-8 */
s = mul32(m, r);
/* |s/sqrt(m) - 1| < 0x1p-8 */
d = mul32(s, r);
u = three - d;
r = mul32(r, u) << 1;
/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
s = mul32(s, u) << 1;
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
d = mul32(s, r);
u = three - d;
s = mul32(s, u);
/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
s = (s - 1)>>6;
/* s < sqrt(m) < s + 0x1.08p-23 */
/* compute nearest rounded result. */
uint32_t d0, d1, d2;
float y, t;
d0 = (m << 16) - s*s;
d1 = s - d0;
d2 = d1 + s + 1;
s += d1 >> 31;
s &= 0x007fffff;
s |= ey;
y = asfloat(s);
if (FENV_SUPPORT) {
/* handle rounding and inexact exception. */
uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
tiny |= (d1^d2) & 0x80000000;
t = asfloat(tiny);
y = eval_as_float(y + t);
}
return y;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* tan(x)
* Return tangent function of x.
*
* kernel function:
* __tan ... tangent function on [-pi/4,pi/4]
* __rem_pio2 ... argument reduction routine
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
#include "libm.h"
double __cdecl tan(double x)
{
double y[2];
uint32_t ix;
unsigned n;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */
if (ix <= 0x3fe921fb) {
if (ix < 0x3e400000) { /* |x| < 2**-27 */
/* raise inexact if x!=0 and underflow if subnormal */
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
return x;
}
return __tan(x, 0.0, 0);
}
/* tan(Inf or NaN) is NaN */
if (ix >= 0x7ff00000)
return x - x;
/* argument reduction */
n = __rem_pio2(x, y);
return __tan(y[0], y[1], n&1);
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* Small multiples of pi/2 rounded to double precision. */
static const double
t1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
t2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
t3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
t4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float __cdecl tanf(float x)
{
double y;
uint32_t ix;
unsigned n, sign;
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
return x;
}
return __tandf(x, 0);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
return __tandf((sign ? x+t1pio2 : x-t1pio2), 1);
else
return __tandf((sign ? x+t2pio2 : x-t2pio2), 0);
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
return __tandf((sign ? x+t3pio2 : x-t3pio2), 1);
else
return __tandf((sign ? x+t4pio2 : x-t4pio2), 0);
}
/* tan(Inf or NaN) is NaN */
if (ix >= 0x7f800000)
return x - x;
/* argument reduction */
n = __rem_pio2f(x, &y);
return __tandf(y, n&1);
}
#include "libm.h"
/* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x))
* = (exp(2*x) - 1)/(exp(2*x) - 1 + 2)
* = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2)
*/
double __cdecl tanh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
uint32_t w;
int sign;
double_t t;
/* x = |x| */
sign = u.i >> 63;
u.i &= (uint64_t)-1/2;
x = u.f;
w = u.i >> 32;
if (w > 0x3fe193ea) {
/* |x| > log(3)/2 ~= 0.5493 or nan */
if (w > 0x40340000) {
/* |x| > 20 or nan */
/* note: this branch avoids raising overflow */
t = 1 - 0/x;
} else {
t = expm1(2*x);
t = 1 - 2/(t+2);
}
} else if (w > 0x3fd058ae) {
/* |x| > log(5/3)/2 ~= 0.2554 */
t = expm1(2*x);
t = t/(t+2);
} else if (w >= 0x00100000) {
/* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
t = expm1(-2*x);
t = -t/(t+2);
} else {
/* |x| is subnormal */
/* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
FORCE_EVAL((float)x);
t = x;
}
return sign ? -t : t;
}
#include "libm.h"
float __cdecl tanhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t w;
int sign;
float t;
/* x = |x| */
sign = u.i >> 31;
u.i &= 0x7fffffff;
x = u.f;
w = u.i;
if (w > 0x3f0c9f54) {
/* |x| > log(3)/2 ~= 0.5493 or nan */
if (w > 0x41200000) {
/* |x| > 10 */
t = 1 + 0/x;
} else {
t = expm1f(2*x);
t = 1 - 2/(t+2);
}
} else if (w > 0x3e82c578) {
/* |x| > log(5/3)/2 ~= 0.2554 */
t = expm1f(2*x);
t = t/(t+2);
} else if (w >= 0x00800000) {
/* |x| >= 0x1p-126 */
t = expm1f(-2*x);
t = -t/(t+2);
} else {
/* |x| is subnormal */
FORCE_EVAL(x*x);
t = x;
}
return sign ? -t : t;
}
/*
"A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964)
"Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001)
"An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004)
approximation method:
(x - 0.5) S(x)
Gamma(x) = (x + g - 0.5) * ----------------
exp(x + g - 0.5)
with
a1 a2 a3 aN
S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ]
x + 1 x + 2 x + 3 x + N
with a0, a1, a2, a3,.. aN constants which depend on g.
for x < 0 the following reflection formula is used:
Gamma(x)*Gamma(-x) = -pi/(x sin(pi x))
most ideas and constants are from boost and python
*/
#include "libm.h"
static const double pi = 3.141592653589793238462643383279502884;
/* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */
static double sinpi(double x)
{
int n;
/* argument reduction: x = |x| mod 2 */
/* spurious inexact when x is odd int */
x = x * 0.5;
x = 2 * (x - floor(x));
/* reduce x into [-.25,.25] */
n = 4 * x;
n = (n+1)/2;
x -= n * 0.5;
x *= pi;
switch (n) {
default: /* case 4 */
case 0:
return __sin(x, 0, 0);
case 1:
return __cos(x, 0);
case 2:
return __sin(-x, 0, 0);
case 3:
return -__cos(x, 0);
}
}
#define N 12
//static const double g = 6.024680040776729583740234375;
static const double gmhalf = 5.524680040776729583740234375;
static const double Snum[N+1] = {
23531376880.410759688572007674451636754734846804940,
42919803642.649098768957899047001988850926355848959,
35711959237.355668049440185451547166705960488635843,
17921034426.037209699919755754458931112671403265390,
6039542586.3520280050642916443072979210699388420708,
1439720407.3117216736632230727949123939715485786772,
248874557.86205415651146038641322942321632125127801,
31426415.585400194380614231628318205362874684987640,
2876370.6289353724412254090516208496135991145378768,
186056.26539522349504029498971604569928220784236328,
8071.6720023658162106380029022722506138218516325024,
210.82427775157934587250973392071336271166969580291,
2.5066282746310002701649081771338373386264310793408,
};
static const double Sden[N+1] = {
0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
2637558, 357423, 32670, 1925, 66, 1,
};
/* n! for small integer n */
static const double fact[] = {
1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
355687428096000.0, 6402373705728000.0, 121645100408832000.0,
2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
};
/* S(x) rational function for positive x */
static double S(double x)
{
double_t num = 0, den = 0;
int i;
/* to avoid overflow handle large x differently */
if (x < 8)
for (i = N; i >= 0; i--) {
num = num * x + Snum[i];
den = den * x + Sden[i];
}
else
for (i = 0; i <= N; i++) {
num = num / x + Snum[i];
den = den / x + Sden[i];
}
return num/den;
}
double __cdecl tgamma(double x)
{
union {double f; uint64_t i;} u = {x};
double absx, y;
double_t dy, z, r;
uint32_t ix = u.i>>32 & 0x7fffffff;
int sign = u.i>>63;
/* special cases */
if (ix >= 0x7ff00000)
/* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
return x + INFINITY;
if (ix < (0x3ff-54)<<20)
/* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
return 1/x;
/* integer arguments */
/* raise inexact when non-integer */
if (x == floor(x)) {
if (sign)
return 0/0.0;
if (x <= sizeof fact/sizeof *fact)
return fact[(int)x - 1];
}
/* x >= 172: tgamma(x)=inf with overflow */
/* x =< -184: tgamma(x)=+-0 with underflow */
if (ix >= 0x40670000) { /* |x| >= 184 */
if (sign) {
FORCE_EVAL((float)(0x1p-126/x));
if (floor(x) * 0.5 == floor(x * 0.5))
return 0;
return -0.0;
}
x *= 0x1p1023;
return x;
}
absx = sign ? -x : x;
/* handle the error of x + g - 0.5 */
y = absx + gmhalf;
if (absx > gmhalf) {
dy = y - absx;
dy -= gmhalf;
} else {
dy = y - gmhalf;
dy -= absx;
}
z = absx - 0.5;
r = S(absx) * exp(-y);
if (x < 0) {
/* reflection formula for negative x */
/* sinpi(absx) is not 0, integers are already handled */
r = -pi / (sinpi(absx) * absx * r);
dy = -dy;
z = -z;
}
r += dy * (gmhalf+0.5) * r / y;
z = pow(y, 0.5*z);
y = r * z * z;
return y;
}
#if 0
double __lgamma_r(double x, int *sign)
{
double r, absx;
*sign = 1;
/* special cases */
if (!isfinite(x))
/* lgamma(nan)=nan, lgamma(+-inf)=inf */
return x*x;
/* integer arguments */
if (x == floor(x) && x <= 2) {
/* n <= 0: lgamma(n)=inf with divbyzero */
/* n == 1,2: lgamma(n)=0 */
if (x <= 0)
return 1/0.0;
return 0;
}
absx = fabs(x);
/* lgamma(x) ~ -log(|x|) for tiny |x| */
if (absx < 0x1p-54) {
*sign = 1 - 2*!!signbit(x);
return -log(absx);
}
/* use tgamma for smaller |x| */
if (absx < 128) {
x = tgamma(x);
*sign = 1 - 2*!!signbit(x);
return log(fabs(x));
}
/* second term (log(S)-g) could be more precise here.. */
/* or with stirling: (|x|-0.5)*(log(|x|)-1) + poly(1/|x|) */
r = (absx-0.5)*(log(absx+gmhalf)-1) + (log(S(absx)) - (gmhalf+0.5));
if (x < 0) {
/* reflection formula for negative x */
x = sinpi(absx);
*sign = 2*!!signbit(x) - 1;
r = log(pi/(fabs(x)*absx)) - r;
}
return r;
}
weak_alias(__lgamma_r, lgamma_r);
#endif
#include <math.h>
float __cdecl tgammaf(float x)
{
return tgamma(x);
}
#include "libm.h"
double __cdecl trunc(double x)
{
union {double f; uint64_t i;} u = {x};
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
uint64_t m;
if (e >= 52 + 12)
return x;
if (e < 12)
e = 1;
m = -1ULL >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
u.i &= ~m;
return u.f;
}
#include "libm.h"
float __cdecl truncf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f + 9;
uint32_t m;
if (e >= 23 + 9)
return x;
if (e < 9)
e = 1;
m = -1U >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
u.i &= ~m;
return u.f;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment