Commit 438553e9 authored by Alexandre Julliard's avatar Alexandre Julliard

libs: Import the math library from upstream musl 1.2.3.

parent 35f9091b
......@@ -725,6 +725,8 @@ TIFF_PE_LIBS
TIFF_PE_CFLAGS
PNG_PE_LIBS
PNG_PE_CFLAGS
MUSL_PE_LIBS
MUSL_PE_CFLAGS
MPG123_PE_LIBS
MPG123_PE_CFLAGS
LDAP_PE_LIBS
......@@ -1577,6 +1579,7 @@ enable_lcms2
enable_ldap
enable_mfuuid
enable_mpg123
enable_musl
enable_png
enable_strmbase
enable_strmiids
......@@ -1734,6 +1737,8 @@ LDAP_PE_CFLAGS
LDAP_PE_LIBS
MPG123_PE_CFLAGS
MPG123_PE_LIBS
MUSL_PE_CFLAGS
MUSL_PE_LIBS
PNG_PE_CFLAGS
PNG_PE_LIBS
TIFF_PE_CFLAGS
......@@ -2526,6 +2531,10 @@ Some influential environment variables:
version
MPG123_PE_LIBS
Linker flags for the PE mpg123, overriding the bundled version
MUSL_PE_CFLAGS
C compiler flags for the PE musl, overriding the bundled version
MUSL_PE_LIBS
Linker flags for the PE musl, overriding the bundled version
PNG_PE_CFLAGS
C compiler flags for the PE png, overriding the bundled version
PNG_PE_LIBS Linker flags for the PE png, overriding the bundled version
......@@ -13161,6 +13170,21 @@ fi
printf "%s\n" "$as_me:${as_lineno-$LINENO}: mpg123 cflags: $MPG123_PE_CFLAGS" >&5
printf "%s\n" "$as_me:${as_lineno-$LINENO}: mpg123 libs: $MPG123_PE_LIBS" >&5
if ${MUSL_PE_LIBS:+false} :
then :
MUSL_PE_LIBS=musl
if ${MUSL_PE_CFLAGS:+false} :
then :
MUSL_PE_CFLAGS=
else $as_nop
enable_musl=no
fi
else $as_nop
enable_musl=no
fi
printf "%s\n" "$as_me:${as_lineno-$LINENO}: musl cflags: $MUSL_PE_CFLAGS" >&5
printf "%s\n" "$as_me:${as_lineno-$LINENO}: musl libs: $MUSL_PE_LIBS" >&5
if ${PNG_PE_LIBS:+false} :
then :
PNG_PE_LIBS="png \$(ZLIB_PE_LIBS)"
......@@ -21976,6 +22000,7 @@ wine_fn_config_makefile libs/lcms2 enable_lcms2
wine_fn_config_makefile libs/ldap enable_ldap
wine_fn_config_makefile libs/mfuuid enable_mfuuid
wine_fn_config_makefile libs/mpg123 enable_mpg123
wine_fn_config_makefile libs/musl enable_musl
wine_fn_config_makefile libs/png enable_png
wine_fn_config_makefile libs/strmbase enable_strmbase
wine_fn_config_makefile libs/strmiids enable_strmiids
......@@ -23128,6 +23153,8 @@ LDAP_PE_CFLAGS = $LDAP_PE_CFLAGS
LDAP_PE_LIBS = $LDAP_PE_LIBS
MPG123_PE_CFLAGS = $MPG123_PE_CFLAGS
MPG123_PE_LIBS = $MPG123_PE_LIBS
MUSL_PE_CFLAGS = $MUSL_PE_CFLAGS
MUSL_PE_LIBS = $MUSL_PE_LIBS
PNG_PE_CFLAGS = $PNG_PE_CFLAGS
PNG_PE_LIBS = $PNG_PE_LIBS
TIFF_PE_CFLAGS = $TIFF_PE_CFLAGS
......
......@@ -1121,6 +1121,7 @@ WINE_EXTLIB_FLAGS(JXR, jxr, jxr, "-I\$(top_srcdir)/libs/jxr/jxrgluelib -I\$(top_
WINE_EXTLIB_FLAGS(LCMS2, lcms2, lcms2, "-I\$(top_srcdir)/libs/lcms2/include")
WINE_EXTLIB_FLAGS(LDAP, ldap, ldap, "-I\$(top_srcdir)/libs/ldap/include")
WINE_EXTLIB_FLAGS(MPG123, mpg123, mpg123, "-I\$(top_srcdir)/libs/mpg123/src/libmpg123")
WINE_EXTLIB_FLAGS(MUSL, musl, musl)
WINE_EXTLIB_FLAGS(PNG, png, "png \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/png")
WINE_EXTLIB_FLAGS(TIFF, tiff, "tiff \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/tiff/libtiff")
WINE_EXTLIB_FLAGS(VKD3D, vkd3d, vkd3d, "-I\$(top_srcdir)/libs/vkd3d/include")
......@@ -3284,6 +3285,7 @@ WINE_CONFIG_MAKEFILE(libs/lcms2)
WINE_CONFIG_MAKEFILE(libs/ldap)
WINE_CONFIG_MAKEFILE(libs/mfuuid)
WINE_CONFIG_MAKEFILE(libs/mpg123)
WINE_CONFIG_MAKEFILE(libs/musl)
WINE_CONFIG_MAKEFILE(libs/png)
WINE_CONFIG_MAKEFILE(libs/strmbase)
WINE_CONFIG_MAKEFILE(libs/strmiids)
......
......@@ -136,7 +136,7 @@ _ACRTIMP int __cdecl _fpclass(double);
_ACRTIMP double __cdecl nextafter(double, double);
#ifndef __i386__
#if !defined(__i386__) || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl sinf(float);
_ACRTIMP float __cdecl cosf(float);
......@@ -206,13 +206,13 @@ static inline int _fpclassf(float x)
#endif
#if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
#if (!defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)) || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl fabsf(float);
#else
static inline float fabsf(float x) { return fabs(x); }
#endif
#if !defined(__i386__) || _MSVCR_VER>=120
#if !defined(__i386__) || _MSVCR_VER>=120 || defined(_NO_CRT_MATH_INLINE)
_ACRTIMP float __cdecl _chgsignf(float);
_ACRTIMP float __cdecl _copysignf(float, float);
......
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Valentin Ochs
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Valentin Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.
EXTLIB = libmusl.a
EXTRAINCL = -I$(srcdir)/src/internal -I$(srcdir)/arch/generic
EXTRADEFS = -D_ACRTIMP= -D_NO_CRT_MATH_INLINE
SOURCES = \
src/math/__cos.c \
src/math/__cosdf.c \
src/math/__expo2.c \
src/math/__expo2f.c \
src/math/__fpclassify.c \
src/math/__fpclassifyf.c \
src/math/__math_divzero.c \
src/math/__math_divzerof.c \
src/math/__math_invalid.c \
src/math/__math_invalidf.c \
src/math/__rem_pio2.c \
src/math/__rem_pio2_large.c \
src/math/__rem_pio2f.c \
src/math/__sin.c \
src/math/__sindf.c \
src/math/__tan.c \
src/math/__tandf.c \
src/math/acos.c \
src/math/acosf.c \
src/math/acosh.c \
src/math/acoshf.c \
src/math/asin.c \
src/math/asinf.c \
src/math/asinh.c \
src/math/asinhf.c \
src/math/atan.c \
src/math/atan2.c \
src/math/atan2f.c \
src/math/atanf.c \
src/math/atanh.c \
src/math/atanhf.c \
src/math/cbrt.c \
src/math/cbrtf.c \
src/math/ceil.c \
src/math/ceilf.c \
src/math/copysign.c \
src/math/copysignf.c \
src/math/cos.c \
src/math/cosf.c \
src/math/cosh.c \
src/math/coshf.c \
src/math/erf.c \
src/math/erff.c \
src/math/exp.c \
src/math/exp2.c \
src/math/exp2f.c \
src/math/exp2f_data.c \
src/math/exp_data.c \
src/math/expf.c \
src/math/expm1.c \
src/math/expm1f.c \
src/math/fabs.c \
src/math/fabsf.c \
src/math/fdim.c \
src/math/fdimf.c \
src/math/floor.c \
src/math/floorf.c \
src/math/fma.c \
src/math/fmaf.c \
src/math/fmax.c \
src/math/fmaxf.c \
src/math/fmin.c \
src/math/fminf.c \
src/math/fmod.c \
src/math/fmodf.c \
src/math/frexp.c \
src/math/frexpf.c \
src/math/hypot.c \
src/math/hypotf.c \
src/math/ilogb.c \
src/math/ilogbf.c \
src/math/j0.c \
src/math/j1.c \
src/math/jn.c \
src/math/ldexp.c \
src/math/lgamma.c \
src/math/lgamma_r.c \
src/math/lgammaf.c \
src/math/lgammaf_r.c \
src/math/log.c \
src/math/log10.c \
src/math/log10f.c \
src/math/log1p.c \
src/math/log1pf.c \
src/math/log2.c \
src/math/log2_data.c \
src/math/log2f.c \
src/math/log2f_data.c \
src/math/log_data.c \
src/math/logb.c \
src/math/logbf.c \
src/math/logf.c \
src/math/logf_data.c \
src/math/modf.c \
src/math/modff.c \
src/math/nan.c \
src/math/nanf.c \
src/math/nextafter.c \
src/math/nextafterf.c \
src/math/nexttoward.c \
src/math/nexttowardf.c \
src/math/pow.c \
src/math/pow_data.c \
src/math/powf.c \
src/math/powf_data.c \
src/math/remainder.c \
src/math/remainderf.c \
src/math/remquo.c \
src/math/remquof.c \
src/math/rint.c \
src/math/rintf.c \
src/math/round.c \
src/math/roundf.c \
src/math/scalbn.c \
src/math/scalbnf.c \
src/math/signgam.c \
src/math/sin.c \
src/math/sincos.c \
src/math/sincosf.c \
src/math/sinf.c \
src/math/sinh.c \
src/math/sinhf.c \
src/math/sqrt.c \
src/math/sqrt_data.c \
src/math/sqrtf.c \
src/math/tan.c \
src/math/tanf.c \
src/math/tanh.c \
src/math/tanhf.c \
src/math/tgamma.c \
src/math/tgammaf.c \
src/math/trunc.c \
src/math/truncf.c
#ifndef FEATURES_H
#define FEATURES_H
#define weak
#define hidden
#define weak_alias(old, new)
#endif
#ifndef _LIBM_H
#define _LIBM_H
#include <stdint.h>
#include <float.h>
#include <math.h>
#include <errno.h>
#include <features.h>
typedef float float_t;
typedef double double_t;
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t m;
uint16_t se;
} i;
};
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
/* This is the m68k variant of 80-bit long double, and this definition only works
* on archs where the alignment requirement of uint64_t is <= 4. */
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t pad;
uint64_t m;
} i;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t lo;
uint32_t mid;
uint16_t top;
uint16_t se;
} i;
struct {
uint64_t lo;
uint64_t hi;
} i2;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t top;
uint32_t mid;
uint64_t lo;
} i;
struct {
uint64_t hi;
uint64_t lo;
} i2;
};
#else
#error Unsupported long double representation
#endif
/* Support non-nearest rounding mode. */
#define WANT_ROUNDING 1
/* Support signaling NaNs. */
#define WANT_SNAN 0
#if WANT_SNAN
#error SNaN is unsupported
#else
#define issignalingf_inline(x) 0
#define issignaling_inline(x) 0
#endif
#ifndef TOINT_INTRINSICS
#define TOINT_INTRINSICS 0
#endif
#if TOINT_INTRINSICS
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static double_t roundtoint(double_t);
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static int32_t converttoint(double_t);
#endif
/* Helps static branch prediction so hot path can be better optimized. */
#ifdef __GNUC__
#define predict_true(x) __builtin_expect(!!(x), 1)
#define predict_false(x) __builtin_expect(x, 0)
#else
#define predict_true(x) (x)
#define predict_false(x) (x)
#endif
/* Evaluate an expression as the specified type. With standard excess
precision handling a type cast or assignment is enough (with
-ffloat-store an assignment is required, in old compilers argument
passing and return statement may not drop excess precision). */
static inline float eval_as_float(float x)
{
float y = x;
return y;
}
static inline double eval_as_double(double x)
{
double y = x;
return y;
}
/* fp_barrier returns its input, but limits code transformations
as if it had a side-effect (e.g. observable io) and returned
an arbitrary value. */
#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
volatile float y = x;
return y;
}
#endif
#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
volatile double y = x;
return y;
}
#endif
#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x)
{
volatile long double y = x;
return y;
}
#endif
/* fp_force_eval ensures that the input value is computed when that's
otherwise unused. To prevent the constant folding of the input
expression, an additional fp_barrier may be needed or a compilation
mode that does so (e.g. -frounding-math in gcc). Then it can be
used to evaluate an expression for its fenv side-effects only. */
#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
volatile float y;
y = x;
}
#endif
#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
volatile double y;
y = x;
}
#endif
#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x)
{
volatile long double y;
y = x;
}
#endif
#define FORCE_EVAL(x) do { \
if (sizeof(x) == sizeof(float)) { \
fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
fp_force_eval(x); \
} else { \
fp_force_evall(x); \
} \
} while(0)
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
#define EXTRACT_WORDS(hi,lo,d) \
do { \
uint64_t __u = asuint64(d); \
(hi) = __u >> 32; \
(lo) = (uint32_t)__u; \
} while (0)
#define GET_HIGH_WORD(hi,d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo,d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define INSERT_WORDS(d,hi,lo) \
do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
#define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo)
#define GET_FLOAT_WORD(w,d) \
do { \
(w) = asuint(d); \
} while (0)
#define SET_FLOAT_WORD(d,w) \
do { \
(d) = asfloat(w); \
} while (0)
hidden int __rem_pio2_large(double*,double*,int,int,int);
hidden int __rem_pio2(double,double*);
hidden double __sin(double,double,int);
hidden double __cos(double,double);
hidden double __tan(double,double,int);
hidden double __expo2(double,double);
hidden int __rem_pio2f(float,double*);
hidden float __sindf(double);
hidden float __cosdf(double);
hidden float __tandf(double,int);
hidden float __expo2f(float,float);
hidden int __rem_pio2l(long double, long double *);
hidden long double __sinl(long double, long double, int);
hidden long double __cosl(long double, long double);
hidden long double __tanl(long double, long double, int);
hidden long double __polevll(long double, const long double *, int);
hidden long double __p1evll(long double, const long double *, int);
extern int __signgam;
hidden double __lgamma_r(double, int *);
hidden float __lgammaf_r(float, int *);
/* error handling functions */
hidden float __math_xflowf(uint32_t, float);
hidden float __math_uflowf(uint32_t);
hidden float __math_oflowf(uint32_t);
hidden float __math_divzerof(uint32_t);
hidden float __math_invalidf(float);
hidden double __math_xflow(uint32_t, double);
hidden double __math_uflow(uint32_t);
hidden double __math_oflow(uint32_t);
hidden double __math_divzero(uint32_t);
hidden double __math_invalid(double);
#if LDBL_MANT_DIG != DBL_MANT_DIG
hidden long double __math_invalidl(long double);
#endif
#endif
/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/*
* __cos( x, y )
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
*
* Algorithm
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
* 3. cos(x) is approximated by a polynomial of degree 14 on
* [0,pi/4]
* 4 14
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
* where the remez error is
*
* | 2 4 6 8 10 12 14 | -58
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
* | |
*
* 4 6 8 10 12 14
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
* cos(x) ~ 1 - x*x/2 + r
* since cos(x+y) ~ cos(x) - sin(x)*y
* ~ cos(x) - x*y,
* a correction term is necessary in cos(x) and hence
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
* For better accuracy, rearrange to
* cos(x+y) ~ w + (tmp + (r-x*y))
* where w = 1 - x*x/2 and tmp is a tiny correction term
* (1 - x*x/2 == w + tmp exactly in infinite precision).
* The exactness of w + tmp in infinite precision depends on w
* and tmp having the same precision as x. If they have extra
* precision due to compiler bugs, then the extra precision is
* only good provided it is retained in all terms of the final
* expression for cos(). Retention happens in all cases tested
* under FreeBSD, so don't pessimize things by forcibly clipping
* any extra precision in w.
*/
#include "libm.h"
static const double
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double __cos(double x, double y)
{
double_t hz,z,r,w;
z = x*x;
w = z*z;
r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
hz = 0.5*z;
w = 1.0-hz;
return w + (((1.0-w)-hz) + (z*r-x*y));
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
static const double
C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */
C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */
C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */
C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
float __cosdf(double x)
{
double_t r, w, z;
/* Try to optimize for parallel evaluation as in __tandf.c. */
z = x*x;
w = z*z;
r = C2+z*C3;
return ((1.0+z*C0) + w*C1) + (w*z)*r;
}
#include "libm.h"
/* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */
static const int k = 2043;
static const double kln2 = 0x1.62066151add8bp+10;
/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
double __expo2(double x, double sign)
{
double scale;
/* note that k is odd and scale*scale overflows */
INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0);
/* exp(x - k ln2) * 2**(k-1) */
/* in directed rounding correct sign before rounding or overflow is important */
return exp(x - kln2) * (sign * scale) * scale;
}
#include "libm.h"
/* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */
static const int k = 235;
static const float kln2 = 0x1.45c778p+7f;
/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
float __expo2f(float x, float sign)
{
float scale;
/* note that k is odd and scale*scale overflows */
SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23);
/* exp(x - k ln2) * 2**(k-1) */
/* in directed rounding correct sign before rounding or overflow is important */
return expf(x - kln2) * (sign * scale) * scale;
}
#include <math.h>
#include <stdint.h>
short __cdecl _dclass(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i>>52 & 0x7ff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
return FP_NORMAL;
}
#include <math.h>
#include <stdint.h>
short __cdecl _fdclass(float x)
{
union {float f; uint32_t i;} u = {x};
int e = u.i>>23 & 0xff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
return FP_NORMAL;
}
#include "libm.h"
double __math_divzero(uint32_t sign)
{
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}
#include "libm.h"
float __math_divzerof(uint32_t sign)
{
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
}
#include "libm.h"
double __math_invalid(double x)
{
return (x - x) / (x - x);
}
#include "libm.h"
float __math_invalidf(float x)
{
return (x - x) / (x - x);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
* Optimized by Bruce D. Evans.
*/
/* __rem_pio2(x,y)
*
* return the remainder of x rem pi/2 in y[0]+y[1]
* use __rem_pio2_large() for large x
*/
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 33 bit of pi/2
* pio2_1t: pi/2 - pio2_1
* pio2_2: second 33 bit of pi/2
* pio2_2t: pi/2 - (pio2_1+pio2_2)
* pio2_3: third 33 bit of pi/2
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb54442d18p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
int __rem_pio2(double x, double *y)
{
union {double f; uint64_t i;} u = {x};
double_t z,w,t,r,fn;
double tx[3],ty[2];
uint32_t ix;
int sign, n, ex, ey, i;
sign = u.i>>63;
ix = u.i>>32 & 0x7fffffff;
if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
goto medium; /* cancellation -- use medium case */
if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
if (!sign) {
z = x - pio2_1; /* one round good to 85 bits */
y[0] = z - pio2_1t;
y[1] = (z-y[0]) - pio2_1t;
return 1;
} else {
z = x + pio2_1;
y[0] = z + pio2_1t;
y[1] = (z-y[0]) + pio2_1t;
return -1;
}
} else {
if (!sign) {
z = x - 2*pio2_1;
y[0] = z - 2*pio2_1t;
y[1] = (z-y[0]) - 2*pio2_1t;
return 2;
} else {
z = x + 2*pio2_1;
y[0] = z + 2*pio2_1t;
y[1] = (z-y[0]) + 2*pio2_1t;
return -2;
}
}
}
if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
goto medium;
if (!sign) {
z = x - 3*pio2_1;
y[0] = z - 3*pio2_1t;
y[1] = (z-y[0]) - 3*pio2_1t;
return 3;
} else {
z = x + 3*pio2_1;
y[0] = z + 3*pio2_1t;
y[1] = (z-y[0]) + 3*pio2_1t;
return -3;
}
} else {
if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
goto medium;
if (!sign) {
z = x - 4*pio2_1;
y[0] = z - 4*pio2_1t;
y[1] = (z-y[0]) - 4*pio2_1t;
return 4;
} else {
z = x + 4*pio2_1;
y[0] = z + 4*pio2_1t;
y[1] = (z-y[0]) + 4*pio2_1t;
return -4;
}
}
}
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
/* rint(x/(pi/2)) */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;
ex = ix>>20;
if (ex - ey > 16) { /* 2nd round, good to 118 bits */
t = r;
w = fn*pio2_2;
r = t - w;
w = fn*pio2_2t - ((t-r)-w);
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;
if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
t = r;
w = fn*pio2_3;
r = t - w;
w = fn*pio2_3t - ((t-r)-w);
y[0] = r - w;
}
}
y[1] = (r - y[0]) - w;
return n;
}
/*
* all other (large) arguments
*/
if (ix >= 0x7ff00000) { /* x is inf or NaN */
y[0] = y[1] = x - x;
return 0;
}
/* set z = scalbn(|x|,-ilogb(x)+23) */
u.f = x;
u.i &= (uint64_t)-1>>12;
u.i |= (uint64_t)(0x3ff + 23)<<52;
z = u.f;
for (i=0; i < 2; i++) {
tx[i] = (double)(int32_t)z;
z = (z-tx[i])*0x1p24;
}
tx[i] = z;
/* skip zero terms, first term is non-zero */
while (tx[i] == 0.0)
i--;
n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
if (sign) {
y[0] = -ty[0];
y[1] = -ty[1];
return -n;
}
y[0] = ty[0];
y[1] = ty[1];
return n;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __rem_pio2f(x,y)
*
* return the remainder of x rem pi/2 in *y
* use double precision for everything except passing x
* use __rem_pio2_large() for large x
*/
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 25 bits of pi/2
* pio2_1t: pi/2 - pio2_1
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb6p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
int __rem_pio2f(float x, double *y)
{
union {float f; uint32_t i;} u = {x};
double tx[1],ty[1];
double_t fn;
uint32_t ix;
int n, sign, e0;
ix = u.i & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
*y = x - fn*pio2_1 - fn*pio2_1t;
/* Matters with directed rounding. */
if (predict_false(*y < -pio4)) {
n--;
fn--;
*y = x - fn*pio2_1 - fn*pio2_1t;
} else if (predict_false(*y > pio4)) {
n++;
fn++;
*y = x - fn*pio2_1 - fn*pio2_1t;
}
return n;
}
if(ix>=0x7f800000) { /* x is inf or NaN */
*y = x-x;
return 0;
}
/* scale x into [2^23, 2^24-1] */
sign = u.i>>31;
e0 = (ix>>23) - (0x7f+23); /* e0 = ilogb(|x|)-23, positive */
u.i = ix - (e0<<23);
tx[0] = u.f;
n = __rem_pio2_large(tx,ty,e0,1,0);
if (sign) {
*y = -ty[0];
return -n;
}
*y = ty[0];
return n;
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __sin( x, y, iy)
* kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
*
* Algorithm
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
* 2. Callers must return sin(-0) = -0 without calling here since our
* odd polynomial is not evaluated in a way that preserves -0.
* Callers may do the optimization sin(x) ~ x for tiny x.
* 3. sin(x) is approximated by a polynomial of degree 13 on
* [0,pi/4]
* 3 13
* sin(x) ~ x + S1*x + ... + S6*x
* where
*
* |sin(x) 2 4 6 8 10 12 | -58
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
* | x |
*
* 4. sin(x+y) = sin(x) + sin'(x')*y
* ~ sin(x) + (1-x*x/2)*y
* For better accuracy, let
* 3 2 2 2 2
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
* then 3 2
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
*/
#include "libm.h"
static const double
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double __sin(double x, double y, int iy)
{
double_t z,r,v,w;
z = x*x;
w = z*z;
r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
v = z*x;
if (iy == 0)
return x + v*(S1 + z*r);
else
return x - ((z*(0.5*y - v*r) - y) - v*S1);
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
static const double
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
float __sindf(double x)
{
double_t r, s, w, z;
/* Try to optimize for parallel evaluation as in __tandf.c. */
z = x*x;
w = z*z;
r = S3 + z*S4;
s = z*x;
return (x + s*(S1 + z*S2)) + s*w*r;
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */
/*
* ====================================================
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* __tan( x, y, k )
* kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x.
* Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
*
* Algorithm
* 1. Since tan(-x) = -tan(x), we need only to consider positive x.
* 2. Callers must return tan(-0) = -0 without calling here since our
* odd polynomial is not evaluated in a way that preserves -0.
* Callers may do the optimization tan(x) ~ x for tiny x.
* 3. tan(x) is approximated by a odd polynomial of degree 27 on
* [0,0.67434]
* 3 27
* tan(x) ~ x + T1*x + ... + T13*x
* where
*
* |tan(x) 2 4 26 | -59.2
* |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
* | x |
*
* Note: tan(x+y) = tan(x) + tan'(x)*y
* ~ tan(x) + (1+x*x)*y
* Therefore, for better accuracy in computing tan(x+y), let
* 3 2 2 2 2
* r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
* then
* 3 2
* tan(x+y) = x + (T1*x + (x *(r+y)+y))
*
* 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
* tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
* = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
*/
#include "libm.h"
static const double T[] = {
3.33333333333334091986e-01, /* 3FD55555, 55555563 */
1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
-1.85586374855275456654e-05, /* BEF375CB, DB605373 */
2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
},
pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
double __tan(double x, double y, int odd)
{
double_t z, r, v, w, s, a;
double w0, a0;
uint32_t hx;
int big, sign;
GET_HIGH_WORD(hx,x);
big = (hx&0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
if (big) {
sign = hx>>31;
if (sign) {
x = -x;
y = -y;
}
x = (pio4 - x) + (pio4lo - y);
y = 0.0;
}
z = x * x;
w = z * z;
/*
* Break x^5*(T[1]+x^2*T[2]+...) into
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
*/
r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11]))));
v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12])))));
s = z * x;
r = y + z*(s*(r + v) + y) + s*T[0];
w = x + r;
if (big) {
s = 1 - 2*odd;
v = s - 2.0 * (x + (r - w*w/(w + s)));
return sign ? -v : v;
}
if (!odd)
return w;
/* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
w0 = w;
SET_LOW_WORD(w0, 0);
v = r - (w0 - x); /* w0+v = r+x */
a0 = a = -1.0 / w;
SET_LOW_WORD(a0, 0);
return a0 + a*(1.0 + a0*w0 + a0*v);
}
/* origin: FreeBSD /usr/src/lib/msun/src/k_tanf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */
static const double T[] = {
0x15554d3418c99f.0p-54, /* 0.333331395030791399758 */
0x1112fd38999f72.0p-55, /* 0.133392002712976742718 */
0x1b54c91d865afe.0p-57, /* 0.0533812378445670393523 */
0x191df3908c33ce.0p-58, /* 0.0245283181166547278873 */
0x185dadfcecf44e.0p-61, /* 0.00297435743359967304927 */
0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */
};
float __tandf(double x, int odd)
{
double_t z,r,w,s,t,u;
z = x*x;
/*
* Split up the polynomial into small independent terms to give
* opportunities for parallel evaluation. The chosen splitting is
* micro-optimized for Athlons (XP, X64). It costs 2 multiplications
* relative to Horner's method on sequential machines.
*
* We add the small terms from lowest degree up for efficiency on
* non-sequential machines (the lowest degree terms tend to be ready
* earlier). Apart from this, we don't care about order of
* operations, and don't need to to care since we have precision to
* spare. However, the chosen splitting is good for accuracy too,
* and would give results as accurate as Horner's method if the
* small terms were added from highest degree down.
*/
r = T[4] + z*T[5];
t = T[2] + z*T[3];
w = z*z;
s = z*x;
u = T[0] + z*T[1];
r = (x + s*u) + (s*w)*(t + w*r);
return odd ? -1.0/r : r;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* acos(x)
* Method :
* acos(x) = pi/2 - asin(x)
* acos(-x) = pi/2 + asin(x)
* For |x|<=0.5
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
* For x>0.5
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
* = 2asin(sqrt((1-x)/2))
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
* = 2f + (2c + 2s*z*R(z))
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
* for f so that f+c ~ sqrt(z).
* For x<-0.5
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
* Function needed: sqrt
*/
#include "libm.h"
static const double
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z)
{
double_t p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q;
}
double __cdecl acos(double x)
{
double z,w,s,c,df;
uint32_t hx,ix;
GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3ff00000) {
uint32_t lx;
GET_LOW_WORD(lx,x);
if ((ix-0x3ff00000 | lx) == 0) {
/* acos(1)=0, acos(-1)=pi */
if (hx >> 31)
return 2*pio2_hi + 0x1p-120f;
return 0;
}
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3fe00000) {
if (ix <= 0x3c600000) /* |x| < 2**-57 */
return pio2_hi + 0x1p-120f;
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
}
/* x < -0.5 */
if (hx >> 31) {
z = (1.0+x)*0.5;
s = sqrt(z);
w = R(z)*s-pio2_lo;
return 2*(pio2_hi - (s+w));
}
/* x > 0.5 */
z = (1.0-x)*0.5;
s = sqrt(z);
df = s;
SET_LOW_WORD(df,0);
c = (z-df*df)/(s+df);
w = R(z)*s+c;
return 2*(df+w);
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
static float R(float z)
{
float_t p, q;
p = z*(pS0+z*(pS1+z*pS2));
q = 1.0f+z*qS1;
return p/q;
}
float __cdecl acosf(float x)
{
float z,w,s,c,df;
uint32_t hx,ix;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3f800000) {
if (ix == 0x3f800000) {
if (hx >> 31)
return 2*pio2_hi + 0x1p-120f;
return 0;
}
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3f000000) {
if (ix <= 0x32800000) /* |x| < 2**-26 */
return pio2_hi + 0x1p-120f;
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
}
/* x < -0.5 */
if (hx >> 31) {
z = (1+x)*0.5f;
s = sqrtf(z);
w = R(z)*s-pio2_lo;
return 2*(pio2_hi - (s+w));
}
/* x > 0.5 */
z = (1-x)*0.5f;
s = sqrtf(z);
GET_FLOAT_WORD(hx,s);
SET_FLOAT_WORD(df,hx&0xfffff000);
c = (z-df*df)/(s+df);
w = R(z)*s+c;
return 2*(df+w);
}
#include "libm.h"
#if FLT_EVAL_METHOD==2
#undef sqrt
#define sqrt sqrtl
#endif
/* acosh(x) = log(x + sqrt(x*x-1)) */
double __cdecl acosh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
/* x < 1 domain error is handled in the called functions */
if (e < 0x3ff + 1)
/* |x| < 2, up to 2ulp error in [1,1.125] */
return log1p(x-1 + sqrt((x-1)*(x-1)+2*(x-1)));
if (e < 0x3ff + 26)
/* |x| < 0x1p26 */
return log(2*x - 1/(x+sqrt(x*x-1)));
/* |x| >= 0x1p26 or nan */
return log(x) + 0.693147180559945309417232121458176568;
}
#include "libm.h"
#if FLT_EVAL_METHOD==2
#undef sqrtf
#define sqrtf sqrtl
#elif FLT_EVAL_METHOD==1
#undef sqrtf
#define sqrtf sqrt
#endif
/* acosh(x) = log(x + sqrt(x*x-1)) */
float __cdecl acoshf(float x)
{
union {float f; uint32_t i;} u = {x};
uint32_t a = u.i & 0x7fffffff;
if (a < 0x3f800000+(1<<23))
/* |x| < 2, invalid if x < 1 */
/* up to 2ulp error in [1,1.125] */
return log1pf(x-1 + sqrtf((x-1)*(x-1)+2*(x-1)));
if (u.i < 0x3f800000+(12<<23))
/* 2 <= x < 0x1p12 */
return logf(2*x - 1/(x+sqrtf(x*x-1)));
/* x >= 0x1p12 or x <= -2 or nan */
return logf(x) + 0.693147180559945309417232121458176568f;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* asin(x)
* Method :
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
* we approximate asin(x) on [0,0.5] by
* asin(x) = x + x*x^2*R(x^2)
* where
* R(x^2) is a rational approximation of (asin(x)-x)/x^3
* and its remez error is bounded by
* |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
*
* For x in [0.5,1]
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
* then for x>0.98
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
* For x<=0.98, let pio4_hi = pio2_hi/2, then
* f = hi part of s;
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
* and
* asin(x) = pi/2 - 2*(s+s*z*R(z))
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
*
* Special cases:
* if x is NaN, return x itself;
* if |x|>1, return NaN with invalid signal.
*
*/
#include "libm.h"
static const double
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
/* coefficients for R(x^2) */
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z)
{
double_t p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q;
}
double __cdecl asin(double x)
{
double z,r,s;
uint32_t hx,ix;
GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if (ix >= 0x3ff00000) {
uint32_t lx;
GET_LOW_WORD(lx, x);
if ((ix-0x3ff00000 | lx) == 0)
/* asin(1) = +-pi/2 with inexact */
return x*pio2_hi + 0x1p-120f;
return 0/(x-x);
}
/* |x| < 0.5 */
if (ix < 0x3fe00000) {
/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
if (ix < 0x3e500000 && ix >= 0x00100000)
return x;
return x + x*R(x*x);
}
/* 1 > |x| >= 0.5 */
z = (1 - fabs(x))*0.5;
s = sqrt(z);
r = R(z);
if (ix >= 0x3fef3333) { /* if |x| > 0.975 */
x = pio2_hi-(2*(s+s*r)-pio2_lo);
} else {
double f,c;
/* f+c = sqrt(z) */
f = s;
SET_LOW_WORD(f,0);
c = (z-f*f)/(s+f);
x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
}
if (hx >> 31)
return -x;
return x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const double
pio2 = 1.570796326794896558e+00;
static const float
/* coefficients for R(x^2) */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
static float R(float z)
{
float_t p, q;
p = z*(pS0+z*(pS1+z*pS2));
q = 1.0f+z*qS1;
return p/q;
}
float __cdecl asinf(float x)
{
double s;
float z;
uint32_t hx,ix;
GET_FLOAT_WORD(hx, x);
ix = hx & 0x7fffffff;
if (ix >= 0x3f800000) { /* |x| >= 1 */
if (ix == 0x3f800000) /* |x| == 1 */
return x*pio2 + 0x1p-120f; /* asin(+-1) = +-pi/2 with inexact */
return 0/(x-x); /* asin(|x|>1) is NaN */
}
if (ix < 0x3f000000) { /* |x| < 0.5 */
/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
if (ix < 0x39800000 && ix >= 0x00800000)
return x;
return x + x*R(x*x);
}
/* 1 > |x| >= 0.5 */
z = (1 - fabsf(x))*0.5f;
s = sqrt(z);
x = pio2 - 2*(s+s*R(z));
if (hx >> 31)
return -x;
return x;
}
#include "libm.h"
/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
double __cdecl asinh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
unsigned s = u.i >> 63;
/* |x| */
u.i &= (uint64_t)-1/2;
x = u.f;
if (e >= 0x3ff + 26) {
/* |x| >= 0x1p26 or inf or nan */
x = log(x) + 0.693147180559945309417232121458176568;
} else if (e >= 0x3ff + 1) {
/* |x| >= 2 */
x = log(2*x + 1/(sqrt(x*x+1)+x));
} else if (e >= 0x3ff - 26) {
/* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */
x = log1p(x + x*x/(sqrt(x*x+1)+1));
} else {
/* |x| < 0x1p-26, raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
}
return s ? -x : x;
}
#include "libm.h"
/* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */
float __cdecl asinhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t i = u.i & 0x7fffffff;
unsigned s = u.i >> 31;
/* |x| */
u.i = i;
x = u.f;
if (i >= 0x3f800000 + (12<<23)) {
/* |x| >= 0x1p12 or inf or nan */
x = logf(x) + 0.693147180559945309417232121458176568f;
} else if (i >= 0x3f800000 + (1<<23)) {
/* |x| >= 2 */
x = logf(2*x + 1/(sqrtf(x*x+1)+x));
} else if (i >= 0x3f800000 - (12<<23)) {
/* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */
x = log1pf(x + x*x/(sqrtf(x*x+1)+1));
} else {
/* |x| < 0x1p-12, raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
}
return s ? -x : x;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* atan(x)
* Method
* 1. Reduce x to positive by atan(x) = -atan(-x).
* 2. According to the integer k=4t+0.25 chopped, t=x, the argument
* is further reduced to one of the following intervals and the
* arctangent of t is evaluated by the corresponding formula:
*
* [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
* [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
* [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
* [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
* [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double atanhi[] = {
4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
};
static const double atanlo[] = {
2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
};
static const double aT[] = {
3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
-1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
-1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
-7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
-5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
-3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
};
double __cdecl atan(double x)
{
double_t w,s1,s2,z;
uint32_t ix,sign;
int id;
GET_HIGH_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix >= 0x44100000) { /* if |x| >= 2^66 */
if (isnan(x))
return x;
z = atanhi[3] + 0x1p-120f;
return sign ? -z : z;
}
if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
if (ix < 0x3e400000) { /* |x| < 2^-27 */
if (ix < 0x00100000)
/* raise underflow for subnormal x */
FORCE_EVAL((float)x);
return x;
}
id = -1;
} else {
x = fabs(x);
if (ix < 0x3ff30000) { /* |x| < 1.1875 */
if (ix < 0x3fe60000) { /* 7/16 <= |x| < 11/16 */
id = 0;
x = (2.0*x-1.0)/(2.0+x);
} else { /* 11/16 <= |x| < 19/16 */
id = 1;
x = (x-1.0)/(x+1.0);
}
} else {
if (ix < 0x40038000) { /* |x| < 2.4375 */
id = 2;
x = (x-1.5)/(1.0+1.5*x);
} else { /* 2.4375 <= |x| < 2^66 */
id = 3;
x = -1.0/x;
}
}
}
/* end of argument reduction */
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
if (id < 0)
return x - x*(s1+s2);
z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
return sign ? -z : z;
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* atan2(y,x)
* Method :
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
* 2. Reduce x to positive by (if x and y are unexceptional):
* ARG (x+iy) = arctan(y/x) ... if x > 0,
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
*
* Special cases:
*
* ATAN2((anything), NaN ) is NaN;
* ATAN2(NAN , (anything) ) is NaN;
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
* ATAN2(+-INF,+INF ) is +-pi/4 ;
* ATAN2(+-INF,-INF ) is +-3pi/4;
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double
pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
double __cdecl atan2(double y, double x)
{
double z;
uint32_t m,lx,ly,ix,iy;
if (isnan(x) || isnan(y))
return x+y;
EXTRACT_WORDS(ix, lx, x);
EXTRACT_WORDS(iy, ly, y);
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
return atan(y);
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix = ix & 0x7fffffff;
iy = iy & 0x7fffffff;
/* when y = 0 */
if ((iy|ly) == 0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi; /* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if ((ix|lx) == 0)
return m&1 ? -pi/2 : pi/2;
/* when x is INF */
if (ix == 0x7ff00000) {
if (iy == 0x7ff00000) {
switch(m) {
case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3*pi/4; /* atan(+INF,-INF) */
case 3: return -3*pi/4; /* atan(-INF,-INF) */
}
} else {
switch(m) {
case 0: return 0.0; /* atan(+...,+INF) */
case 1: return -0.0; /* atan(-...,+INF) */
case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi; /* atan(-...,-INF) */
}
}
}
/* |y/x| > 0x1p64 */
if (ix+(64<<20) < iy || iy == 0x7ff00000)
return m&1 ? -pi/2 : pi/2;
/* z = atan(|y/x|) without spurious underflow */
if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
z = 0;
else
z = atan(fabs(y/x));
switch (m) {
case 0: return z; /* atan(+,+) */
case 1: return -z; /* atan(-,+) */
case 2: return pi - (z-pi_lo); /* atan(+,-) */
default: /* case 3 */
return (z-pi_lo) - pi; /* atan(-,-) */
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
pi = 3.1415927410e+00, /* 0x40490fdb */
pi_lo = -8.7422776573e-08; /* 0xb3bbbd2e */
float __cdecl atan2f(float y, float x)
{
float z;
uint32_t m,ix,iy;
if (isnan(x) || isnan(y))
return x+y;
GET_FLOAT_WORD(ix, x);
GET_FLOAT_WORD(iy, y);
if (ix == 0x3f800000) /* x=1.0 */
return atanf(y);
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
ix &= 0x7fffffff;
iy &= 0x7fffffff;
/* when y = 0 */
if (iy == 0) {
switch (m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
case 2: return pi; /* atan(+0,-anything) = pi */
case 3: return -pi; /* atan(-0,-anything) =-pi */
}
}
/* when x = 0 */
if (ix == 0)
return m&1 ? -pi/2 : pi/2;
/* when x is INF */
if (ix == 0x7f800000) {
if (iy == 0x7f800000) {
switch (m) {
case 0: return pi/4; /* atan(+INF,+INF) */
case 1: return -pi/4; /* atan(-INF,+INF) */
case 2: return 3*pi/4; /*atan(+INF,-INF)*/
case 3: return -3*pi/4; /*atan(-INF,-INF)*/
}
} else {
switch (m) {
case 0: return 0.0f; /* atan(+...,+INF) */
case 1: return -0.0f; /* atan(-...,+INF) */
case 2: return pi; /* atan(+...,-INF) */
case 3: return -pi; /* atan(-...,-INF) */
}
}
}
/* |y/x| > 0x1p26 */
if (ix+(26<<23) < iy || iy == 0x7f800000)
return m&1 ? -pi/2 : pi/2;
/* z = atan(|y/x|) with correct underflow */
if ((m&2) && iy+(26<<23) < ix) /*|y/x| < 0x1p-26, x < 0 */
z = 0.0;
else
z = atanf(fabsf(y/x));
switch (m) {
case 0: return z; /* atan(+,+) */
case 1: return -z; /* atan(-,+) */
case 2: return pi - (z-pi_lo); /* atan(+,-) */
default: /* case 3 */
return (z-pi_lo) - pi; /* atan(-,-) */
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float atanhi[] = {
4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */
7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */
9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */
1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */
};
static const float atanlo[] = {
5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */
3.7748947079e-08, /* atan(1.0)lo 0x33222168 */
3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */
7.5497894159e-08, /* atan(inf)lo 0x33a22168 */
};
static const float aT[] = {
3.3333328366e-01,
-1.9999158382e-01,
1.4253635705e-01,
-1.0648017377e-01,
6.1687607318e-02,
};
float __cdecl atanf(float x)
{
float_t w,s1,s2,z;
uint32_t ix,sign;
int id;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x4c800000) { /* if |x| >= 2**26 */
if (isnan(x))
return x;
z = atanhi[3] + 0x1p-120f;
return sign ? -z : z;
}
if (ix < 0x3ee00000) { /* |x| < 0.4375 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
if (ix < 0x00800000)
/* raise underflow for subnormal x */
FORCE_EVAL(x*x);
return x;
}
id = -1;
} else {
x = fabsf(x);
if (ix < 0x3f980000) { /* |x| < 1.1875 */
if (ix < 0x3f300000) { /* 7/16 <= |x| < 11/16 */
id = 0;
x = (2.0f*x - 1.0f)/(2.0f + x);
} else { /* 11/16 <= |x| < 19/16 */
id = 1;
x = (x - 1.0f)/(x + 1.0f);
}
} else {
if (ix < 0x401c0000) { /* |x| < 2.4375 */
id = 2;
x = (x - 1.5f)/(1.0f + 1.5f*x);
} else { /* 2.4375 <= |x| < 2**26 */
id = 3;
x = -1.0f/x;
}
}
}
/* end of argument reduction */
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*aT[4]));
s2 = w*(aT[1]+w*aT[3]);
if (id < 0)
return x - x*(s1+s2);
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
return sign ? -z : z;
}
#include "libm.h"
/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
double __cdecl atanh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
unsigned e = u.i >> 52 & 0x7ff;
unsigned s = u.i >> 63;
double_t y;
/* |x| */
u.i &= (uint64_t)-1/2;
y = u.f;
if (e < 0x3ff - 1) {
if (e < 0x3ff - 32) {
/* handle underflow */
if (e == 0)
FORCE_EVAL((float)y);
} else {
/* |x| < 0.5, up to 1.7ulp error */
y = 0.5*log1p(2*y + 2*y*y/(1-y));
}
} else {
/* avoid overflow */
y = 0.5*log1p(2*(y/(1-y)));
}
return s ? -y : y;
}
#include "libm.h"
/* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */
float __cdecl atanhf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
unsigned s = u.i >> 31;
float_t y;
/* |x| */
u.i &= 0x7fffffff;
y = u.f;
if (u.i < 0x3f800000 - (1<<23)) {
if (u.i < 0x3f800000 - (32<<23)) {
/* handle underflow */
if (u.i < (1<<23))
FORCE_EVAL((float)(y*y));
} else {
/* |x| < 0.5, up to 1.7ulp error */
y = 0.5f*log1pf(2*y + 2*y*y/(1-y));
}
} else {
/* avoid overflow */
y = 0.5f*log1pf(2*(y/(1-y)));
}
return s ? -y : y;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
* Optimized by Bruce D. Evans.
*/
/* cbrt(x)
* Return cube root of x
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
static const double
P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double __cdecl cbrt(double x)
{
union {double f; uint64_t i;} u = {x};
double_t r,s,t,w;
uint32_t hx = u.i>>32 & 0x7fffffff;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x;
/*
* Rough cbrt to 5 bits:
* cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
* where e is integral and >= 0, m is real and in [0, 1), and "/" and
* "%" are integer division and modulus with rounding towards minus
* infinity. The RHS is always >= the LHS and has a maximum relative
* error of about 1 in 16. Adding a bias of -0.03306235651 to the
* (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
* floating point representation, for finite positive normal values,
* ordinary integer divison of the value in bits magically gives
* almost exactly the RHS of the above provided we first subtract the
* exponent bias (1023 for doubles) and later add it back. We do the
* subtraction virtually to keep e >= 0 so that ordinary integer
* division rounds towards minus infinity; this is also efficient.
*/
if (hx < 0x00100000) { /* zero or subnormal? */
u.f = x*0x1p54;
hx = u.i>>32 & 0x7fffffff;
if (hx == 0)
return x; /* cbrt(0) is itself */
hx = hx/3 + B2;
} else
hx = hx/3 + B1;
u.i &= 1ULL<<63;
u.i |= (uint64_t)hx << 32;
t = u.f;
/*
* New cbrt to 23 bits:
* cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
* where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
* to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
* gives us bounds for r = t**3/x.
*
* Try to optimize for parallel evaluation as in __tanf.c.
*/
r = (t*t)*(t/x);
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
/*
* Round t away from zero to 23 bits (sloppily except for ensuring that
* the result is larger in magnitude than cbrt(x) but not much more than
* 2 23-bit ulps larger). With rounding towards zero, the error bound
* would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
* in the rounded t, the infinite-precision error in the Newton
* approximation barely affects third digit in the final error
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
* before the final error is larger than 0.667 ulps.
*/
u.f = t;
u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
t = u.f;
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
s = t*t; /* t*t is exact */
r = x/s; /* error <= 0.5 ulps; |r| < |t| */
w = t+t; /* t+t is exact */
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
return t;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* cbrtf(x)
* Return cube root of x
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
static const unsigned
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
float __cdecl cbrtf(float x)
{
double_t r,T;
union {float f; uint32_t i;} u = {x};
uint32_t hx = u.i & 0x7fffffff;
if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
return x + x;
/* rough cbrt to 5 bits */
if (hx < 0x00800000) { /* zero or subnormal? */
if (hx == 0)
return x; /* cbrt(+-0) is itself */
u.f = x*0x1p24f;
hx = u.i & 0x7fffffff;
hx = hx/3 + B2;
} else
hx = hx/3 + B1;
u.i &= 0x80000000;
u.i |= hx;
/*
* First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
* double precision so that its terms can be arranged for efficiency
* without causing overflow or underflow.
*/
T = u.f;
r = T*T*T;
T = T*((double_t)x+x+r)/(x+r+r);
/*
* Second step Newton iteration to 47 bits. In double precision for
* efficiency and accuracy.
*/
r = T*T*T;
T = T*((double_t)x+x+r)/(x+r+r);
/* rounding to 24 bits is perfect in round-to-nearest mode */
return T;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl ceil(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);
return u.i >> 63 ? -0.0 : 1;
}
if (y < 0)
return x + y + 1;
return x + y;
}
#include "libm.h"
float __cdecl ceilf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i += m;
u.i &= ~m;
} else {
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31)
u.f = -0.0;
else if (u.i << 1)
u.f = 1.0;
}
return u.f;
}
#include "libm.h"
double __cdecl copysign(double x, double y) {
union {double f; uint64_t i;} ux={x}, uy={y};
ux.i &= -1ULL/2;
ux.i |= uy.i & 1ULL<<63;
return ux.f;
}
#include <math.h>
#include <stdint.h>
float __cdecl copysignf(float x, float y)
{
union {float f; uint32_t i;} ux={x}, uy={y};
ux.i &= 0x7fffffff;
ux.i |= uy.i & 0x80000000;
return ux.f;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* cos(x)
* Return cosine function of x.
*
* kernel function:
* __sin ... sine function on [-pi/4,pi/4]
* __cos ... cosine function on [-pi/4,pi/4]
* __rem_pio2 ... argument reduction routine
*
* Method.
* Let S,C and T denote the sin, cos and tan respectively on
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
* in [-pi/4 , +pi/4], and let n = k mod 4.
* We have
*
* n sin(x) cos(x) tan(x)
* ----------------------------------------------------------
* 0 S C T
* 1 C -S -1/T
* 2 -S -C T
* 3 -C S -1/T
* ----------------------------------------------------------
*
* Special cases:
* Let trig be any of sin, cos, or tan.
* trig(+-INF) is NaN, with signals;
* trig(NaN) is that NaN;
*
* Accuracy:
* TRIG(x) returns trig(x) nearly rounded
*/
#include "libm.h"
double __cdecl cos(double x)
{
double y[2];
uint32_t ix;
unsigned n;
GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */
if (ix <= 0x3fe921fb) {
if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
/* raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0;
}
return __cos(x, 0);
}
/* cos(Inf or NaN) is NaN */
if (ix >= 0x7ff00000)
return x-x;
/* argument reduction */
n = __rem_pio2(x, y);
switch (n&3) {
case 0: return __cos(y[0], y[1]);
case 1: return -__sin(y[0], y[1], 1);
case 2: return -__cos(y[0], y[1]);
default:
return __sin(y[0], y[1], 1);
}
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
/* Small multiples of pi/2 rounded to double precision. */
static const double
c1pio2 = 1*M_PI_2, /* 0x3FF921FB, 0x54442D18 */
c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float __cdecl cosf(float x)
{
double y;
uint32_t ix;
unsigned n, sign;
GET_FLOAT_WORD(ix, x);
sign = ix >> 31;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
/* raise inexact if x != 0 */
FORCE_EVAL(x + 0x1p120f);
return 1.0f;
}
return __cosdf(x);
}
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
return -__cosdf(sign ? x+c2pio2 : x-c2pio2);
else {
if (sign)
return __sindf(x + c1pio2);
else
return __sindf(c1pio2 - x);
}
}
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
return __cosdf(sign ? x+c4pio2 : x-c4pio2);
else {
if (sign)
return __sindf(-x - c3pio2);
else
return __sindf(x - c3pio2);
}
}
/* cos(Inf or NaN) is NaN */
if (ix >= 0x7f800000)
return x-x;
/* general argument reduction needed */
n = __rem_pio2f(x,&y);
switch (n&3) {
case 0: return __cosdf(y);
case 1: return __sindf(-y);
case 2: return -__cosdf(y);
default:
return __sindf(y);
}
}
#include "libm.h"
/* cosh(x) = (exp(x) + 1/exp(x))/2
* = 1 + 0.5*(exp(x)-1)*(exp(x)-1)/exp(x)
* = 1 + x*x/2 + o(x^4)
*/
double __cdecl cosh(double x)
{
union {double f; uint64_t i;} u = {.f = x};
uint32_t w;
double t;
/* |x| */
u.i &= (uint64_t)-1/2;
x = u.f;
w = u.i >> 32;
/* |x| < log(2) */
if (w < 0x3fe62e42) {
if (w < 0x3ff00000 - (26<<20)) {
/* raise inexact if x!=0 */
FORCE_EVAL(x + 0x1p120f);
return 1;
}
t = expm1(x);
return 1 + t*t/(2*(1+t));
}
/* |x| < log(DBL_MAX) */
if (w < 0x40862e42) {
t = exp(x);
/* note: if x>log(0x1p26) then the 1/t is not needed */
return 0.5*(t + 1/t);
}
/* |x| > log(DBL_MAX) or nan */
/* note: the result is stored to handle overflow */
t = __expo2(x, 1.0);
return t;
}
#include "libm.h"
float __cdecl coshf(float x)
{
union {float f; uint32_t i;} u = {.f = x};
uint32_t w;
float t;
/* |x| */
u.i &= 0x7fffffff;
x = u.f;
w = u.i;
/* |x| < log(2) */
if (w < 0x3f317217) {
if (w < 0x3f800000 - (12<<23)) {
FORCE_EVAL(x + 0x1p120f);
return 1;
}
t = expm1f(x);
return 1 + t*t/(2*(1+t));
}
/* |x| < log(FLT_MAX) */
if (w < 0x42b17217) {
t = expf(x);
return 0.5f*(t + 1/t);
}
/* |x| > log(FLT_MAX) or nan */
t = __expo2f(x, 1.0f);
return t;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
erx = 8.4506291151e-01, /* 0x3f58560b */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
efx8 = 1.0270333290e+00, /* 0x3f8375d4 */
pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
pp1 = -3.2504209876e-01, /* 0xbea66beb */
pp2 = -2.8481749818e-02, /* 0xbce9528f */
pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
qq2 = 6.5022252500e-02, /* 0x3d852a63 */
qq3 = 5.0813062117e-03, /* 0x3ba68116 */
qq4 = 1.3249473704e-04, /* 0x390aee49 */
qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
pa1 = 4.1485610604e-01, /* 0x3ed46805 */
pa2 = -3.7220788002e-01, /* 0xbebe9208 */
pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
qa4 = 1.2617121637e-01, /* 0x3e013307 */
qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
ra0 = -9.8649440333e-03, /* 0xbc21a093 */
ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
ra2 = -1.0558626175e+01, /* 0xc128f022 */
ra3 = -6.2375331879e+01, /* 0xc2798057 */
ra4 = -1.6239666748e+02, /* 0xc322658c */
ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
ra6 = -8.1287437439e+01, /* 0xc2a2932b */
ra7 = -9.8143291473e+00, /* 0xc11d077e */
sa1 = 1.9651271820e+01, /* 0x419d35ce */
sa2 = 1.3765776062e+02, /* 0x4309a863 */
sa3 = 4.3456588745e+02, /* 0x43d9486f */
sa4 = 6.4538726807e+02, /* 0x442158c9 */
sa5 = 4.2900814819e+02, /* 0x43d6810b */
sa6 = 1.0863500214e+02, /* 0x42d9451f */
sa7 = 6.5702495575e+00, /* 0x40d23f7c */
sa8 = -6.0424413532e-02, /* 0xbd777f97 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
rb0 = -9.8649431020e-03, /* 0xbc21a092 */
rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
rb2 = -1.7757955551e+01, /* 0xc18e104b */
rb3 = -1.6063638306e+02, /* 0xc320a2ea */
rb4 = -6.3756646729e+02, /* 0xc41f6441 */
rb5 = -1.0250950928e+03, /* 0xc480230b */
rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
sb1 = 3.0338060379e+01, /* 0x41f2b459 */
sb2 = 3.2579251099e+02, /* 0x43a2e571 */
sb3 = 1.5367296143e+03, /* 0x44c01759 */
sb4 = 3.1998581543e+03, /* 0x4547fdbb */
sb5 = 2.5530502930e+03, /* 0x451f90ce */
sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
sb7 = -2.2440952301e+01; /* 0xc1b38712 */
static float erfc1(float x)
{
float_t s,P,Q;
s = fabsf(x) - 1;
P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
Q = 1+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
return 1 - erx - P/Q;
}
static float erfc2(uint32_t ix, float x)
{
float_t s,R,S;
float z;
if (ix < 0x3fa00000) /* |x| < 1.25 */
return erfc1(x);
x = fabsf(x);
s = 1/(x*x);
if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
R = ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
ra5+s*(ra6+s*ra7))))));
S = 1.0f+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
sa5+s*(sa6+s*(sa7+s*sa8)))))));
} else { /* |x| >= 1/0.35 */
R = rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
rb5+s*rb6)))));
S = 1.0f+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
sb5+s*(sb6+s*sb7))))));
}
GET_FLOAT_WORD(ix, x);
SET_FLOAT_WORD(z, ix&0xffffe000);
return expf(-z*z - 0.5625f) * expf((z-x)*(z+x) + R/S)/x;
}
float __cdecl erff(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erf(nan)=nan, erf(+-inf)=+-1 */
return 1-2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x31800000) { /* |x| < 2**-28 */
/*avoid underflow */
return 0.125f*(8*x + efx8*x);
}
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
return x + x*y;
}
if (ix < 0x40c00000) /* |x| < 6 */
y = 1 - erfc2(ix,x);
else
y = 1 - 0x1p-120f;
return sign ? -y : y;
}
float __cdecl erfcf(float x)
{
float r,s,z,y;
uint32_t ix;
int sign;
GET_FLOAT_WORD(ix, x);
sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) {
/* erfc(nan)=nan, erfc(+-inf)=0,2 */
return 2*sign + 1/x;
}
if (ix < 0x3f580000) { /* |x| < 0.84375 */
if (ix < 0x23800000) /* |x| < 2**-56 */
return 1.0f - x;
z = x*x;
r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
s = 1.0f+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
y = r/s;
if (sign || ix < 0x3e800000) /* x < 1/4 */
return 1.0f - (x+x*y);
return 0.5f - (x - 0.5f + x*y);
}
if (ix < 0x41e00000) { /* |x| < 28 */
return sign ? 2 - erfc2(ix,x) : erfc2(ix,x);
}
return sign ? 2 - 0x1p-120f : 0x1p-120f*0x1p-120f;
}
/*
* Double-precision e^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double __cdecl exp(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (asuint64(x) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble(sbits);
y = 2 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double __cdecl exp2(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (!(asuint64(x) >> 63))
return __math_oflow(0);
else if (asuint64(x) >= asuint64(-1075.0))
return __math_uflow(0);
}
if (2 * asuint64(x) > 2 * asuint64(928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double(x + Shift);
ki = asuint64(kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
Wrong count: 168353 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float __cdecl exp2f(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(128.0f))) {
/* |x| >= 128 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf(0);
if (x <= -150.0f)
return __math_uflowf(0);
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp2f_data.h"
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
},
};
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP2F_DATA_H
#define _EXP2F_DATA_H
#include <features.h>
#include <stdint.h>
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern hidden const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
#endif
/*
* Shared data between exp, exp2 and pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
const struct exp_data __exp_data = {
// N/ln2
.invln2N = 0x1.71547652b82fep0 * N,
// -ln2/N
.negln2hiN = -0x1.62e42fefa0000p-8,
.negln2loN = -0x1.cf79abc9e3b3ap-47,
// Used for rounding when !TOINT_INTRINSICS
#if EXP_USE_TOINT_NARROW
.shift = 0x1800000000.8p0,
#else
.shift = 0x1.8p52,
#endif
// exp polynomial coefficients.
.poly = {
// abs error: 1.555*2^-66
// ulp error: 0.509 (0.511 without fma)
// if |x| < ln2/256+eps
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
// abs error if |x| < ln2/128: 1.7145*2^-56
0x1.ffffffffffdbdp-2,
0x1.555555555543cp-3,
0x1.55555cf172b91p-5,
0x1.1111167a4d017p-7,
},
.exp2_shift = 0x1.8p52 / N,
// exp2 polynomial coefficients.
.exp2_poly = {
// abs error: 1.2195*2^-65
// ulp error: 0.507 (0.511 without fma)
// if |x| < 1/256
// abs error if |x| < 1/128: 1.9941*2^-56
0x1.62e42fefa39efp-1,
0x1.ebfbdff82c424p-3,
0x1.c6b08d70cf4b5p-5,
0x1.3b2abd24650ccp-7,
0x1.5d7e09b4e3a84p-10,
},
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
// tab[2*k] = asuint64(T[k])
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
.tab = {
0x0, 0x3ff0000000000000,
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
0xbc6a033489906e0b, 0x3fef9b66affed31b,
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
0x3c968efde3a8a894, 0x3fef387a6e756238,
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
0x3c859f48a72a4c6d, 0x3feedea64c123422,
0xbc9312607a28698a, 0x3feeda4504ac801c,
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
0x3c4363ed60c2ac11, 0x3feece086061892d,
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
0x3c93350518fdd78e, 0x3feeaf4736b527da,
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
0x3c9063e1e21c5409, 0x3feeab07dd485429,
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
0x3c9432e62b64c035, 0x3feea76f15ad2148,
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
0xbc845378892be9ae, 0x3feea34634ccc320,
0xbc93cedd78565858, 0x3feea23882552225,
0x3c5710aa807e1964, 0x3feea155d44ca973,
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
0xbc6a12ad8734b982, 0x3feea012750bdabf,
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
0xbc8619321e55e68a, 0x3fee9feb564267c9,
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
0xbc9369b6f13b3734, 0x3feea589994cce13,
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
0xbc94d450d872576e, 0x3feea8d99b4492ed,
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
0x3c71affc2b91ce27, 0x3feec49182a3f090,
0x3c6dd235e10a73bb, 0x3feec86319e32323,
0xbc87c50422622263, 0x3feecc667b5de565,
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
0x3c90cc319cee31d2, 0x3feed99e1330b358,
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
0xbc90a40e3da6f640, 0x3feef9728de5593a,
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
0xbc91eee26b588a35, 0x3fef05b030a1064a,
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
0x3c676b2c6c921968, 0x3fef3720dcef9069,
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
0xbc900dae3875a949, 0x3fef4f87080d89f2,
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
0xbc82919e2040220f, 0x3fef60e316c98398,
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
0x3c843a59ac016b4b, 0x3fef7321f301b460,
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
0xbc892ab93b470dc9, 0x3fef864614f5a129,
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
},
};
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP_DATA_H
#define _EXP_DATA_H
#include <features.h>
#include <stdint.h>
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
extern hidden const struct exp_data {
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data;
#endif
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
Wrong count: 170635 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define InvLn2N __exp2f_data.invln2_scaled
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float __cdecl expf(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(88.0f))) {
/* |x| >= 88 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf(0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf(0);
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally ties-to-even rule is used, otherwise the magnitude of r
can be bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#else
# define SHIFT __exp2f_data.shift
kd = eval_as_double(z + SHIFT);
ki = asuint64(kd);
kd -= SHIFT;
#endif
r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* expm1(x)
* Returns exp(x)-1, the exponential of x minus 1.
*
* Method
* 1. Argument reduction:
* Given x, find r and integer k such that
*
* x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
*
* Here a correction term c will be computed to compensate
* the error in r when rounded to a floating-point number.
*
* 2. Approximating expm1(r) by a special rational function on
* the interval [0,0.34658]:
* Since
* r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
* we define R1(r*r) by
* r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
* That is,
* R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
* = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
* = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
* We use a special Remez algorithm on [0,0.347] to generate
* a polynomial of degree 5 in r*r to approximate R1. The
* maximum error of this polynomial approximation is bounded
* by 2**-61. In other words,
* R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
* where Q1 = -1.6666666666666567384E-2,
* Q2 = 3.9682539681370365873E-4,
* Q3 = -9.9206344733435987357E-6,
* Q4 = 2.5051361420808517002E-7,
* Q5 = -6.2843505682382617102E-9;
* z = r*r,
* with error bounded by
* | 5 | -61
* | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
* | |
*
* expm1(r) = exp(r)-1 is then computed by the following
* specific way which minimize the accumulation rounding error:
* 2 3
* r r [ 3 - (R1 + R1*r/2) ]
* expm1(r) = r + --- + --- * [--------------------]
* 2 2 [ 6 - r*(3 - R1*r/2) ]
*
* To compensate the error in the argument reduction, we use
* expm1(r+c) = expm1(r) + c + expm1(r)*c
* ~ expm1(r) + c + r*c
* Thus c+r*c will be added in as the correction terms for
* expm1(r+c). Now rearrange the term to avoid optimization
* screw up:
* ( 2 2 )
* ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
* expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
* ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
* ( )
*
* = r - E
* 3. Scale back to obtain expm1(x):
* From step 1, we have
* expm1(x) = either 2^k*[expm1(r)+1] - 1
* = or 2^k*[expm1(r) + (1-2^-k)]
* 4. Implementation notes:
* (A). To save one multiplication, we scale the coefficient Qi
* to Qi*2^i, and replace z by (x^2)/2.
* (B). To achieve maximum accuracy, we compute expm1(x) by
* (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
* (ii) if k=0, return r-E
* (iii) if k=-1, return 0.5*(r-E)-0.5
* (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
* else return 1.0+2.0*(r-E);
* (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
* (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else
* (vii) return 2^k(1-((E+2^-k)-r))
*
* Special cases:
* expm1(INF) is INF, expm1(NaN) is NaN;
* expm1(-INF) is -1, and
* for finite argument, only expm1(0)=0 is exact.
*
* Accuracy:
* according to an error analysis, the error is always less than
* 1 ulp (unit in the last place).
*
* Misc. info.
* For IEEE double
* if x > 7.09782712893383973096e+02 then expm1(x) overflow
*
* Constants:
* The hexadecimal values are the intended ones for the following
* constants. The decimal values may be used, provided that the
* compiler will convert from decimal to binary accurately enough
* to produce the hexadecimal values shown.
*/
#include "libm.h"
static const double
o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */
Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double __cdecl expm1(double x)
{
double_t y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
union {double f; uint64_t i;} u = {x};
uint32_t hx = u.i>>32 & 0x7fffffff;
int k, sign = u.i>>63;
/* filter out huge and non-finite argument */
if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
if (isnan(x))
return x;
if (sign)
return -1;
if (x > o_threshold) {
x *= 0x1p1023;
return x;
}
}
/* argument reduction */
if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
if (!sign) {
hi = x - ln2_hi;
lo = ln2_lo;
k = 1;
} else {
hi = x + ln2_hi;
lo = -ln2_lo;
k = -1;
}
} else {
k = invln2*x + (sign ? -0.5 : 0.5);
t = k;
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
x = hi-lo;
c = (hi-x)-lo;
} else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
if (hx < 0x00100000)
FORCE_EVAL((float)x);
return x;
} else
k = 0;
/* x is now in primary range */
hfx = 0.5*x;
hxs = x*hfx;
r1 = 1.0+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
t = 3.0-r1*hfx;
e = hxs*((r1-t)/(6.0 - x*t));
if (k == 0) /* c is 0 */
return x - (x*e-hxs);
e = x*(e-c) - c;
e -= hxs;
/* exp(x) ~ 2^k (x_reduced - e + 1) */
if (k == -1)
return 0.5*(x-e) - 0.5;
if (k == 1) {
if (x < -0.25)
return -2.0*(e-(x+0.5));
return 1.0+2.0*(x-e);
}
u.i = (uint64_t)(0x3ff + k)<<52; /* 2^k */
twopk = u.f;
if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
y = x - e + 1.0;
if (k == 1024)
y = y*2.0*0x1p1023;
else
y = y*twopk;
return y - 1.0;
}
u.i = (uint64_t)(0x3ff - k)<<52; /* 2^-k */
if (k < 20)
y = (x-e+(1-u.f))*twopk;
else
y = (x-(e+u.f)+1)*twopk;
return y;
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]:
* |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04
* Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c):
*/
Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */
Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */
float __cdecl expm1f(float x)
{
float_t y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
union {float f; uint32_t i;} u = {x};
uint32_t hx = u.i & 0x7fffffff;
int k, sign = u.i >> 31;
/* filter out huge and non-finite argument */
if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
if (hx > 0x7f800000) /* NaN */
return x;
if (sign)
return -1;
if (hx > 0x42b17217) { /* x > log(FLT_MAX) */
x *= 0x1p127f;
return x;
}
}
/* argument reduction */
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
if (!sign) {
hi = x - ln2_hi;
lo = ln2_lo;
k = 1;
} else {
hi = x + ln2_hi;
lo = -ln2_lo;
k = -1;
}
} else {
k = invln2*x + (sign ? -0.5f : 0.5f);
t = k;
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
x = hi-lo;
c = (hi-x)-lo;
} else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
if (hx < 0x00800000)
FORCE_EVAL(x*x);
return x;
} else
k = 0;
/* x is now in primary range */
hfx = 0.5f*x;
hxs = x*hfx;
r1 = 1.0f+hxs*(Q1+hxs*Q2);
t = 3.0f - r1*hfx;
e = hxs*((r1-t)/(6.0f - x*t));
if (k == 0) /* c is 0 */
return x - (x*e-hxs);
e = x*(e-c) - c;
e -= hxs;
/* exp(x) ~ 2^k (x_reduced - e + 1) */
if (k == -1)
return 0.5f*(x-e) - 0.5f;
if (k == 1) {
if (x < -0.25f)
return -2.0f*(e-(x+0.5f));
return 1.0f + 2.0f*(x-e);
}
u.i = (0x7f+k)<<23; /* 2^k */
twopk = u.f;
if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
y = x - e + 1.0f;
if (k == 128)
y = y*2.0f*0x1p127f;
else
y = y*twopk;
return y - 1.0f;
}
u.i = (0x7f-k)<<23; /* 2^-k */
if (k < 23)
y = (x-e+(1-u.f))*twopk;
else
y = (x-(e+u.f)+1)*twopk;
return y;
}
#include <math.h>
#include <stdint.h>
double __cdecl fabs(double x)
{
union {double f; uint64_t i;} u = {x};
u.i &= -1ULL/2;
return u.f;
}
#include <math.h>
#include <stdint.h>
float __cdecl fabsf(float x)
{
union {float f; uint32_t i;} u = {x};
u.i &= 0x7fffffff;
return u.f;
}
#include <math.h>
double __cdecl fdim(double x, double y)
{
if (isnan(x))
return x;
if (isnan(y))
return y;
return x > y ? x - y : 0;
}
#include <math.h>
float __cdecl fdimf(float x, float y)
{
if (isnan(x))
return x;
if (isnan(y))
return y;
return x > y ? x - y : 0;
}
#include "libm.h"
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
#define EPS DBL_EPSILON
#elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON
#endif
static const double_t toint = 1/EPS;
double __cdecl floor(double x)
{
union {double f; uint64_t i;} u = {x};
int e = u.i >> 52 & 0x7ff;
double_t y;
if (e >= 0x3ff+52 || x == 0)
return x;
/* y = int(x) - x, where int(x) is an integer neighbor of x */
if (u.i >> 63)
y = x - toint + toint - x;
else
y = x + toint - toint - x;
/* special case because of non-nearest rounding modes */
if (e <= 0x3ff-1) {
FORCE_EVAL(y);
return u.i >> 63 ? -1 : 0;
}
if (y > 0)
return x + y - 1;
return x + y;
}
#include "libm.h"
float __cdecl floorf(float x)
{
union {float f; uint32_t i;} u = {x};
int e = (int)(u.i >> 23 & 0xff) - 0x7f;
uint32_t m;
if (e >= 23)
return x;
if (e >= 0) {
m = 0x007fffff >> e;
if ((u.i & m) == 0)
return x;
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31)
u.i += m;
u.i &= ~m;
} else {
FORCE_EVAL(x + 0x1p120f);
if (u.i >> 31 == 0)
u.i = 0;
else if (u.i << 1)
u.f = -1.0;
}
return u.f;
}
#include <stdint.h>
#include <float.h>
#include <math.h>
#include "libm.h"
static inline int a_clz_64(uint64_t x)
{
#ifdef __GNUC
if (x>>32) return __builtin_clz(x>>32);
return __builtin_clz(x) + 32;
#else
uint32_t y;
int r;
if (x>>32) y=x>>32, r=0; else y=x, r=32;
if (y>>16) y>>=16; else r |= 16;
if (y>>8) y>>=8; else r |= 8;
if (y>>4) y>>=4; else r |= 4;
if (y>>2) y>>=2; else r |= 2;
return r | !(y>>1);
#endif
}
#define ASUINT64(x) ((union {double f; uint64_t i;}){x}).i
#define ZEROINFNAN (0x7ff-0x3ff-52-1)
struct num { uint64_t m; int e; int sign; };
static struct num normalize(double x)
{
uint64_t ix = ASUINT64(x);
int e = ix>>52;
int sign = e & 0x800;
e &= 0x7ff;
if (!e) {
ix = ASUINT64(x*0x1p63);
e = ix>>52 & 0x7ff;
e = e ? e-63 : 0x800;
}
ix &= (1ull<<52)-1;
ix |= 1ull<<52;
ix <<= 1;
e -= 0x3ff + 52 + 1;
return (struct num){ix,e,sign};
}
static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y)
{
uint64_t t1,t2,t3;
uint64_t xlo = (uint32_t)x, xhi = x>>32;
uint64_t ylo = (uint32_t)y, yhi = y>>32;
t1 = xlo*ylo;
t2 = xlo*yhi + xhi*ylo;
t3 = xhi*yhi;
*lo = t1 + (t2<<32);
*hi = t3 + (t2>>32) + (t1 > *lo);
}
double __cdecl fma(double x, double y, double z)
{
#pragma STDC FENV_ACCESS ON
/* normalize so top 10bits and last bit are 0 */
struct num nx, ny, nz;
nx = normalize(x);
ny = normalize(y);
nz = normalize(z);
if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN)
return x*y + z;
if (nz.e >= ZEROINFNAN) {
if (nz.e > ZEROINFNAN) /* z==0 */
return x*y + z;
return z;
}
/* mul: r = x*y */
uint64_t rhi, rlo, zhi, zlo;
mul(&rhi, &rlo, nx.m, ny.m);
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
/* align exponents */
int e = nx.e + ny.e;
int d = nz.e - e;
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
if (d > 0) {
if (d < 64) {
zlo = nz.m<<d;
zhi = nz.m>>64-d;
} else {
zlo = 0;
zhi = nz.m;
e = nz.e - 64;
d -= 64;
if (d == 0) {
} else if (d < 64) {
rlo = rhi<<64-d | rlo>>d | !!(rlo<<64-d);
rhi = rhi>>d;
} else {
rlo = 1;
rhi = 0;
}
}
} else {
zhi = 0;
d = -d;
if (d == 0) {
zlo = nz.m;
} else if (d < 64) {
zlo = nz.m>>d | !!(nz.m<<64-d);
} else {
zlo = 1;
}
}
/* add */
int sign = nx.sign^ny.sign;
int samesign = !(sign^nz.sign);
int nonzero = 1;
if (samesign) {
/* r += z */
rlo += zlo;
rhi += zhi + (rlo < zlo);
} else {
/* r -= z */
uint64_t t = rlo;
rlo -= zlo;
rhi = rhi - zhi - (t < rlo);
if (rhi>>63) {
rlo = -rlo;
rhi = -rhi-!!rlo;
sign = !sign;
}
nonzero = !!rhi;
}
/* set rhi to top 63bit of the result (last bit is sticky) */
if (nonzero) {
e += 64;
d = a_clz_64(rhi)-1;
/* note: d > 0 */
rhi = rhi<<d | rlo>>64-d | !!(rlo<<d);
} else if (rlo) {
d = a_clz_64(rlo)-1;
if (d < 0)
rhi = rlo>>1 | (rlo&1);
else
rhi = rlo<<d;
} else {
/* exact +-0 */
return x*y + z;
}
e -= d;
/* convert to double */
int64_t i = rhi; /* i is in [1<<62,(1<<63)-1] */
if (sign)
i = -i;
double r = i; /* |r| is in [0x1p62,0x1p63] */
if (e < -1022-62) {
/* result is subnormal before rounding */
if (e == -1022-63) {
double c = 0x1p63;
if (sign)
c = -c;
if (r == c) {
/* min normal after rounding, underflow depends
on arch behaviour which can be imitated by
a double to float conversion */
float fltmin = 0x0.ffffff8p-63*FLT_MIN * r;
return DBL_MIN/FLT_MIN * fltmin;
}
/* one bit is lost when scaled, add another top bit to
only round once at conversion if it is inexact */
if (rhi << 53) {
i = rhi>>1 | (rhi&1) | 1ull<<62;
if (sign)
i = -i;
r = i;
r = 2*r - c; /* remove top bit */
/* raise underflow portably, such that it
cannot be optimized away */
{
double_t tiny = DBL_MIN/FLT_MIN * r;
r += (double)(tiny*tiny) * (r-r);
}
}
} else {
/* only round once when scaled */
d = 10;
i = ( rhi>>d | !!(rhi<<64-d) ) << d;
if (sign)
i = -i;
r = i;
}
}
return scalbn(r, e);
}
/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
/*-
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <fenv.h>
#include <math.h>
#include <stdint.h>
#include "libm.h"
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
* A double has more than twice as much precision than a float, so
* direct double-precision arithmetic suffices, except where double
* rounding occurs.
*/
float __cdecl fmaf(float x, float y, float z)
{
#pragma STDC FENV_ACCESS ON
double xy, result;
union {double f; uint64_t i;} u;
int e;
xy = (double)x * y;
result = xy + z;
u.f = result;
e = u.i>>52 & 0x7ff;
/* Common case: The double precision result is fine. */
if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
e == 0x7ff || /* NaN */
(result - xy == z && result - z == xy) || /* exact */
fegetround() != FE_TONEAREST) /* not round-to-nearest */
{
/*
underflow may not be raised correctly, example:
fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
*/
if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) {
fp_barrierf((float)u.f * (float)u.f);
}
z = result;
return z;
}
/*
* If result is inexact, and exactly halfway between two float values,
* we need to adjust the low-order bit in the direction of the error.
*/
double err;
int neg = u.i >> 63;
if (neg == (z > xy))
err = xy - result + z;
else
err = z - result + xy;
if (neg == (err < 0))
u.i++;
else
u.i--;
z = u.f;
return z;
}
#include <math.h>
double __cdecl fmax(double x, double y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? y : x;
return x < y ? y : x;
}
#include <math.h>
float __cdecl fmaxf(float x, float y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeroes, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? y : x;
return x < y ? y : x;
}
#include <math.h>
double __cdecl fmin(double x, double y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? x : y;
return x < y ? x : y;
}
#include <math.h>
float __cdecl fminf(float x, float y)
{
if (isnan(x))
return y;
if (isnan(y))
return x;
/* handle signed zeros, see C99 Annex F.9.9.2 */
if (signbit(x) != signbit(y))
return signbit(x) ? x : y;
return x < y ? x : y;
}
#include <math.h>
#include <stdint.h>
double __cdecl fmod(double x, double y)
{
union {double f; uint64_t i;} ux = {x}, uy = {y};
int ex = ux.i>>52 & 0x7ff;
int ey = uy.i>>52 & 0x7ff;
int sx = ux.i>>63;
uint64_t i;
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
/* float load/store to inner loops ruining performance and code size */
uint64_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1ULL >> 12;
uxi |= 1ULL << 52;
}
if (!ey) {
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1ULL >> 12;
uy.i |= 1ULL << 52;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 63 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>52 == 0; uxi <<= 1, ex--);
/* scale result */
if (ex > 0) {
uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52;
} else {
uxi >>= -ex + 1;
}
uxi |= (uint64_t)sx << 63;
ux.i = uxi;
return ux.f;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl fmodf(float x, float y)
{
union {float f; uint32_t i;} ux = {x}, uy = {y};
int ex = ux.i>>23 & 0xff;
int ey = uy.i>>23 & 0xff;
uint32_t sx = ux.i & 0x80000000;
uint32_t i;
uint32_t uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0xff)
return (x*y)/(x*y);
if (uxi<<1 <= uy.i<<1) {
if (uxi<<1 == uy.i<<1)
return 0*x;
return x;
}
/* normalize x and y */
if (!ex) {
for (i = uxi<<9; i>>31 == 0; ex--, i <<= 1);
uxi <<= -ex + 1;
} else {
uxi &= -1U >> 9;
uxi |= 1U << 23;
}
if (!ey) {
for (i = uy.i<<9; i>>31 == 0; ey--, i <<= 1);
uy.i <<= -ey + 1;
} else {
uy.i &= -1U >> 9;
uy.i |= 1U << 23;
}
/* x mod y */
for (; ex > ey; ex--) {
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
uxi <<= 1;
}
i = uxi - uy.i;
if (i >> 31 == 0) {
if (i == 0)
return 0*x;
uxi = i;
}
for (; uxi>>23 == 0; uxi <<= 1, ex--);
/* scale result up */
if (ex > 0) {
uxi -= 1U << 23;
uxi |= (uint32_t)ex << 23;
} else {
uxi >>= -ex + 1;
}
uxi |= sx;
ux.i = uxi;
return ux.f;
}
#include <math.h>
#include <stdint.h>
double __cdecl frexp(double x, int *e)
{
union { double d; uint64_t i; } y = { x };
int ee = y.i>>52 & 0x7ff;
if (!ee) {
if (x) {
x = frexp(x*0x1p64, e);
*e -= 64;
} else *e = 0;
return x;
} else if (ee == 0x7ff) {
return x;
}
*e = ee - 0x3fe;
y.i &= 0x800fffffffffffffull;
y.i |= 0x3fe0000000000000ull;
return y.d;
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl frexpf(float x, int *e)
{
union { float f; uint32_t i; } y = { x };
int ee = y.i>>23 & 0xff;
if (!ee) {
if (x) {
x = frexpf(x*0x1p64, e);
*e -= 64;
} else *e = 0;
return x;
} else if (ee == 0xff) {
return x;
}
*e = ee - 0x7e;
y.i &= 0x807ffffful;
y.i |= 0x3f000000ul;
return y.f;
}
#include <math.h>
#include <stdint.h>
#include <float.h>
#include "libm.h"
#if FLT_EVAL_METHOD > 1U && LDBL_MANT_DIG == 64
#define SPLIT (0x1p32 + 1)
#else
#define SPLIT (0x1p27 + 1)
#endif
static void sq(double_t *hi, double_t *lo, double x)
{
double_t xh, xl, xc;
xc = (double_t)x*SPLIT;
xh = x - xc + xc;
xl = x - xh;
*hi = (double_t)x*x;
*lo = xh*xh - *hi + 2*xh*xl + xl*xl;
}
double __cdecl _hypot(double x, double y)
{
union {double f; uint64_t i;} ux = {x}, uy = {y}, ut;
int ex, ey;
double_t hx, lx, hy, ly, z;
/* arrange |x| >= |y| */
ux.i &= -1ULL>>1;
uy.i &= -1ULL>>1;
if (ux.i < uy.i) {
ut = ux;
ux = uy;
uy = ut;
}
/* special cases */
ex = ux.i>>52;
ey = uy.i>>52;
x = ux.f;
y = uy.f;
/* note: hypot(inf,nan) == inf */
if (ey == 0x7ff)
return y;
if (ex == 0x7ff || uy.i == 0)
return x;
/* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
/* 64 difference is enough for ld80 double_t */
if (ex - ey > 64)
return x + y;
/* precise sqrt argument in nearest rounding mode without overflow */
/* xh*xh must not overflow and xl*xl must not underflow in sq */
z = 1;
if (ex > 0x3ff+510) {
z = 0x1p700;
x *= 0x1p-700;
y *= 0x1p-700;
} else if (ey < 0x3ff-450) {
z = 0x1p-700;
x *= 0x1p700;
y *= 0x1p700;
}
sq(&hx, &lx, x);
sq(&hy, &ly, y);
return z*sqrt(ly+lx+hy+hx);
}
#include <math.h>
#include <stdint.h>
#include "libm.h"
float __cdecl _hypotf(float x, float y)
{
union {float f; uint32_t i;} ux = {x}, uy = {y}, ut;
float_t z;
ux.i &= -1U>>1;
uy.i &= -1U>>1;
if (ux.i < uy.i) {
ut = ux;
ux = uy;
uy = ut;
}
x = ux.f;
y = uy.f;
if (uy.i == 0xff<<23)
return y;
if (ux.i >= 0xff<<23 || uy.i == 0 || ux.i - uy.i >= 25<<23)
return x + y;
z = 1;
if (ux.i >= (0x7f+60)<<23) {
z = 0x1p90f;
x *= 0x1p-90f;
y *= 0x1p-90f;
} else if (uy.i < (0x7f-60)<<23) {
z = 0x1p-90f;
x *= 0x1p90f;
y *= 0x1p90f;
}
return z*sqrtf((double)x*x + (double)y*y);
}
#include <limits.h>
#include "libm.h"
int __cdecl ilogb(double x)
{
#pragma STDC FENV_ACCESS ON
union {double f; uint64_t i;} u = {x};
uint64_t i = u.i;
int e = i>>52 & 0x7ff;
if (!e) {
i <<= 12;
if (i == 0) {
FORCE_EVAL(0/0.0f);
return FP_ILOGB0;
}
/* subnormal x */
for (e = -0x3ff; i>>63 == 0; e--, i<<=1);
return e;
}
if (e == 0x7ff) {
FORCE_EVAL(0/0.0f);
return i<<12 ? FP_ILOGBNAN : INT_MAX;
}
return e - 0x3ff;
}
#include <limits.h>
#include "libm.h"
int __cdecl ilogbf(float x)
{
#pragma STDC FENV_ACCESS ON
union {float f; uint32_t i;} u = {x};
uint32_t i = u.i;
int e = i>>23 & 0xff;
if (!e) {
i <<= 9;
if (i == 0) {
FORCE_EVAL(0/0.0f);
return FP_ILOGB0;
}
/* subnormal x */
for (e = -0x7f; i>>31 == 0; e--, i<<=1);
return e;
}
if (e == 0xff) {
FORCE_EVAL(0/0.0f);
return i<<9 ? FP_ILOGBNAN : INT_MAX;
}
return e - 0x7f;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment