Commit e3772c91 authored by Piotr Caban's avatar Piotr Caban Committed by Alexandre Julliard

msvcrt: Improve __libm_sse2_sqrt_precise implementation.

parent ba3cc127
...@@ -1165,13 +1165,14 @@ double CDECL sinh( double x ) ...@@ -1165,13 +1165,14 @@ double CDECL sinh( double x )
return ret; return ret;
} }
static inline double CDECL ret_nan( void ) static inline double CDECL ret_nan( BOOL update_sw )
{ {
double x = 1.0; double x = 1.0;
if (!update_sw) return -NAN;
return (x - x) / (x - x); return (x - x) / (x - x);
} }
BOOL sqrt_validate( double *x ) BOOL sqrt_validate( double *x, BOOL update_sw )
{ {
short c = _dclass(*x); short c = _dclass(*x);
...@@ -1179,7 +1180,8 @@ BOOL sqrt_validate( double *x ) ...@@ -1179,7 +1180,8 @@ BOOL sqrt_validate( double *x )
if (c == FP_NAN) if (c == FP_NAN)
{ {
#ifdef __i386__ #ifdef __i386__
*x = math_error(_DOMAIN, "sqrt", *x, 0, *x); if (update_sw)
*x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
#else #else
/* set signaling bit */ /* set signaling bit */
*(ULONGLONG*)x |= 0x8000000000000ULL; *(ULONGLONG*)x |= 0x8000000000000ULL;
...@@ -1188,14 +1190,14 @@ BOOL sqrt_validate( double *x ) ...@@ -1188,14 +1190,14 @@ BOOL sqrt_validate( double *x )
} }
if (signbit(*x)) if (signbit(*x))
{ {
*x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan()); *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
return FALSE; return FALSE;
} }
if (c == FP_INFINITE) return FALSE; if (c == FP_INFINITE) return FALSE;
return TRUE; return TRUE;
} }
#if defined(__x86_64__) #if defined(__x86_64__) || defined(__i386__)
double CDECL sse2_sqrt(double); double CDECL sse2_sqrt(double);
__ASM_GLOBAL_FUNC( sse2_sqrt, __ASM_GLOBAL_FUNC( sse2_sqrt,
"sqrtsd %xmm0, %xmm0\n\t" "sqrtsd %xmm0, %xmm0\n\t"
...@@ -1241,12 +1243,12 @@ __ASM_GLOBAL_FUNC( x87_sqrt, ...@@ -1241,12 +1243,12 @@ __ASM_GLOBAL_FUNC( x87_sqrt,
double CDECL sqrt( double x ) double CDECL sqrt( double x )
{ {
#ifdef __x86_64__ #ifdef __x86_64__
if (!sqrt_validate(&x)) if (!sqrt_validate(&x, TRUE))
return x; return x;
return sse2_sqrt(x); return sse2_sqrt(x);
#elif defined( __i386__ ) #elif defined( __i386__ )
if (!sqrt_validate(&x)) if (!sqrt_validate(&x, TRUE))
return x; return x;
return x87_sqrt(x); return x87_sqrt(x);
...@@ -1259,7 +1261,7 @@ double CDECL sqrt( double x ) ...@@ -1259,7 +1261,7 @@ double CDECL sqrt( double x )
unsigned int r,t1,s1,ix1,q1; unsigned int r,t1,s1,ix1,q1;
ULONGLONG ix; ULONGLONG ix;
if (!sqrt_validate(&x)) if (!sqrt_validate(&x, TRUE))
return x; return x;
ix = *(ULONGLONG*)&x; ix = *(ULONGLONG*)&x;
...@@ -3344,12 +3346,25 @@ void __cdecl __libm_sse2_tanf(void) ...@@ -3344,12 +3346,25 @@ void __cdecl __libm_sse2_tanf(void)
*/ */
void __cdecl __libm_sse2_sqrt_precise(void) void __cdecl __libm_sse2_sqrt_precise(void)
{ {
unsigned int cw;
double d; double d;
__asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) ); __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
d = sqrt( d ); __control87_2(0, 0, NULL, &cw);
__asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) ); if (cw & _MCW_RC)
} {
d = sqrt(d);
__asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
return;
}
if (!sqrt_validate(&d, FALSE))
{
__asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
return;
}
__asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
}
#endif /* __i386__ */ #endif /* __i386__ */
/********************************************************************* /*********************************************************************
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment