From aea23c6dfc720d03431976e8e316b40de9a43d07 Mon Sep 17 00:00:00 2001 From: Peter Rutenbar Date: Mon, 24 Nov 2014 01:31:53 -0500 Subject: [PATCH] Implemented ftentox and fixed fscale again - (Badly) worked around a clang bug to implement ftentox - (Very badly) fixed another edge case in fscale - Found another bug in softfloat (floatx80_to_int32) -- Not so sure anymore that my floatx80/subnormal math fix works right. Just figured out that x80's exp is relative to the second mantissa bit. float128{exp=0x3fff, man=0} == floatx80{exp=0x3fff, man=0x800...} even though x80 has an explicit bit, the exponents are the same... --- core/SoftFloat/softfloat.c | 16 ++++--- core/fpu.c | 90 +++++++++++++++++++++++++------------- 2 files changed, 69 insertions(+), 37 deletions(-) diff --git a/core/SoftFloat/softfloat.c b/core/SoftFloat/softfloat.c index 36d7c68..7d36c66 100644 --- a/core/SoftFloat/softfloat.c +++ b/core/SoftFloat/softfloat.c @@ -3162,6 +3162,9 @@ int32 floatx80_to_int32( floatx80 a ) aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; + // [shoebill] there is no way 0x4037 is the right constant to use here. + // 1.0 has exp=0x3fff, 0x4037 - 0x3fff == 56. + // (aSig >> 56) shifts out the mantissa bits, plus 8 more. shiftCount = 0x4037 - aExp; if ( shiftCount <= 0 ) shiftCount = 1; shift64RightJamming( aSig, shiftCount, &aSig ); @@ -3392,11 +3395,11 @@ float128 floatx80_to_float128( floatx80 a ) // either 0 or 1. // // shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); - if (aExp == 0) shift128Right( aSig, 0, 16, &zSig0, &zSig1 ); else shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); } @@ -3501,7 +3504,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) return a; } // [shoebill] - // vestigial implicit-bit stuff + // vestigial implicit-bit code // if ( bExp == 0 ) --expDiff; shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); @@ -3513,7 +3516,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } // [shoebill] - // vestigial implicit-bit stuff + // vestigial implicit-bit code // if ( aExp == 0 ) ++expDiff; shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); @@ -3595,7 +3598,8 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) } // [shoebill] // All these places where we check if exp==0 and then add one to it - // are vestigial implicit-bit stuff + // are vestigial implicit-bit-handling code from the float32, 64, and + // 128 implementations. When floatx80 has exp=0, it really means 0. // //if ( aExp == 0 ) { // aExp = 1; @@ -3612,7 +3616,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); } // [shoebill] - // vestigial implicit-bit stuff + // vestigial implicit-bit code //if ( aExp == 0 ) ++expDiff; shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); @@ -3627,7 +3631,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) return a; } // [shoebill] - // vestigial implicit-bit stuff + // vestigial implicit-bit code //if ( bExp == 0 ) --expDiff; shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); aBigger: diff --git a/core/fpu.c b/core/fpu.c index b8ba472..1ca6300 100644 --- a/core/fpu.c +++ b/core/fpu.c @@ -807,6 +807,15 @@ float128 _from_native(double n) #define _native_log2(a) (log(a) / log(2.0)) /* or log2() */ #define _native_log(a) log(a) #define _native_log1p(a) log((a) + 1.0) /* or log1p() */ +double _native_tentox(a) { + /* + * This is a dumb workaround for a clang bug on OS X 10.10 + * Clang wants to optimize pow(10.0, a) to __exp(a), but + * __exp doesn't exist in the 10.8 SDK. So I'm using powl() + * instead, which doesn't have an equivalent optimized function. + */ + return (double)powl(10.0, (long double)a); +} const double _native_e = 2.71828182845904509; const double _native_10 = 10.0; @@ -884,7 +893,7 @@ static float128 _hack_lognp1 (float128 x) { } static float128 _hack_tentox (float128 x) { - return _from_native(_native_pow(_native_10, _to_native(x))); + return _from_native(_native_tentox(_to_native(x))); } static float128 _hack_twotox (float128 x) { @@ -1923,15 +1932,36 @@ static void inst_fmath_fscale () fpu->result.low >>= 1; fpu->result.low |= (fpu->result.high << 63); m_high = (fpu->result.high & 0x0000ffffffffffffULL) >> 1; - fpu->result.high >>= 63; - fpu->result.high <<= 63; + fpu->result.high &= 0x8000000000000000ULL; fpu->result.high |= m_high; - return; } - - fpu->result = fpu->dest; - fpu->result.high &= 0x8000ffffffffffffULL; - fpu->result.high |= (((uint64_t)exponent) << 48); + else if ((orig_exponent == 0) && (exponent > 0)) { + uint32_t i; + /* + * Edge case 2: if the original value was subnormal, and we're + * adding to the exponent, then the result may normal or subnormal, + * and we need to adjust the implicit bit accordingly. + */ + + fpu->result = fpu->dest; + float128 _two128 = int32_to_float128(2); + for (i=0; i < exponent; i++) + fpu->result = float128_mul(fpu->result, _two128); + /* + * FIXME: this is a really bad implementation, but I doubt anyone + * will ever hit this condition. It's also probably not exactly + * correct. The motorola docs say that 68881 will never return an + * unnormal number as the result of an operation, but if you add + * an arbitrary value to the exponent of a subnormal number, you + * can get an unnormal number. Does the 68881 specially handle this? + */ + } + else { + /* Common case */ + fpu->result = fpu->dest; + fpu->result.high &= 0x8000ffffffffffffULL; + fpu->result.high |= (((uint64_t)exponent) << 48); + } return ; overflow: @@ -2197,29 +2227,27 @@ static void inst_fmath_ftentox () // _hack_ftentox() is broken on clang 3.5 on osx 10.10 // (tries to optimize pow(10.0, x) to __exp10(x), and __exp10 // isn't implemented in the 10.8 SDK) -// const _Bool source_zero = _float128_is_zero(fpu->source); -// const _Bool source_inf = _float128_is_infinity(fpu->source); -// const _Bool source_sign = _float128_is_neg(fpu->source); -// -// /* If source is zero, result is +1.0 */ -// if (source_zero) { -// fpu->result = _one128; -// return ; -// } -// /* if source is -inf, result is +0.0 */ -// else if (source_inf && source_sign) { -// fpu->result = _zero128; -// return ; -// } -// /* if source is +inf, result is +inf */ -// else if (source_inf) { -// fpu->result = fpu->source; -// return ; -// } -// -// fpu->result = _hack_tentox(fpu->source); -// - assert(!"fmath: ftentox not implemented"); + const _Bool source_zero = _float128_is_zero(fpu->source); + const _Bool source_inf = _float128_is_infinity(fpu->source); + const _Bool source_sign = _float128_is_neg(fpu->source); + + /* If source is zero, result is +1.0 */ + if (source_zero) { + fpu->result = _one128; + return ; + } + /* if source is -inf, result is +0.0 */ + else if (source_inf && source_sign) { + fpu->result = _zero128; + return ; + } + /* if source is +inf, result is +inf */ + else if (source_inf) { + fpu->result = fpu->source; + return ; + } + + fpu->result = _hack_tentox(fpu->source); } static void inst_fmath_ftst ()