From 9e91b8067a14aae57b1ed714b5df53d746b23cb5 Mon Sep 17 00:00:00 2001 From: Peter Rutenbar Date: Sun, 23 Nov 2014 19:41:06 -0500 Subject: [PATCH] Fixed a million SoftFloat bugs, and an edge case in fscale SoftFloat's support for subnormal numbers is completely broken on floatx80. Particularly when the result of an op on a subnormal number is normal, and vice versa. I'm not completely sure that my fixes are correct, or that they didn't break anything. I need to do more testing. Also fixed an edge case in fscale that gets weird when the input goes from normal to subnormal... oh, and I still need to fix the opposite case. --- core/SoftFloat/softfloat-macros.h | 10 +- core/SoftFloat/softfloat.c | 156 +++++++++++++++++++++++++----- core/fpu.c | 24 ++++- 3 files changed, 164 insertions(+), 26 deletions(-) diff --git a/core/SoftFloat/softfloat-macros.h b/core/SoftFloat/softfloat-macros.h index d289328..45d49e9 100644 --- a/core/SoftFloat/softfloat-macros.h +++ b/core/SoftFloat/softfloat-macros.h @@ -1,3 +1,9 @@ +/* + * SoftFloat with lots of fixes and modified for use by Shoebill. + * + * Based on SoftFloat 2b by John R. Hauser. + * Modifications by Peter Rutenbar. (pruten@gmail.com) + */ /*============================================================================ @@ -152,7 +158,9 @@ INLINE void z0 = a0>>count; } else { - z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + // [shoebill] This is a bug, right? ( count < 64 ) can never be true + // z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0; z0 = 0; } *z1Ptr = z1; diff --git a/core/SoftFloat/softfloat.c b/core/SoftFloat/softfloat.c index 496b4d7..36d7c68 100644 --- a/core/SoftFloat/softfloat.c +++ b/core/SoftFloat/softfloat.c @@ -1,3 +1,10 @@ +/* + * SoftFloat with lots of fixes and modified for use by Shoebill. + * + * Based on SoftFloat 2b by John R. Hauser. + * Modifications by Peter Rutenbar. (pruten@gmail.com) + */ + /*============================================================================ @@ -30,6 +37,9 @@ these four paragraphs for those parts of this code that are retained. =============================================================================*/ +// [shoebill] +// Inlining milieu.h + /*---------------------------------------------------------------------------- | Include common integer types and flags. *----------------------------------------------------------------------------*/ @@ -307,6 +317,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) shift32RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x7F; + // [shoebill] + // I think there's a bug here too: if zSig==0x80000080 and zExp==-2, + // then that low bit will get lost. And since roundBits is 0, + // we won't raise underflow or inexact. if ( isTiny && roundBits ) float_raise( float_flag_underflow ); } } @@ -376,7 +390,13 @@ INLINE flag extractFloat64Sign( float64 a ) | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ - +// [shoebill] +// This function left shifts the mantissa until it has an implicit 1 bit +// just to the left of the mantissa (actually, it produces a value with an explicit bit +// in that position, but the callers are aware of that.) +// The exponent it returns is the "true" exponent - as if 0x0000 weren't a special case. +// It behaves as though exp=0 was an order of magnitude below exp=1, (which is isn't for implicit bit formats), +// and it returns negative numbers if it needs an even lower magnitude than that. static void normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) { @@ -388,6 +408,7 @@ static void } + /*---------------------------------------------------------------------------- | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a | double-precision floating-point value, returning the result. After being @@ -469,6 +490,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) shift64RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x3FF; + // [shoebill] + // I think there's a bug here: if zSig==0x8000000000000400 and zExp==-2, + // then that low bit will get lost. And since roundBits is 0, + // we won't raise underflow or inexact. if ( isTiny && roundBits ) float_raise( float_flag_underflow ); } } @@ -551,8 +576,12 @@ static void shiftCount = countLeadingZeros64( aSig ); *zSigPtr = aSig<>9 ); zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; zExp = aExp; @@ -3321,7 +3375,24 @@ float128 floatx80_to_float128( floatx80 a ) if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); } + + // [shoebill] + // If the x80 has a non-zero exponent and its explicit bit is 0, + // then Motorola's 68881 docs call it "unnormal". packFloat128 can't + // handle that, so we need to de-unnormalize it to get a regular + // normal or subnormal number. + while ((aExp > 0) && ((aSig >> 63)==0)) { + aExp--; + aSig <<= 1; + } + // [shoebill] + // Don't chew off the x80's explicit bit without checking + // the exponent. If it's 0, then the explicit bit may be + // either 0 or 1. + // + // shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); + if (aExp == 0) shift128Right( aSig, 0, 16, &zSig0, &zSig1 ); else @@ -3424,21 +3495,27 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) bSig = extractFloatx80Frac( b ); bExp = extractFloatx80Exp( b ); expDiff = aExp - bExp; - if ( 0 < expDiff ) { + if ( 0 < expDiff ) { // [shoebill] aExp is bigger than bExp if ( aExp == 0x7FFF ) { if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); return a; } - if ( bExp == 0 ) --expDiff; + // [shoebill] + // vestigial implicit-bit stuff + // if ( bExp == 0 ) --expDiff; + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); zExp = aExp; } - else if ( expDiff < 0 ) { + else if ( expDiff < 0 ) { // [shoebill] aExp is smaller than bExp if ( bExp == 0x7FFF ) { if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } - if ( aExp == 0 ) ++expDiff; + // [shoebill] + // vestigial implicit-bit stuff + // if ( aExp == 0 ) ++expDiff; + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); zExp = bExp; } @@ -3452,7 +3529,21 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) zSig1 = 0; zSig0 = aSig + bSig; if ( aExp == 0 ) { - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + // [shoebill] + // Adding subnormal numbers can possibly overflow, so we need to + // detect and handle that. + if ((( aSig >> 63 ) && ( bSig >> 63 )) || + (( aSig >> 63 ) && !( zSig0 >> 63 )) || + (( bSig >> 63 ) && !( zSig0 >> 63 ))) { + zSig1 = zSig0 << 63; + zSig0 >>= 1; + zSig0 |= LIT64( 0x8000000000000000 ); + zExp = 1; + } + else { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + } + goto roundAndPack; } zExp = aExp; @@ -3502,10 +3593,15 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) z.high = floatx80_default_nan_high; return z; } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } + // [shoebill] + // All these places where we check if exp==0 and then add one to it + // are vestigial implicit-bit stuff + // + //if ( aExp == 0 ) { + // aExp = 1; + // bExp = 1; + //} + zSig1 = 0; if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; @@ -3515,7 +3611,10 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); } - if ( aExp == 0 ) ++expDiff; + // [shoebill] + // vestigial implicit-bit stuff + //if ( aExp == 0 ) ++expDiff; + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); bBigger: sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); @@ -3527,7 +3626,9 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); return a; } - if ( bExp == 0 ) --expDiff; + // [shoebill] + // vestigial implicit-bit stuff + //if ( bExp == 0 ) --expDiff; shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); aBigger: sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); @@ -3628,6 +3729,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } + zExp = aExp + bExp - 0x3FFE; mul64To128( aSig, bSig, &zSig0, &zSig1 ); if ( 0 < (sbits64) zSig0 ) { @@ -4350,10 +4452,10 @@ floatx80 float128_to_floatx80( float128 a ) int32 aExp; bits64 aSig0, aSig1; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); + aSig1 = extractFloat128Frac1( a ); // [shoebill] low + aSig0 = extractFloat128Frac0( a ); // [shoebill] high & 0x0000ffffff... + aExp = extractFloat128Exp( a ); // [shoebill] (high >> 48) & 0x7fff + aSign = extractFloat128Sign( a ); // [shoebill] 1 or 0 if ( aExp == 0x7FFF ) { if ( aSig0 | aSig1 ) { return commonNaNToFloatx80( float128ToCommonNaN( a ) ); @@ -4363,13 +4465,23 @@ floatx80 float128_to_floatx80( float128 a ) if ( aExp == 0 ) { if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + // [shoebill] + // normalizeFloat128Subnormal (exp=0x0000, mant=0x0000 c000...) -> (aExp=0, mant=0x0001 8000...) + // normalizeFloat128Subnormal (exp=0x0000, mant=0x0000 4000...) -> (aExp=-1, mant=0x0001 0000...) + // if the original exponent was 0x0000, then the implicit bit is 0. + // So when making a x80-friendly mantissa, make sure the explicit bit is 0 + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); } else { aSig0 |= LIT64( 0x0001000000000000 ); + // [shoebill] + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); } - shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); - return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); + // [shoebill] + // shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + + return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); } #endif diff --git a/core/fpu.c b/core/fpu.c index a5460ab..b8ba472 100644 --- a/core/fpu.c +++ b/core/fpu.c @@ -1876,7 +1876,7 @@ static void inst_fmath_fscale () const _Bool dest_zero = _float128_is_zero(fpu->dest); const _Bool dest_inf = _float128_is_infinity(fpu->dest); - int32_t factor, exponent; + int32_t factor, orig_exponent, exponent; /* If the source is inf, the result is nan and set operr */ if (source_inf) { @@ -1904,12 +1904,30 @@ static void inst_fmath_fscale () assert(float_exception_flags == 0); - exponent = factor + (int32_t)((fpu->dest.high >> 48) & 0x7fff); + orig_exponent = (int32_t)((fpu->dest.high >> 48) & 0x7fff); + exponent = factor + orig_exponent; - if (exponent <= 0) /* not precisely correct, I think - should be '< 0' */ + if (exponent < 0) goto underflow; else if (exponent >= 0x7fff) goto overflow; + else if ((exponent == 0) && (orig_exponent > 0)) { + uint64_t m_high; + /* + * Edge case: if the 80-bit input was {exp=1, mantissa=1}, + * the 128-bit version will be {exp=1, mantissa=0}. + * If we're subtracting 1, the result will be {exp=0, mantissa=0} + * which is not correct. We need to account for the implicit bit. + */ + fpu->result = fpu->dest; + fpu->result.low >>= 1; + fpu->result.low |= (fpu->result.high << 63); + m_high = (fpu->result.high & 0x0000ffffffffffffULL) >> 1; + fpu->result.high >>= 63; + fpu->result.high <<= 63; + fpu->result.high |= m_high; + return; + } fpu->result = fpu->dest; fpu->result.high &= 0x8000ffffffffffffULL;