Implemented ftentox and fixed fscale again

- (Badly) worked around a clang bug to implement ftentox - (Very badly) fixed another edge case in fscale - Found another bug in softfloat (floatx80_to_int32) -- Not so sure anymore that my floatx80/subnormal math fix works right. Just figured out that x80's exp is relative to the second mantissa bit. float128{exp=0x3fff, man=0} == floatx80{exp=0x3fff, man=0x800...} even though x80 has an explicit bit, the exponents are the same...
2025-04-17 17:37:14 +00:00 · 2014-11-24 01:31:53 -05:00 · 2014-11-24 01:31:53 -05:00 · aea23c6dfc
commit aea23c6dfc
parent 9e91b8067a
2 changed files with 69 additions and 37 deletions
--- a/core/SoftFloat/softfloat.c
+++ b/core/SoftFloat/softfloat.c
@ -3162,6 +3162,9 @@ int32 floatx80_to_int32( floatx80 a )
    aExp = extractFloatx80Exp( a );
    aSign = extractFloatx80Sign( a );
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    // [shoebill] there is no way 0x4037 is the right constant to use here.
+    // 1.0 has exp=0x3fff, 0x4037 - 0x3fff == 56.
+    // (aSig >> 56) shifts out the mantissa bits, plus 8 more.
    shiftCount = 0x4037 - aExp;
    if ( shiftCount <= 0 ) shiftCount = 1;
    shift64RightJamming( aSig, shiftCount, &aSig );
@ -3392,11 +3395,11 @@ float128 floatx80_to_float128( floatx80 a )
    // either 0 or 1.
    //
    // shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
-    
    if (aExp == 0)
        shift128Right( aSig, 0, 16, &zSig0, &zSig1 );
    else
        shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+    
    return packFloat128( aSign, aExp, zSig0, zSig1 );

 }
@ -3501,7 +3504,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
            return a;
        }
        // [shoebill]
-        // vestigial implicit-bit stuff
+        // vestigial implicit-bit code
        // if ( bExp == 0 ) --expDiff;
        
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
@ -3513,7 +3516,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
        }
        // [shoebill]
-        // vestigial implicit-bit stuff
+        // vestigial implicit-bit code
        // if ( aExp == 0 ) ++expDiff;
        
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
@ -3595,7 +3598,8 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
    }
    // [shoebill]
    // All these places where we check if exp==0 and then add one to it
-    // are vestigial implicit-bit stuff
+    // are vestigial implicit-bit-handling code from the float32, 64, and
+    // 128 implementations. When floatx80 has exp=0, it really means 0.
    //
    //if ( aExp == 0 ) {
    //    aExp = 1;
@ -3612,7 +3616,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
    }
    // [shoebill]
-    // vestigial implicit-bit stuff
+    // vestigial implicit-bit code
    //if ( aExp == 0 ) ++expDiff;
    
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
@ -3627,7 +3631,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
        return a;
    }
    // [shoebill]
-    // vestigial implicit-bit stuff
+    // vestigial implicit-bit code
    //if ( bExp == 0 ) --expDiff;
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
 aBigger:
--- a/core/fpu.c
+++ b/core/fpu.c
@ -807,6 +807,15 @@ float128 _from_native(double n)
 #define _native_log2(a) (log(a) / log(2.0)) /* or log2() */
 #define _native_log(a) log(a)
 #define _native_log1p(a) log((a) + 1.0) /* or log1p() */
+double  _native_tentox(a) {
+    /*
+     * This is a dumb workaround for a clang bug on OS X 10.10
+     * Clang wants to optimize pow(10.0, a) to __exp(a), but
+     * __exp doesn't exist in the 10.8 SDK. So I'm using powl()
+     * instead, which doesn't have an equivalent optimized function.
+     */
+    return (double)powl(10.0, (long double)a);
+}

 const double _native_e = 2.71828182845904509;
 const double _native_10 = 10.0;
@ -884,7 +893,7 @@ static float128 _hack_lognp1 (float128 x) {
 }

 static float128 _hack_tentox (float128 x) {
-    return _from_native(_native_pow(_native_10, _to_native(x)));
+    return _from_native(_native_tentox(_to_native(x)));
 }

 static float128 _hack_twotox (float128 x) {
@ -1923,15 +1932,36 @@ static void inst_fmath_fscale ()
        fpu->result.low >>= 1;
        fpu->result.low |= (fpu->result.high << 63);
        m_high = (fpu->result.high & 0x0000ffffffffffffULL) >> 1;
-        fpu->result.high >>= 63;
-        fpu->result.high <<= 63;
+        fpu->result.high &= 0x8000000000000000ULL;
        fpu->result.high |= m_high;
-        return;
    }
-    
-    fpu->result = fpu->dest;
-    fpu->result.high &= 0x8000ffffffffffffULL;
-    fpu->result.high |= (((uint64_t)exponent) << 48);
+    else if ((orig_exponent == 0) && (exponent > 0)) {
+        uint32_t i;
+        /*
+         * Edge case 2: if the original value was subnormal, and we're
+         * adding to the exponent, then the result may normal or subnormal,
+         * and we need to adjust the implicit bit accordingly.
+         */
+        
+        fpu->result = fpu->dest;
+        float128 _two128 = int32_to_float128(2);
+        for (i=0; i < exponent; i++)
+            fpu->result = float128_mul(fpu->result, _two128);
+        /* 
+         * FIXME: this is a really bad implementation, but I doubt anyone
+         * will ever hit this condition. It's also probably not exactly
+         * correct. The motorola docs say that 68881 will never return an
+         * unnormal number as the result of an operation, but if you add
+         * an arbitrary value to the exponent of a subnormal number, you
+         * can get an unnormal number. Does the 68881 specially handle this?
+         */
+    }
+    else {
+        /* Common case */
+        fpu->result = fpu->dest;
+        fpu->result.high &= 0x8000ffffffffffffULL;
+        fpu->result.high |= (((uint64_t)exponent) << 48);
+    }
    return ;
    
 overflow:
@ -2197,29 +2227,27 @@ static void inst_fmath_ftentox ()
    // _hack_ftentox() is broken on clang 3.5 on osx 10.10
    // (tries to optimize pow(10.0, x) to __exp10(x), and __exp10
    //  isn't implemented in the 10.8 SDK)
-//    const _Bool source_zero = _float128_is_zero(fpu->source);
-//    const _Bool source_inf = _float128_is_infinity(fpu->source);
-//    const _Bool source_sign = _float128_is_neg(fpu->source);
-//    
-//    /* If source is zero, result is +1.0 */
-//    if (source_zero) {
-//        fpu->result = _one128;
-//        return ;
-//    }
-//    /* if source is -inf, result is +0.0 */
-//    else if (source_inf && source_sign) {
-//        fpu->result = _zero128;
-//        return ;
-//    }
-//    /* if source is +inf, result is +inf */
-//    else if (source_inf) {
-//        fpu->result = fpu->source;
-//        return ;
-//    }
-//    
-//    fpu->result = _hack_tentox(fpu->source);
-//    
-    assert(!"fmath: ftentox not implemented");
+    const _Bool source_zero = _float128_is_zero(fpu->source);
+    const _Bool source_inf = _float128_is_infinity(fpu->source);
+    const _Bool source_sign = _float128_is_neg(fpu->source);
+    
+    /* If source is zero, result is +1.0 */
+    if (source_zero) {
+        fpu->result = _one128;
+        return ;
+    }
+    /* if source is -inf, result is +0.0 */
+    else if (source_inf && source_sign) {
+        fpu->result = _zero128;
+        return ;
+    }
+    /* if source is +inf, result is +inf */
+    else if (source_inf) {
+        fpu->result = fpu->source;
+        return ;
+    }
+    
+    fpu->result = _hack_tentox(fpu->source);
 }

 static void inst_fmath_ftst ()