mirror of
https://github.com/pruten/shoebill.git
synced 2025-04-17 17:37:14 +00:00
Implemented ftentox and fixed fscale again
- (Badly) worked around a clang bug to implement ftentox - (Very badly) fixed another edge case in fscale - Found another bug in softfloat (floatx80_to_int32) -- Not so sure anymore that my floatx80/subnormal math fix works right. Just figured out that x80's exp is relative to the second mantissa bit. float128{exp=0x3fff, man=0} == floatx80{exp=0x3fff, man=0x800...} even though x80 has an explicit bit, the exponents are the same...
This commit is contained in:
parent
9e91b8067a
commit
aea23c6dfc
@ -3162,6 +3162,9 @@ int32 floatx80_to_int32( floatx80 a )
|
||||
aExp = extractFloatx80Exp( a );
|
||||
aSign = extractFloatx80Sign( a );
|
||||
if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
|
||||
// [shoebill] there is no way 0x4037 is the right constant to use here.
|
||||
// 1.0 has exp=0x3fff, 0x4037 - 0x3fff == 56.
|
||||
// (aSig >> 56) shifts out the mantissa bits, plus 8 more.
|
||||
shiftCount = 0x4037 - aExp;
|
||||
if ( shiftCount <= 0 ) shiftCount = 1;
|
||||
shift64RightJamming( aSig, shiftCount, &aSig );
|
||||
@ -3392,11 +3395,11 @@ float128 floatx80_to_float128( floatx80 a )
|
||||
// either 0 or 1.
|
||||
//
|
||||
// shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
|
||||
|
||||
if (aExp == 0)
|
||||
shift128Right( aSig, 0, 16, &zSig0, &zSig1 );
|
||||
else
|
||||
shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
|
||||
|
||||
return packFloat128( aSign, aExp, zSig0, zSig1 );
|
||||
|
||||
}
|
||||
@ -3501,7 +3504,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
||||
return a;
|
||||
}
|
||||
// [shoebill]
|
||||
// vestigial implicit-bit stuff
|
||||
// vestigial implicit-bit code
|
||||
// if ( bExp == 0 ) --expDiff;
|
||||
|
||||
shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
||||
@ -3513,7 +3516,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
||||
return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
|
||||
}
|
||||
// [shoebill]
|
||||
// vestigial implicit-bit stuff
|
||||
// vestigial implicit-bit code
|
||||
// if ( aExp == 0 ) ++expDiff;
|
||||
|
||||
shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
||||
@ -3595,7 +3598,8 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
||||
}
|
||||
// [shoebill]
|
||||
// All these places where we check if exp==0 and then add one to it
|
||||
// are vestigial implicit-bit stuff
|
||||
// are vestigial implicit-bit-handling code from the float32, 64, and
|
||||
// 128 implementations. When floatx80 has exp=0, it really means 0.
|
||||
//
|
||||
//if ( aExp == 0 ) {
|
||||
// aExp = 1;
|
||||
@ -3612,7 +3616,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
||||
return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
|
||||
}
|
||||
// [shoebill]
|
||||
// vestigial implicit-bit stuff
|
||||
// vestigial implicit-bit code
|
||||
//if ( aExp == 0 ) ++expDiff;
|
||||
|
||||
shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
||||
@ -3627,7 +3631,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
|
||||
return a;
|
||||
}
|
||||
// [shoebill]
|
||||
// vestigial implicit-bit stuff
|
||||
// vestigial implicit-bit code
|
||||
//if ( bExp == 0 ) --expDiff;
|
||||
shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
||||
aBigger:
|
||||
|
90
core/fpu.c
90
core/fpu.c
@ -807,6 +807,15 @@ float128 _from_native(double n)
|
||||
#define _native_log2(a) (log(a) / log(2.0)) /* or log2() */
|
||||
#define _native_log(a) log(a)
|
||||
#define _native_log1p(a) log((a) + 1.0) /* or log1p() */
|
||||
double _native_tentox(a) {
|
||||
/*
|
||||
* This is a dumb workaround for a clang bug on OS X 10.10
|
||||
* Clang wants to optimize pow(10.0, a) to __exp(a), but
|
||||
* __exp doesn't exist in the 10.8 SDK. So I'm using powl()
|
||||
* instead, which doesn't have an equivalent optimized function.
|
||||
*/
|
||||
return (double)powl(10.0, (long double)a);
|
||||
}
|
||||
|
||||
const double _native_e = 2.71828182845904509;
|
||||
const double _native_10 = 10.0;
|
||||
@ -884,7 +893,7 @@ static float128 _hack_lognp1 (float128 x) {
|
||||
}
|
||||
|
||||
static float128 _hack_tentox (float128 x) {
|
||||
return _from_native(_native_pow(_native_10, _to_native(x)));
|
||||
return _from_native(_native_tentox(_to_native(x)));
|
||||
}
|
||||
|
||||
static float128 _hack_twotox (float128 x) {
|
||||
@ -1923,15 +1932,36 @@ static void inst_fmath_fscale ()
|
||||
fpu->result.low >>= 1;
|
||||
fpu->result.low |= (fpu->result.high << 63);
|
||||
m_high = (fpu->result.high & 0x0000ffffffffffffULL) >> 1;
|
||||
fpu->result.high >>= 63;
|
||||
fpu->result.high <<= 63;
|
||||
fpu->result.high &= 0x8000000000000000ULL;
|
||||
fpu->result.high |= m_high;
|
||||
return;
|
||||
}
|
||||
|
||||
fpu->result = fpu->dest;
|
||||
fpu->result.high &= 0x8000ffffffffffffULL;
|
||||
fpu->result.high |= (((uint64_t)exponent) << 48);
|
||||
else if ((orig_exponent == 0) && (exponent > 0)) {
|
||||
uint32_t i;
|
||||
/*
|
||||
* Edge case 2: if the original value was subnormal, and we're
|
||||
* adding to the exponent, then the result may normal or subnormal,
|
||||
* and we need to adjust the implicit bit accordingly.
|
||||
*/
|
||||
|
||||
fpu->result = fpu->dest;
|
||||
float128 _two128 = int32_to_float128(2);
|
||||
for (i=0; i < exponent; i++)
|
||||
fpu->result = float128_mul(fpu->result, _two128);
|
||||
/*
|
||||
* FIXME: this is a really bad implementation, but I doubt anyone
|
||||
* will ever hit this condition. It's also probably not exactly
|
||||
* correct. The motorola docs say that 68881 will never return an
|
||||
* unnormal number as the result of an operation, but if you add
|
||||
* an arbitrary value to the exponent of a subnormal number, you
|
||||
* can get an unnormal number. Does the 68881 specially handle this?
|
||||
*/
|
||||
}
|
||||
else {
|
||||
/* Common case */
|
||||
fpu->result = fpu->dest;
|
||||
fpu->result.high &= 0x8000ffffffffffffULL;
|
||||
fpu->result.high |= (((uint64_t)exponent) << 48);
|
||||
}
|
||||
return ;
|
||||
|
||||
overflow:
|
||||
@ -2197,29 +2227,27 @@ static void inst_fmath_ftentox ()
|
||||
// _hack_ftentox() is broken on clang 3.5 on osx 10.10
|
||||
// (tries to optimize pow(10.0, x) to __exp10(x), and __exp10
|
||||
// isn't implemented in the 10.8 SDK)
|
||||
// const _Bool source_zero = _float128_is_zero(fpu->source);
|
||||
// const _Bool source_inf = _float128_is_infinity(fpu->source);
|
||||
// const _Bool source_sign = _float128_is_neg(fpu->source);
|
||||
//
|
||||
// /* If source is zero, result is +1.0 */
|
||||
// if (source_zero) {
|
||||
// fpu->result = _one128;
|
||||
// return ;
|
||||
// }
|
||||
// /* if source is -inf, result is +0.0 */
|
||||
// else if (source_inf && source_sign) {
|
||||
// fpu->result = _zero128;
|
||||
// return ;
|
||||
// }
|
||||
// /* if source is +inf, result is +inf */
|
||||
// else if (source_inf) {
|
||||
// fpu->result = fpu->source;
|
||||
// return ;
|
||||
// }
|
||||
//
|
||||
// fpu->result = _hack_tentox(fpu->source);
|
||||
//
|
||||
assert(!"fmath: ftentox not implemented");
|
||||
const _Bool source_zero = _float128_is_zero(fpu->source);
|
||||
const _Bool source_inf = _float128_is_infinity(fpu->source);
|
||||
const _Bool source_sign = _float128_is_neg(fpu->source);
|
||||
|
||||
/* If source is zero, result is +1.0 */
|
||||
if (source_zero) {
|
||||
fpu->result = _one128;
|
||||
return ;
|
||||
}
|
||||
/* if source is -inf, result is +0.0 */
|
||||
else if (source_inf && source_sign) {
|
||||
fpu->result = _zero128;
|
||||
return ;
|
||||
}
|
||||
/* if source is +inf, result is +inf */
|
||||
else if (source_inf) {
|
||||
fpu->result = fpu->source;
|
||||
return ;
|
||||
}
|
||||
|
||||
fpu->result = _hack_tentox(fpu->source);
|
||||
}
|
||||
|
||||
static void inst_fmath_ftst ()
|
||||
|
Loading…
x
Reference in New Issue
Block a user