Improve FPU emulation accurracy. However, PPC_ENABLE_FPU_EXCEPTIONS is still

set to 0 until generated code is optimized enough (current slow down factor
is 3x vs. previous core, expectations are about 50% slower FP code).

The main benefit is exception bits are accurate. All glibc test-fenv,
test-arith{,f}, test-double, test-float pass on ppc, and mostly on x86_64
with gcc 4.0.1. Yes, this is also compiler dependent.

FIXME: find a real Mac application that depends on precise FPSCR bits... I
think I don't want to care optimizing yet until someone shows me a real world
application.
This commit is contained in:
gbeauche 2006-07-04 07:19:18 +00:00
parent 0a74d0559a
commit 7efab4276f
4 changed files with 183 additions and 137 deletions

View File

@ -127,7 +127,7 @@ protected:
{ record_cr(0, value); }
void record_cr1()
{ cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); }
void record_fpscr();
void record_fpscr(int exceptions);
void record_cr6(powerpc_vr const & vS, bool check_one) {
if (check_one && (vS.j[0] == UVAL64(0xffffffffffffffff) &&
vS.j[1] == UVAL64(0xffffffffffffffff)))

View File

@ -442,28 +442,31 @@ void powerpc_cpu::execute_multiply(uint32 opcode)
* Update FP exception bits
**/
void powerpc_cpu::record_fpscr()
void powerpc_cpu::record_fpscr(int exceptions)
{
#if PPC_ENABLE_FPU_EXCEPTIONS
// Reset non-sticky bits
fpscr() &= ~(FPSCR_VX_field::mask() | FPSCR_FEX_field::mask());
// Always update FX if exception bits changed
if (exceptions)
fpscr() |= FPSCR_FX_field::mask() | exceptions;
// Always update VX
if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() | \
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() | \
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() | \
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() | \
if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() |
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() |
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() |
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() |
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()))
fpscr() |= FPSCR_VX_field::mask();
else
fpscr() &= ~FPSCR_VX_field::mask();
// Always update FEX
if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) \
|| ((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) \
|| ((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) \
|| ((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) \
|| ((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask())))
if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) ||
((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) ||
((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) ||
((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) ||
((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask())))
fpscr() |= FPSCR_FEX_field::mask();
else
fpscr() &= ~FPSCR_FEX_field::mask();
#endif
}
@ -487,53 +490,38 @@ void powerpc_cpu::execute_fp_arith(uint32 opcode)
const double b = RB::get(this, opcode);
const double c = RC::get(this, opcode);
// Check for FP Exception Conditions
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
int exceptions;
if (FPSCR) {
exceptions = op_apply<uint32, fp_exception_condition<OP>, RA, RB, RC>::apply(a, b, c);
feclearexcept(FE_ALL_EXCEPT);
febarrier();
}
#endif
FP d = op_apply<double, OP, RA, RB, RC>::apply(a, b, c);
#if PPC_ENABLE_FPU_EXCEPTIONS
if (FPSCR) {
// Check exceptions raised
int masked = 0xffffffff;
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_INEXACT) {
exceptions |= FPSCR_XX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (raised & FE_DIVBYZERO) {
exceptions |= FPSCR_ZX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_ZX_field::mask();
if (raised & FE_UNDERFLOW) {
exceptions |= FPSCR_UX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_UX_field::mask();
if (raised & FE_OVERFLOW) {
exceptions |= FPSCR_OX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_OX_field::mask();
fpscr() &= masked;
fpscr() |= exceptions;
// Update FPSCR exception bits
record_fpscr();
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
if (raised & FE_DIVBYZERO)
exceptions |= FPSCR_ZX_field::mask();
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_OVERFLOW)
exceptions |= FPSCR_OX_field::mask();
record_fpscr(exceptions);
#endif
// FPSCR[FPRF] is set to the class and sign of the result
if (!exceptions || !FPSCR_VE_field::test(fpscr()))
if (!FPSCR_VE_field::test(fpscr()))
fp_classify(d);
}
#endif
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -806,24 +794,18 @@ void powerpc_cpu::execute_fp_compare(uint32 opcode)
FPSCR_FPCC_field::insert(fpscr(), c);
cr().set(crfd, c);
// Check for FP exception condition
#if PPC_ENABLE_FPU_EXCEPTIONS
if (is_SNaN(a) || is_SNaN(b)) {
fpscr() |= FPSCR_VXSNAN_field::mask();
fpscr() |= FPSCR_FX_field::mask();
if (OC && !FPSCR_VE_field::test(fpscr())) {
fpscr() |= FPSCR_VXVC_field::mask();
fpscr() |= FPSCR_FX_field::mask();
}
}
else if (OC && (is_QNaN(a) || is_QNaN(b))) {
fpscr() |= FPSCR_VXVC_field::mask();
fpscr() |= FPSCR_FX_field::mask();
}
#endif
// Update FPSCR exception bits
record_fpscr();
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
if (is_SNaN(a) || is_SNaN(b)) {
exceptions |= FPSCR_VXSNAN_field::mask();
if (OC && !FPSCR_VE_field::test(fpscr()))
exceptions |= FPSCR_VXVC_field::mask();
}
else if (OC && (is_QNaN(a) || is_QNaN(b)))
exceptions |= FPSCR_VXVC_field::mask();
record_fpscr(exceptions);
#endif
increment_pc(4);
}
@ -842,6 +824,20 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode)
const uint32 r = RN::get(this, opcode);
any_register d;
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
if (is_NaN(b)) {
exceptions |= FPSCR_VXCVI_field::mask();
if (is_SNaN(b))
exceptions |= FPSCR_VXSNAN_field::mask();
}
if (isinf(b))
exceptions |= FPSCR_VXCVI_field::mask();
feclearexcept(FE_ALL_EXCEPT);
febarrier();
#endif
// Convert to integer word if operand fits bounds
if (b >= -(double)0x80000000 && b <= (double)0x7fffffff) {
#if defined mathlib_lrint
@ -866,7 +862,15 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode)
d.j = 0x80000000;
// Update FPSCR exception bits
record_fpscr();
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
record_fpscr(exceptions);
#endif
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -918,10 +922,32 @@ template< class Rc >
void powerpc_cpu::execute_fp_round(uint32 opcode)
{
const double b = operand_fp_RB::get(this, opcode);
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions =
fp_invalid_operation_condition<double>::
apply(FPSCR_VXSNAN_field::mask(), b);
feclearexcept(FE_ALL_EXCEPT);
febarrier();
#endif
float d = (float)b;
// Update FPSCR exception bits
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
record_fpscr(exceptions);
#endif
// FPSCR[FPRF] is set to the class and sign of the result
fp_classify(d);
if (!FPSCR_VE_field::test(fpscr()))
fp_classify(d);
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -1006,8 +1032,15 @@ void powerpc_cpu::execute_mtfsf(uint32 opcode)
if ((f & 0x80) == 0)
m &= ~FPSCR_FX_field::mask();
// The mtfsf instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
int exceptions = fsf & m;
exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Move frB bits to FPSCR according to field mask
fpscr() = (fsf & m) | (fpscr() & ~m);
fpscr() = (fpscr() & ~m) | exceptions;
// Update FPSCR exception bits (don't implicitly update FX)
record_fpscr(0);
// Update native FP control word
if (m & FPSCR_RN_field::mask())
@ -1030,15 +1063,19 @@ void powerpc_cpu::execute_mtfsfi(uint32 opcode)
if (crfD == 0)
m &= ~FPSCR_FX_field::mask();
// The mtfsfi instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
int exceptions = RB::get(this, opcode) & m;
exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Move immediate to FPSCR according to field crfD
fpscr() = (RB::get(this, opcode) & m) | (fpscr() & ~m);
fpscr() = (fpscr() & ~m) | exceptions;
// Update native FP control word
if (m & FPSCR_RN_field::mask())
fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr())));
// Update FPSCR exception bits
record_fpscr();
// Update FPSCR exception bits (don't implicitly update FX)
record_fpscr(0);
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -1050,16 +1087,21 @@ void powerpc_cpu::execute_mtfsfi(uint32 opcode)
template< class RB, class Rc >
void powerpc_cpu::execute_mtfsb(uint32 opcode)
{
// Bit crbD of the FPSCR is set or cleared
const uint32 crbD = crbD_field::extract(opcode);
fpscr() = (fpscr() & ~(1 << (31 - crbD))) | (RB::get(this, opcode) << (31 - crbD));
// The mtfsb0 and mtfsb1 instructions cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
int exceptions = RB::get(this, opcode) << (31 - crbD);
exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Bit crbD of the FPSCR is set or cleared
fpscr() &= ~(1 << (31 - crbD));
// Update native FP control word
if (crbD & FPSCR_RN_field::mask())
fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr())));
// Update FPSCR exception bits
record_fpscr();
record_fpscr(exceptions);
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -1074,9 +1116,6 @@ void powerpc_cpu::execute_mffs(uint32 opcode)
// Move FPSCR to FPR(FRD)
operand_fp_dw_RD::set(this, opcode, fpscr());
// Update FPSCR exception bits
record_fpscr();
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();

View File

@ -228,34 +228,40 @@ static inline uint32 fp_store_single_convert(uint64 v)
* FP classification
**/
static inline bool is_NaN(double v) {
static inline bool is_NaN(double v)
{
any_register x; x.d = v;
return (((x.j & UVAL64(0x7ff0000000000000)) == UVAL64(0x7ff0000000000000)) &&
((x.j & UVAL64(0x000fffffffffffff)) != 0));
}
static inline bool is_SNaN(double v) {
static inline bool is_QNaN(double v)
{
any_register x; x.d = v;
return is_NaN(v) && !(x.j & UVAL64(0x0008000000000000)) ? signbit(v) : false;
return is_NaN(v) && (x.j & UVAL64(0x0008000000000000));
}
static inline bool is_QNaN(double v) {
return is_NaN(v) && !is_SNaN(v);
static inline bool is_SNaN(double v)
{
return is_NaN(v) && !is_QNaN(v);
}
static inline bool is_NaN(float v) {
static inline bool is_NaN(float v)
{
any_register x; x.f = v;
return (((x.i & 0x7f800000) == 0x7f800000) &&
((x.i & 0x007fffff) != 0));
}
static inline bool is_SNaN(float v) {
static inline bool is_QNaN(float v)
{
any_register x; x.f = v;
return is_NaN(v) && !(x.i & 0x00400000) ? signbit(v) : false;
return is_NaN(v) && (x.i & 0x00400000);
}
static inline bool is_QNaN(float v) {
return is_NaN(v) && !is_SNaN(v);
static inline bool is_SNaN(float v)
{
return is_NaN(v) && !is_QNaN(v);
}
/**
@ -277,43 +283,35 @@ struct fp_exception_condition {
template< class FP >
struct fp_invalid_operation_condition {
static inline uint32 apply(FP a, FP b, int check, bool negate = false) {
static inline uint32 apply(int flags) {
uint32 exceptions = 0;
if (FPSCR_VXSNAN_field::test(check) && (is_SNaN(a) || is_SNaN(b))) {
exceptions |= FPSCR_VXSNAN_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXISI_field::test(check) && isinf(a) && isinf(b)) {
if ((negate && (signbit(a) == signbit(b))) ||
(!negate && (signbit(a) != signbit(b)))) {
exceptions |= FPSCR_VXISI_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
}
if (FPSCR_VXIDI_field::test(check) && isinf(a) && isinf(b)) {
exceptions |= FPSCR_VXIDI_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXZDZ_field::test(check) && a == 0 && b == 0) {
exceptions |= FPSCR_VXZDZ_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXIMZ_field::test(check) && a == 0 && isinf(b)) {
exceptions |= FPSCR_VXIMZ_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXVC_field::test(check) && (is_NaN(a) || is_NaN(b))) {
exceptions |= FPSCR_VXVC_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXSOFT_field::test(check)) {
if (FPSCR_VXSOFT_field::test(flags))
exceptions |= FPSCR_VXSOFT_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXSQRT_field::test(check) && signbit(a)) {
return 0;
}
static inline uint32 apply(int flags, FP a) {
uint32 exceptions = 0;
if (FPSCR_VXSNAN_field::test(flags) && is_SNaN(a))
exceptions |= FPSCR_VXSNAN_field::mask();
if (FPSCR_VXVC_field::test(flags) && is_NaN(a))
exceptions |= FPSCR_VXVC_field::mask();
if (FPSCR_VXSQRT_field::test(flags) && signbit(a))
exceptions |= FPSCR_VXSQRT_field::mask();
exceptions |= FPSCR_FX_field::mask();
return exceptions;
}
static inline uint32 apply(int flags, FP a, FP b, bool negate = false) {
uint32 exceptions = apply(flags) | apply(flags, a) | apply(flags, b);
if (FPSCR_VXISI_field::test(flags) && isinf(a) && isinf(b)) {
if (( negate && (signbit(a) == signbit(b))) ||
(!negate && (signbit(a) != signbit(b))))
exceptions |= FPSCR_VXISI_field::mask();
}
if (FPSCR_VXIDI_field::test(flags) && isinf(a) && isinf(b))
exceptions |= FPSCR_VXIDI_field::mask();
if (FPSCR_VXZDZ_field::test(flags) && a == 0 && b == 0)
exceptions |= FPSCR_VXZDZ_field::mask();
if (FPSCR_VXIMZ_field::test(flags) && ((a == 0 && isinf(b)) || (isinf(a) && b == 0)))
exceptions |= FPSCR_VXIMZ_field::mask();
return exceptions;
}
};
@ -322,7 +320,7 @@ struct fp_invalid_operation_condition {
template<> \
struct fp_exception_condition<OP> { \
static inline uint32 apply(TYPE a, TYPE b) { \
return fp_invalid_operation_condition<TYPE>::apply(a, b, EXCP, NEGATE); \
return fp_invalid_operation_condition<TYPE>::apply(EXCP, a, b, NEGATE); \
} \
};
@ -335,32 +333,31 @@ struct fp_divide_exception_condition {
static inline uint32 apply(FP a, FP b) {
int exceptions =
fp_invalid_operation_condition<FP>::
apply(a, b,
FPSCR_VXSNAN_field::mask() | FPSCR_VXIDI_field::mask() | FPSCR_VXZDZ_field::mask());
#if 0
if (!exceptions && b == 0)
exceptions = FPSCR_ZX_field::mask() | FPSCR_FX_field::mask();
#endif
apply(FPSCR_VXSNAN_field::mask() | FPSCR_VXIDI_field::mask() | FPSCR_VXZDZ_field::mask(),
a, b);
if (isfinite(a) && a != 0 && b == 0)
exceptions = FPSCR_ZX_field::mask();
return exceptions;
}
};
template<> struct fp_exception_condition<op_fdiv> : fp_divide_exception_condition<double> { };
template< class FP, bool NG >
template< class FP, bool negate >
struct fp_fma_exception_condition {
static inline uint32 apply(FP a, FP b, FP c) {
#if 1
return fp_invalid_operation_condition<FP>::
apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask());
#else
// FIXME: we are losing precision
double p = a * b;
return (fp_invalid_operation_condition<FP>::
apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask(), false) |
fp_invalid_operation_condition<FP>::
apply(p, c, FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask(), NG));
#endif
return
fp_invalid_operation_condition<FP>::
apply(FPSCR_VXSNAN_field::mask(), a) |
fp_invalid_operation_condition<FP>::
apply(FPSCR_VXSNAN_field::mask(), b) |
fp_invalid_operation_condition<FP>::
apply(FPSCR_VXSNAN_field::mask(), c) |
fp_invalid_operation_condition<FP>::
apply(FPSCR_VXIMZ_field::mask(), a, b) |
fp_invalid_operation_condition<FP>::
apply(FPSCR_VXISI_field::mask(), a * b, c, negate)
;
}
};

View File

@ -46,6 +46,16 @@ extern "C" int fesetround(int);
#endif /* FENV_H */
// Make sure previous instructions are executed first
// XXX this is most really a hint to the compiler so that is doesn't
// reorder calls to fe*() functions before the actual compuation...
#if defined __GNUC__
#define febarrier() __asm__ __volatile__ ("")
#endif
#ifndef febarrier
#define febarrier()
#endif
// HOST_FLOAT_WORDS_BIG_ENDIAN is a tristate:
// yes (1) / no (0) / default (undefined)
#if HOST_FLOAT_WORDS_BIG_ENDIAN