diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp index e5170ddd..9caf7a68 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp @@ -127,7 +127,7 @@ protected: { record_cr(0, value); } void record_cr1() { cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); } - void record_fpscr(); + void record_fpscr(int exceptions); void record_cr6(powerpc_vr const & vS, bool check_one) { if (check_one && (vS.j[0] == UVAL64(0xffffffffffffffff) && vS.j[1] == UVAL64(0xffffffffffffffff))) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp index 504c4f41..78b45df2 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp @@ -442,28 +442,31 @@ void powerpc_cpu::execute_multiply(uint32 opcode) * Update FP exception bits **/ -void powerpc_cpu::record_fpscr() +void powerpc_cpu::record_fpscr(int exceptions) { #if PPC_ENABLE_FPU_EXCEPTIONS + // Reset non-sticky bits + fpscr() &= ~(FPSCR_VX_field::mask() | FPSCR_FEX_field::mask()); + + // Always update FX if exception bits changed + if (exceptions) + fpscr() |= FPSCR_FX_field::mask() | exceptions; + // Always update VX - if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() | \ - FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() | \ - FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() | \ - FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() | \ + if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() | + FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() | + FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() | + FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() | FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask())) fpscr() |= FPSCR_VX_field::mask(); - else - fpscr() &= ~FPSCR_VX_field::mask(); // Always update FEX - if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) \ - || ((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) \ - || ((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) \ - || ((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) \ - || ((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask()))) + if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) || + ((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) || + ((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) || + ((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) || + ((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask()))) fpscr() |= FPSCR_FEX_field::mask(); - else - fpscr() &= ~FPSCR_FEX_field::mask(); #endif } @@ -487,53 +490,38 @@ void powerpc_cpu::execute_fp_arith(uint32 opcode) const double b = RB::get(this, opcode); const double c = RC::get(this, opcode); - // Check for FP Exception Conditions #if PPC_ENABLE_FPU_EXCEPTIONS - int exceptions = 0; + int exceptions; if (FPSCR) { exceptions = op_apply, RA, RB, RC>::apply(a, b, c); feclearexcept(FE_ALL_EXCEPT); + febarrier(); } #endif FP d = op_apply::apply(a, b, c); -#if PPC_ENABLE_FPU_EXCEPTIONS if (FPSCR) { - // Check exceptions raised - int masked = 0xffffffff; - int raised = fetestexcept(FE_ALL_EXCEPT); - if (raised & FE_INEXACT) { - exceptions |= FPSCR_XX_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (raised & FE_DIVBYZERO) { - exceptions |= FPSCR_ZX_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - else masked &= ~FPSCR_ZX_field::mask(); - if (raised & FE_UNDERFLOW) { - exceptions |= FPSCR_UX_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - else masked &= ~FPSCR_UX_field::mask(); - if (raised & FE_OVERFLOW) { - exceptions |= FPSCR_OX_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - else masked &= ~FPSCR_OX_field::mask(); - fpscr() &= masked; - fpscr() |= exceptions; - // Update FPSCR exception bits - record_fpscr(); +#if PPC_ENABLE_FPU_EXCEPTIONS + febarrier(); + int raised = fetestexcept(FE_ALL_EXCEPT); + if (raised & FE_INEXACT) + exceptions |= FPSCR_XX_field::mask(); + if (raised & FE_DIVBYZERO) + exceptions |= FPSCR_ZX_field::mask(); + if (raised & FE_UNDERFLOW) + exceptions |= FPSCR_UX_field::mask(); + if (raised & FE_OVERFLOW) + exceptions |= FPSCR_OX_field::mask(); + record_fpscr(exceptions); +#endif // FPSCR[FPRF] is set to the class and sign of the result - if (!exceptions || !FPSCR_VE_field::test(fpscr())) + if (!FPSCR_VE_field::test(fpscr())) fp_classify(d); } -#endif // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) @@ -806,24 +794,18 @@ void powerpc_cpu::execute_fp_compare(uint32 opcode) FPSCR_FPCC_field::insert(fpscr(), c); cr().set(crfd, c); - // Check for FP exception condition -#if PPC_ENABLE_FPU_EXCEPTIONS - if (is_SNaN(a) || is_SNaN(b)) { - fpscr() |= FPSCR_VXSNAN_field::mask(); - fpscr() |= FPSCR_FX_field::mask(); - if (OC && !FPSCR_VE_field::test(fpscr())) { - fpscr() |= FPSCR_VXVC_field::mask(); - fpscr() |= FPSCR_FX_field::mask(); - } - } - else if (OC && (is_QNaN(a) || is_QNaN(b))) { - fpscr() |= FPSCR_VXVC_field::mask(); - fpscr() |= FPSCR_FX_field::mask(); - } -#endif - // Update FPSCR exception bits - record_fpscr(); +#if PPC_ENABLE_FPU_EXCEPTIONS + int exceptions = 0; + if (is_SNaN(a) || is_SNaN(b)) { + exceptions |= FPSCR_VXSNAN_field::mask(); + if (OC && !FPSCR_VE_field::test(fpscr())) + exceptions |= FPSCR_VXVC_field::mask(); + } + else if (OC && (is_QNaN(a) || is_QNaN(b))) + exceptions |= FPSCR_VXVC_field::mask(); + record_fpscr(exceptions); +#endif increment_pc(4); } @@ -842,6 +824,20 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode) const uint32 r = RN::get(this, opcode); any_register d; +#if PPC_ENABLE_FPU_EXCEPTIONS + int exceptions = 0; + if (is_NaN(b)) { + exceptions |= FPSCR_VXCVI_field::mask(); + if (is_SNaN(b)) + exceptions |= FPSCR_VXSNAN_field::mask(); + } + if (isinf(b)) + exceptions |= FPSCR_VXCVI_field::mask(); + + feclearexcept(FE_ALL_EXCEPT); + febarrier(); +#endif + // Convert to integer word if operand fits bounds if (b >= -(double)0x80000000 && b <= (double)0x7fffffff) { #if defined mathlib_lrint @@ -866,7 +862,15 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode) d.j = 0x80000000; // Update FPSCR exception bits - record_fpscr(); +#if PPC_ENABLE_FPU_EXCEPTIONS + febarrier(); + int raised = fetestexcept(FE_ALL_EXCEPT); + if (raised & FE_UNDERFLOW) + exceptions |= FPSCR_UX_field::mask(); + if (raised & FE_INEXACT) + exceptions |= FPSCR_XX_field::mask(); + record_fpscr(exceptions); +#endif // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) @@ -918,10 +922,32 @@ template< class Rc > void powerpc_cpu::execute_fp_round(uint32 opcode) { const double b = operand_fp_RB::get(this, opcode); + +#if PPC_ENABLE_FPU_EXCEPTIONS + int exceptions = + fp_invalid_operation_condition:: + apply(FPSCR_VXSNAN_field::mask(), b); + + feclearexcept(FE_ALL_EXCEPT); + febarrier(); +#endif + float d = (float)b; + // Update FPSCR exception bits +#if PPC_ENABLE_FPU_EXCEPTIONS + febarrier(); + int raised = fetestexcept(FE_ALL_EXCEPT); + if (raised & FE_UNDERFLOW) + exceptions |= FPSCR_UX_field::mask(); + if (raised & FE_INEXACT) + exceptions |= FPSCR_XX_field::mask(); + record_fpscr(exceptions); +#endif + // FPSCR[FPRF] is set to the class and sign of the result - fp_classify(d); + if (!FPSCR_VE_field::test(fpscr())) + fp_classify(d); // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) @@ -1006,8 +1032,15 @@ void powerpc_cpu::execute_mtfsf(uint32 opcode) if ((f & 0x80) == 0) m &= ~FPSCR_FX_field::mask(); + // The mtfsf instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly + int exceptions = fsf & m; + exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask()); + // Move frB bits to FPSCR according to field mask - fpscr() = (fsf & m) | (fpscr() & ~m); + fpscr() = (fpscr() & ~m) | exceptions; + + // Update FPSCR exception bits (don't implicitly update FX) + record_fpscr(0); // Update native FP control word if (m & FPSCR_RN_field::mask()) @@ -1030,15 +1063,19 @@ void powerpc_cpu::execute_mtfsfi(uint32 opcode) if (crfD == 0) m &= ~FPSCR_FX_field::mask(); + // The mtfsfi instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly + int exceptions = RB::get(this, opcode) & m; + exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask()); + // Move immediate to FPSCR according to field crfD - fpscr() = (RB::get(this, opcode) & m) | (fpscr() & ~m); + fpscr() = (fpscr() & ~m) | exceptions; // Update native FP control word if (m & FPSCR_RN_field::mask()) fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr()))); - // Update FPSCR exception bits - record_fpscr(); + // Update FPSCR exception bits (don't implicitly update FX) + record_fpscr(0); // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) @@ -1050,16 +1087,21 @@ void powerpc_cpu::execute_mtfsfi(uint32 opcode) template< class RB, class Rc > void powerpc_cpu::execute_mtfsb(uint32 opcode) { - // Bit crbD of the FPSCR is set or cleared const uint32 crbD = crbD_field::extract(opcode); - fpscr() = (fpscr() & ~(1 << (31 - crbD))) | (RB::get(this, opcode) << (31 - crbD)); + + // The mtfsb0 and mtfsb1 instructions cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly + int exceptions = RB::get(this, opcode) << (31 - crbD); + exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask()); + + // Bit crbD of the FPSCR is set or cleared + fpscr() &= ~(1 << (31 - crbD)); // Update native FP control word if (crbD & FPSCR_RN_field::mask()) fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr()))); // Update FPSCR exception bits - record_fpscr(); + record_fpscr(exceptions); // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) @@ -1074,9 +1116,6 @@ void powerpc_cpu::execute_mffs(uint32 opcode) // Move FPSCR to FPR(FRD) operand_fp_dw_RD::set(this, opcode, fpscr()); - // Update FPSCR exception bits - record_fpscr(); - // Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set if (Rc::test(opcode)) record_cr1(); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp index 7aa689c0..8a219a85 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp @@ -228,34 +228,40 @@ static inline uint32 fp_store_single_convert(uint64 v) * FP classification **/ -static inline bool is_NaN(double v) { +static inline bool is_NaN(double v) +{ any_register x; x.d = v; return (((x.j & UVAL64(0x7ff0000000000000)) == UVAL64(0x7ff0000000000000)) && ((x.j & UVAL64(0x000fffffffffffff)) != 0)); } -static inline bool is_SNaN(double v) { +static inline bool is_QNaN(double v) +{ any_register x; x.d = v; - return is_NaN(v) && !(x.j & UVAL64(0x0008000000000000)) ? signbit(v) : false; + return is_NaN(v) && (x.j & UVAL64(0x0008000000000000)); } -static inline bool is_QNaN(double v) { - return is_NaN(v) && !is_SNaN(v); +static inline bool is_SNaN(double v) +{ + return is_NaN(v) && !is_QNaN(v); } -static inline bool is_NaN(float v) { +static inline bool is_NaN(float v) +{ any_register x; x.f = v; return (((x.i & 0x7f800000) == 0x7f800000) && ((x.i & 0x007fffff) != 0)); } -static inline bool is_SNaN(float v) { +static inline bool is_QNaN(float v) +{ any_register x; x.f = v; - return is_NaN(v) && !(x.i & 0x00400000) ? signbit(v) : false; + return is_NaN(v) && (x.i & 0x00400000); } -static inline bool is_QNaN(float v) { - return is_NaN(v) && !is_SNaN(v); +static inline bool is_SNaN(float v) +{ + return is_NaN(v) && !is_QNaN(v); } /** @@ -277,43 +283,35 @@ struct fp_exception_condition { template< class FP > struct fp_invalid_operation_condition { - static inline uint32 apply(FP a, FP b, int check, bool negate = false) { + static inline uint32 apply(int flags) { uint32 exceptions = 0; - if (FPSCR_VXSNAN_field::test(check) && (is_SNaN(a) || is_SNaN(b))) { - exceptions |= FPSCR_VXSNAN_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXISI_field::test(check) && isinf(a) && isinf(b)) { - if ((negate && (signbit(a) == signbit(b))) || - (!negate && (signbit(a) != signbit(b)))) { - exceptions |= FPSCR_VXISI_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - } - if (FPSCR_VXIDI_field::test(check) && isinf(a) && isinf(b)) { - exceptions |= FPSCR_VXIDI_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXZDZ_field::test(check) && a == 0 && b == 0) { - exceptions |= FPSCR_VXZDZ_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXIMZ_field::test(check) && a == 0 && isinf(b)) { - exceptions |= FPSCR_VXIMZ_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXVC_field::test(check) && (is_NaN(a) || is_NaN(b))) { - exceptions |= FPSCR_VXVC_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXSOFT_field::test(check)) { + if (FPSCR_VXSOFT_field::test(flags)) exceptions |= FPSCR_VXSOFT_field::mask(); - exceptions |= FPSCR_FX_field::mask(); - } - if (FPSCR_VXSQRT_field::test(check) && signbit(a)) { + return 0; + } + static inline uint32 apply(int flags, FP a) { + uint32 exceptions = 0; + if (FPSCR_VXSNAN_field::test(flags) && is_SNaN(a)) + exceptions |= FPSCR_VXSNAN_field::mask(); + if (FPSCR_VXVC_field::test(flags) && is_NaN(a)) + exceptions |= FPSCR_VXVC_field::mask(); + if (FPSCR_VXSQRT_field::test(flags) && signbit(a)) exceptions |= FPSCR_VXSQRT_field::mask(); - exceptions |= FPSCR_FX_field::mask(); + return exceptions; + } + static inline uint32 apply(int flags, FP a, FP b, bool negate = false) { + uint32 exceptions = apply(flags) | apply(flags, a) | apply(flags, b); + if (FPSCR_VXISI_field::test(flags) && isinf(a) && isinf(b)) { + if (( negate && (signbit(a) == signbit(b))) || + (!negate && (signbit(a) != signbit(b)))) + exceptions |= FPSCR_VXISI_field::mask(); } + if (FPSCR_VXIDI_field::test(flags) && isinf(a) && isinf(b)) + exceptions |= FPSCR_VXIDI_field::mask(); + if (FPSCR_VXZDZ_field::test(flags) && a == 0 && b == 0) + exceptions |= FPSCR_VXZDZ_field::mask(); + if (FPSCR_VXIMZ_field::test(flags) && ((a == 0 && isinf(b)) || (isinf(a) && b == 0))) + exceptions |= FPSCR_VXIMZ_field::mask(); return exceptions; } }; @@ -322,7 +320,7 @@ struct fp_invalid_operation_condition { template<> \ struct fp_exception_condition { \ static inline uint32 apply(TYPE a, TYPE b) { \ - return fp_invalid_operation_condition::apply(a, b, EXCP, NEGATE); \ + return fp_invalid_operation_condition::apply(EXCP, a, b, NEGATE); \ } \ }; @@ -335,32 +333,31 @@ struct fp_divide_exception_condition { static inline uint32 apply(FP a, FP b) { int exceptions = fp_invalid_operation_condition:: - apply(a, b, - FPSCR_VXSNAN_field::mask() | FPSCR_VXIDI_field::mask() | FPSCR_VXZDZ_field::mask()); -#if 0 - if (!exceptions && b == 0) - exceptions = FPSCR_ZX_field::mask() | FPSCR_FX_field::mask(); -#endif + apply(FPSCR_VXSNAN_field::mask() | FPSCR_VXIDI_field::mask() | FPSCR_VXZDZ_field::mask(), + a, b); + if (isfinite(a) && a != 0 && b == 0) + exceptions = FPSCR_ZX_field::mask(); return exceptions; } }; template<> struct fp_exception_condition : fp_divide_exception_condition { }; -template< class FP, bool NG > +template< class FP, bool negate > struct fp_fma_exception_condition { static inline uint32 apply(FP a, FP b, FP c) { -#if 1 - return fp_invalid_operation_condition:: - apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask()); -#else - // FIXME: we are losing precision - double p = a * b; - return (fp_invalid_operation_condition:: - apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask(), false) | - fp_invalid_operation_condition:: - apply(p, c, FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask(), NG)); -#endif + return + fp_invalid_operation_condition:: + apply(FPSCR_VXSNAN_field::mask(), a) | + fp_invalid_operation_condition:: + apply(FPSCR_VXSNAN_field::mask(), b) | + fp_invalid_operation_condition:: + apply(FPSCR_VXSNAN_field::mask(), c) | + fp_invalid_operation_condition:: + apply(FPSCR_VXIMZ_field::mask(), a, b) | + fp_invalid_operation_condition:: + apply(FPSCR_VXISI_field::mask(), a * b, c, negate) + ; } }; diff --git a/SheepShaver/src/kpx_cpu/src/mathlib/ieeefp.hpp b/SheepShaver/src/kpx_cpu/src/mathlib/ieeefp.hpp index 4e92e33c..7fe544e5 100755 --- a/SheepShaver/src/kpx_cpu/src/mathlib/ieeefp.hpp +++ b/SheepShaver/src/kpx_cpu/src/mathlib/ieeefp.hpp @@ -46,6 +46,16 @@ extern "C" int fesetround(int); #endif /* FENV_H */ +// Make sure previous instructions are executed first +// XXX this is most really a hint to the compiler so that is doesn't +// reorder calls to fe*() functions before the actual compuation... +#if defined __GNUC__ +#define febarrier() __asm__ __volatile__ ("") +#endif +#ifndef febarrier +#define febarrier() +#endif + // HOST_FLOAT_WORDS_BIG_ENDIAN is a tristate: // yes (1) / no (0) / default (undefined) #if HOST_FLOAT_WORDS_BIG_ENDIAN