Merge in FP exceptions support but disable it for now as it is incomplete

and slower. Implement mcrfs. Fix and optimize fctiw with native rounding.
This commit is contained in:
gbeauche 2004-01-24 16:43:45 +00:00
parent 3de5a15902
commit 82808234fa
6 changed files with 345 additions and 111 deletions

View File

@ -63,6 +63,18 @@
#endif
/**
* PPC_ENABLE_FPU_EXCEPTIONS
*
* Define to enable a more precise FPU emulation with support for
* exception bits. This is slower and not fully correct yet.
**/
#ifndef PPC_ENABLE_FPU_EXCEPTIONS
#define PPC_ENABLE_FPU_EXCEPTIONS 0
#endif
/**
* PPC_DECODE_CACHE
*

View File

@ -84,8 +84,11 @@ protected:
{ record_cr(0, value); }
void record_cr1()
{ cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); }
void record_fpscr();
void fp_classify(double x);
template< class FP >
void fp_classify(FP x);
void fp_setround(int round);
protected:
@ -365,6 +368,7 @@ private:
template< class RA >
void execute_stwcx(uint32 opcode);
void execute_mcrf(uint32 opcode);
void execute_mcrfs(uint32 opcode);
void execute_mtcrf(uint32 opcode);
template< class FM, class RB, class Rc >
void execute_mtfsf(uint32 opcode);

View File

@ -727,6 +727,12 @@ const powerpc_cpu::instr_info_t powerpc_cpu::powerpc_ii_table[] = {
PPC_I(MCRF),
XL_form, 19, 0, CFLOW_NORMAL
},
{ "mcrfs",
EXECUTE_0(mcrfs),
NULL,
PPC_I(MCRFS),
X_form, 63, 64, CFLOW_NORMAL
},
{ "mfcr",
EXECUTE_GENERIC_ARITH(nop, RD, CR, NONE, NONE, OE_BIT_0, RC_BIT_0),
NULL,

View File

@ -21,6 +21,7 @@
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <fenv.h>
#include "sysdeps.h"
#include "cpu/vm.hpp"
@ -51,26 +52,26 @@
* RC Input operand register or immediate (optional: operand_NONE)
**/
template< class OP, class RA, class RB, class RC >
template< class RT, class OP, class RA, class RB, class RC >
struct op_apply {
template< class T >
static inline T apply(T a, T b, T c) {
static inline RT apply(T a, T b, T c) {
return OP::apply(a, b, c);
}
};
template< class OP, class RA, class RB >
struct op_apply<OP, RA, RB, null_operand> {
template< class RT, class OP, class RA, class RB >
struct op_apply<RT, OP, RA, RB, null_operand> {
template< class T >
static inline T apply(T a, T b, T c) {
static inline RT apply(T a, T b, T) {
return OP::apply(a, b);
}
};
template< class OP, class RA >
struct op_apply<OP, RA, null_operand, null_operand> {
template< class RT, class OP, class RA >
struct op_apply<RT, OP, RA, null_operand, null_operand> {
template< class T >
static inline T apply(T a, T b, T c) {
static inline RT apply(T a, T, T) {
return OP::apply(a);
}
};
@ -198,7 +199,7 @@ void powerpc_cpu::execute_generic_arith(uint32 opcode)
const uint32 b = RB::get(this, opcode);
const uint32 c = RC::get(this, opcode);
uint32 d = op_apply<OP, RA, RB, RC>::apply(a, b, c);
uint32 d = op_apply<uint32, OP, RA, RB, RC>::apply(a, b, c);
// Set XER (OV, SO) if instruction has OE set
if (OE::test(opcode))
@ -440,6 +441,37 @@ void powerpc_cpu::execute_multiply(uint32 opcode)
increment_pc(4);
}
/**
* Record FPSCR
*
* Update FP exception bits
**/
void powerpc_cpu::record_fpscr()
{
#if PPC_ENABLE_FPU_EXCEPTIONS
// Always update VX
if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() | \
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() | \
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() | \
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() | \
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()))
fpscr() |= FPSCR_VX_field::mask();
else
fpscr() &= ~FPSCR_VX_field::mask();
// Always update FEX
if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) \
|| ((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) \
|| ((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) \
|| ((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) \
|| ((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask())))
fpscr() |= FPSCR_FEX_field::mask();
else
fpscr() &= ~FPSCR_FEX_field::mask();
#endif
}
/**
* Floating-point arithmetics
*
@ -459,31 +491,52 @@ void powerpc_cpu::execute_fp_arith(uint32 opcode)
const double a = RA::get(this, opcode);
const double b = RB::get(this, opcode);
const double c = RC::get(this, opcode);
FP d = op_apply<OP, RA, RB, RC>::apply(a, b, c);
#if 0
// FIXME: Compute FPSCR bits if instruction requests it
// Check for FP Exception Conditions
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
if (FPSCR) {
exceptions = op_apply<uint32, fp_exception_condition<OP>, RA, RB, RC>::apply(a, b, c);
feclearexcept(FE_ALL_EXCEPT);
}
#endif
FP d = op_apply<double, OP, RA, RB, RC>::apply(a, b, c);
#if PPC_ENABLE_FPU_EXCEPTIONS
if (FPSCR) {
// Always update VX
if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() | \
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() | \
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() | \
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() | \
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()))
fpscr() |= FPSCR_VX_field::mask();
else
fpscr() &= ~FPSCR_VX_field::mask();
// Check exceptions raised
int masked = 0xffffffff;
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_INEXACT) {
exceptions |= FPSCR_XX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (raised & FE_DIVBYZERO) {
exceptions |= FPSCR_ZX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_ZX_field::mask();
if (raised & FE_UNDERFLOW) {
exceptions |= FPSCR_UX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_UX_field::mask();
if (raised & FE_OVERFLOW) {
exceptions |= FPSCR_OX_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
else masked &= ~FPSCR_OX_field::mask();
fpscr() &= masked;
fpscr() |= exceptions;
// Always update FEX
if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) \
|| ((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) \
|| ((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) \
|| ((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) \
|| ((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask())))
fpscr() |= FPSCR_FEX_field::mask();
else
fpscr() &= ~FPSCR_FEX_field::mask();
// Update FPSCR exception bits
record_fpscr();
// FPSCR[FPRF] is set to the class and sign of the result
if (!exceptions || !FPSCR_VE_field::test(fpscr()))
fp_classify(d);
}
#endif
@ -735,95 +788,50 @@ void powerpc_cpu::execute_stwcx(uint32 opcode)
increment_pc(4);
}
/**
* Basic (and probably incorrect) implementation for missing functions
* in older C libraries
**/
#ifndef signbit
#define signbit(X) my_signbit(X)
#endif
static inline bool my_signbit(double X) {
return X < 0.0;
}
#ifndef isless
#define isless(X, Y) my_isless(X, Y)
#endif
static inline bool my_isless(double X, double Y) {
return X < Y;
}
#ifndef isgreater
#define isgreater(X, Y) my_isgreater(X, Y)
#endif
static inline bool my_isgreater(double X, double Y) {
return X > Y;
}
/**
* Floating-point compare instruction
*
* OC Predicate for ordered compare
**/
static inline bool is_NaN(double v) {
any_register x; x.d = v;
return (((x.j & UVAL64(0x7ff0000000000000)) == UVAL64(0x7ff0000000000000)) &&
((x.j & UVAL64(0x000fffffffffffff)) != 0));
}
static inline bool is_SNaN(double v) {
any_register x; x.d = v;
return is_NaN(v) && !(x.j & UVAL64(0x0008000000000000)) ? signbit(v) : false;
}
static inline bool is_QNaN(double v) {
return is_NaN(v) && !is_SNaN(v);
}
static inline bool is_NaN(float v) {
any_register x; x.f = v;
return (((x.i & 0x7f800000) == 0x7f800000) &&
((x.i & 0x007fffff) != 0));
}
static inline bool is_SNaN(float v) {
any_register x; x.f = v;
return is_NaN(v) && !(x.i & 0x00400000) ? signbit(v) : false;
}
static inline bool is_QNaN(float v) {
return is_NaN(v) && !is_SNaN(v);
}
template< bool OC >
void powerpc_cpu::execute_fp_compare(uint32 opcode)
{
const double a = operand_fp_RA::get(this, opcode);
const double b = operand_fp_RB::get(this, opcode);
const int crfd = crfD_field::extract(opcode);
int c;
if (is_NaN(a) || is_NaN(b))
cr().set(crfd, 1);
c = 1;
else if (isless(a, b))
cr().set(crfd, 8);
c = 8;
else if (isgreater(a, b))
cr().set(crfd, 4);
c = 4;
else
cr().set(crfd, 2);
c = 2;
fpscr() = (fpscr() & ~FPSCR_FPCC_field::mask()) | (cr().get(crfd) << 12);
FPSCR_FPCC_field::insert(fpscr(), c);
cr().set(crfd, c);
// Check for FP exception condition
#if PPC_ENABLE_FPU_EXCEPTIONS
if (is_SNaN(a) || is_SNaN(b)) {
fpscr() |= FPSCR_VXSNAN_field::mask();
if (OC && !FPSCR_VE_field::test(fpscr()))
fpscr() |= FPSCR_FX_field::mask();
if (OC && !FPSCR_VE_field::test(fpscr())) {
fpscr() |= FPSCR_VXVC_field::mask();
fpscr() |= FPSCR_FX_field::mask();
}
}
else if (OC && (is_QNaN(a) || is_QNaN(b)))
else if (OC && (is_QNaN(a) || is_QNaN(b))) {
fpscr() |= FPSCR_VXVC_field::mask();
fpscr() |= FPSCR_FX_field::mask();
}
#endif
// Update FPSCR exception bits
record_fpscr();
increment_pc(4);
}
@ -842,20 +850,16 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode)
const uint32 r = RN::get(this, opcode);
any_register d;
switch (r) {
case 0: // Round to nearest
d.j = (int32)(b + 0.5);
break;
case 1: // Round toward zero
// Bounds checking and convert to integer word
if (b > (double)0x7fffffff)
d.j = 0x7fffffff;
else if (b < -(double)0x80000000)
d.j = 0x80000000;
else
d.j = (int32)b;
break;
case 2: // Round toward +infinity
d.j = (int32)ceil(b);
break;
case 3: // Round toward -infinity
d.j = (int32)floor(b);
break;
}
// Update FPSCR exception bits
record_fpscr();
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
@ -872,7 +876,8 @@ void powerpc_cpu::execute_fp_int_convert(uint32 opcode)
* Rc Predicate to record CR1
**/
void powerpc_cpu::fp_classify(double x)
template< class FP >
void powerpc_cpu::fp_classify(FP x)
{
uint32 c = fpscr() & ~FPSCR_FPRF_field::mask();
uint8 fc = fpclassify(x);
@ -955,6 +960,27 @@ void powerpc_cpu::execute_mcrf(uint32 opcode)
increment_pc(4);
}
void powerpc_cpu::execute_mcrfs(uint32 opcode)
{
const int crfS = crfS_field::extract(opcode);
const int crfD = crfD_field::extract(opcode);
// The contents of FPSCR field crfS are copied to CR field crfD
const uint32 m = 0xf << (28 - 4 * crfS);
cr().set(crfD, (fpscr() & m) >> (28 - 4 * crfS));
// All exception bits copied (except FEX and VX) are cleared in the FPSCR
fpscr() &= ~(m & (FPSCR_FX_field::mask() | FPSCR_OX_field::mask() |
FPSCR_UX_field::mask() | FPSCR_ZX_field::mask() |
FPSCR_XX_field::mask() | FPSCR_VXSNAN_field::mask() |
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() |
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() |
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() |
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()));
increment_pc(4);
}
void powerpc_cpu::execute_mtcrf(uint32 opcode)
{
uint32 mask = field2mask[CRM_field::extract(opcode)];
@ -962,6 +988,25 @@ void powerpc_cpu::execute_mtcrf(uint32 opcode)
increment_pc(4);
}
void powerpc_cpu::fp_setround(int round)
{
// Set native rounding mode
switch (round) {
case 0:
fesetround(FE_TONEAREST);
break;
case 1:
fesetround(FE_TOWARDZERO);
break;
case 2:
fesetround(FE_UPWARD);
break;
case 3:
fesetround(FE_DOWNWARD);
break;
}
}
template< class FM, class RB, class Rc >
void powerpc_cpu::execute_mtfsf(uint32 opcode)
{
@ -976,6 +1021,10 @@ void powerpc_cpu::execute_mtfsf(uint32 opcode)
// Move frB bits to FPSCR according to field mask
fpscr() = (fsf & m) | (fpscr() & ~m);
// Update native FP control word
if (m & FPSCR_RN_field::mask())
fp_setround(FPSCR_RN_field::extract(fpscr()));
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
@ -996,6 +1045,13 @@ void powerpc_cpu::execute_mtfsfi(uint32 opcode)
// Move immediate to FPSCR according to field crfD
fpscr() = (RB::get(this, opcode) & m) | (fpscr() & ~m);
// Update native FP control word
if (m & FPSCR_RN_field::mask())
fp_setround(FPSCR_RN_field::extract(fpscr()));
// Update FPSCR exception bits
record_fpscr();
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
@ -1010,6 +1066,13 @@ void powerpc_cpu::execute_mtfsb(uint32 opcode)
const uint32 crbD = crbD_field::extract(opcode);
fpscr() = (fpscr() & ~(1 << (31 - crbD))) | (RB::get(this, opcode) << (31 - crbD));
// Update native FP control word
if (crbD & FPSCR_RN_field::mask())
fp_setround(FPSCR_RN_field::extract(fpscr()));
// Update FPSCR exception bits
record_fpscr();
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
@ -1023,6 +1086,9 @@ void powerpc_cpu::execute_mffs(uint32 opcode)
// Move FPSCR to FPR(FRD)
operand_fp_dw_RD::set(this, opcode, fpscr());
// Update FPSCR exception bits
record_fpscr();
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();

View File

@ -131,4 +131,149 @@ DEFINE_HELPER(do_execute_divide, (uint32 RA, uint32 RB))
RETURN(RD);
}
/**
* FP classification
**/
static inline bool is_NaN(double v) {
any_register x; x.d = v;
return (((x.j & UVAL64(0x7ff0000000000000)) == UVAL64(0x7ff0000000000000)) &&
((x.j & UVAL64(0x000fffffffffffff)) != 0));
}
static inline bool is_SNaN(double v) {
any_register x; x.d = v;
return is_NaN(v) && !(x.j & UVAL64(0x0008000000000000)) ? signbit(v) : false;
}
static inline bool is_QNaN(double v) {
return is_NaN(v) && !is_SNaN(v);
}
static inline bool is_NaN(float v) {
any_register x; x.f = v;
return (((x.i & 0x7f800000) == 0x7f800000) &&
((x.i & 0x007fffff) != 0));
}
static inline bool is_SNaN(float v) {
any_register x; x.f = v;
return is_NaN(v) && !(x.i & 0x00400000) ? signbit(v) : false;
}
static inline bool is_QNaN(float v) {
return is_NaN(v) && !is_SNaN(v);
}
/**
* Check for FP Exception Conditions
**/
template< class OP >
struct fp_exception_condition {
static inline uint32 apply(double) {
return 0;
}
static inline uint32 apply(double, double) {
return 0;
}
static inline uint32 apply(double, double, double) {
return 0;
}
};
template< class FP >
struct fp_invalid_operation_condition {
static inline uint32 apply(FP a, FP b, int check, bool negate = false) {
uint32 exceptions = 0;
if (FPSCR_VXSNAN_field::test(check) && (is_SNaN(a) || is_SNaN(b))) {
exceptions |= FPSCR_VXSNAN_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXISI_field::test(check) && isinf(a) && isinf(b)) {
if ((negate && (signbit(a) == signbit(b))) ||
(!negate && (signbit(a) != signbit(b)))) {
exceptions |= FPSCR_VXISI_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
}
if (FPSCR_VXIDI_field::test(check) && isinf(a) && isinf(b)) {
exceptions |= FPSCR_VXIDI_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXZDZ_field::test(check) && a == 0 && b == 0) {
exceptions |= FPSCR_VXZDZ_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXIMZ_field::test(check) && a == 0 && isinf(b)) {
exceptions |= FPSCR_VXIMZ_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXVC_field::test(check) && (is_NaN(a) || is_NaN(b))) {
exceptions |= FPSCR_VXVC_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXSOFT_field::test(check)) {
exceptions |= FPSCR_VXSOFT_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
if (FPSCR_VXSQRT_field::test(check) && signbit(a)) {
exceptions |= FPSCR_VXSQRT_field::mask();
exceptions |= FPSCR_FX_field::mask();
}
return exceptions;
}
};
#define DEFINE_FP_INVALID_OPERATION(OP, TYPE, EXCP, NEGATE) \
template<> \
struct fp_exception_condition<OP> { \
static inline uint32 apply(TYPE a, TYPE b) { \
return fp_invalid_operation_condition<TYPE>::apply(a, b, EXCP, NEGATE); \
} \
};
DEFINE_FP_INVALID_OPERATION(op_fadd, double, FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask(), 0);
DEFINE_FP_INVALID_OPERATION(op_fsub, double, FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask(), 1);
DEFINE_FP_INVALID_OPERATION(op_fmul, double, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask(), 0);
template< class FP >
struct fp_divide_exception_condition {
static inline uint32 apply(FP a, FP b) {
int exceptions =
fp_invalid_operation_condition<FP>::
apply(a, b,
FPSCR_VXSNAN_field::mask() | FPSCR_VXIDI_field::mask() | FPSCR_VXZDZ_field::mask());
#if 0
if (!exceptions && b == 0)
exceptions = FPSCR_ZX_field::mask() | FPSCR_FX_field::mask();
#endif
return exceptions;
}
};
template<> struct fp_exception_condition<op_fdiv> : fp_divide_exception_condition<double> { };
template< class FP, bool NG >
struct fp_fma_exception_condition {
static inline uint32 apply(FP a, FP b, FP c) {
#if 1
return fp_invalid_operation_condition<FP>::
apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask());
#else
// FIXME: we are losing precision
double p = a * b;
return (fp_invalid_operation_condition<FP>::
apply(a, b, FPSCR_VXSNAN_field::mask() | FPSCR_VXIMZ_field::mask(), false) |
fp_invalid_operation_condition<FP>::
apply(p, c, FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask(), NG));
#endif
}
};
template<> struct fp_exception_condition<op_fmadd> : fp_fma_exception_condition<double, false> { };
template<> struct fp_exception_condition<op_fmsub> : fp_fma_exception_condition<double, true> { };
template<> struct fp_exception_condition<op_fnmadd> : fp_fma_exception_condition<double, false> { };
template<> struct fp_exception_condition<op_fnmsub> : fp_fma_exception_condition<double, true> { };
#endif /* PPC_EXECUTE_H */

View File

@ -132,6 +132,7 @@ enum powerpc_instruction {
PPC_I(LWZUX),
PPC_I(LWZX),
PPC_I(MCRF),
PPC_I(MCRFS),
PPC_I(MFCR),
PPC_I(MFFS),
PPC_I(MFMSR),