Floating point overhaul, part 4

Reworked the INF/NAN checks again - This time, only checking them when the result is calculated
This commit is contained in:
dingusdev 2021-10-14 20:31:10 -07:00
parent 9251745d6f
commit 061b061813

View File

@ -185,43 +185,34 @@ void update_fex() {
ppc_state.fpscr = (ppc_state.fpscr & ~0x40000000) | (fex_result << 30); ppc_state.fpscr = (ppc_state.fpscr & ~0x40000000) | (fex_result << 30);
} }
template <typename T> template <typename T, const FPOP fpop>
constexpr auto ppc_confirm_inf_nan(int chosen_reg_1, int chosen_reg_2, int chosen_reg_3, uint32_t op) { constexpr auto ppc_confirm_inf_nan(int chosen_reg_1, int chosen_reg_2, int chosen_reg_3) {
T input_a = T(ppc_state.fpr[chosen_reg_1].int64_r); T input_a = T(ppc_state.fpr[chosen_reg_1].int64_r);
T input_b = T(ppc_state.fpr[chosen_reg_2].int64_r); T input_b = T(ppc_state.fpr[chosen_reg_2].int64_r);
T input_c = T(ppc_state.fpr[chosen_reg_3].int64_r); T input_c = T(ppc_state.fpr[chosen_reg_3].int64_r);
ppc_state.fpscr &= 0x7fbfffff; ppc_state.fpscr &= 0x7fbfffff;
bool inf_or_nan = false; switch (fpop) {
switch (op) {
case FPOP::DIV: case FPOP::DIV:
if (isnan(input_a) && isnan(input_b)) { if (isnan(input_a) && isnan(input_b)) {
ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXIDI); ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXIDI);
inf_or_nan = true;
} else if ((input_a == FP_ZERO) && (input_b == FP_ZERO)) { } else if ((input_a == FP_ZERO) && (input_b == FP_ZERO)) {
ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXZDZ); ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXZDZ);
inf_or_nan = true;
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
case FPOP::SUB: case FPOP::SUB:
if (isnan(input_a) && isnan(input_b)) { if (isnan(input_a) && isnan(input_b)) {
ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXISI); ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXISI);
inf_or_nan = true;
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
case FPOP::ADD: case FPOP::ADD:
if (isnan(input_a) && isnan(input_b)) { if (isnan(input_a) && isnan(input_b)) {
ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXISI); ppc_state.fpscr |= (FPSCR::FX | FPSCR::VXISI);
inf_or_nan = true;
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
case FPOP::MUL: case FPOP::MUL:
if (((input_a == FP_ZERO) && (input_c == FP_INFINITE)) || if (((input_a == FP_ZERO) && (input_c == FP_INFINITE)) ||
@ -229,16 +220,13 @@ constexpr auto ppc_confirm_inf_nan(int chosen_reg_1, int chosen_reg_2, int chose
ppc_state.fpscr |= ppc_state.fpscr |=
(FPSCR::FX | FPSCR::VXSNAN | (FPSCR::FX | FPSCR::VXSNAN |
FPSCR::VXIMZ); FPSCR::VXIMZ);
inf_or_nan = true;
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
case FPOP::FMSUB: case FPOP::FMSUB:
case FPOP::FNMSUB: case FPOP::FNMSUB:
if (isnan(input_a) || isnan(input_b) || isnan(input_c)) { if (isnan(input_a) || isnan(input_b) || isnan(input_c)) {
ppc_state.fpscr |= FPSCR::VXSNAN; ppc_state.fpscr |= FPSCR::VXSNAN;
inf_or_nan = true;
if (((input_a == FP_ZERO) && (input_c == FP_INFINITE)) || if (((input_a == FP_ZERO) && (input_c == FP_INFINITE)) ||
((input_c == FP_ZERO) && (input_a == FP_INFINITE))) { ((input_c == FP_ZERO) && (input_a == FP_INFINITE))) {
ppc_state.fpscr |= FPSCR::VXIMZ; ppc_state.fpscr |= FPSCR::VXIMZ;
@ -246,20 +234,14 @@ constexpr auto ppc_confirm_inf_nan(int chosen_reg_1, int chosen_reg_2, int chose
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
case FPOP::FMADD: case FPOP::FMADD:
case FPOP::FNMADD: case FPOP::FNMADD:
if (isnan(input_a) || isnan(input_b) || isnan(input_c)) { if (isnan(input_a) || isnan(input_b) || isnan(input_c)) {
ppc_state.fpscr |= FPSCR::VXSNAN; ppc_state.fpscr |= FPSCR::VXSNAN;
inf_or_nan = true;
} }
update_fex(); update_fex();
return inf_or_nan;
break; break;
default:
return false;
} }
} }
@ -298,9 +280,12 @@ void ppc_changecrf1() {
void dppc_interpreter::ppc_fadd() { void dppc_interpreter::ppc_fadd() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, 0, FPOP::ADD)) {
ppc_dblresult64_d = val_reg_a + val_reg_b; ppc_dblresult64_d = val_reg_a + val_reg_b;
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d); ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, ADD>(reg_a, reg_b, 0);
} }
if (rc_flag) if (rc_flag)
@ -310,9 +295,12 @@ void dppc_interpreter::ppc_fadd() {
void dppc_interpreter::ppc_fsub() { void dppc_interpreter::ppc_fsub() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, 0, FPOP::SUB)) {
ppc_dblresult64_d = val_reg_a - val_reg_b; ppc_dblresult64_d = val_reg_a - val_reg_b;
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d); ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, SUB>(reg_a, reg_b, 0);
} }
if (rc_flag) if (rc_flag)
@ -322,9 +310,12 @@ void dppc_interpreter::ppc_fsub() {
void dppc_interpreter::ppc_fdiv() { void dppc_interpreter::ppc_fdiv() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, 0, FPOP::DIV)) {
ppc_dblresult64_d = val_reg_a / val_reg_b; ppc_dblresult64_d = val_reg_a / val_reg_b;
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d); ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, DIV>(reg_a, reg_b, 0);
} }
if (rc_flag) if (rc_flag)
@ -334,9 +325,12 @@ void dppc_interpreter::ppc_fdiv() {
void dppc_interpreter::ppc_fmul() { void dppc_interpreter::ppc_fmul() {
ppc_grab_regsfpdac(); ppc_grab_regsfpdac();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_c, 0, FPOP::MUL)) {
ppc_dblresult64_d = val_reg_a * val_reg_c; ppc_dblresult64_d = val_reg_a * val_reg_c;
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d); ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, MUL>(reg_a, reg_b, 0);
} }
if (rc_flag) if (rc_flag)
@ -346,11 +340,13 @@ void dppc_interpreter::ppc_fmul() {
void dppc_interpreter::ppc_fmadd() { void dppc_interpreter::ppc_fmadd() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, reg_c, FPOP::FMADD)) {
ppc_dblresult64_d = std::fma(val_reg_a, val_reg_c, val_reg_b); ppc_dblresult64_d = std::fma(val_reg_a, val_reg_c, val_reg_b);
ppc_store_dfpresult_flt(reg_d);
}
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, FMADD>(reg_a, reg_b, reg_c);
}
if (rc_flag) if (rc_flag)
ppc_changecrf1(); ppc_changecrf1();
@ -359,12 +355,14 @@ void dppc_interpreter::ppc_fmadd() {
void dppc_interpreter::ppc_fmsub() { void dppc_interpreter::ppc_fmsub() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, reg_c, FPOP::FMSUB)) {
ppc_dblresult64_d = (val_reg_a * val_reg_c); ppc_dblresult64_d = (val_reg_a * val_reg_c);
ppc_dblresult64_d -= val_reg_b; ppc_dblresult64_d -= val_reg_b;
ppc_store_dfpresult_flt(reg_d);
}
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, FMSUB>(reg_a, reg_b, reg_c);
}
if (rc_flag) if (rc_flag)
ppc_changecrf1(); ppc_changecrf1();
@ -373,12 +371,16 @@ void dppc_interpreter::ppc_fmsub() {
void dppc_interpreter::ppc_fnmadd() { void dppc_interpreter::ppc_fnmadd() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, reg_c, FPOP::FNMADD)) {
ppc_dblresult64_d = (val_reg_a * val_reg_c); ppc_dblresult64_d = (val_reg_a * val_reg_c);
ppc_dblresult64_d += val_reg_b; ppc_dblresult64_d += val_reg_b;
ppc_dblresult64_d = -ppc_dblresult64_d; ppc_dblresult64_d = -ppc_dblresult64_d;
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d); ppc_store_dfpresult_flt(reg_d);
} }
else {
ppc_confirm_inf_nan<double, FNMADD>(reg_a, reg_b, reg_c);
}
if (rc_flag) if (rc_flag)
ppc_changecrf1(); ppc_changecrf1();
@ -387,12 +389,14 @@ void dppc_interpreter::ppc_fnmadd() {
void dppc_interpreter::ppc_fnmsub() { void dppc_interpreter::ppc_fnmsub() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
if (!ppc_confirm_inf_nan<double>(reg_a, reg_b, reg_c, FPOP::FNMSUB)) {
ppc_dblresult64_d = (val_reg_a * val_reg_c); ppc_dblresult64_d = (val_reg_a * val_reg_c);
ppc_dblresult64_d -= val_reg_b; ppc_dblresult64_d -= val_reg_b;
ppc_store_dfpresult_flt(reg_d);
}
if (!isnan(ppc_dblresult64_d)) {
ppc_store_dfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<double, FNMSUB>(reg_a, reg_b, reg_c);
}
if (rc_flag) if (rc_flag)
ppc_changecrf1(); ppc_changecrf1();
@ -401,10 +405,13 @@ void dppc_interpreter::ppc_fnmsub() {
void dppc_interpreter::ppc_fadds() { void dppc_interpreter::ppc_fadds() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, 0, FPOP::ADD)) {
float intermediate = (float)val_reg_a + (float)val_reg_b; float intermediate = (float)val_reg_a + (float)val_reg_b;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, ADD>(reg_a, reg_b, 0);
} }
if (rc_flag) if (rc_flag)
@ -414,23 +421,13 @@ void dppc_interpreter::ppc_fadds() {
void dppc_interpreter::ppc_fsubs() { void dppc_interpreter::ppc_fsubs() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, 0, FPOP::SUB)) {
float intermediate = (float)val_reg_a - (float)val_reg_b; float intermediate = (float)val_reg_a - (float)val_reg_b;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} } else {
ppc_confirm_inf_nan<float, SUB>(reg_a, reg_b, 0);
if (rc_flag)
ppc_changecrf1();
}
void dppc_interpreter::ppc_fmuls() {
ppc_grab_regsfpdac();
if (!ppc_confirm_inf_nan<float>(reg_a, 0, reg_c, FPOP::MUL)) {
float intermediate = (float)val_reg_a * (float)val_reg_c;
ppc_dblresult64_d = static_cast<double>(intermediate);
ppc_store_sfpresult_flt(reg_d);
} }
if (rc_flag) if (rc_flag)
@ -440,10 +437,29 @@ void dppc_interpreter::ppc_fmuls() {
void dppc_interpreter::ppc_fdivs() { void dppc_interpreter::ppc_fdivs() {
ppc_grab_regsfpdab(); ppc_grab_regsfpdab();
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, 0, FPOP::DIV)) {
float intermediate = (float)val_reg_a / (float)val_reg_b; float intermediate = (float)val_reg_a / (float)val_reg_b;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, DIV>(reg_a, reg_b, 0);
}
if (rc_flag)
ppc_changecrf1();
}
void dppc_interpreter::ppc_fmuls() {
ppc_grab_regsfpdac();
float intermediate = (float)val_reg_a * (float)val_reg_c;
ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, MUL>(reg_a, 0, reg_c);
} }
if (rc_flag) if (rc_flag)
@ -453,11 +469,13 @@ void dppc_interpreter::ppc_fdivs() {
void dppc_interpreter::ppc_fmadds() { void dppc_interpreter::ppc_fmadds() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, reg_c, FPOP::FMADD)) { ppc_dblresult64_d = static_cast<double>(
ppc_dblresult64_d = static_cast<double>(std::fma( std::fma((float)val_reg_a, (float)val_reg_c, (float)val_reg_b));
(float)val_reg_a, (float)val_reg_c, (float)val_reg_b));
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, FMADD>(reg_a, reg_b, reg_c);
} }
if (rc_flag) if (rc_flag)
@ -467,14 +485,14 @@ void dppc_interpreter::ppc_fmadds() {
void dppc_interpreter::ppc_fmsubs() { void dppc_interpreter::ppc_fmsubs() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
float intermediate; float intermediate = (float)val_reg_a * (float)val_reg_c;
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, reg_c, FPOP::FMSUB)) {
intermediate = (float)val_reg_a * (float)val_reg_c;
intermediate -= (float)val_reg_b; intermediate -= (float)val_reg_b;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, FMSUB>(reg_a, reg_b, reg_c);
} }
if (rc_flag) if (rc_flag)
@ -484,16 +502,15 @@ void dppc_interpreter::ppc_fmsubs() {
void dppc_interpreter::ppc_fnmadds() { void dppc_interpreter::ppc_fnmadds() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
float intermediate; float intermediate = (float)val_reg_a * (float)val_reg_c;
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, reg_c, FPOP::FNMADD)) {
intermediate = (float)val_reg_a * (float)val_reg_c;
intermediate += (float)val_reg_b; intermediate += (float)val_reg_b;
intermediate = -intermediate; intermediate = -intermediate;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, FNMADD>(reg_a, reg_b, reg_c);
} }
if (rc_flag) if (rc_flag)
@ -503,16 +520,16 @@ void dppc_interpreter::ppc_fnmadds() {
void dppc_interpreter::ppc_fnmsubs() { void dppc_interpreter::ppc_fnmsubs() {
ppc_grab_regsfpdabc(); ppc_grab_regsfpdabc();
float intermediate; float intermediate = (float)val_reg_a * (float)val_reg_c;
if (!ppc_confirm_inf_nan<float>(reg_a, reg_b, reg_c, FPOP::FNMSUB)) {
intermediate = (float)val_reg_a * (float)val_reg_c;
intermediate -= (float)val_reg_b; intermediate -= (float)val_reg_b;
intermediate = -intermediate; intermediate = -intermediate;
ppc_dblresult64_d = static_cast<double>(intermediate); ppc_dblresult64_d = static_cast<double>(intermediate);
if (!isnan(ppc_dblresult64_d)) {
ppc_store_sfpresult_flt(reg_d); ppc_store_sfpresult_flt(reg_d);
} else {
ppc_confirm_inf_nan<float, FNMSUB>(reg_a, reg_b, reg_c);
} }
if (rc_flag) if (rc_flag)