From 329bcc68b1da5f7f4c0fc24793ff2664ef602080 Mon Sep 17 00:00:00 2001 From: dingusdev <52434309+dingusdev@users.noreply.github.com> Date: Thu, 17 Oct 2024 08:00:27 -0700 Subject: [PATCH] Floating-point fix-ups Largely to fix setting flags, but partially to fix the incorrect nan emulation --- CREDITS.md | 1 + cpu/ppc/ppcfpopcodes.cpp | 118 +++++++++++++++++++++++++++++++++++++++ cpu/ppc/ppcopcodes.cpp | 2 +- main.cpp | 4 +- 4 files changed, 122 insertions(+), 3 deletions(-) diff --git a/CREDITS.md b/CREDITS.md index 6794c66..3887115 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -7,6 +7,7 @@ - maximumspatium - joevt - mihaip +- kkaisershot ## NT4/PPC fork diff --git a/cpu/ppc/ppcfpopcodes.cpp b/cpu/ppc/ppcfpopcodes.cpp index 6d36ac4..6264abb 100644 --- a/cpu/ppc/ppcfpopcodes.cpp +++ b/cpu/ppc/ppcfpopcodes.cpp @@ -127,6 +127,18 @@ static void fpresult_update(double set_result) { } else { ppc_state.fpscr |= FPCC_ZERO; } + + if (std::fetestexcept(FE_OVERFLOW)) { + ppc_state.fpscr |= (OX + FX); + } + if (std::fetestexcept(FE_UNDERFLOW)) { + ppc_state.fpscr |= (UX + FX); + } + if (std::fetestexcept(FE_DIVBYZERO)) { + ppc_state.fpscr |= (ZX + FX); + } + + std::feclearexcept(FE_ALL_EXCEPT); if (std::isinf(set_result)) ppc_state.fpscr |= FPCC_FUNAN; @@ -160,6 +172,12 @@ void dppc_interpreter::ppc_fadd() { max_double_check(val_reg_a, val_reg_b); double ppc_dblresult64_d = val_reg_a + val_reg_b; + + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || + ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); ppc_update_fex(); @@ -179,8 +197,13 @@ void dppc_interpreter::ppc_fsub() { double ppc_dblresult64_d = val_reg_a - val_reg_b; + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; + ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + ppc_update_fex(); if (rec) ppc_update_cr1(); @@ -199,6 +222,12 @@ void dppc_interpreter::ppc_fdiv() { ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if (isinf(val_reg_a) && isinf(val_reg_b)) + ppc_state.fpscr |= VXIDI; + + if ((val_reg_a == 0.0) && (val_reg_b == 0.0)) + ppc_state.fpscr |= VXZDZ; + if (rec) ppc_update_cr1(); } @@ -216,6 +245,9 @@ void dppc_interpreter::ppc_fmul() { ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + if (rec) ppc_update_cr1(); } @@ -234,6 +266,13 @@ void dppc_interpreter::ppc_fmadd() { ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + if (rec) ppc_update_cr1(); } @@ -252,6 +291,13 @@ void dppc_interpreter::ppc_fmsub() { ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; + if (rec) ppc_update_cr1(); } @@ -267,9 +313,19 @@ void dppc_interpreter::ppc_fnmadd() { snan_single_check(reg_b); double ppc_dblresult64_d = -std::fma(val_reg_a, val_reg_c, val_reg_b); + if (isnan(ppc_dblresult64_d)) { + ppc_dblresult64_d = -ppc_dblresult64_d; + } ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + if (rec) ppc_update_cr1(); } @@ -288,6 +344,13 @@ void dppc_interpreter::ppc_fnmsub() { ppc_store_dfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; + if (rec) ppc_update_cr1(); } @@ -304,6 +367,10 @@ void dppc_interpreter::ppc_fadds() { double ppc_dblresult64_d = (float)(val_reg_a + val_reg_b); ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + fpresult_update(ppc_dblresult64_d); if (rec) @@ -321,6 +388,10 @@ void dppc_interpreter::ppc_fsubs() { double ppc_dblresult64_d = (float)(val_reg_a - val_reg_b); + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; + ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); @@ -338,6 +409,13 @@ void dppc_interpreter::ppc_fdivs() { snan_double_check(reg_a, reg_b); double ppc_dblresult64_d = (float)(val_reg_a / val_reg_b); + + if (isinf(val_reg_a) && isinf(val_reg_b)) + ppc_state.fpscr |= VXIDI; + + if ((val_reg_a == 0.0) && (val_reg_b == 0.0)) + ppc_state.fpscr |= VXZDZ; + ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); @@ -355,9 +433,14 @@ void dppc_interpreter::ppc_fmuls() { snan_double_check(reg_a, reg_c); double ppc_dblresult64_d = (float)(val_reg_a * val_reg_c); + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if (rec) ppc_update_cr1(); } @@ -376,6 +459,13 @@ void dppc_interpreter::ppc_fmadds() { ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + if (rec) ppc_update_cr1(); } @@ -390,9 +480,20 @@ void dppc_interpreter::ppc_fmsubs() { snan_double_check(reg_a, reg_c); snan_single_check(reg_b); + double ppc_dblresult64_d = (float)std::fma(val_reg_a, val_reg_c, -val_reg_b); + if (isnan(ppc_dblresult64_d)) { + ppc_dblresult64_d = -ppc_dblresult64_d; + } ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; if (rec) ppc_update_cr1(); @@ -409,9 +510,19 @@ void dppc_interpreter::ppc_fnmadds() { snan_single_check(reg_b); double ppc_dblresult64_d = -(float)std::fma(val_reg_a, val_reg_c, val_reg_b); + if (isnan(ppc_dblresult64_d)) { + ppc_dblresult64_d = -ppc_dblresult64_d; + } ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + double inf = std::numeric_limits::infinity(); + if (((val_reg_a == inf) && (val_reg_b == -inf)) || ((val_reg_a == -inf) && (val_reg_b == inf))) + ppc_state.fpscr |= VXISI; + + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + if (rec) ppc_update_cr1(); } @@ -430,6 +541,13 @@ void dppc_interpreter::ppc_fnmsubs() { ppc_store_sfpresult_flt(reg_d, ppc_dblresult64_d); fpresult_update(ppc_dblresult64_d); + if ((isinf(val_reg_a) && (val_reg_c == 0.0)) || (isinf(val_reg_c) && (val_reg_a == 0.0))) + ppc_state.fpscr |= VXIMZ; + + double inf = std::numeric_limits::infinity(); + if ((val_reg_a == inf) && (val_reg_b == inf)) + ppc_state.fpscr |= VXISI; + if (rec) ppc_update_cr1(); } diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 65ef7d2..13a2c52 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -84,7 +84,7 @@ void do_ctx_sync() { } } -void add_ctx_sync_action(const CtxSyncCallback& cb) { +static void add_ctx_sync_action(const CtxSyncCallback& cb) { gCtxSyncCallbacks.push_back(cb); } diff --git a/main.cpp b/main.cpp index ea34deb..4e67e49 100644 --- a/main.cpp +++ b/main.cpp @@ -53,9 +53,9 @@ static void sigabrt_handler(int signum) { } static string appDescription = string( - "\nDingusPPC - Alpha 1 (5/10/2024) " + "\nDingusPPC - Alpha 1.01 (10/31/2024) " "\nWritten by divingkatae, maximumspatium, " - "\njoevt, mihaip, et. al. " + "\njoevt, mihaip, kkaisershot, et. al. " "\n(c) 2018-2024 The DingusPPC Dev Team. " "\nThis is a build intended for testing. " "\nUse at your own discretion. "