From 134339ae9dfa46fa80d0f31f362d2047625af8c3 Mon Sep 17 00:00:00 2001 From: Mihai Parparita Date: Sat, 25 Jan 2025 17:46:25 -0800 Subject: [PATCH] Mitigate performance impact of respecting FP bit In #135 we switched from a static OpcodeGrabber table to a curOpcodeGrabber pointer in ppc_main_opcode. This results in an extra indirection (as far as generated assembly having an additional load), which reduces execution speed. Switch to making the opcode grabber into a parameter to ppc_main_opcode, and make ppc_exec_inner keep it up to date (via an EXEF_OPCODE exception flag). Also fixes FPU instructions in ppctests - we now need to set the FP MSR bit when initializing the CPU. --- cpu/ppc/ppcemu.h | 12 +++++++----- cpu/ppc/ppcexceptions.cpp | 10 ++++++---- cpu/ppc/ppcexec.cpp | 35 +++++++++++++++++++++++++---------- cpu/ppc/ppcopcodes.cpp | 8 +++++--- cpu/ppc/test/ppctests.cpp | 9 ++++++--- 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/cpu/ppc/ppcemu.h b/cpu/ppc/ppcemu.h index 96330b2..559110d 100644 --- a/cpu/ppc/ppcemu.h +++ b/cpu/ppc/ppcemu.h @@ -182,9 +182,10 @@ extern uint32_t rtc_lo, rtc_hi; /* Flags for controlling interpreter execution. */ enum { - EXEF_BRANCH = 1 << 0, - EXEF_EXCEPTION = 1 << 1, - EXEF_RFI = 1 << 2, + EXEF_BRANCH = 1 << 0, // Branch taken, target PC is is in ppc_next_instruction_address + EXEF_EXCEPTION = 1 << 1, // Exception handler invoked + EXEF_RFI = 1 << 2, // RFI instruction executed + EXEF_OPC_DECODER = 1 << 3, // Opcode decoder has changed }; enum CR_select : int32_t { @@ -640,13 +641,14 @@ template extern void power_srq(uint32_t opcode); extern uint64_t get_virt_time_ns(void); -extern void ppc_main_opcode(uint32_t opcode); +extern void ppc_main_opcode(PPCOpcode* ppc_opcode_grabber, uint32_t opcode); extern void ppc_exec(void); extern void ppc_exec_single(void); extern void ppc_exec_until(uint32_t goal_addr); extern void ppc_exec_dbg(uint32_t start_addr, uint32_t size); -extern void ppc_msr_did_change(); +extern PPCOpcode *ppc_opcode_grabber(); +extern void ppc_msr_did_change(uint32_t old_msr_val, bool set_next_instruction_address = true); /* debugging support API */ void print_fprs(void); /* print content of the floating-point registers */ diff --git a/cpu/ppc/ppcexceptions.cpp b/cpu/ppc/ppcexceptions.cpp index 3b77cc4..213516d 100644 --- a/cpu/ppc/ppcexceptions.cpp +++ b/cpu/ppc/ppcexceptions.cpp @@ -57,7 +57,7 @@ void ppc_exception_handler(Except_Type exception_type, uint32_t srr1_bits) { break; case Except_Type::EXC_ISI: - if (exec_flags) { + if (exec_flags & ~EXEF_OPC_DECODER) { ppc_state.spr[SPR::SRR0] = ppc_next_instruction_address; } else { ppc_state.spr[SPR::SRR0] = ppc_state.pc & 0xFFFFFFFCUL; @@ -66,7 +66,7 @@ void ppc_exception_handler(Except_Type exception_type, uint32_t srr1_bits) { break; case Except_Type::EXC_EXT_INT: - if (exec_flags) { + if (exec_flags & ~EXEF_OPC_DECODER) { ppc_state.spr[SPR::SRR0] = ppc_next_instruction_address; } else { ppc_state.spr[SPR::SRR0] = (ppc_state.pc & 0xFFFFFFFCUL) + 4; @@ -90,7 +90,7 @@ void ppc_exception_handler(Except_Type exception_type, uint32_t srr1_bits) { break; case Except_Type::EXC_DECR: - if (exec_flags) { + if (exec_flags & ~EXEF_OPC_DECODER) { ppc_state.spr[SPR::SRR0] = ppc_next_instruction_address; } else { ppc_state.spr[SPR::SRR0] = (ppc_state.pc & 0xFFFFFFFCUL) + 4; @@ -114,10 +114,12 @@ void ppc_exception_handler(Except_Type exception_type, uint32_t srr1_bits) { } ppc_state.spr[SPR::SRR1] = (ppc_state.msr & 0x0000FF73) | srr1_bits; + uint32_t old_msr_val = ppc_state.msr; ppc_state.msr &= 0xFFFB1041; /* copy MSR[ILE] to MSR[LE] */ ppc_state.msr = (ppc_state.msr & ~MSR::LE) | !!(ppc_state.msr & MSR::ILE); - ppc_msr_did_change(); + // Don't clobber the ppc_next_instruction_address value + ppc_msr_did_change(old_msr_val, false); if (ppc_state.msr & MSR::IP) { ppc_next_instruction_address |= 0xFFF00000; diff --git a/cpu/ppc/ppcexec.cpp b/cpu/ppc/ppcexec.cpp index 529cdf9..8204fa2 100644 --- a/cpu/ppc/ppcexec.cpp +++ b/cpu/ppc/ppcexec.cpp @@ -189,10 +189,21 @@ static PPCOpcode OpcodeGrabber[64 * 2048]; everything else is the same.*/ static PPCOpcode OpcodeGrabberNoFPU[64 * 2048]; -static PPCOpcode* curOpcodeGrabber = OpcodeGrabberNoFPU; +void ppc_msr_did_change(uint32_t old_msr_val, bool set_next_instruction_address) { + bool old_fp = old_msr_val & MSR::FP; + bool new_fp = ppc_state.msr & MSR::FP; + if (old_fp != new_fp) { + exec_flags |= EXEF_OPC_DECODER; + if (set_next_instruction_address) { + // Even though we're setting an exception flag, we want normal + // instruction execution to continue. + ppc_next_instruction_address = ppc_state.pc + 4; + } + } +} -void ppc_msr_did_change() { - curOpcodeGrabber = ppc_state.msr & MSR::FP ? OpcodeGrabber : OpcodeGrabberNoFPU; +PPCOpcode* ppc_opcode_grabber() { + return ppc_state.msr & MSR::FP ? OpcodeGrabber : OpcodeGrabberNoFPU; } /** Exception helpers. */ @@ -222,7 +233,7 @@ void ppc_release_int() { /** Opcode decoding functions. */ /* Dispatch using primary and modifier opcode */ -void ppc_main_opcode(uint32_t opcode) +void ppc_main_opcode(PPCOpcode *opcodeGrabber, uint32_t opcode) { #ifdef CPU_PROFILING num_executed_instrs++; @@ -230,7 +241,7 @@ void ppc_main_opcode(uint32_t opcode) num_opcodes[opcode]++; #endif #endif - curOpcodeGrabber[(opcode >> 15 & 0x1F800) | (opcode & 0x7FF)](opcode); + opcodeGrabber[(opcode >> 15 & 0x1F800) | (opcode & 0x7FF)](opcode); } static long long cpu_now_ns() { @@ -282,6 +293,7 @@ static void ppc_exec_inner(uint32_t start_addr, uint32_t size) uint64_t max_cycles = 0; uint32_t page_start, eb_start, eb_end = 0; uint32_t opcode; + PPCOpcode* opcode_grabber = ppc_opcode_grabber(); uint8_t* pc_real; while (power_on) { @@ -300,11 +312,14 @@ static void ppc_exec_inner(uint32_t start_addr, uint32_t size) } opcode = ppc_read_instruction(pc_real); - ppc_main_opcode(opcode); - if (g_icycles++ >= max_cycles || exec_timer) + ppc_main_opcode(opcode_grabber, opcode); + if (g_icycles++ >= max_cycles || exec_timer) [[unlikely]] max_cycles = process_events(); if (exec_flags) { + if (exec_flags & EXEF_OPC_DECODER) [[unlikely]] { + opcode_grabber = ppc_opcode_grabber(); + } // define next execution block eb_start = ppc_next_instruction_address; if (!(exec_flags & EXEF_RFI) && (eb_start & PPC_PAGE_MASK) == page_start) { @@ -359,7 +374,7 @@ void ppc_exec_single() uint8_t* pc_real = mmu_translate_imem(ppc_state.pc); uint32_t opcode = ppc_read_instruction(pc_real); - ppc_main_opcode(opcode); + ppc_main_opcode(ppc_opcode_grabber(), opcode); g_icycles++; process_events(); @@ -836,7 +851,6 @@ void ppc_cpu_init(MemCtrlBase* mem_ctrl, uint32_t cpu_version, bool do_include_6 } ppc_mmu_init(); - ppc_msr_did_change(); /* redirect code execution to reset vector */ ppc_state.pc = 0xFFF00100; @@ -895,8 +909,9 @@ static uint64_t reg_op(string& reg_name, uint64_t val, bool is_write) { } if (reg_name_u == "MSR") { if (is_write) { + uint32_t old_msr_val = ppc_state.msr; ppc_state.msr = (uint32_t)val; - ppc_msr_did_change(); + ppc_msr_did_change(old_msr_val); } return ppc_state.msr; } diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 8435d0c..e08ac97 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -801,8 +801,9 @@ void dppc_interpreter::ppc_mtmsr(uint32_t opcode) { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::NOT_ALLOWED); } uint32_t reg_s = (opcode >> 21) & 0x1F; + uint32_t old_msr_val = ppc_state.msr; ppc_state.msr = ppc_state.gpr[reg_s]; - ppc_msr_did_change(); + ppc_msr_did_change(old_msr_val); // generate External Interrupt Exception // if CPU interrupt line is asserted @@ -1377,10 +1378,11 @@ void dppc_interpreter::ppc_rfi(uint32_t opcode) { #ifdef CPU_PROFILING num_supervisor_instrs++; #endif + uint32_t old_msr_val = ppc_state.msr; uint32_t new_srr1_val = (ppc_state.spr[SPR::SRR1] & 0x87C0FF73UL); uint32_t new_msr_val = (ppc_state.msr & ~0x87C0FF73UL); ppc_state.msr = (new_msr_val | new_srr1_val) & 0xFFFBFFFFUL; - ppc_msr_did_change(); + ppc_msr_did_change(old_msr_val); // generate External Interrupt Exception // if CPU interrupt line is still asserted @@ -1406,7 +1408,7 @@ void dppc_interpreter::ppc_rfi(uint32_t opcode) { mmu_change_mode(); - exec_flags = EXEF_RFI; + exec_flags |= EXEF_RFI; } void dppc_interpreter::ppc_sc(uint32_t opcode) { diff --git a/cpu/ppc/test/ppctests.cpp b/cpu/ppc/test/ppctests.cpp index 7a1d489..dae8613 100644 --- a/cpu/ppc/test/ppctests.cpp +++ b/cpu/ppc/test/ppctests.cpp @@ -46,7 +46,7 @@ void xer_ov_test(string mnem, uint32_t opcode) { ppc_state.gpr[3] = 2; ppc_state.gpr[4] = 2; ppc_state.spr[SPR::XER] = 0xFFFFFFFF; - ppc_main_opcode(opcode); + ppc_main_opcode(ppc_opcode_grabber(), opcode); if (ppc_state.spr[SPR::XER] & 0x40000000UL) { cout << "Invalid " << mnem << " emulation! XER[OV] should not be set." << endl; nfailed++; @@ -150,7 +150,7 @@ static void read_test_data() { ppc_state.spr[SPR::XER] = 0; ppc_state.cr = 0; - ppc_main_opcode(opcode); + ppc_main_opcode(ppc_opcode_grabber(), opcode); ntested++; @@ -292,7 +292,7 @@ static void read_test_float_data() { ppc_state.cr = 0; - ppc_main_opcode(opcode); + ppc_main_opcode(ppc_opcode_grabber(), opcode); ntested++; @@ -318,6 +318,9 @@ static void read_test_float_data() { int main() { is_601 = true; initialize_ppc_opcode_table(); //kludge + // MPC601 sets MSR[ME] bit during hard reset / Power-On. + // Also set MSR[FP] bit so we can test FPU instructions. + ppc_state.msr = (MSR::ME | MSR::IP | MSR::FP); cout << "Running DingusPPC emulator tests..." << endl << endl;