From b759f25d87a21b95ea0384fa9a83644591bb6305 Mon Sep 17 00:00:00 2001 From: Mihai Parparita Date: Sat, 30 Nov 2024 18:54:20 +0100 Subject: [PATCH] ppc: Use a unified opcode lookup table Instead of a primary opcode lookup table with 64 entries and a few smaller tables with 4-2048 entries, use a single 64 * 2048 (128K) entry table to dispatch opcodes. Helps with performance, since we avoid the function call overhead for some frequently-used instructions (e.g. branch, integer, floating point). Saves ~2 seconds from the time to Welcome to Macintosh (same measurement methodology as #125) Secondarily also makes opcode registration/decoding a bit more uniform, and scannable, since it's now all in initialize_ppc_opcode_table. --- cpu/ppc/ppcemu.h | 2 +- cpu/ppc/ppcexec.cpp | 223 ++++++++++++++------------------------ cpu/ppc/test/ppctests.cpp | 2 +- utils/imgfile_sdl.cpp | 5 +- 4 files changed, 86 insertions(+), 146 deletions(-) diff --git a/cpu/ppc/ppcemu.h b/cpu/ppc/ppcemu.h index 486599f..01b84da 100644 --- a/cpu/ppc/ppcemu.h +++ b/cpu/ppc/ppcemu.h @@ -411,7 +411,7 @@ void ppc_illegalop(uint32_t opcode); void ppc_assert_int(); void ppc_release_int(); -void initialize_ppc_opcode_tables(); +void initialize_ppc_opcode_table(); void ppc_changecrf0(uint32_t set_result); void set_host_rounding_mode(uint8_t mode); diff --git a/cpu/ppc/ppcexec.cpp b/cpu/ppc/ppcexec.cpp index f6265bd..0fa3c43 100644 --- a/cpu/ppc/ppcexec.cpp +++ b/cpu/ppc/ppcexec.cpp @@ -180,30 +180,9 @@ public: #endif -/** Opcode lookup tables. */ - -/** Primary opcode (bits 0...5) lookup table. */ -static PPCOpcode OpcodeGrabber[64]; - -/** Lookup tables for branch instructions. */ -const static PPCOpcode SubOpcode16Grabber[] = { - dppc_interpreter::ppc_bc, // bc - dppc_interpreter::ppc_bc, // bcl - dppc_interpreter::ppc_bc, // bca - dppc_interpreter::ppc_bc}; // bcla - -const static PPCOpcode SubOpcode18Grabber[] = { - dppc_interpreter::ppc_b, // b - dppc_interpreter::ppc_b, // bl - dppc_interpreter::ppc_b, // ba - dppc_interpreter::ppc_b}; // bla - -/** Instructions decoding tables for integer, - single floating-point, and double-floating point ops respectively */ - -static PPCOpcode SubOpcode31Grabber[2048]; -static PPCOpcode SubOpcode59Grabber[64]; -static PPCOpcode SubOpcode63Grabber[2048]; +/** Opcode lookup table, indexed by + primary opcode (bits 0...5) and modifier (bits 21...31). */ +static PPCOpcode OpcodeGrabber[64 * 2048]; /** Exception helpers. */ @@ -227,88 +206,7 @@ void ppc_release_int() { /** Opcode decoding functions. */ -static void ppc_opcode16(uint32_t opcode) { - SubOpcode16Grabber[opcode & 3](opcode); -} - -static void ppc_opcode18(uint32_t opcode) { - SubOpcode18Grabber[opcode & 3](opcode); -} - -template -static void ppc_opcode19(uint32_t opcode) { - uint16_t subop_grab = opcode & 0x7FF; - - switch (subop_grab) { - case 0: - ppc_mcrf(opcode); - break; - case 32: - ppc_bclr(opcode); - break; - case 33: - ppc_bclr(opcode); - break; - case 66: - ppc_crnor(opcode); - break; - case 100: - ppc_rfi(opcode); - break; - case 258: - ppc_crandc(opcode); - break; - case 300: - ppc_isync(opcode); - break; - case 386: - ppc_crxor(opcode); - break; - case 450: - ppc_crnand(opcode); - break; - case 514: - ppc_crand(opcode); - break; - case 578: - ppc_creqv(opcode); - break; - case 834: - ppc_crorc(opcode); - break; - case 898: - ppc_cror(opcode); - break; - case 1056: - ppc_bcctr(opcode); - break; - case 1057: - ppc_bcctr(opcode); - break; - default: - ppc_illegalop(opcode); - } -} - -template void ppc_opcode19(uint32_t opcode); -template void ppc_opcode19(uint32_t opcode); - -static void ppc_opcode31(uint32_t opcode) { - uint16_t subop_grab = opcode & 0x7FFUL; - SubOpcode31Grabber[subop_grab](opcode); -} - -static void ppc_opcode59(uint32_t opcode) { - uint16_t subop_grab = opcode & 0x3FUL; - SubOpcode59Grabber[subop_grab](opcode); -} - -static void ppc_opcode63(uint32_t opcode) { - uint16_t subop_grab = opcode & 0x7FFUL; - SubOpcode63Grabber[subop_grab](opcode); -} - -/* Dispatch using main opcode */ +/* Dispatch using primary and modifier opcode */ void ppc_main_opcode(uint32_t opcode) { #ifdef CPU_PROFILING @@ -317,7 +215,7 @@ void ppc_main_opcode(uint32_t opcode) num_opcodes[opcode]++; #endif #endif - OpcodeGrabber[(opcode >> 26) & 0x3F](opcode); + OpcodeGrabber[(opcode >> 15 & 0x1F800) | (opcode & 0x7FF)](opcode); } static long long cpu_now_ns() { @@ -591,60 +489,82 @@ Opcode table macros: - o is for overflow (OV). - c is for carry CARRY0/CARRY1. It also works for other options: SHFT0/SHFT1, RIGHT0/LEFT1, uint8_t/uint16_t/uint32_t, and int8_t/int16_t. +- a is for address mode (AA). +- l is for link register (LK). +- r is for raw (adding custom entries to the table) */ #define OP(opcode, fn) \ do { \ - OpcodeGrabber[opcode] = fn; \ +for (uint32_t mod = 0; mod < 2048; mod++) { \ + OpcodeGrabber[((opcode) << 11) | mod] = fn; \ +} \ } while (0) #define OPX(opcode, subopcode, fn) \ do { \ - opcode ## Grabber[((subopcode)<<1)] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1)] = fn; \ } while (0) #define OPXd(opcode, subopcode, fn) \ do { \ - opcode ## Grabber[((subopcode)<<1)] = fn; \ - opcode ## Grabber[((subopcode)<<1)+1] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn; \ } while (0) #define OPXod(opcode, subopcode, fn) \ do { \ - opcode ## Grabber[((subopcode)<<1)] = fn; \ - opcode ## Grabber[((subopcode)<<1)+1] = fn; \ - opcode ## Grabber[1024+((subopcode)<<1)] = fn; \ - opcode ## Grabber[1024+((subopcode)<<1)+1] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x400] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x401] = fn; \ } while (0) #define OPXdc(opcode, subopcode, fn, carry) \ do { \ - opcode ## Grabber[((subopcode)<<1)] = fn; \ - opcode ## Grabber[((subopcode)<<1)+1] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn; \ } while (0) #define OPXcod(opcode, subopcode, fn, carry) \ do { \ - opcode ## Grabber[((subopcode)<<1)] = fn; \ - opcode ## Grabber[((subopcode)<<1)+1] = fn; \ - opcode ## Grabber[1024+((subopcode)<<1)] = fn; \ - opcode ## Grabber[1024+((subopcode)<<1)+1] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x000] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x001] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x400] = fn; \ + OpcodeGrabber[((opcode) << 11) | ((subopcode)<<1) | 0x401] = fn; \ } while (0) -#define OP31(subopcode, fn) OPX(SubOpcode31, subopcode, fn) -#define OP31d(subopcode, fn) OPXd(SubOpcode31, subopcode, fn) -#define OP31od(subopcode, fn) OPXod(SubOpcode31, subopcode, fn) -#define OP31dc(subopcode, fn, carry) OPXdc(SubOpcode31, subopcode, fn, carry) -#define OP31cod(subopcode, fn, carry) OPXcod(SubOpcode31, subopcode, fn, carry) +#define OPla(opcode, subopcode, fn) \ +do { \ +for (uint32_t mod = 0; mod < 512; mod++) { \ + OpcodeGrabber[((opcode) << 11) | (mod << 2) | (subopcode)] = fn; \ +} \ +} while (0) -#define OP59d(subopcode, fn) OPXd(SubOpcode59, subopcode, fn) +#define OPr(opcode, mod, fn) \ +do { \ + OpcodeGrabber[((opcode) << 11) | (mod)] = fn; \ +} while (0) -#define OP63(subopcode, fn) OPX(SubOpcode63, subopcode, fn) -#define OP63d(subopcode, fn) OPXd(SubOpcode63, subopcode, fn) -#define OP63dc(subopcode, fn, carry) OPXdc(SubOpcode63, subopcode, fn, carry) +#define OP31(subopcode, fn) OPX(31, subopcode, fn) +#define OP31d(subopcode, fn) OPXd(31, subopcode, fn) +#define OP31od(subopcode, fn) OPXod(31, subopcode, fn) +#define OP31dc(subopcode, fn, carry) OPXdc(31, subopcode, fn, carry) +#define OP31cod(subopcode, fn, carry) OPXcod(31, subopcode, fn, carry) -void initialize_ppc_opcode_tables() { - std::fill_n(OpcodeGrabber, 64, ppc_illegalop); +#define OP63(subopcode, fn) OPX(63, subopcode, fn) +#define OP63d(subopcode, fn) OPXd(63, subopcode, fn) +#define OP63dc(subopcode, fn, carry) OPXdc(63, subopcode, fn, carry) + +#define OP59d(subopcode, fn) \ +do { \ +for (uint32_t mod = 0; mod < 16; mod++) { \ + OPXd(59, (mod << 5) | (subopcode), fn); \ +} \ +} while (0) + +void initialize_ppc_opcode_table() { + std::fill_n(OpcodeGrabber, 64 * 2048, ppc_illegalop); OP(3, ppc_twi); //OP(4, ppc_opcode4); - Altivec instructions not emulated yet. Uncomment once they're implemented. OP(7, ppc_mulli); @@ -656,10 +576,7 @@ void initialize_ppc_opcode_tables() { OP(13, ppc_addic); OP(14, ppc_addi); OP(15, ppc_addi); - OP(16, ppc_opcode16); OP(17, ppc_sc); - OP(18, ppc_opcode18); - if (is_601) OP(19, ppc_opcode19); else OP(19, ppc_opcode19); OP(20, ppc_rlwimi); OP(21, ppc_rlwinm); if (is_601 || include_601) OP(22, power_rlmi); @@ -670,7 +587,6 @@ void initialize_ppc_opcode_tables() { OP(27, ppc_xori); OP(28, ppc_andirc); OP(29, ppc_andirc); - OP(31, ppc_opcode31); OP(32, ppc_lz); OP(33, ppc_lzu); OP(34, ppc_lz); @@ -695,10 +611,33 @@ void initialize_ppc_opcode_tables() { OP(53, ppc_stfsu); OP(54, ppc_stfd); OP(55, ppc_stfdu); - OP(59, ppc_opcode59); - OP(63, ppc_opcode63); - std::fill_n(SubOpcode31Grabber, 2048, ppc_illegalop); + OPla(16, 0x0, (dppc_interpreter::ppc_bc)); // bc + OPla(16, 0x1, (dppc_interpreter::ppc_bc)); // bcl + OPla(16, 0x2, (dppc_interpreter::ppc_bc)); // bca + OPla(16, 0x3, (dppc_interpreter::ppc_bc)); // bcla + + OPla(18, 0x0, (dppc_interpreter::ppc_b)); // b + OPla(18, 0x1, (dppc_interpreter::ppc_b)); // bl + OPla(18, 0x2, (dppc_interpreter::ppc_b)); // ba + OPla(18, 0x3, (dppc_interpreter::ppc_b)); // bla + + OPr(19, 0, ppc_mcrf); + OPr(19, 32, ppc_bclr); + OPr(19, 33, ppc_bclr); + OPr(19, 66, ppc_crnor); + OPr(19, 100, ppc_rfi); + OPr(19, 258, ppc_crandc); + OPr(19, 300, ppc_isync); + OPr(19, 386, ppc_crxor); + OPr(19, 450, ppc_crnand); + OPr(19, 514, ppc_crand); + OPr(19, 578, ppc_creqv); + OPr(19, 834, ppc_crorc); + OPr(19, 898, ppc_cror); + OPr(19, 1056, (is_601 ? ppc_bcctr : ppc_bcctr)); + OPr(19, 1057, (is_601 ? ppc_bcctr : ppc_bcctr)); + OP31(0, ppc_cmp); OP31(4, ppc_tw); OP31(32, ppc_cmpl); @@ -740,7 +679,7 @@ void initialize_ppc_opcode_tables() { OP31(631, ppc_lfdux); OP31(790, ppc_lhbrx); - SubOpcode31Grabber[(150<<1)+1] = ppc_stwcx; // No Rc=0 variant. + OPr(31, (150<<1) | 1, ppc_stwcx); // No Rc=0 variant. OP31(151, ppc_stx); OP31(183, ppc_stux); OP31(215, ppc_stx); @@ -835,7 +774,6 @@ void initialize_ppc_opcode_tables() { if (!is_601) OP31(978, ppc_tlbld); if (!is_601) OP31(1010, ppc_tlbli); - std::fill_n(SubOpcode59Grabber, 64, ppc_illegalop); OP59d(18, ppc_fdivs); OP59d(20, ppc_fsubs); OP59d(21, ppc_fadds); @@ -847,7 +785,6 @@ void initialize_ppc_opcode_tables() { OP59d(30, ppc_fnmsubs); OP59d(31, ppc_fnmadds); - std::fill_n(SubOpcode63Grabber, 2048, ppc_illegalop); OP63(0, ppc_fcmpu); OP63d(12, ppc_frsp); OP63d(14, ppc_fctiw); @@ -890,7 +827,7 @@ void ppc_cpu_init(MemCtrlBase* mem_ctrl, uint32_t cpu_version, bool do_include_6 is_601 = (cpu_version >> 16) == 1; include_601 = !is_601 & do_include_601; - initialize_ppc_opcode_tables(); + initialize_ppc_opcode_table(); // initialize emulator timers TimerManager::get_instance()->set_time_now_cb(&get_virt_time_ns); diff --git a/cpu/ppc/test/ppctests.cpp b/cpu/ppc/test/ppctests.cpp index b925d58..7a1d489 100644 --- a/cpu/ppc/test/ppctests.cpp +++ b/cpu/ppc/test/ppctests.cpp @@ -317,7 +317,7 @@ static void read_test_float_data() { int main() { is_601 = true; - initialize_ppc_opcode_tables(); //kludge + initialize_ppc_opcode_table(); //kludge cout << "Running DingusPPC emulator tests..." << endl << endl; diff --git a/utils/imgfile_sdl.cpp b/utils/imgfile_sdl.cpp index 4cffebe..43aca2b 100644 --- a/utils/imgfile_sdl.cpp +++ b/utils/imgfile_sdl.cpp @@ -42,7 +42,7 @@ ImgFile::~ImgFile() = default; bool ImgFile::open(const std::string &img_path) { - if (is_deterministic) { + if (is_deterministic && false) { // Avoid writes to the underlying file by reading it all in memory and // only operating on that. auto mem_stream = std::make_unique(); @@ -90,6 +90,9 @@ uint64_t ImgFile::read(void* buf, uint64_t offset, uint64_t length) const uint64_t ImgFile::write(const void* buf, uint64_t offset, uint64_t length) { + if (is_deterministic) { + return length; + } if (!impl->stream) { LOG_F(WARNING, "ImgFile::write before disk was opened, ignoring."); return 0;