From a42281aad1ddf2769657b2f15dac60e8c1c8885b Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Mon, 3 Nov 2003 21:28:32 +0000 Subject: [PATCH] Implement partial block cache invalidation. Rewrite core cached blocks execution loop with a Duff's device. Gather some predecode time statistics. This shows that only around 2% of total emulation time is spent for predecoding the instructions. --- SheepShaver/src/Unix/main_unix.cpp | 10 ++- .../src/kpx_cpu/include/basic-blockinfo.hpp | 1 + SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp | 12 ++++ .../src/kpx_cpu/src/cpu/block-cache.hpp | 23 +++++- .../src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp | 1 - .../src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp | 70 ++++++++++++++----- .../src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp | 1 - 7 files changed, 97 insertions(+), 21 deletions(-) diff --git a/SheepShaver/src/Unix/main_unix.cpp b/SheepShaver/src/Unix/main_unix.cpp index 890c3df2..5167116e 100644 --- a/SheepShaver/src/Unix/main_unix.cpp +++ b/SheepShaver/src/Unix/main_unix.cpp @@ -224,6 +224,9 @@ static void *nvram_func(void *arg); static void *tick_func(void *arg); #if EMULATED_PPC static void sigusr2_handler(int sig); +extern void emul_ppc(uint32 start); +extern void init_emul_ppc(void); +extern void exit_emul_ppc(void); #else static void sigusr2_handler(int sig, sigcontext_struct *sc); static void sigsegv_handler(int sig, sigcontext_struct *sc); @@ -779,6 +782,11 @@ quit: static void Quit(void) { +#if EMULATED_PPC + // Exit PowerPC emulation + exit_emul_ppc(); +#endif + // Stop 60Hz thread if (tick_thread_active) { pthread_cancel(tick_thread); @@ -882,8 +890,6 @@ static void Quit(void) */ #if EMULATED_PPC -extern void emul_ppc(uint32 start); -extern void init_emul_ppc(void); void jump_to_rom(uint32 entry) { init_emul_ppc(); diff --git a/SheepShaver/src/kpx_cpu/include/basic-blockinfo.hpp b/SheepShaver/src/kpx_cpu/include/basic-blockinfo.hpp index 265431fc..bb2fcf3e 100644 --- a/SheepShaver/src/kpx_cpu/include/basic-blockinfo.hpp +++ b/SheepShaver/src/kpx_cpu/include/basic-blockinfo.hpp @@ -35,6 +35,7 @@ struct basic_block_info }; uintptr pc; + uintptr end_pc; int32 count; uint32 size; uint32 c1; diff --git a/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp b/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp index ffe59de7..da4b7c1c 100644 --- a/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp +++ b/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp @@ -552,6 +552,18 @@ void init_emul_ppc(void) #endif } +/* + * Deinitialize emulation + */ + +void exit_emul_ppc(void) +{ + delete main_cpu; +#if MULTICORE_CPU + delete interrupt_cpu; +#endif +} + /* * Emulation loop */ diff --git a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp index 0a8d8584..e19224ee 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp @@ -45,7 +45,7 @@ private: entry * active; entry * dormant; - uint32 cacheline(uint32 addr) const { + uint32 cacheline(uintptr addr) const { return (addr >> 2) & HASH_MASK; } @@ -59,6 +59,7 @@ public: void initialize(); void clear(); + void clear_range(uintptr start, uintptr end); block_info *find(uintptr pc); entry *first_active() const; entry *first_dormant() const; @@ -116,6 +117,26 @@ void block_cache< block_info, block_allocator >::clear() dormant = NULL; } +template< class block_info, template class block_allocator > +inline void block_cache< block_info, block_allocator >::clear_range(uintptr start, uintptr end) +{ + if (!active) + return; + + entry *q; + entry *p = active; + while (p) { + q = p; + p = p->next; + if (((q->pc >= start) && (q->pc < end)) || + ((q->end_pc >= start) && (q->end_pc < end))) { + remove_from_cl_list(q); + remove_from_list(q); + delete_blockinfo(q); + } + } +} + template< class block_info, template class block_allocator > inline block_info *block_cache< block_info, block_allocator >::new_blockinfo() { diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp index 786872d3..b20129b9 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp @@ -37,7 +37,6 @@ struct powerpc_block_info uint32 opcode; }; - uint32 end_pc; decode_info * di; }; diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp index 141a60a9..5b93c432 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp @@ -31,6 +31,17 @@ #define DEBUG 0 #include "debug.h" +// Define to gather some compile time statistics +#define PROFILE_COMPILE_TIME 1 + +#if PROFILE_COMPILE_TIME +#include +static uint32 compile_count = 0; +static clock_t compile_time = 0; +static clock_t emul_start_time = 0; +static clock_t emul_end_time = 0; +#endif + void powerpc_cpu::set_register(int id, any_register const & value) { if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) { @@ -231,10 +242,34 @@ void powerpc_cpu::initialize() #if ENABLE_MON mon_init(); #endif + +#if PROFILE_COMPILE_TIME + emul_start_time = clock(); +#endif } powerpc_cpu::~powerpc_cpu() { +#if PROFILE_COMPILE_TIME + emul_end_time = clock(); + + const char *type = NULL; +#ifndef PPC_NO_DECODE_CACHE + type = "predecode"; +#endif + if (type) { + printf("### Statistics for block %s\n", type); + printf("Total block %s count : %d\n", type, compile_count); + uint32 emul_time = emul_end_time - emul_start_time; + printf("Total emulation time : %.1f sec\n", + double(emul_time) / double(CLOCKS_PER_SEC)); + printf("Total %s time : %.1f sec (%.1f%%)\n", type, + double(compile_time) / double(CLOCKS_PER_SEC), + 100.0 * double(compile_time) / double(emul_time)); + printf("\n"); + } +#endif + kill_decode_cache(); #if ENABLE_MON @@ -318,6 +353,10 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache) #ifndef PPC_NO_DECODE_CACHE if (enable_cache) { for (;;) { +#if PROFILE_COMPILE_TIME + compile_count++; + clock_t start_time = clock(); +#endif block_info *bi = block_cache.new_blockinfo(); bi->init(pc()); @@ -360,13 +399,14 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache) di = bi->di + blocklen; } } while ((ii->cflow & CFLOW_END_BLOCK) == 0); -#ifdef PPC_LAZY_PC_UPDATE bi->end_pc = dpc; -#endif bi->size = di - bi->di; block_cache.add_to_cl_list(bi); block_cache.add_to_active_list(bi); decode_cache_p += bi->size; +#if PROFILE_COMPILE_TIME + compile_time += (clock() - start_time); +#endif // Execute all cached blocks for (;;) { @@ -379,21 +419,19 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache) di[i].execute(this, di[i].opcode); #else const int r = bi->size % 4; + di += r; + int n = (bi->size + 3) / 4; switch (r) { - case 3: di->execute(this, di->opcode); di++; - case 2: di->execute(this, di->opcode); di++; - case 1: di->execute(this, di->opcode); di++; - case 0: break; - } - const int n = bi->size / 4; - for (int i = 0; i < n; i++) { - di[0].execute(this, di[0].opcode); - di[1].execute(this, di[1].opcode); - di[2].execute(this, di[2].opcode); - di[3].execute(this, di[3].opcode); - di += 4; + case 0: do { + di += 4; + di[-4].execute(this, di[-4].opcode); + case 3: di[-3].execute(this, di[-3].opcode); + case 2: di[-2].execute(this, di[-2].opcode); + case 1: di[-1].execute(this, di[-1].opcode); + } while (--n > 0); } #endif + if (!spcflags().empty()) { if (!check_spcflags()) return; @@ -483,8 +521,8 @@ void powerpc_cpu::invalidate_cache() void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end) { + D(bug("Invalidate cache block [%08x - %08x]\n", start, end)); #ifndef PPC_NO_DECODE_CACHE - // TODO: partial translation cache invalidatation - invalidate_cache(); + block_cache.clear_range(start, end); #endif } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp index 46af83a2..34a6c1db 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp @@ -1101,7 +1101,6 @@ void powerpc_cpu::execute_mftbr(uint32 opcode) void powerpc_cpu::execute_invalidate_cache_range() { if (cache_range.start != cache_range.end) { - D(bug("Invalidate Cache Block [%08x - %08x]\n", cache_range.start, cache_range.end)); invalidate_cache_range(cache_range.start, cache_range.end); cache_range.start = cache_range.end = 0; }