From 8afa65cc961177caf6a8b40eb81e680fa4d3a44a Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Tue, 27 Jan 2004 13:54:51 +0000 Subject: [PATCH] Inline fast basic block lookups. Only check top tag as it is a hit more than 95% of the time. Overall, this improves performance by more than 2x on a P4. --- SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp | 12 ++++++++++++ SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp | 5 ++++- .../src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp | 17 +++++++++++++++++ .../src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp | 1 + .../src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp | 3 +++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp index 313c3fab..ef36c1f0 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp @@ -60,6 +60,7 @@ public: void initialize(); void clear(); void clear_range(uintptr start, uintptr end); + block_info *fast_find(uintptr pc); block_info *find(uintptr pc); void remove_from_cl_list(block_info *bi); @@ -148,6 +149,17 @@ inline void block_cache< block_info, block_allocator >::delete_blockinfo(block_i allocator.release(bce); } +template< class block_info, template class block_allocator > +inline block_info *block_cache< block_info, block_allocator >::fast_find(uintptr pc) +{ + // Hit: return immediately (that covers more than 95% of the cases) + entry * bce = cache_tags[cacheline(pc)]; + if (bce && bce->pc == pc) + return bce; + + return NULL; +} + template< class block_info, template class block_allocator > block_info *block_cache< block_info, block_allocator >::find(uintptr pc) { diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp index 80d13e1b..0a06bf7e 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp @@ -461,7 +461,10 @@ void powerpc_cpu::execute(uint32 entry) } } - if ((bi->pc != pc()) && ((bi = block_cache.find(pc())) == NULL)) + // Don't check for backward branches here as this + // is now done by generated code. Besides, we will + // get here if the fast cache lookup failed too. + if ((bi = block_cache.find(pc())) == NULL) break; } } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp index b3afb370..b80f7a80 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp @@ -86,6 +86,8 @@ struct powerpc_dyngen_helper { static double & fp_result() { return CPU->fp_result(); } static uint64 & fp_result_dw() { return CPU->fp_result_dw(); } static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); } + + static inline powerpc_block_info *find_block(uint32 pc) { return CPU->block_cache.fast_find(pc); } }; @@ -1206,3 +1208,18 @@ void OPPROTO op_nego_T0(void) T0 = -T0; } +/** + * Generate possible call to next basic block without going + * through register state restore & full cache lookup + **/ + +void OPPROTO op_jump_next_A0(void) +{ + // Make sure there is no pending interrupt request + if (powerpc_dyngen_helper::spcflags().empty()) { + powerpc_block_info *bi = (powerpc_block_info *)reg_A0; + uint32 pc = powerpc_dyngen_helper::get_pc(); + if (bi->pc == pc || (bi = powerpc_dyngen_helper::find_block(pc)) != NULL) + goto *(bi->entry_point); + } +} diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp index 7810dcfb..9d5d8652 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp @@ -119,6 +119,7 @@ public: DEFINE_ALIAS(decrement_ctr_T0,0); DEFINE_ALIAS(branch_A0_if_T0,1); DEFINE_ALIAS(branch_A0_if_not_T0,1); + DEFINE_ALIAS(jump_next_A0,0); // Compare & Record instructions DEFINE_ALIAS(record_cr0_T0,0); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp index 38d35f91..76689db1 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp @@ -1303,6 +1303,9 @@ powerpc_cpu::compile_block(uint32 entry_point) goto again; } } + // TODO: optimize this to a direct jump to pregenerated code? + dg.gen_mov_ad_A0_im((uintptr)bi); + dg.gen_jump_next_A0(); dg.gen_exec_return(); dg.gen_end(); bi->end_pc = dpc;