Inline fast basic block lookups. Only check top tag as it is a hit more than

95% of the time. Overall, this improves performance by more than 2x on a P4.
This commit is contained in:
gbeauche 2004-01-27 13:54:51 +00:00
parent 5bbde8f9fe
commit 8afa65cc96
5 changed files with 37 additions and 1 deletions

View File

@ -60,6 +60,7 @@ public:
void initialize();
void clear();
void clear_range(uintptr start, uintptr end);
block_info *fast_find(uintptr pc);
block_info *find(uintptr pc);
void remove_from_cl_list(block_info *bi);
@ -148,6 +149,17 @@ inline void block_cache< block_info, block_allocator >::delete_blockinfo(block_i
allocator.release(bce);
}
template< class block_info, template<class T> class block_allocator >
inline block_info *block_cache< block_info, block_allocator >::fast_find(uintptr pc)
{
// Hit: return immediately (that covers more than 95% of the cases)
entry * bce = cache_tags[cacheline(pc)];
if (bce && bce->pc == pc)
return bce;
return NULL;
}
template< class block_info, template<class T> class block_allocator >
block_info *block_cache< block_info, block_allocator >::find(uintptr pc)
{

View File

@ -461,7 +461,10 @@ void powerpc_cpu::execute(uint32 entry)
}
}
if ((bi->pc != pc()) && ((bi = block_cache.find(pc())) == NULL))
// Don't check for backward branches here as this
// is now done by generated code. Besides, we will
// get here if the fast cache lookup failed too.
if ((bi = block_cache.find(pc())) == NULL)
break;
}
}

View File

@ -86,6 +86,8 @@ struct powerpc_dyngen_helper {
static double & fp_result() { return CPU->fp_result(); }
static uint64 & fp_result_dw() { return CPU->fp_result_dw(); }
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
static inline powerpc_block_info *find_block(uint32 pc) { return CPU->block_cache.fast_find(pc); }
};
@ -1206,3 +1208,18 @@ void OPPROTO op_nego_T0(void)
T0 = -T0;
}
/**
* Generate possible call to next basic block without going
* through register state restore & full cache lookup
**/
void OPPROTO op_jump_next_A0(void)
{
// Make sure there is no pending interrupt request
if (powerpc_dyngen_helper::spcflags().empty()) {
powerpc_block_info *bi = (powerpc_block_info *)reg_A0;
uint32 pc = powerpc_dyngen_helper::get_pc();
if (bi->pc == pc || (bi = powerpc_dyngen_helper::find_block(pc)) != NULL)
goto *(bi->entry_point);
}
}

View File

@ -119,6 +119,7 @@ public:
DEFINE_ALIAS(decrement_ctr_T0,0);
DEFINE_ALIAS(branch_A0_if_T0,1);
DEFINE_ALIAS(branch_A0_if_not_T0,1);
DEFINE_ALIAS(jump_next_A0,0);
// Compare & Record instructions
DEFINE_ALIAS(record_cr0_T0,0);

View File

@ -1303,6 +1303,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
goto again;
}
}
// TODO: optimize this to a direct jump to pregenerated code?
dg.gen_mov_ad_A0_im((uintptr)bi);
dg.gen_jump_next_A0();
dg.gen_exec_return();
dg.gen_end();
bi->end_pc = dpc;