mirror of
https://github.com/kanjitalk755/macemu.git
synced 2025-01-13 23:32:55 +00:00
Inline fast basic block lookups. Only check top tag as it is a hit more than
95% of the time. Overall, this improves performance by more than 2x on a P4.
This commit is contained in:
parent
5bbde8f9fe
commit
8afa65cc96
@ -60,6 +60,7 @@ public:
|
|||||||
void initialize();
|
void initialize();
|
||||||
void clear();
|
void clear();
|
||||||
void clear_range(uintptr start, uintptr end);
|
void clear_range(uintptr start, uintptr end);
|
||||||
|
block_info *fast_find(uintptr pc);
|
||||||
block_info *find(uintptr pc);
|
block_info *find(uintptr pc);
|
||||||
|
|
||||||
void remove_from_cl_list(block_info *bi);
|
void remove_from_cl_list(block_info *bi);
|
||||||
@ -148,6 +149,17 @@ inline void block_cache< block_info, block_allocator >::delete_blockinfo(block_i
|
|||||||
allocator.release(bce);
|
allocator.release(bce);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template< class block_info, template<class T> class block_allocator >
|
||||||
|
inline block_info *block_cache< block_info, block_allocator >::fast_find(uintptr pc)
|
||||||
|
{
|
||||||
|
// Hit: return immediately (that covers more than 95% of the cases)
|
||||||
|
entry * bce = cache_tags[cacheline(pc)];
|
||||||
|
if (bce && bce->pc == pc)
|
||||||
|
return bce;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
template< class block_info, template<class T> class block_allocator >
|
template< class block_info, template<class T> class block_allocator >
|
||||||
block_info *block_cache< block_info, block_allocator >::find(uintptr pc)
|
block_info *block_cache< block_info, block_allocator >::find(uintptr pc)
|
||||||
{
|
{
|
||||||
|
@ -461,7 +461,10 @@ void powerpc_cpu::execute(uint32 entry)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((bi->pc != pc()) && ((bi = block_cache.find(pc())) == NULL))
|
// Don't check for backward branches here as this
|
||||||
|
// is now done by generated code. Besides, we will
|
||||||
|
// get here if the fast cache lookup failed too.
|
||||||
|
if ((bi = block_cache.find(pc())) == NULL)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -86,6 +86,8 @@ struct powerpc_dyngen_helper {
|
|||||||
static double & fp_result() { return CPU->fp_result(); }
|
static double & fp_result() { return CPU->fp_result(); }
|
||||||
static uint64 & fp_result_dw() { return CPU->fp_result_dw(); }
|
static uint64 & fp_result_dw() { return CPU->fp_result_dw(); }
|
||||||
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
|
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
|
||||||
|
|
||||||
|
static inline powerpc_block_info *find_block(uint32 pc) { return CPU->block_cache.fast_find(pc); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -1206,3 +1208,18 @@ void OPPROTO op_nego_T0(void)
|
|||||||
T0 = -T0;
|
T0 = -T0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate possible call to next basic block without going
|
||||||
|
* through register state restore & full cache lookup
|
||||||
|
**/
|
||||||
|
|
||||||
|
void OPPROTO op_jump_next_A0(void)
|
||||||
|
{
|
||||||
|
// Make sure there is no pending interrupt request
|
||||||
|
if (powerpc_dyngen_helper::spcflags().empty()) {
|
||||||
|
powerpc_block_info *bi = (powerpc_block_info *)reg_A0;
|
||||||
|
uint32 pc = powerpc_dyngen_helper::get_pc();
|
||||||
|
if (bi->pc == pc || (bi = powerpc_dyngen_helper::find_block(pc)) != NULL)
|
||||||
|
goto *(bi->entry_point);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -119,6 +119,7 @@ public:
|
|||||||
DEFINE_ALIAS(decrement_ctr_T0,0);
|
DEFINE_ALIAS(decrement_ctr_T0,0);
|
||||||
DEFINE_ALIAS(branch_A0_if_T0,1);
|
DEFINE_ALIAS(branch_A0_if_T0,1);
|
||||||
DEFINE_ALIAS(branch_A0_if_not_T0,1);
|
DEFINE_ALIAS(branch_A0_if_not_T0,1);
|
||||||
|
DEFINE_ALIAS(jump_next_A0,0);
|
||||||
|
|
||||||
// Compare & Record instructions
|
// Compare & Record instructions
|
||||||
DEFINE_ALIAS(record_cr0_T0,0);
|
DEFINE_ALIAS(record_cr0_T0,0);
|
||||||
|
@ -1303,6 +1303,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
|||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// TODO: optimize this to a direct jump to pregenerated code?
|
||||||
|
dg.gen_mov_ad_A0_im((uintptr)bi);
|
||||||
|
dg.gen_jump_next_A0();
|
||||||
dg.gen_exec_return();
|
dg.gen_exec_return();
|
||||||
dg.gen_end();
|
dg.gen_end();
|
||||||
bi->end_pc = dpc;
|
bi->end_pc = dpc;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user