Generate spcflags checks at the start of the block. This makes better

opportunities when CR cache is implemented.
This commit is contained in:
gbeauche 2006-07-30 16:29:10 +00:00
parent bcf7f9a2cd
commit 954593d1c0
7 changed files with 87 additions and 104 deletions

View File

@ -246,15 +246,6 @@ DEFINE_OP(8,T0,1,T2);
#define FORCE_RET() asm volatile ("rts")
#endif
#define SLOW_DISPATCH(TARGET) do { \
static const void __attribute__((unused)) *label1 = &&dummy_label1; \
static const void __attribute__((unused)) *label2 = &&dummy_label2; \
goto *((void *)TARGET); \
dummy_label1: \
dummy_label2: \
dyngen_barrier(); \
} while (0)
extern "C" void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu);
void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
{
@ -267,7 +258,7 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
stk[n_slots - 3] = A1;
stk[n_slots - 4] = A2;
CPU = this_cpu;
SLOW_DISPATCH(entry_point);
DYNGEN_SLOW_DISPATCH(entry_point);
func(); // NOTE: never called, fake to make compiler save return point
#ifdef ASM_OP_EXEC_RETURN_INSN
asm volatile ("1: .byte " ASM_OP_EXEC_RETURN_INSN);
@ -288,7 +279,7 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
void OPPROTO op_jmp_slow(void)
{
SLOW_DISPATCH(PARAM1);
DYNGEN_SLOW_DISPATCH(PARAM1);
}
void OPPROTO op_jmp_fast(void)
@ -296,13 +287,13 @@ void OPPROTO op_jmp_fast(void)
#ifdef DYNGEN_FAST_DISPATCH
DYNGEN_FAST_DISPATCH(__op_PARAM1);
#else
SLOW_DISPATCH(PARAM1);
DYNGEN_SLOW_DISPATCH(PARAM1);
#endif
}
void OPPROTO op_jmp_A0(void)
{
SLOW_DISPATCH(A0);
DYNGEN_SLOW_DISPATCH(A0);
}
// Register calling conventions based arches don't need a stack frame

View File

@ -25,7 +25,7 @@
#include "cpu/jit/jit-codegen.hpp"
// Set jump target address
static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr)
static inline void dg_set_jmp_target_noflush(uint8 *jmp_addr, uint8 *addr)
{
#if defined(__powerpc__) || defined(__ppc__)
// patch the branch destination
@ -33,7 +33,17 @@ static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr)
uint32 val = *ptr;
val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc);
*ptr = val;
#endif
#if defined(__i386__) || defined(__x86_64__)
// patch the branch destination
*(uint32 *)jmp_addr = addr - (jmp_addr + 4);
#endif
}
static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr)
{
dg_set_jmp_target_noflush(jmp_addr, addr);
#if defined(__powerpc__) || defined(__ppc__)
// flush icache
asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
asm volatile ("sync" : : : "memory");
@ -41,11 +51,6 @@ static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr)
asm volatile ("sync" : : : "memory");
asm volatile ("isync" : : : "memory");
#endif
#if defined(__i386__) || defined(__x86_64__)
// patch the branch destination
*(uint32 *)jmp_addr = addr - (jmp_addr + 4);
// no need to flush icache explicitly
#endif
}
#ifdef SHEEPSHAVER

View File

@ -115,6 +115,15 @@ extern int __op_PARAM1, __op_PARAM2, __op_PARAM3;
#define DYNGEN_FAST_DISPATCH(TARGET) asm volatile ("jmp " ASM_NAME(TARGET))
#endif
#define DYNGEN_SLOW_DISPATCH(TARGET) do { \
static const void __attribute__((unused)) *label1 = &&dummy_label1; \
static const void __attribute__((unused)) *label2 = &&dummy_label2; \
goto *((void *)TARGET); \
dummy_label1: \
dummy_label2: \
dyngen_barrier(); \
} while (0)
extern int __op_jmp0, __op_jmp1;
// Sections handling

View File

@ -577,12 +577,6 @@ void OPPROTO op_spcflags_clear(void)
powerpc_dyngen_helper::spcflags().clear(PARAM1);
}
/**
* Branch instructions
**/
#ifdef DYNGEN_FAST_DISPATCH
#if defined(__x86_64__)
#define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \
asm volatile ("test %0,%0 ; jz " #TARGET : : "r" (SPCFLAGS))
@ -591,7 +585,16 @@ void OPPROTO op_spcflags_clear(void)
#define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \
if (SPCFLAGS == 0) DYNGEN_FAST_DISPATCH(TARGET)
#endif
#endif
void OPPROTO op_spcflags_check(void)
{
FAST_COMPARE_SPECFLAGS_DISPATCH(powerpc_dyngen_helper::spcflags().get(), __op_jmp0);
}
/**
* Branch instructions
**/
template< int bo >
static inline void do_prep_branch_bo(void)
@ -640,78 +643,49 @@ DEFINE_OP(1x1x,(1,0,1,0));
#undef DEFINE_OP
#undef BO
template< bool chain >
void OPPROTO op_branch_chain_1(void)
{
DYNGEN_FAST_DISPATCH(__op_jmp0);
}
void OPPROTO op_branch_chain_2(void)
{
if (T1)
DYNGEN_FAST_DISPATCH(__op_jmp0);
else
DYNGEN_FAST_DISPATCH(__op_jmp1);
dyngen_barrier();
}
static inline void do_execute_branch_1(uint32 tpc)
{
#ifdef DYNGEN_FAST_DISPATCH
if (chain)
FAST_COMPARE_SPECFLAGS_DISPATCH(powerpc_dyngen_helper::spcflags().get(), __op_jmp0);
#endif
powerpc_dyngen_helper::set_pc(tpc);
dyngen_barrier();
}
void op_branch_1_T0(void)
void OPPROTO op_branch_1_T0(void)
{
do_execute_branch_1<0>(T0);
do_execute_branch_1(T0);
}
void op_branch_chain_1_T0(void)
void OPPROTO op_branch_1_im(void)
{
do_execute_branch_1<1>(T0);
do_execute_branch_1(PARAM1);
}
void op_branch_1_im(void)
{
do_execute_branch_1<0>(PARAM1);
}
void op_branch_chain_1_im(void)
{
do_execute_branch_1<1>(PARAM1);
}
template< bool chain >
static inline void do_execute_branch_2(uint32 tpc, uint32 npc)
{
#ifdef DYNGEN_FAST_DISPATCH
if (chain) {
T2 = powerpc_dyngen_helper::spcflags().get();
if (T1) {
FAST_COMPARE_SPECFLAGS_DISPATCH(T2, __op_jmp0);
T0 = tpc;
}
else {
FAST_COMPARE_SPECFLAGS_DISPATCH(T2, __op_jmp1);
T0 = npc;
}
}
else
#endif
T0 = T1 ? tpc : npc;
powerpc_dyngen_helper::set_pc(T0);
powerpc_dyngen_helper::set_pc(T1 ? tpc : npc);
dyngen_barrier();
}
void op_branch_2_T0_im(void)
void OPPROTO op_branch_2_T0_im(void)
{
do_execute_branch_2<0>(T0, PARAM1);
do_execute_branch_2(T0, PARAM1);
}
void op_branch_chain_2_T0_im(void)
void OPPROTO op_branch_2_im_im(void)
{
do_execute_branch_2<1>(T0, PARAM1);
}
void op_branch_2_im_im(void)
{
do_execute_branch_2<0>(PARAM1, PARAM2);
}
void op_branch_chain_2_im_im(void)
{
do_execute_branch_2<1>(PARAM1, PARAM2);
do_execute_branch_2(PARAM1, PARAM2);
}
@ -1360,13 +1334,10 @@ void OPPROTO op_dcbz_T0(void)
void OPPROTO op_jump_next_A0(void)
{
// Make sure there is no pending interrupt request
if (likely(powerpc_dyngen_helper::spcflags().empty())) {
powerpc_block_info *bi = (powerpc_block_info *)A0;
uint32 pc = powerpc_dyngen_helper::get_pc();
if (likely(bi->pc == pc) || likely((bi = powerpc_dyngen_helper::find_block(pc)) != NULL))
goto *(bi->entry_point);
}
powerpc_block_info *bi = (powerpc_block_info *)A0;
uint32 pc = powerpc_dyngen_helper::get_pc();
if (likely(bi->pc == pc) || likely((bi = powerpc_dyngen_helper::find_block(pc)) != NULL))
goto *(bi->entry_point);
dyngen_barrier();
}

View File

@ -52,6 +52,18 @@ powerpc_dyngen::powerpc_dyngen(dyngen_cpu_base cpu, int cache_size)
#endif
}
uint8 *powerpc_dyngen::gen_start(uint32 pc)
{
// Generate exit if there are pending spcflags
uint8 *p = basic_dyngen::gen_start();
gen_op_spcflags_check();
gen_op_set_PC_im(pc);
gen_exec_return();
dg_set_jmp_target_noflush(jmp_addr[0], gen_align());
jmp_addr[0] = NULL;
return p;
}
void powerpc_dyngen::gen_compare_T0_T1(int crf)
{
gen_op_compare_T0_T1();
@ -250,30 +262,22 @@ void powerpc_dyngen::gen_bc(int bo, int bi, uint32 tpc, uint32 npc, bool direct_
if (BO_CONDITIONAL_BRANCH(bo) || BO_DECREMENT_CTR(bo)) {
// two-way branches
if (tpc != 0xffffffff) {
if (direct_chaining)
gen_op_branch_chain_2_im_im(tpc, npc);
else
gen_op_branch_2_im_im(tpc, npc);
}
if (direct_chaining)
gen_op_branch_chain_2();
else {
if (direct_chaining)
gen_op_branch_chain_2_T0_im(npc);
if (tpc != 0xffffffff)
gen_op_branch_2_im_im(tpc, npc);
else
gen_op_branch_2_T0_im(npc);
}
}
else {
// one-way branches
if (tpc != 0xffffffff) {
if (direct_chaining)
gen_op_branch_chain_1_im(tpc);
else
gen_op_branch_1_im(tpc);
}
if (direct_chaining)
gen_op_branch_chain_1();
else {
if (direct_chaining)
gen_op_branch_chain_1_T0();
if (tpc != 0xffffffff)
gen_op_branch_1_im(tpc);
else
gen_op_branch_1_T0();
}

View File

@ -52,6 +52,9 @@ public:
// Default constructor
powerpc_dyngen(dyngen_cpu_base cpu, int cache_size = -1);
// Generate prologue
uint8 *gen_start(uint32 pc);
// Load/store registers
void gen_load_T0_GPR(int i);
void gen_load_T1_GPR(int i);

View File

@ -145,7 +145,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
again:
block_info *bi = block_cache.new_blockinfo();
bi->init(entry_point);
bi->entry_point = dg.gen_start();
bi->entry_point = dg.gen_start(entry_point);
// Direct block chaining support variables
bool use_direct_block_chaining = false;
@ -163,7 +163,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
if (ii->cflow & CFLOW_END_BLOCK)
done_compile = true;
// Assume we can compile this opcode;
// Assume we can compile this opcode
compile_status = COMPILE_CODE_OK;
#if PPC_FLIGHT_RECORDER
@ -1538,7 +1538,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
assert(dg.jmp_addr[i] != NULL);
bi->li[i].jmp_addr = dg.jmp_addr[i];
bi->li[i].jmp_resolve_addr = p;
dg_set_jmp_target(bi->li[i].jmp_addr, bi->li[i].jmp_resolve_addr);
dg_set_jmp_target_noflush(bi->li[i].jmp_addr, bi->li[i].jmp_resolve_addr);
}
}
}