diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp index 99df2361..7b9feebc 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp @@ -246,15 +246,6 @@ DEFINE_OP(8,T0,1,T2); #define FORCE_RET() asm volatile ("rts") #endif -#define SLOW_DISPATCH(TARGET) do { \ - static const void __attribute__((unused)) *label1 = &&dummy_label1; \ - static const void __attribute__((unused)) *label2 = &&dummy_label2; \ - goto *((void *)TARGET); \ - dummy_label1: \ - dummy_label2: \ - dyngen_barrier(); \ -} while (0) - extern "C" void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu); void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu) { @@ -267,7 +258,7 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu) stk[n_slots - 3] = A1; stk[n_slots - 4] = A2; CPU = this_cpu; - SLOW_DISPATCH(entry_point); + DYNGEN_SLOW_DISPATCH(entry_point); func(); // NOTE: never called, fake to make compiler save return point #ifdef ASM_OP_EXEC_RETURN_INSN asm volatile ("1: .byte " ASM_OP_EXEC_RETURN_INSN); @@ -288,7 +279,7 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu) void OPPROTO op_jmp_slow(void) { - SLOW_DISPATCH(PARAM1); + DYNGEN_SLOW_DISPATCH(PARAM1); } void OPPROTO op_jmp_fast(void) @@ -296,13 +287,13 @@ void OPPROTO op_jmp_fast(void) #ifdef DYNGEN_FAST_DISPATCH DYNGEN_FAST_DISPATCH(__op_PARAM1); #else - SLOW_DISPATCH(PARAM1); + DYNGEN_SLOW_DISPATCH(PARAM1); #endif } void OPPROTO op_jmp_A0(void) { - SLOW_DISPATCH(A0); + DYNGEN_SLOW_DISPATCH(A0); } // Register calling conventions based arches don't need a stack frame diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp index f46e0fcc..12824e1f 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp @@ -25,7 +25,7 @@ #include "cpu/jit/jit-codegen.hpp" // Set jump target address -static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr) +static inline void dg_set_jmp_target_noflush(uint8 *jmp_addr, uint8 *addr) { #if defined(__powerpc__) || defined(__ppc__) // patch the branch destination @@ -33,7 +33,17 @@ static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr) uint32 val = *ptr; val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc); *ptr = val; +#endif +#if defined(__i386__) || defined(__x86_64__) + // patch the branch destination + *(uint32 *)jmp_addr = addr - (jmp_addr + 4); +#endif +} +static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr) +{ + dg_set_jmp_target_noflush(jmp_addr, addr); +#if defined(__powerpc__) || defined(__ppc__) // flush icache asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory"); asm volatile ("sync" : : : "memory"); @@ -41,11 +51,6 @@ static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr) asm volatile ("sync" : : : "memory"); asm volatile ("isync" : : : "memory"); #endif -#if defined(__i386__) || defined(__x86_64__) - // patch the branch destination - *(uint32 *)jmp_addr = addr - (jmp_addr + 4); - // no need to flush icache explicitly -#endif } #ifdef SHEEPSHAVER diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/dyngen-exec.h b/SheepShaver/src/kpx_cpu/src/cpu/jit/dyngen-exec.h index efedec08..8c12fe6b 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/dyngen-exec.h +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/dyngen-exec.h @@ -115,6 +115,15 @@ extern int __op_PARAM1, __op_PARAM2, __op_PARAM3; #define DYNGEN_FAST_DISPATCH(TARGET) asm volatile ("jmp " ASM_NAME(TARGET)) #endif +#define DYNGEN_SLOW_DISPATCH(TARGET) do { \ + static const void __attribute__((unused)) *label1 = &&dummy_label1; \ + static const void __attribute__((unused)) *label2 = &&dummy_label2; \ + goto *((void *)TARGET); \ + dummy_label1: \ + dummy_label2: \ + dyngen_barrier(); \ +} while (0) + extern int __op_jmp0, __op_jmp1; // Sections handling diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp index ed9299b8..9c8b26e4 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp @@ -577,12 +577,6 @@ void OPPROTO op_spcflags_clear(void) powerpc_dyngen_helper::spcflags().clear(PARAM1); } - -/** - * Branch instructions - **/ - -#ifdef DYNGEN_FAST_DISPATCH #if defined(__x86_64__) #define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \ asm volatile ("test %0,%0 ; jz " #TARGET : : "r" (SPCFLAGS)) @@ -591,7 +585,16 @@ void OPPROTO op_spcflags_clear(void) #define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \ if (SPCFLAGS == 0) DYNGEN_FAST_DISPATCH(TARGET) #endif -#endif + +void OPPROTO op_spcflags_check(void) +{ + FAST_COMPARE_SPECFLAGS_DISPATCH(powerpc_dyngen_helper::spcflags().get(), __op_jmp0); +} + + +/** + * Branch instructions + **/ template< int bo > static inline void do_prep_branch_bo(void) @@ -640,78 +643,49 @@ DEFINE_OP(1x1x,(1,0,1,0)); #undef DEFINE_OP #undef BO -template< bool chain > +void OPPROTO op_branch_chain_1(void) +{ + DYNGEN_FAST_DISPATCH(__op_jmp0); +} + +void OPPROTO op_branch_chain_2(void) +{ + if (T1) + DYNGEN_FAST_DISPATCH(__op_jmp0); + else + DYNGEN_FAST_DISPATCH(__op_jmp1); + dyngen_barrier(); +} + static inline void do_execute_branch_1(uint32 tpc) { -#ifdef DYNGEN_FAST_DISPATCH - if (chain) - FAST_COMPARE_SPECFLAGS_DISPATCH(powerpc_dyngen_helper::spcflags().get(), __op_jmp0); -#endif powerpc_dyngen_helper::set_pc(tpc); - dyngen_barrier(); } -void op_branch_1_T0(void) +void OPPROTO op_branch_1_T0(void) { - do_execute_branch_1<0>(T0); + do_execute_branch_1(T0); } -void op_branch_chain_1_T0(void) +void OPPROTO op_branch_1_im(void) { - do_execute_branch_1<1>(T0); + do_execute_branch_1(PARAM1); } -void op_branch_1_im(void) -{ - do_execute_branch_1<0>(PARAM1); -} - -void op_branch_chain_1_im(void) -{ - do_execute_branch_1<1>(PARAM1); -} - -template< bool chain > static inline void do_execute_branch_2(uint32 tpc, uint32 npc) { -#ifdef DYNGEN_FAST_DISPATCH - if (chain) { - T2 = powerpc_dyngen_helper::spcflags().get(); - if (T1) { - FAST_COMPARE_SPECFLAGS_DISPATCH(T2, __op_jmp0); - T0 = tpc; - } - else { - FAST_COMPARE_SPECFLAGS_DISPATCH(T2, __op_jmp1); - T0 = npc; - } - } - else -#endif - - T0 = T1 ? tpc : npc; - powerpc_dyngen_helper::set_pc(T0); + powerpc_dyngen_helper::set_pc(T1 ? tpc : npc); dyngen_barrier(); } -void op_branch_2_T0_im(void) +void OPPROTO op_branch_2_T0_im(void) { - do_execute_branch_2<0>(T0, PARAM1); + do_execute_branch_2(T0, PARAM1); } -void op_branch_chain_2_T0_im(void) +void OPPROTO op_branch_2_im_im(void) { - do_execute_branch_2<1>(T0, PARAM1); -} - -void op_branch_2_im_im(void) -{ - do_execute_branch_2<0>(PARAM1, PARAM2); -} - -void op_branch_chain_2_im_im(void) -{ - do_execute_branch_2<1>(PARAM1, PARAM2); + do_execute_branch_2(PARAM1, PARAM2); } @@ -1360,13 +1334,10 @@ void OPPROTO op_dcbz_T0(void) void OPPROTO op_jump_next_A0(void) { - // Make sure there is no pending interrupt request - if (likely(powerpc_dyngen_helper::spcflags().empty())) { - powerpc_block_info *bi = (powerpc_block_info *)A0; - uint32 pc = powerpc_dyngen_helper::get_pc(); - if (likely(bi->pc == pc) || likely((bi = powerpc_dyngen_helper::find_block(pc)) != NULL)) - goto *(bi->entry_point); - } + powerpc_block_info *bi = (powerpc_block_info *)A0; + uint32 pc = powerpc_dyngen_helper::get_pc(); + if (likely(bi->pc == pc) || likely((bi = powerpc_dyngen_helper::find_block(pc)) != NULL)) + goto *(bi->entry_point); dyngen_barrier(); } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp index e7f35048..b4f07daf 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp @@ -52,6 +52,18 @@ powerpc_dyngen::powerpc_dyngen(dyngen_cpu_base cpu, int cache_size) #endif } +uint8 *powerpc_dyngen::gen_start(uint32 pc) +{ + // Generate exit if there are pending spcflags + uint8 *p = basic_dyngen::gen_start(); + gen_op_spcflags_check(); + gen_op_set_PC_im(pc); + gen_exec_return(); + dg_set_jmp_target_noflush(jmp_addr[0], gen_align()); + jmp_addr[0] = NULL; + return p; +} + void powerpc_dyngen::gen_compare_T0_T1(int crf) { gen_op_compare_T0_T1(); @@ -250,30 +262,22 @@ void powerpc_dyngen::gen_bc(int bo, int bi, uint32 tpc, uint32 npc, bool direct_ if (BO_CONDITIONAL_BRANCH(bo) || BO_DECREMENT_CTR(bo)) { // two-way branches - if (tpc != 0xffffffff) { - if (direct_chaining) - gen_op_branch_chain_2_im_im(tpc, npc); - else - gen_op_branch_2_im_im(tpc, npc); - } + if (direct_chaining) + gen_op_branch_chain_2(); else { - if (direct_chaining) - gen_op_branch_chain_2_T0_im(npc); + if (tpc != 0xffffffff) + gen_op_branch_2_im_im(tpc, npc); else gen_op_branch_2_T0_im(npc); } } else { // one-way branches - if (tpc != 0xffffffff) { - if (direct_chaining) - gen_op_branch_chain_1_im(tpc); - else - gen_op_branch_1_im(tpc); - } + if (direct_chaining) + gen_op_branch_chain_1(); else { - if (direct_chaining) - gen_op_branch_chain_1_T0(); + if (tpc != 0xffffffff) + gen_op_branch_1_im(tpc); else gen_op_branch_1_T0(); } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp index a4c5d852..6126fad7 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp @@ -52,6 +52,9 @@ public: // Default constructor powerpc_dyngen(dyngen_cpu_base cpu, int cache_size = -1); + // Generate prologue + uint8 *gen_start(uint32 pc); + // Load/store registers void gen_load_T0_GPR(int i); void gen_load_T1_GPR(int i); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp index 1e38c9ac..deccc209 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp @@ -145,7 +145,7 @@ powerpc_cpu::compile_block(uint32 entry_point) again: block_info *bi = block_cache.new_blockinfo(); bi->init(entry_point); - bi->entry_point = dg.gen_start(); + bi->entry_point = dg.gen_start(entry_point); // Direct block chaining support variables bool use_direct_block_chaining = false; @@ -163,7 +163,7 @@ powerpc_cpu::compile_block(uint32 entry_point) if (ii->cflow & CFLOW_END_BLOCK) done_compile = true; - // Assume we can compile this opcode; + // Assume we can compile this opcode compile_status = COMPILE_CODE_OK; #if PPC_FLIGHT_RECORDER @@ -1538,7 +1538,7 @@ powerpc_cpu::compile_block(uint32 entry_point) assert(dg.jmp_addr[i] != NULL); bi->li[i].jmp_addr = dg.jmp_addr[i]; bi->li[i].jmp_resolve_addr = p; - dg_set_jmp_target(bi->li[i].jmp_addr, bi->li[i].jmp_resolve_addr); + dg_set_jmp_target_noflush(bi->li[i].jmp_addr, bi->li[i].jmp_resolve_addr); } } }