diff --git a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp index 66644f92..7f6ee53e 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/block-cache.hpp @@ -128,6 +128,7 @@ inline void block_cache< block_info, block_allocator >::clear_range(uintptr star q = p; p = p->next; if (q->intersect(start, end)) { + q->invalidate(); remove_from_cl_list(q); remove_from_list(q); delete_blockinfo(q); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp index 406b850d..30179c7a 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp @@ -25,6 +25,30 @@ #include "cpu/jit/jit-cache.hpp" #include JIT_TARGET_INCLUDE(jit-target-cache.hpp) +// Set jump target address +static inline void dg_set_jmp_target(uint8 *jmp_addr, uint8 *addr) +{ +#if defined(__powerpc__) + // patch the branch destination + uint32 *ptr = (uint32 *)jmp_addr; + uint32 val = *ptr; + val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc); + *ptr = val; + + // flush icache + asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory"); + asm volatile ("sync" : : : "memory"); + asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory"); + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +#endif +#if defined(__i386__) || defined(__x86_64__) + // patch the branch destination + *(uint32 *)jmp_addr = addr - (jmp_addr + 4); + // no need to flush icache explicitly +#endif +} + #ifdef SHEEPSHAVER class powerpc_cpu; typedef powerpc_cpu *dyngen_cpu_base; @@ -206,41 +230,10 @@ public: #undef DEFINE_ALIAS_3 #undef DEFINE_ALIAS_RAW -#if DYNGEN_DIRECT_BLOCK_CHAINING - // Jump addresses for direct chaining + // Address of jump offset to patch for direct chaining uint8 *jmp_addr[2]; - - // Set jump target address - void set_jmp_target(uint8 *jmp_addr, uint8 *addr); -#endif }; -#if DYNGEN_DIRECT_BLOCK_CHAINING -inline void -basic_dyngen::set_jmp_target(uint8 *jmp_addr, uint8 *addr) -{ -#if defined(__powerpc__) - // patch the branch destination - uint32 *ptr = (uint32 *)jmp_addr; - uint32 val = *ptr; - val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc); - *ptr = val; - - // flush icache - asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory"); - asm volatile ("sync" : : : "memory"); - asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory"); - asm volatile ("sync" : : : "memory"); - asm volatile ("isync" : : : "memory"); -#endif -#if defined(__i386__) || defined(__x86_64__) - // patch the branch destination - *(uint32 *)jmp_addr = addr - (jmp_addr + 4); - // no need to flush icache explicitly -#endif -} -#endif - inline bool basic_dyngen::direct_jump_possible(uintptr target) const { diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp index 09f7e133..0c112997 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-blockinfo.hpp @@ -44,13 +44,16 @@ struct powerpc_block_info #if PPC_ENABLE_JIT uint8 * entry_point; #if DYNGEN_DIRECT_BLOCK_CHAINING - uint8 * jmp_addr[2]; // Address of target native branch offset to patch - uint32 jmp_pc[2]; // Target jump addresses in emulated address space + uint8 * jmp_resolve_addr[2]; // Address of default code to resolve target addr + uint8 * jmp_addr[2]; // Address of target native branch offset to patch + uint32 jmp_pc[2]; // Target jump addresses in emulated address space + static const uint32 INVALID_PC = 0xffffffff; // An invalid PC address to mark jmp_pc[] as stale #endif #endif uintptr min_pc, max_pc; bool intersect(uintptr start, uintptr end); + void invalidate(); }; inline bool diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp index 9f0d55b4..d8e027f0 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp @@ -446,7 +446,7 @@ void *powerpc_cpu::compile_chain_block(block_info *sbi) tbi = compile_block(tpc); assert(tbi && tbi->pc == tpc); - codegen.set_jmp_target(sbi->jmp_addr[n], tbi->entry_point); + dg_set_jmp_target(sbi->jmp_addr[n], tbi->entry_point); return tbi->entry_point; } #endif @@ -660,6 +660,19 @@ void powerpc_cpu::invalidate_cache() #endif } +inline void powerpc_block_info::invalidate() +{ +#if DYNGEN_DIRECT_BLOCK_CHAINING + for (int i = 0; i < 2; i++) { + uint32 tpc = jmp_pc[i]; + // For any jump within page boundaries, reset the jump address + // to the target block resolver (trampoline) + if (tpc != INVALID_PC && ((tpc ^ pc) >> 12) == 0) + dg_set_jmp_target(jmp_addr[i], jmp_resolve_addr[i]); + } +#endif +} + void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end) { D(bug("Invalidate cache block [%08x - %08x]\n", start, end)); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp index 55864a9c..00261880 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp @@ -50,6 +50,12 @@ static inline bool is_read_only_memory(uintptr addr) return false; } +// Returns TRUE if we can directly generate a jump to the target block +static inline bool direct_chaining_possible(uint32 bpc, uint32 tpc) +{ + return ((bpc ^ tpc) >> 12) == 0 || is_read_only_memory(tpc); +} + /** * Basic block disassemblers @@ -132,6 +138,12 @@ powerpc_cpu::compile_block(uint32 entry_point) block_info *bi = block_cache.new_blockinfo(); bi->init(entry_point); bi->entry_point = dg.gen_start(); +#if DYNGEN_DIRECT_BLOCK_CHAINING + for (int i = 0; i < 2; i++) { + dg.jmp_addr[i] = NULL; + bi->jmp_pc[i] = block_info::INVALID_PC; + } +#endif // Direct block chaining support variables bool use_direct_block_chaining = false; @@ -452,7 +464,7 @@ powerpc_cpu::compile_block(uint32 entry_point) #if DYNGEN_DIRECT_BLOCK_CHAINING // Use direct block chaining for in-page jumps or jumps to ROM area const uint32 npc = dpc + 4; - if (((tpc & -4096) == (npc & -4096)) || is_read_only_memory(tpc)) { + if (direct_chaining_possible(bi->pc, tpc)) { use_direct_block_chaining = true; bi->jmp_pc[0] = tpc; bi->jmp_pc[1] = npc; @@ -507,10 +519,17 @@ powerpc_cpu::compile_block(uint32 entry_point) } #endif +#if DYNGEN_DIRECT_BLOCK_CHAINING + // Use direct block chaining, addresses will be resolved at execution + if (direct_chaining_possible(bi->pc, tpc)) { + use_direct_block_chaining = true; + bi->jmp_pc[0] = tpc; + } +#endif dg.gen_mov_32_A0_im(tpc); // BO field is built so that we always branch to A0 - dg.gen_bc_A0(BO_MAKE(0,0,0,0), 0, 0, false); + dg.gen_bc_A0(BO_MAKE(0,0,0,0), 0, 0, use_direct_block_chaining); break; } case PPC_I(CMP): // Compare @@ -1468,24 +1487,19 @@ powerpc_cpu::compile_block(uint32 entry_point) if (use_direct_block_chaining) { typedef void *(*func_t)(dyngen_cpu_base); func_t func = (func_t)nv_mem_fun(&powerpc_cpu::compile_chain_block).ptr(); - - // Taken PC - uint8 *p = dg.gen_start(); - dg.gen_mov_ad_T0_im(((uintptr)bi) | 0); - dg.gen_invoke_CPU_T0_ret_A0(func); - dg.gen_jmp_A0(); - dg.gen_end(); - bi->jmp_addr[0] = dg.jmp_addr[0]; - dg.set_jmp_target(dg.jmp_addr[0], p); - - // Not taken PC - p = dg.gen_start(); - dg.gen_mov_ad_T0_im(((uintptr)bi) | 1); - dg.gen_invoke_CPU_T0_ret_A0(func); - dg.gen_jmp_A0(); - dg.gen_end(); - bi->jmp_addr[1] = dg.jmp_addr[1]; - dg.set_jmp_target(dg.jmp_addr[1], p); + for (int i = 0; i < 2; i++) { + if (bi->jmp_pc[i] != block_info::INVALID_PC) { + uint8 *p = dg.gen_start(); + dg.gen_mov_ad_T0_im(((uintptr)bi) | i); + dg.gen_invoke_CPU_T0_ret_A0(func); + dg.gen_jmp_A0(); + dg.gen_end(); + assert(dg.jmp_addr[i] != NULL); + bi->jmp_addr[i] = dg.jmp_addr[i]; + bi->jmp_resolve_addr[i] = p; + dg_set_jmp_target(bi->jmp_addr[i], bi->jmp_resolve_addr[i]); + } + } } #endif