mirror of
https://github.com/kanjitalk755/macemu.git
synced 2025-02-25 12:29:02 +00:00
direct block chaining, aka faster block dispatcher
This commit is contained in:
parent
6c0e2a9f2a
commit
08bcd2653d
@ -88,6 +88,10 @@
|
||||
#define PPC_PROFILE_COMPILE_TIME 0
|
||||
#define PPC_PROFILE_GENERIC_CALLS 0
|
||||
#define KPX_MAX_CPUS 1
|
||||
// direct block chaining is only tested on PPC right now
|
||||
#if defined(__powerpc__)
|
||||
#define DYNGEN_DIRECT_BLOCK_CHAINING 1
|
||||
#endif
|
||||
#else
|
||||
// Mac ROM is write protected
|
||||
#define ROM_IS_WRITE_PROTECTED 1
|
||||
|
@ -273,13 +273,6 @@ DEFINE_OP(8,T0,1,T1);
|
||||
dyngen_barrier(); \
|
||||
} while (0)
|
||||
|
||||
#if defined(__powerpc__)
|
||||
#define FAST_DISPATCH(TARGET) asm volatile ("b " #TARGET)
|
||||
#endif
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#define FAST_DISPATCH(TARGET) asm volatile ("jmp " #TARGET)
|
||||
#endif
|
||||
|
||||
extern "C" void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu);
|
||||
void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
|
||||
{
|
||||
@ -340,13 +333,18 @@ void OPPROTO op_jmp_slow(void)
|
||||
|
||||
void OPPROTO op_jmp_fast(void)
|
||||
{
|
||||
#ifdef FAST_DISPATCH
|
||||
FAST_DISPATCH(__op_param1);
|
||||
#ifdef DYNGEN_FAST_DISPATCH
|
||||
DYNGEN_FAST_DISPATCH(__op_param1);
|
||||
#else
|
||||
SLOW_DISPATCH(PARAM1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void OPPROTO op_jmp_A0(void)
|
||||
{
|
||||
SLOW_DISPATCH(reg_A0);
|
||||
}
|
||||
|
||||
// Register calling conventions based arches don't need a stack frame
|
||||
#if defined(__powerpc__) || defined(__x86_64__)
|
||||
#define DEFINE_OP(NAME, CODE) \
|
||||
@ -477,4 +475,16 @@ DEFINE_OP(op_invoke_direct_CPU_im_im, {
|
||||
CALL(func(CPU, PARAM2, PARAM3));
|
||||
});
|
||||
|
||||
DEFINE_OP(op_invoke_CPU_T0_ret_A0, {
|
||||
typedef void *(*func_t)(void *, uintptr);
|
||||
func_t func = (func_t)reg_A0;
|
||||
reg_A0 = (uintptr)CALL(func(CPU, reg_T0));
|
||||
});
|
||||
|
||||
DEFINE_OP(op_invoke_direct_CPU_T0_ret_A0, {
|
||||
typedef void *(*func_t)(void *, uintptr);
|
||||
func_t func = (func_t)PARAM1;
|
||||
reg_A0 = (uintptr)CALL(func(CPU, reg_T0));
|
||||
});
|
||||
|
||||
#undef DEFINE_OP
|
||||
|
@ -135,3 +135,15 @@ basic_dyngen::gen_invoke_CPU_im_im(void (*func)(dyngen_cpu_base, uint32, uint32)
|
||||
gen_op_invoke_CPU_im_im(param1, param2);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
basic_dyngen::gen_invoke_CPU_T0_ret_A0(void *(*func)(dyngen_cpu_base))
|
||||
{
|
||||
if (direct_call_possible((uintptr)func))
|
||||
gen_op_invoke_direct_CPU_T0_ret_A0((uintptr)func);
|
||||
else {
|
||||
gen_op_mov_ad_A0_im((uintptr)func);
|
||||
gen_op_invoke_CPU_T0_ret_A0();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,6 +84,7 @@ public:
|
||||
void gen_invoke_CPU_T0(void (*func)(dyngen_cpu_base, uint32));
|
||||
void gen_invoke_CPU_im(void (*func)(dyngen_cpu_base, uint32), uint32 value);
|
||||
void gen_invoke_CPU_im_im(void (*func)(dyngen_cpu_base, uint32, uint32), uint32 param1, uint32 param2);
|
||||
void gen_invoke_CPU_T0_ret_A0(void *(*func)(dyngen_cpu_base));
|
||||
|
||||
// Raw aliases
|
||||
#define DEFINE_ALIAS_RAW(NAME, ARGLIST, ARGS) \
|
||||
@ -176,6 +177,7 @@ public:
|
||||
// Jump instructions
|
||||
DEFINE_ALIAS(jmp_slow,1);
|
||||
DEFINE_ALIAS(jmp_fast,1);
|
||||
DEFINE_ALIAS(jmp_A0,0);
|
||||
|
||||
// Load/Store instructions
|
||||
DEFINE_ALIAS(load_u32_T0_A0_T1,0);
|
||||
@ -203,8 +205,37 @@ public:
|
||||
#undef DEFINE_ALIAS_2
|
||||
#undef DEFINE_ALIAS_3
|
||||
#undef DEFINE_ALIAS_RAW
|
||||
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
// Jump addresses for direct chaining
|
||||
uint8 *jmp_addr[2];
|
||||
|
||||
// Set jump target address
|
||||
void set_jmp_target(uint8 *jmp_addr, uint8 *addr);
|
||||
#endif
|
||||
};
|
||||
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
inline void
|
||||
basic_dyngen::set_jmp_target(uint8 *jmp_addr, uint8 *addr)
|
||||
{
|
||||
#if defined(__powerpc__)
|
||||
// patch the branch destination
|
||||
uint32 *ptr = (uint32 *)jmp_addr;
|
||||
uint32 val = *ptr;
|
||||
val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc);
|
||||
*ptr = val;
|
||||
|
||||
// flush icache
|
||||
asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
|
||||
asm volatile ("sync" : : : "memory");
|
||||
asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory");
|
||||
asm volatile ("sync" : : : "memory");
|
||||
asm volatile ("isync" : : : "memory");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
inline bool
|
||||
basic_dyngen::direct_jump_possible(uintptr target) const
|
||||
{
|
||||
|
@ -96,6 +96,14 @@ extern int __op_cpuparam;
|
||||
#define CPUPARAM ((long)(&__op_cpuparam))
|
||||
#endif
|
||||
|
||||
// Direct block chaining support
|
||||
#if defined(__powerpc__)
|
||||
#define DYNGEN_FAST_DISPATCH(TARGET) asm volatile ("b " #TARGET)
|
||||
#endif
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#define DYNGEN_FAST_DISPATCH(TARGET) asm volatile ("jmp " #TARGET)
|
||||
#endif
|
||||
|
||||
extern int __op_jmp0, __op_jmp1;
|
||||
|
||||
#endif /* DYNGEN_EXEC_H */
|
||||
|
@ -867,7 +867,7 @@ void gen_code(const char *name, const char *demangled_name,
|
||||
runtime to do translated block
|
||||
chaining: the offset of the instruction
|
||||
needs to be stored */
|
||||
fprintf(outfile, " jmp_offsets[%d] = %d + (code_ptr() - gen_code_buf);\n",
|
||||
fprintf(outfile, " jmp_addr[%d] = code_ptr() + %d;\n",
|
||||
n, rel->r_offset - start_offset);
|
||||
continue;
|
||||
}
|
||||
|
@ -44,6 +44,17 @@
|
||||
#define DYNGEN_ASM_OPTS 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
*
|
||||
* Define to enable direct block chaining on platforms supporting
|
||||
* that feature. e.g. PowerPC.
|
||||
**/
|
||||
|
||||
#ifndef DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
#define DYNGEN_DIRECT_BLOCK_CHAINING 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Helpers to reach JIT backends headers
|
||||
**/
|
||||
|
@ -21,6 +21,7 @@
|
||||
#ifndef PPC_BLOCKINFO_H
|
||||
#define PPC_BLOCKINFO_H
|
||||
|
||||
#include "cpu/jit/jit-config.hpp"
|
||||
#include "nvmemfun.hpp"
|
||||
#include "basic-blockinfo.hpp"
|
||||
|
||||
@ -42,6 +43,9 @@ struct powerpc_block_info
|
||||
#endif
|
||||
#if PPC_ENABLE_JIT
|
||||
uint8 * entry_point;
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
uint8 * jmp_addr[2]; // Jump addresses for direct chaining
|
||||
#endif
|
||||
#endif
|
||||
uintptr min_pc, max_pc;
|
||||
|
||||
|
@ -431,6 +431,26 @@ bool powerpc_cpu::check_spcflags()
|
||||
return true;
|
||||
}
|
||||
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
void *powerpc_cpu::compile_chain_block(block_info *sbi)
|
||||
{
|
||||
// Block index is stuffed into the source basic block pointer,
|
||||
// which is aligned at least on 4-byte boundaries
|
||||
const int n = ((uintptr)sbi) & 3;
|
||||
sbi = (block_info *)(((uintptr)sbi) & ~3L);
|
||||
const uint32 bpc = sbi->pc;
|
||||
|
||||
const uint32 tpc = pc();
|
||||
block_info *tbi = block_cache.find(tpc);
|
||||
if (tbi == NULL)
|
||||
tbi = compile_block(tpc);
|
||||
assert(tbi && tbi->pc == tpc);
|
||||
|
||||
codegen.set_jmp_target(sbi->jmp_addr[n], tbi->entry_point);
|
||||
return tbi->entry_point;
|
||||
}
|
||||
#endif
|
||||
|
||||
void powerpc_cpu::execute(uint32 entry)
|
||||
{
|
||||
pc() = entry;
|
||||
@ -644,6 +664,13 @@ void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end)
|
||||
{
|
||||
D(bug("Invalidate cache block [%08x - %08x]\n", start, end));
|
||||
#if PPC_DECODE_CACHE || PPC_ENABLE_JIT
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
if (use_jit) {
|
||||
// Invalidate on page boundaries
|
||||
start &= -4096;
|
||||
end = (end + 4095) & -4096;
|
||||
}
|
||||
#endif
|
||||
block_cache.clear_range(start, end);
|
||||
#endif
|
||||
}
|
||||
|
@ -337,6 +337,9 @@ private:
|
||||
friend class powerpc_dyngen;
|
||||
powerpc_dyngen codegen;
|
||||
block_info *compile_block(uint32 entry);
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
void *compile_chain_block(block_info *sbi);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Semantic action templates
|
||||
|
@ -684,7 +684,7 @@ DEFINE_OP(branch_if_not_T0);
|
||||
#undef DEFINE_OP
|
||||
#undef DEFINE_OP_CTR
|
||||
|
||||
template< int bo >
|
||||
template< int bo, bool chain >
|
||||
static inline void do_execute_branch_bo(uint32 tpc, uint32 npc)
|
||||
{
|
||||
bool ctr_ok = true;
|
||||
@ -706,20 +706,33 @@ static inline void do_execute_branch_bo(uint32 tpc, uint32 npc)
|
||||
powerpc_dyngen_helper::set_ctr(ctr);
|
||||
}
|
||||
|
||||
if (ctr_ok && cond_ok)
|
||||
if (ctr_ok && cond_ok) {
|
||||
powerpc_dyngen_helper::set_pc(tpc);
|
||||
else
|
||||
#ifdef DYNGEN_FAST_DISPATCH
|
||||
if (chain && powerpc_dyngen_helper::spcflags().empty())
|
||||
DYNGEN_FAST_DISPATCH(__op_jmp0);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
powerpc_dyngen_helper::set_pc(npc);
|
||||
#ifdef DYNGEN_FAST_DISPATCH
|
||||
if (chain && powerpc_dyngen_helper::spcflags().empty())
|
||||
DYNGEN_FAST_DISPATCH(__op_jmp1);
|
||||
#endif
|
||||
}
|
||||
|
||||
dyngen_barrier();
|
||||
}
|
||||
|
||||
#define BO(A,B,C,D) (((A) << 4)| ((B) << 3) | ((C) << 2) | ((D) << 1))
|
||||
#define DEFINE_OP(BO_SUFFIX, BO_VALUE) \
|
||||
void OPPROTO op_branch_A0_bo_##BO_SUFFIX(void) \
|
||||
{ \
|
||||
do_execute_branch_bo<BO BO_VALUE>(A0, PARAM1); \
|
||||
#define DEFINE_OP1(BO_SUFFIX, BO_VALUE, CHAIN) \
|
||||
void OPPROTO op_branch_A0_bo_##BO_SUFFIX##_##CHAIN(void) \
|
||||
{ \
|
||||
do_execute_branch_bo<BO BO_VALUE, CHAIN>(A0, PARAM1); \
|
||||
}
|
||||
#define DEFINE_OP(BO_SUFFIX, BO_VALUE) \
|
||||
DEFINE_OP1(BO_SUFFIX, BO_VALUE, 0) \
|
||||
DEFINE_OP1(BO_SUFFIX, BO_VALUE, 1)
|
||||
|
||||
DEFINE_OP(0000,(0,0,0,0));
|
||||
DEFINE_OP(0001,(0,0,0,1));
|
||||
|
@ -236,34 +236,58 @@ DEFINE_INSN(store, T0);
|
||||
|
||||
#undef DEFINE_INSN
|
||||
|
||||
void powerpc_dyngen::gen_bc_A0(int bo, int bi, uint32 npc)
|
||||
void powerpc_dyngen::gen_bc_A0(int bo, int bi, uint32 npc, bool direct_chaining)
|
||||
{
|
||||
#if 1
|
||||
if (BO_CONDITIONAL_BRANCH(bo)) {
|
||||
gen_load_T0_CR();
|
||||
gen_and_32_T0_im(1 << (31 - bi));
|
||||
}
|
||||
if (direct_chaining) {
|
||||
switch (bo >> 1) {
|
||||
#define _(A,B,C,D) (((A) << 3)| ((B) << 2) | ((C) << 1) | (D))
|
||||
case _(0,0,0,0): gen_op_branch_A0_bo_0000(npc); break;
|
||||
case _(0,0,0,1): gen_op_branch_A0_bo_0001(npc); break;
|
||||
case _(0,0,0,0): gen_op_branch_A0_bo_0000_1(npc); break;
|
||||
case _(0,0,0,1): gen_op_branch_A0_bo_0001_1(npc); break;
|
||||
case _(0,0,1,0):
|
||||
case _(0,0,1,1): gen_op_branch_A0_bo_001x(npc); break;
|
||||
case _(0,1,0,0): gen_op_branch_A0_bo_0100(npc); break;
|
||||
case _(0,1,0,1): gen_op_branch_A0_bo_0101(npc); break;
|
||||
case _(0,0,1,1): gen_op_branch_A0_bo_001x_1(npc); break;
|
||||
case _(0,1,0,0): gen_op_branch_A0_bo_0100_1(npc); break;
|
||||
case _(0,1,0,1): gen_op_branch_A0_bo_0101_1(npc); break;
|
||||
case _(0,1,1,0):
|
||||
case _(0,1,1,1): gen_op_branch_A0_bo_011x(npc); break;
|
||||
case _(0,1,1,1): gen_op_branch_A0_bo_011x_1(npc); break;
|
||||
case _(1,0,0,0):
|
||||
case _(1,1,0,0): gen_op_branch_A0_bo_1x00(npc); break;
|
||||
case _(1,1,0,0): gen_op_branch_A0_bo_1x00_1(npc); break;
|
||||
case _(1,0,0,1):
|
||||
case _(1,1,0,1): gen_op_branch_A0_bo_1x01(npc); break;
|
||||
case _(1,1,0,1): gen_op_branch_A0_bo_1x01_1(npc); break;
|
||||
case _(1,0,1,0):
|
||||
case _(1,0,1,1):
|
||||
case _(1,1,1,0):
|
||||
case _(1,1,1,1): gen_op_branch_A0_bo_1x1x(); break;
|
||||
case _(1,1,1,1): gen_op_branch_A0_bo_1x1x_1(); break;
|
||||
#undef _
|
||||
default: abort();
|
||||
}
|
||||
} else {
|
||||
switch (bo >> 1) {
|
||||
#define _(A,B,C,D) (((A) << 3)| ((B) << 2) | ((C) << 1) | (D))
|
||||
case _(0,0,0,0): gen_op_branch_A0_bo_0000_0(npc); break;
|
||||
case _(0,0,0,1): gen_op_branch_A0_bo_0001_0(npc); break;
|
||||
case _(0,0,1,0):
|
||||
case _(0,0,1,1): gen_op_branch_A0_bo_001x_0(npc); break;
|
||||
case _(0,1,0,0): gen_op_branch_A0_bo_0100_0(npc); break;
|
||||
case _(0,1,0,1): gen_op_branch_A0_bo_0101_0(npc); break;
|
||||
case _(0,1,1,0):
|
||||
case _(0,1,1,1): gen_op_branch_A0_bo_011x_0(npc); break;
|
||||
case _(1,0,0,0):
|
||||
case _(1,1,0,0): gen_op_branch_A0_bo_1x00_0(npc); break;
|
||||
case _(1,0,0,1):
|
||||
case _(1,1,0,1): gen_op_branch_A0_bo_1x01_0(npc); break;
|
||||
case _(1,0,1,0):
|
||||
case _(1,0,1,1):
|
||||
case _(1,1,1,0):
|
||||
case _(1,1,1,1): gen_op_branch_A0_bo_1x1x_0(); break;
|
||||
#undef _
|
||||
default: abort();
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (BO_CONDITIONAL_BRANCH(bo)) {
|
||||
gen_load_T0_CR();
|
||||
|
@ -219,7 +219,7 @@ public:
|
||||
void gen_store_single_F0_A0_im(int32 offset);
|
||||
|
||||
// Branch instructions
|
||||
void gen_bc_A0(int bo, int bi, uint32 npc);
|
||||
void gen_bc_A0(int bo, int bi, uint32 npc, bool direct_chaining);
|
||||
|
||||
// Vector instructions
|
||||
void gen_load_ad_VD_VR(int i);
|
||||
|
@ -133,6 +133,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
bi->init(entry_point);
|
||||
bi->entry_point = dg.gen_start();
|
||||
|
||||
// Direct block chaining support variables
|
||||
bool use_direct_block_chaining = false;
|
||||
|
||||
uint32 dpc = entry_point - 4;
|
||||
uint32 min_pc, max_pc;
|
||||
min_pc = max_pc = entry_point;
|
||||
@ -429,6 +432,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
break;
|
||||
}
|
||||
case PPC_I(BC): // Branch Conditional
|
||||
{
|
||||
#if FOLLOW_CONST_JUMPS
|
||||
if (!LK_field::test(opcode)) {
|
||||
const int bo = BO_field::extract(opcode);
|
||||
@ -440,8 +444,16 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
dg.gen_mov_32_A0_im(((AA_field::test(opcode) ? 0 : dpc) + operand_BD::get(this, opcode)) & -4);
|
||||
const uint32 tpc = ((AA_field::test(opcode) ? 0 : dpc) + operand_BD::get(this, opcode)) & -4;
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
// Use direct block chaining for in-page jumps or jumps to ROM area
|
||||
const uint32 npc = dpc + 4;
|
||||
if (((tpc & -4096) == (npc & -4096)) || is_read_only_memory(tpc))
|
||||
use_direct_block_chaining = true;
|
||||
#endif
|
||||
dg.gen_mov_32_A0_im(tpc);
|
||||
goto do_branch;
|
||||
}
|
||||
case PPC_I(BCCTR): // Branch Conditional to Count Register
|
||||
dg.gen_load_A0_CTR();
|
||||
goto do_branch;
|
||||
@ -457,7 +469,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
if (LK_field::test(opcode))
|
||||
dg.gen_store_im_LR(npc);
|
||||
|
||||
dg.gen_bc_A0(bo, bi, npc);
|
||||
dg.gen_bc_A0(bo, bi, npc, use_direct_block_chaining);
|
||||
break;
|
||||
}
|
||||
case PPC_I(B): // Branch
|
||||
@ -491,7 +503,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
dg.gen_mov_32_A0_im(tpc);
|
||||
|
||||
// BO field is built so that we always branch to A0
|
||||
dg.gen_bc_A0(BO_MAKE(0,0,0,0), 0, 0);
|
||||
dg.gen_bc_A0(BO_MAKE(0,0,0,0), 0, 0, false);
|
||||
break;
|
||||
}
|
||||
case PPC_I(CMP): // Compare
|
||||
@ -1428,6 +1440,33 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
bi->size = dg.code_ptr() - bi->entry_point;
|
||||
if (disasm)
|
||||
disasm_translation(entry_point, dpc - entry_point + 4, bi->entry_point, bi->size);
|
||||
|
||||
#if DYNGEN_DIRECT_BLOCK_CHAINING
|
||||
// Generate backpatch trampolines
|
||||
if (use_direct_block_chaining) {
|
||||
typedef void *(*func_t)(dyngen_cpu_base);
|
||||
func_t func = (func_t)nv_mem_fun(&powerpc_cpu::compile_chain_block).ptr();
|
||||
|
||||
// Taken PC
|
||||
uint8 *p = dg.gen_start();
|
||||
dg.gen_mov_ad_T0_im(((uintptr)bi) | 0);
|
||||
dg.gen_invoke_CPU_T0_ret_A0(func);
|
||||
dg.gen_jmp_A0();
|
||||
dg.gen_end();
|
||||
bi->jmp_addr[0] = dg.jmp_addr[0];
|
||||
dg.set_jmp_target(dg.jmp_addr[0], p);
|
||||
|
||||
// Not taken PC
|
||||
p = dg.gen_start();
|
||||
dg.gen_mov_ad_T0_im(((uintptr)bi) | 1);
|
||||
dg.gen_invoke_CPU_T0_ret_A0(func);
|
||||
dg.gen_jmp_A0();
|
||||
dg.gen_end();
|
||||
bi->jmp_addr[1] = dg.jmp_addr[1];
|
||||
dg.set_jmp_target(dg.jmp_addr[1], p);
|
||||
}
|
||||
#endif
|
||||
|
||||
block_cache.add_to_cl_list(bi);
|
||||
if (is_read_only_memory(bi->pc))
|
||||
block_cache.add_to_dormant_list(bi);
|
||||
|
Loading…
x
Reference in New Issue
Block a user