SS: Fix JIT on minwg32

- add wrappers with default calling convention for powerpc_cpu member functions used through nv_mem_fun ptr()
** explicit wrappers for member functions that were used explicitly
** dynamic wrapper generator in nv_mem_fun1_t for member functions used dynamically via the instruction table
- add missing direct addressing (non-zero constant offset to Mac memory) support in lvx and stvx implementations
- fix mismatched parameter lists between powerpc_jit member functions and the calls they get through the jit_info table to fix problems at -O2
This commit is contained in:
rakslice 2020-03-17 17:45:38 -07:00
parent a667dc9787
commit d936e9938d
7 changed files with 218 additions and 32 deletions

View File

@ -23,6 +23,10 @@
#include <functional> #include <functional>
#ifdef __MINGW32__
#include "vm_alloc.h"
#endif
#if defined __GNUC__ #if defined __GNUC__
#define HAVE_FAST_NV_MEM_FUN 1 #define HAVE_FAST_NV_MEM_FUN 1
#define MEM_FUN_WORDS 2 #define MEM_FUN_WORDS 2
@ -91,11 +95,123 @@ template< class R, class T, class A >
class nv_mem_fun1_t : public std::binary_function<T, A, R> { class nv_mem_fun1_t : public std::binary_function<T, A, R> {
typedef R (T::*pmf_t)(A); typedef R (T::*pmf_t)(A);
typedef R (* PF_CONVENTION pf_t)(T *, A x); typedef R (* PF_CONVENTION pf_t)(T *, A x);
#ifdef __MINGW32__
typedef R (* default_call_conv_pf_t)(T *, A x);
#endif
pf_t pf; pf_t pf;
public: public:
nv_mem_fun1_t(pmf_t pmf) : pf(nv_mem_fun_of<pmf_t, pf_t>(pmf)) {} nv_mem_fun1_t(pmf_t pmf) : pf(nv_mem_fun_of<pmf_t, pf_t>(pmf)) {
#ifdef __MINGW32__
init_func();
#endif
}
R operator()(T *p, A x) const { return (*pf)(p, x); } R operator()(T *p, A x) const { return (*pf)(p, x); }
#ifdef __MINGW32__
#define NVMEMFUN_THUNK_DEBUG 0
private:
#define DO_CONVENTION_CALL_PF_PLACEHOLDER 0x12345678
#define DO_CONVENTION_CALL_STATICS
static bool do_convention_call_init_done;
static int do_convention_call_code_len;
static int do_convention_call_pf_offset;
unsigned char * do_convention_call_instance_copy;
static void init_do_convention_call() {
if (do_convention_call_init_done) return;
const int max_code_bytes = 100;
const unsigned char last_code_byte_value = 0xc3;
// figure out the size of the function
unsigned char * func_pos = (unsigned char *) &do_convention_call;
int i;
for (i = 0; i < max_code_bytes; i++) {
if (func_pos[i] == last_code_byte_value) {
break;
}
}
do_convention_call_code_len = i + 1;
#if NVMEMFUN_THUNK_DEBUG
printf("do_convention_call func size %d ", do_convention_call_code_len);
#endif
// find the position of the pf placeholder in the function
int placeholder_matches = 0;
for (i = 0; i < do_convention_call_code_len - 3; i++) {
pf_t * cur_ptr = (pf_t*)(func_pos + i);
if (*cur_ptr == (pf_t) DO_CONVENTION_CALL_PF_PLACEHOLDER) {
do_convention_call_pf_offset = i;
#if NVMEMFUN_THUNK_DEBUG
printf("ptr pos offset %x", (uint32)cur_ptr - (uint32)func_pos);
#endif
++placeholder_matches;
}
}
#if NVMEMFUN_THUNK_DEBUG
printf("\n");
fflush(stdout);
#endif
assert(placeholder_matches == 1);
do_convention_call_init_done = true;
}
void init_func() {
if (!do_convention_call_init_done) {
init_do_convention_call();
}
// copy do_convention_call and patch in the address of pf
do_convention_call_instance_copy = (unsigned char *) vm_acquire(do_convention_call_code_len);
// Thunk buffer needs to be around while default_call_conv_ptr() is still in use,
// longer than nv_mem_fun1_t lifetime
//FIXME track the lifetime of this
if (do_convention_call_instance_copy == NULL) return;
unsigned char * func_pos = (unsigned char *) &do_convention_call;
memcpy((void *)do_convention_call_instance_copy, func_pos, do_convention_call_code_len);
// replace byte sequence in buf copy
*(pf_t*)(do_convention_call_instance_copy + do_convention_call_pf_offset) = pf;
#if NVMEMFUN_THUNK_DEBUG
printf("patching do_convention_call to %x; func size %d ", do_convention_call_instance_copy, do_convention_call_code_len);
for (int i = 0 ; i < do_convention_call_code_len; i ++) {
printf("%02x ", do_convention_call_instance_copy[i]);
}
printf("\n");
fflush(stdout);
#endif
vm_protect(do_convention_call_instance_copy, do_convention_call_code_len, VM_PAGE_READ | VM_PAGE_EXECUTE);
}
// Cheesy thunk solution to adapt the calling convention:
// do_convention_call accepts the default calling convention and calls pf with PF_CONVENTION
// Each instance makes its own copy of do_convention_call in a buffer and patches the address of pf into it
static R do_convention_call(T * obj, A x) {
pf_t fn = (pf_t) DO_CONVENTION_CALL_PF_PLACEHOLDER;
return (*fn)(obj, x);
}
public:
default_call_conv_pf_t default_call_conv_ptr() const { return (default_call_conv_pf_t) do_convention_call_instance_copy; }
#else
pf_t ptr() const { return pf; } pf_t ptr() const { return pf; }
#endif
}; };
template< class R, class T, class A > template< class R, class T, class A >

View File

@ -151,9 +151,11 @@ public:
// Execute NATIVE_OP routine // Execute NATIVE_OP routine
void execute_native_op(uint32 native_op); void execute_native_op(uint32 native_op);
static void call_execute_native_op(powerpc_cpu * cpu, uint32 native_op);
// Execute EMUL_OP routine // Execute EMUL_OP routine
void execute_emul_op(uint32 emul_op); void execute_emul_op(uint32 emul_op);
static void call_execute_emul_op(powerpc_cpu * cpu, uint32 emul_op);
// Execute 68k routine // Execute 68k routine
void execute_68k(uint32 entry, M68kRegisters *r); void execute_68k(uint32 entry, M68kRegisters *r);
@ -170,6 +172,7 @@ public:
#endif #endif
// Resource manager thunk // Resource manager thunk
void get_resource(uint32 old_get_resource); void get_resource(uint32 old_get_resource);
static void call_get_resource(powerpc_cpu * cpu, uint32 old_get_resource);
// Handle MacOS interrupt // Handle MacOS interrupt
void interrupt(uint32 entry); void interrupt(uint32 entry);
@ -218,6 +221,10 @@ typedef bit_field< 19, 19 > FN_field;
typedef bit_field< 20, 25 > NATIVE_OP_field; typedef bit_field< 20, 25 > NATIVE_OP_field;
typedef bit_field< 26, 31 > EMUL_OP_field; typedef bit_field< 26, 31 > EMUL_OP_field;
void sheepshaver_cpu::call_execute_emul_op(powerpc_cpu * cpu, uint32 emul_op) {
static_cast<sheepshaver_cpu *>(cpu)->execute_emul_op(emul_op);
}
// Execute EMUL_OP routine // Execute EMUL_OP routine
void sheepshaver_cpu::execute_emul_op(uint32 emul_op) void sheepshaver_cpu::execute_emul_op(uint32 emul_op)
{ {
@ -332,7 +339,7 @@ int sheepshaver_cpu::compile1(codegen_context_t & cg_context)
}; };
uint32 old_get_resource = ReadMacInt32(get_resource_ptr[selector - NATIVE_GET_RESOURCE]); uint32 old_get_resource = ReadMacInt32(get_resource_ptr[selector - NATIVE_GET_RESOURCE]);
typedef void (*func_t)(dyngen_cpu_base, uint32); typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func = (func_t)nv_mem_fun(&sheepshaver_cpu::get_resource).ptr(); func_t func = &sheepshaver_cpu::call_get_resource;
dg.gen_invoke_CPU_im(func, old_get_resource); dg.gen_invoke_CPU_im(func, old_get_resource);
status = COMPILE_CODE_OK; status = COMPILE_CODE_OK;
break; break;
@ -421,7 +428,7 @@ int sheepshaver_cpu::compile1(codegen_context_t & cg_context)
// Invoke NativeOp handler // Invoke NativeOp handler
if (!FN_field::test(opcode)) { if (!FN_field::test(opcode)) {
typedef void (*func_t)(dyngen_cpu_base, uint32); typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func = (func_t)nv_mem_fun(&sheepshaver_cpu::execute_native_op).ptr(); func_t func = &sheepshaver_cpu::call_execute_native_op;
dg.gen_invoke_CPU_im(func, selector); dg.gen_invoke_CPU_im(func, selector);
cg_context.done_compile = false; cg_context.done_compile = false;
status = COMPILE_CODE_OK; status = COMPILE_CODE_OK;
@ -445,7 +452,7 @@ int sheepshaver_cpu::compile1(codegen_context_t & cg_context)
#else #else
// Invoke EmulOp handler // Invoke EmulOp handler
typedef void (*func_t)(dyngen_cpu_base, uint32); typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func = (func_t)nv_mem_fun(&sheepshaver_cpu::execute_emul_op).ptr(); func_t func = &sheepshaver_cpu::call_execute_emul_op;
dg.gen_invoke_CPU_im(func, emul_op); dg.gen_invoke_CPU_im(func, emul_op);
cg_context.done_compile = false; cg_context.done_compile = false;
status = COMPILE_CODE_OK; status = COMPILE_CODE_OK;
@ -685,6 +692,10 @@ inline void sheepshaver_cpu::execute_ppc(uint32 entry)
lr() = saved_lr; lr() = saved_lr;
} }
void sheepshaver_cpu::call_get_resource(powerpc_cpu * cpu, uint32 old_get_resource) {
static_cast<sheepshaver_cpu *>(cpu)->get_resource(old_get_resource);
}
// Resource Manager thunk // Resource Manager thunk
inline void sheepshaver_cpu::get_resource(uint32 old_get_resource) inline void sheepshaver_cpu::get_resource(uint32 old_get_resource)
{ {
@ -897,14 +908,14 @@ void init_emul_op_trampolines(basic_dyngen & dg)
// EmulOp // EmulOp
emul_op_trampoline = dg.gen_start(); emul_op_trampoline = dg.gen_start();
func = (func_t)nv_mem_fun(&sheepshaver_cpu::execute_emul_op).ptr(); func = &sheepshaver_cpu::call_execute_emul_op;
dg.gen_invoke_CPU_T0(func); dg.gen_invoke_CPU_T0(func);
dg.gen_exec_return(); dg.gen_exec_return();
dg.gen_end(); dg.gen_end();
// NativeOp // NativeOp
native_op_trampoline = dg.gen_start(); native_op_trampoline = dg.gen_start();
func = (func_t)nv_mem_fun(&sheepshaver_cpu::execute_native_op).ptr(); func = &sheepshaver_cpu::call_execute_native_op;
dg.gen_invoke_CPU_T0(func); dg.gen_invoke_CPU_T0(func);
dg.gen_exec_return(); dg.gen_exec_return();
dg.gen_end(); dg.gen_end();
@ -1030,6 +1041,10 @@ void HandleInterrupt(powerpc_registers *r)
} }
} }
void sheepshaver_cpu::call_execute_native_op(powerpc_cpu * cpu, uint32 selector) {
static_cast<sheepshaver_cpu *>(cpu)->execute_native_op(selector);
}
// Execute NATIVE_OP routine // Execute NATIVE_OP routine
void sheepshaver_cpu::execute_native_op(uint32 selector) void sheepshaver_cpu::execute_native_op(uint32 selector)
{ {

View File

@ -64,6 +64,12 @@ int register_info_compare(const void *e1, const void *e2)
static int ppc_refcount = 0; static int ppc_refcount = 0;
#ifdef DO_CONVENTION_CALL_STATICS
template<> bool nv_mem_fun1_t<void, powerpc_cpu, uint32>::do_convention_call_init_done = false;
template<> int nv_mem_fun1_t<void, powerpc_cpu, uint32>::do_convention_call_code_len = 0;
template<> int nv_mem_fun1_t<void, powerpc_cpu, uint32>::do_convention_call_pf_offset = 0;
#endif
void powerpc_cpu::set_register(int id, any_register const & value) void powerpc_cpu::set_register(int id, any_register const & value)
{ {
if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) { if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) {
@ -542,7 +548,12 @@ bool powerpc_cpu::check_spcflags()
} }
#if DYNGEN_DIRECT_BLOCK_CHAINING #if DYNGEN_DIRECT_BLOCK_CHAINING
void *powerpc_cpu::compile_chain_block(block_info *sbi) void * powerpc_cpu::call_compile_chain_block(powerpc_cpu * the_cpu, block_info *sbi)
{
return the_cpu->compile_chain_block(sbi);
}
void * PF_CONVENTION powerpc_cpu::compile_chain_block(block_info *sbi)
{ {
// Block index is stuffed into the source basic block pointer, // Block index is stuffed into the source basic block pointer,
// which is aligned at least on 4-byte boundaries // which is aligned at least on 4-byte boundaries
@ -719,7 +730,11 @@ void powerpc_cpu::execute(uint32 entry)
if (is_logging()) if (is_logging())
record_step(opcode); record_step(opcode);
#endif #endif
#ifdef __MINGW32__
assert(ii->execute.default_call_conv_ptr() != 0);
#else
assert(ii->execute.ptr() != 0); assert(ii->execute.ptr() != 0);
#endif
ii->execute(this, opcode); ii->execute(this, opcode);
#if PPC_EXECUTE_DUMP_STATE #if PPC_EXECUTE_DUMP_STATE
if (dump_state) if (dump_state)

View File

@ -371,8 +371,10 @@ private:
friend class powerpc_jit; friend class powerpc_jit;
powerpc_jit codegen; powerpc_jit codegen;
block_info *compile_block(uint32 entry); block_info *compile_block(uint32 entry);
static void call_do_record_step(powerpc_cpu * cpu, uint32 pc, uint32 opcode);
#if DYNGEN_DIRECT_BLOCK_CHAINING #if DYNGEN_DIRECT_BLOCK_CHAINING
void *compile_chain_block(block_info *sbi); void *compile_chain_block(block_info *sbi);
static void * call_compile_chain_block(powerpc_cpu * the_cpu, block_info *sbi);
#endif #endif
#endif #endif
@ -389,6 +391,7 @@ private:
// Instruction handlers // Instruction handlers
void execute_nop(uint32 opcode); void execute_nop(uint32 opcode);
void execute_illegal(uint32 opcode); void execute_illegal(uint32 opcode);
static void call_execute_illegal(powerpc_cpu * cpu, uint32 opcode);
template< class RA, class RB, class RC, class CA, class OE, class Rc > template< class RA, class RB, class RC, class CA, class OE, class Rc >
void execute_addition(uint32 opcode); void execute_addition(uint32 opcode);
template< class OP, class RD, class RA, class RB, class RC, class OE, class Rc > template< class OP, class RD, class RA, class RB, class RC, class OE, class Rc >
@ -453,6 +456,7 @@ private:
void execute_icbi(uint32 opcode); void execute_icbi(uint32 opcode);
void execute_isync(uint32 opcode); void execute_isync(uint32 opcode);
void execute_invalidate_cache_range(); void execute_invalidate_cache_range();
static void call_execute_invalidate_cache_range(powerpc_cpu * cpu);
template< class RA, class RB > template< class RA, class RB >
void execute_dcbz(uint32 opcode); void execute_dcbz(uint32 opcode);
template< bool SL > template< bool SL >

View File

@ -36,6 +36,11 @@ powerpc_jit::powerpc_jit(dyngen_cpu_base cpu)
{ {
} }
// An operand that refers to an address relative to the emulated machine
static x86_memory_operand vm_memory_operand(int32 d, int b, int i = X86_NOREG, int s = 1) {
return x86_memory_operand(d + VMBaseDiff, b, i, s);
}
bool powerpc_jit::initialize(void) bool powerpc_jit::initialize(void)
{ {
if (!powerpc_dyngen::initialize()) if (!powerpc_dyngen::initialize())
@ -239,21 +244,25 @@ bool powerpc_jit::initialize(void)
// Dispatch mid-level code generators // Dispatch mid-level code generators
bool powerpc_jit::gen_vector_1(int mnemo, int vD) bool powerpc_jit::gen_vector_1(int mnemo, int vD)
{ {
if (jit_info[mnemo]->handler == (gen_handler_t)&powerpc_jit::gen_not_available) return false;
return (this->*((bool (powerpc_jit::*)(int, int))jit_info[mnemo]->handler))(mnemo, vD); return (this->*((bool (powerpc_jit::*)(int, int))jit_info[mnemo]->handler))(mnemo, vD);
} }
bool powerpc_jit::gen_vector_2(int mnemo, int vD, int vA, int vB) bool powerpc_jit::gen_vector_2(int mnemo, int vD, int vA, int vB)
{ {
if (jit_info[mnemo]->handler == (gen_handler_t)&powerpc_jit::gen_not_available) return false;
return (this->*((bool (powerpc_jit::*)(int, int, int, int))jit_info[mnemo]->handler))(mnemo, vD, vA, vB); return (this->*((bool (powerpc_jit::*)(int, int, int, int))jit_info[mnemo]->handler))(mnemo, vD, vA, vB);
} }
bool powerpc_jit::gen_vector_3(int mnemo, int vD, int vA, int vB, int vC) bool powerpc_jit::gen_vector_3(int mnemo, int vD, int vA, int vB, int vC)
{ {
if (jit_info[mnemo]->handler == (gen_handler_t)&powerpc_jit::gen_not_available) return false;
return (this->*((bool (powerpc_jit::*)(int, int, int, int, int))jit_info[mnemo]->handler))(mnemo, vD, vA, vB, vC); return (this->*((bool (powerpc_jit::*)(int, int, int, int, int))jit_info[mnemo]->handler))(mnemo, vD, vA, vB, vC);
} }
bool powerpc_jit::gen_vector_compare(int mnemo, int vD, int vA, int vB, bool Rc) bool powerpc_jit::gen_vector_compare(int mnemo, int vD, int vA, int vB, bool Rc)
{ {
if (jit_info[mnemo]->handler == (gen_handler_t)&powerpc_jit::gen_not_available) return false;
return (this->*((bool (powerpc_jit::*)(int, int, int, int, bool))jit_info[mnemo]->handler))(mnemo, vD, vA, vB, Rc); return (this->*((bool (powerpc_jit::*)(int, int, int, int, bool))jit_info[mnemo]->handler))(mnemo, vD, vA, vB, Rc);
} }
@ -395,8 +404,8 @@ bool powerpc_jit::gen_x86_lvx(int mnemo, int vD, int rA, int rB)
gen_add_32(x86_memory_operand(xPPC_GPR(rA), REG_CPU_ID), REG_T0_ID); gen_add_32(x86_memory_operand(xPPC_GPR(rA), REG_CPU_ID), REG_T0_ID);
gen_and_32(x86_immediate_operand(-16), REG_T0_ID); gen_and_32(x86_immediate_operand(-16), REG_T0_ID);
#if SIZEOF_VOID_P == 8 #if SIZEOF_VOID_P == 8
gen_mov_64(x86_memory_operand(0, REG_T0_ID), REG_T1_ID); gen_mov_64(vm_memory_operand(0, REG_T0_ID), REG_T1_ID);
gen_mov_64(x86_memory_operand(8, REG_T0_ID), REG_T2_ID); gen_mov_64(vm_memory_operand(8, REG_T0_ID), REG_T2_ID);
gen_bswap_64(REG_T1_ID); gen_bswap_64(REG_T1_ID);
gen_bswap_64(REG_T2_ID); gen_bswap_64(REG_T2_ID);
gen_rol_64(x86_immediate_operand(32), REG_T1_ID); gen_rol_64(x86_immediate_operand(32), REG_T1_ID);
@ -404,14 +413,14 @@ bool powerpc_jit::gen_x86_lvx(int mnemo, int vD, int rA, int rB)
gen_mov_64(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 0, REG_CPU_ID)); gen_mov_64(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 0, REG_CPU_ID));
gen_mov_64(REG_T2_ID, x86_memory_operand(xPPC_VR(vD) + 8, REG_CPU_ID)); gen_mov_64(REG_T2_ID, x86_memory_operand(xPPC_VR(vD) + 8, REG_CPU_ID));
#else #else
gen_mov_32(x86_memory_operand(0*4, REG_T0_ID), REG_T1_ID); gen_mov_32(vm_memory_operand(0*4, REG_T0_ID), REG_T1_ID);
gen_mov_32(x86_memory_operand(1*4, REG_T0_ID), REG_T2_ID); gen_mov_32(vm_memory_operand(1*4, REG_T0_ID), REG_T2_ID);
gen_bswap_32(REG_T1_ID); gen_bswap_32(REG_T1_ID);
gen_bswap_32(REG_T2_ID); gen_bswap_32(REG_T2_ID);
gen_mov_32(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 0*4, REG_CPU_ID)); gen_mov_32(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 0*4, REG_CPU_ID));
gen_mov_32(REG_T2_ID, x86_memory_operand(xPPC_VR(vD) + 1*4, REG_CPU_ID)); gen_mov_32(REG_T2_ID, x86_memory_operand(xPPC_VR(vD) + 1*4, REG_CPU_ID));
gen_mov_32(x86_memory_operand(2*4, REG_T0_ID), REG_T1_ID); gen_mov_32(vm_memory_operand(2*4, REG_T0_ID), REG_T1_ID);
gen_mov_32(x86_memory_operand(3*4, REG_T0_ID), REG_T2_ID); gen_mov_32(vm_memory_operand(3*4, REG_T0_ID), REG_T2_ID);
gen_bswap_32(REG_T1_ID); gen_bswap_32(REG_T1_ID);
gen_bswap_32(REG_T2_ID); gen_bswap_32(REG_T2_ID);
gen_mov_32(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 2*4, REG_CPU_ID)); gen_mov_32(REG_T1_ID, x86_memory_operand(xPPC_VR(vD) + 2*4, REG_CPU_ID));
@ -435,8 +444,8 @@ bool powerpc_jit::gen_x86_stvx(int mnemo, int vS, int rA, int rB)
gen_and_32(x86_immediate_operand(-16), REG_T0_ID); gen_and_32(x86_immediate_operand(-16), REG_T0_ID);
gen_rol_64(x86_immediate_operand(32), REG_T1_ID); gen_rol_64(x86_immediate_operand(32), REG_T1_ID);
gen_rol_64(x86_immediate_operand(32), REG_T2_ID); gen_rol_64(x86_immediate_operand(32), REG_T2_ID);
gen_mov_64(REG_T1_ID, x86_memory_operand(0, REG_T0_ID)); gen_mov_64(REG_T1_ID, vm_memory_operand(0, REG_T0_ID));
gen_mov_64(REG_T2_ID, x86_memory_operand(8, REG_T0_ID)); gen_mov_64(REG_T2_ID, vm_memory_operand(8, REG_T0_ID));
#else #else
gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 0*4, REG_CPU_ID), REG_T1_ID); gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 0*4, REG_CPU_ID), REG_T1_ID);
gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 1*4, REG_CPU_ID), REG_T2_ID); gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 1*4, REG_CPU_ID), REG_T2_ID);
@ -445,14 +454,14 @@ bool powerpc_jit::gen_x86_stvx(int mnemo, int vS, int rA, int rB)
gen_bswap_32(REG_T1_ID); gen_bswap_32(REG_T1_ID);
gen_bswap_32(REG_T2_ID); gen_bswap_32(REG_T2_ID);
gen_and_32(x86_immediate_operand(-16), REG_T0_ID); gen_and_32(x86_immediate_operand(-16), REG_T0_ID);
gen_mov_32(REG_T1_ID, x86_memory_operand(0*4, REG_T0_ID)); gen_mov_32(REG_T1_ID, vm_memory_operand(0*4, REG_T0_ID));
gen_mov_32(REG_T2_ID, x86_memory_operand(1*4, REG_T0_ID)); gen_mov_32(REG_T2_ID, vm_memory_operand(1*4, REG_T0_ID));
gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 2*4, REG_CPU_ID), REG_T1_ID); gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 2*4, REG_CPU_ID), REG_T1_ID);
gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 3*4, REG_CPU_ID), REG_T2_ID); gen_mov_32(x86_memory_operand(xPPC_VR(vS) + 3*4, REG_CPU_ID), REG_T2_ID);
gen_bswap_32(REG_T1_ID); gen_bswap_32(REG_T1_ID);
gen_bswap_32(REG_T2_ID); gen_bswap_32(REG_T2_ID);
gen_mov_32(REG_T1_ID, x86_memory_operand(2*4, REG_T0_ID)); gen_mov_32(REG_T1_ID, vm_memory_operand(2*4, REG_T0_ID));
gen_mov_32(REG_T2_ID, x86_memory_operand(3*4, REG_T0_ID)); gen_mov_32(REG_T2_ID, vm_memory_operand(3*4, REG_T0_ID));
#endif #endif
return true; return true;
} }
@ -667,7 +676,7 @@ void powerpc_jit::gen_sse2_vsplat(int vD, int rValue)
} }
// vspltisb // vspltisb
bool powerpc_jit::gen_sse2_vspltisb(int mnemo, int vD, int SIMM) bool powerpc_jit::gen_sse2_vspltisb(int mnemo, int vD, int SIMM, int unused)
{ {
switch (SIMM) { switch (SIMM) {
case 0: case 0:
@ -718,7 +727,7 @@ bool powerpc_jit::gen_sse2_vspltisb(int mnemo, int vD, int SIMM)
} }
// vspltish // vspltish
bool powerpc_jit::gen_sse2_vspltish(int mnemo, int vD, int SIMM) bool powerpc_jit::gen_sse2_vspltish(int mnemo, int vD, int SIMM, int unused)
{ {
switch (SIMM) { switch (SIMM) {
case 0: case 0:
@ -764,7 +773,7 @@ bool powerpc_jit::gen_sse2_vspltish(int mnemo, int vD, int SIMM)
} }
// vspltisw // vspltisw
bool powerpc_jit::gen_sse2_vspltisw(int mnemo, int vD, int SIMM) bool powerpc_jit::gen_sse2_vspltisw(int mnemo, int vD, int SIMM, int unused)
{ {
switch (SIMM) { switch (SIMM) {
case 0: case 0:
@ -866,7 +875,7 @@ bool powerpc_jit::gen_ssse3_lvx(int mnemo, int vD, int rA, int rB)
gen_and_32(x86_immediate_operand(-16), REG_T0_ID); gen_and_32(x86_immediate_operand(-16), REG_T0_ID);
x86_memory_operand vswapmask(gen_ssse3_vswap_mask(), X86_NOREG); x86_memory_operand vswapmask(gen_ssse3_vswap_mask(), X86_NOREG);
gen_movdqa(x86_memory_operand(0, REG_T0_ID), REG_V0_ID); gen_movdqa(vm_memory_operand(0, REG_T0_ID), REG_V0_ID);
gen_insn(X86_INSN_SSE_3P, X86_SSSE3_PSHUFB, vswapmask, REG_V0_ID); gen_insn(X86_INSN_SSE_3P, X86_SSSE3_PSHUFB, vswapmask, REG_V0_ID);
gen_movdqa(REG_V0_ID, x86_memory_operand(xPPC_VR(vD), REG_CPU_ID)); gen_movdqa(REG_V0_ID, x86_memory_operand(xPPC_VR(vD), REG_CPU_ID));
return true; return true;
@ -883,7 +892,7 @@ bool powerpc_jit::gen_ssse3_stvx(int mnemo, int vS, int rA, int rB)
x86_memory_operand vswapmask(gen_ssse3_vswap_mask(), X86_NOREG); x86_memory_operand vswapmask(gen_ssse3_vswap_mask(), X86_NOREG);
gen_movdqa(x86_memory_operand(xPPC_VR(vS), REG_CPU_ID), REG_V0_ID); gen_movdqa(x86_memory_operand(xPPC_VR(vS), REG_CPU_ID), REG_V0_ID);
gen_insn(X86_INSN_SSE_3P, X86_SSSE3_PSHUFB, vswapmask, REG_V0_ID); gen_insn(X86_INSN_SSE_3P, X86_SSSE3_PSHUFB, vswapmask, REG_V0_ID);
gen_movdqa(REG_V0_ID, x86_memory_operand(0, REG_T0_ID)); gen_movdqa(REG_V0_ID, vm_memory_operand(0, REG_T0_ID));
return true; return true;
} }

View File

@ -90,9 +90,9 @@ private:
bool gen_sse2_vsel(int mnemo, int vD, int vA, int vB, int vC); bool gen_sse2_vsel(int mnemo, int vD, int vA, int vB, int vC);
bool gen_sse2_vsldoi(int mnemo, int vD, int vA, int vB, int SH); bool gen_sse2_vsldoi(int mnemo, int vD, int vA, int vB, int SH);
void gen_sse2_vsplat(int vD, int rValue); void gen_sse2_vsplat(int vD, int rValue);
bool gen_sse2_vspltisb(int mnemo, int vD, int SIMM); bool gen_sse2_vspltisb(int mnemo, int vD, int SIMM, int unused);
bool gen_sse2_vspltish(int mnemo, int vD, int SIMM); bool gen_sse2_vspltish(int mnemo, int vD, int SIMM, int unused);
bool gen_sse2_vspltisw(int mnemo, int vD, int SIMM); bool gen_sse2_vspltisw(int mnemo, int vD, int SIMM, int unused);
bool gen_sse2_vspltb(int mnemo, int vD, int UIMM, int vB); bool gen_sse2_vspltb(int mnemo, int vD, int UIMM, int vB);
bool gen_sse2_vsplth(int mnemo, int vD, int UIMM, int vB); bool gen_sse2_vsplth(int mnemo, int vD, int UIMM, int vB);
bool gen_sse2_vspltw(int mnemo, int vD, int UIMM, int vB); bool gen_sse2_vspltw(int mnemo, int vD, int UIMM, int vB);

View File

@ -125,6 +125,22 @@ static void disasm_translation(uint32 src_addr, uint32 src_len,
**/ **/
#if PPC_ENABLE_JIT #if PPC_ENABLE_JIT
void
powerpc_cpu::call_do_record_step(powerpc_cpu * cpu, uint32 param1, uint32 param2) {
cpu->do_record_step(param1, param2);
}
void
powerpc_cpu::call_execute_invalidate_cache_range(powerpc_cpu * cpu) {
cpu->execute_invalidate_cache_range();
}
void
powerpc_cpu::call_execute_illegal(powerpc_cpu * cpu, uint32 param1) {
cpu->execute_illegal(param1);
}
powerpc_cpu::block_info * powerpc_cpu::block_info *
powerpc_cpu::compile_block(uint32 entry_point) powerpc_cpu::compile_block(uint32 entry_point)
{ {
@ -169,7 +185,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
#if PPC_FLIGHT_RECORDER #if PPC_FLIGHT_RECORDER
if (is_logging()) { if (is_logging()) {
typedef void (*func_t)(dyngen_cpu_base, uint32, uint32); typedef void (*func_t)(dyngen_cpu_base, uint32, uint32);
func_t func = (func_t)nv_mem_fun((execute_pmf)&powerpc_cpu::do_record_step).ptr(); func_t func = &powerpc_cpu::call_do_record_step;
dg.gen_invoke_CPU_im_im(func, dpc, opcode); dg.gen_invoke_CPU_im_im(func, dpc, opcode);
} }
#endif #endif
@ -1120,7 +1136,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
case PPC_I(ISYNC): // Instruction synchronize case PPC_I(ISYNC): // Instruction synchronize
{ {
typedef void (*func_t)(dyngen_cpu_base); typedef void (*func_t)(dyngen_cpu_base);
func_t func = (func_t)nv_mem_fun(&powerpc_cpu::execute_invalidate_cache_range).ptr(); func_t func = &powerpc_cpu::call_execute_invalidate_cache_range;
dg.gen_invoke_CPU(func); dg.gen_invoke_CPU(func);
break; break;
} }
@ -1377,10 +1393,17 @@ powerpc_cpu::compile_block(uint32 entry_point)
case PPC_I(STVEWX): case PPC_I(STVEWX):
case PPC_I(STVX): case PPC_I(STVX):
case PPC_I(STVXL): case PPC_I(STVXL):
{
assert(vD_field::mask() == vS_field::mask()); assert(vD_field::mask() == vS_field::mask());
assert(vA_field::mask() == rA_field::mask()); assert(vA_field::mask() == rA_field::mask());
assert(vB_field::mask() == rB_field::mask()); assert(vB_field::mask() == rB_field::mask());
// fall-through const int vD = vD_field::extract(opcode);
const int vA = vA_field::extract(opcode);
const int vB = vB_field::extract(opcode);
if (!dg.gen_vector_2(ii->mnemo, vD, vA, vB))
goto do_generic;
break;
}
case PPC_I(VCMPEQFP): case PPC_I(VCMPEQFP):
case PPC_I(VCMPEQUB): case PPC_I(VCMPEQUB):
case PPC_I(VCMPEQUH): case PPC_I(VCMPEQUH):
@ -1488,10 +1511,14 @@ powerpc_cpu::compile_block(uint32 entry_point)
typedef void (*func_t)(dyngen_cpu_base, uint32); typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func; func_t func;
do_generic: do_generic:
#ifdef __MINGW32__
func = (func_t)ii->execute.default_call_conv_ptr();
#else
func = (func_t)ii->execute.ptr(); func = (func_t)ii->execute.ptr();
#endif
goto do_invoke; goto do_invoke;
do_illegal: do_illegal:
func = (func_t)nv_mem_fun(&powerpc_cpu::execute_illegal).ptr(); func = &powerpc_cpu::call_execute_illegal;
goto do_invoke; goto do_invoke;
do_invoke: do_invoke:
#if PPC_PROFILE_GENERIC_CALLS #if PPC_PROFILE_GENERIC_CALLS
@ -1554,7 +1581,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
// Generate backpatch trampolines // Generate backpatch trampolines
if (use_direct_block_chaining) { if (use_direct_block_chaining) {
typedef void *(*func_t)(dyngen_cpu_base); typedef void *(*func_t)(dyngen_cpu_base);
func_t func = (func_t)nv_mem_fun(&powerpc_cpu::compile_chain_block).ptr(); func_t func = (func_t)&powerpc_cpu::call_compile_chain_block;
for (int i = 0; i < block_info::MAX_TARGETS; i++) { for (int i = 0; i < block_info::MAX_TARGETS; i++) {
if (bi->li[i].jmp_pc != block_info::INVALID_PC) { if (bi->li[i].jmp_pc != block_info::INVALID_PC) {
uint8 *p = dg.gen_align(16); uint8 *p = dg.gen_align(16);