Generate PowerPC code wrapping GetResource() replacements. That way, it's

a normal PPC function invocation that can be JIT compiled to native code
instead of nesting execute() calls which may lead to use the interpreter
(this took around 11% of total execution time on boot, downto 3%).

Also, optimize some SheepShaver EmulOps and actually report non-CTI.
This commit is contained in:
gbeauche 2004-01-24 11:28:06 +00:00
parent 60d371486b
commit 10b9ab2c34
4 changed files with 246 additions and 22 deletions

View File

@ -97,6 +97,15 @@ static inline void *Mac2Mac_memcpy(uint32 dest, uint32 src, size_t n) {return me
#define PW(X) ((((X) >> 8) & 0xff) | (((X) & 0xff) << 8))
#endif
// PowerPC procedure helper to write a big-endian 32-bit word
#ifdef WORDS_BIGENDIAN
#define PL(X) X
#else
#define PL(X) \
((((X) & 0xff000000) >> 24) | (((X) & 0x00ff0000) >> 8) | \
(((X) & 0x0000ff00) << 8) | (((X) & 0x000000ff) << 24))
#endif
struct M68kRegisters;
extern void Execute68k(uint32, M68kRegisters *r); // Execute 68k subroutine from EMUL_OP routine, must be ended with RTS
extern void Execute68kTrap(uint16 trap, M68kRegisters *r); // Execute 68k A-Trap from EMUL_OP routine

View File

@ -54,6 +54,7 @@ enum {
NATIVE_DISABLE_INTERRUPT,
NATIVE_ENABLE_INTERRUPT,
NATIVE_MAKE_EXECUTABLE,
NATIVE_CHECK_LOAD_INVOC,
NATIVE_OP_MAX
};

View File

@ -76,6 +76,9 @@ static void enter_mon(void)
// From main_*.cpp
extern uintptr SignalStackBase();
// From rsrc_patches.cpp
extern "C" void check_load_invoc(uint32 type, int16 id, uint32 h);
// PowerPC EmulOp to exit from emulation looop
const uint32 POWERPC_EXEC_RETURN = POWERPC_EMUL_OP | 1;
@ -133,6 +136,9 @@ public:
uint32 get_xer() const { return xer().get(); }
void set_xer(uint32 v) { xer().set(v); }
// Execute EMUL_OP routine
void execute_emul_op(uint32 emul_op);
// Execute 68k routine
void execute_68k(uint32 entry, M68kRegisters *r);
@ -142,6 +148,9 @@ public:
// Execute MacOS/PPC code
uint32 execute_macos_code(uint32 tvect, int nargs, uint32 const *args);
// Compile one instruction
virtual bool compile1(codegen_context_t & cg_context);
// Resource manager thunk
void get_resource(uint32 old_get_resource);
@ -211,6 +220,30 @@ typedef bit_field< 20, 20 > FN_field;
typedef bit_field< 21, 25 > NATIVE_OP_field;
typedef bit_field< 26, 31 > EMUL_OP_field;
// Execute EMUL_OP routine
void sheepshaver_cpu::execute_emul_op(uint32 emul_op)
{
M68kRegisters r68;
WriteMacInt32(XLM_68K_R25, gpr(25));
WriteMacInt32(XLM_RUN_MODE, MODE_EMUL_OP);
for (int i = 0; i < 8; i++)
r68.d[i] = gpr(8 + i);
for (int i = 0; i < 7; i++)
r68.a[i] = gpr(16 + i);
r68.a[7] = gpr(1);
uint32 saved_cr = get_cr() & CR_field<2>::mask();
uint32 saved_xer = get_xer();
EmulOp(&r68, gpr(24), emul_op);
set_cr(saved_cr);
set_xer(saved_xer);
for (int i = 0; i < 8; i++)
gpr(8 + i) = r68.d[i];
for (int i = 0; i < 7; i++)
gpr(16 + i) = r68.a[i];
gpr(1) = r68.a[7];
WriteMacInt32(XLM_RUN_MODE, MODE_68K);
}
// Execute SheepShaver instruction
void sheepshaver_cpu::execute_sheep(uint32 opcode)
{
@ -234,30 +267,111 @@ void sheepshaver_cpu::execute_sheep(uint32 opcode)
pc() += 4;
break;
default: { // EMUL_OP
M68kRegisters r68;
WriteMacInt32(XLM_68K_R25, gpr(25));
WriteMacInt32(XLM_RUN_MODE, MODE_EMUL_OP);
for (int i = 0; i < 8; i++)
r68.d[i] = gpr(8 + i);
for (int i = 0; i < 7; i++)
r68.a[i] = gpr(16 + i);
r68.a[7] = gpr(1);
uint32 saved_cr = get_cr() & CR_field<2>::mask();
uint32 saved_xer = get_xer();
EmulOp(&r68, gpr(24), EMUL_OP_field::extract(opcode) - 3);
set_cr(saved_cr);
set_xer(saved_xer);
for (int i = 0; i < 8; i++)
gpr(8 + i) = r68.d[i];
for (int i = 0; i < 7; i++)
gpr(16 + i) = r68.a[i];
gpr(1) = r68.a[7];
WriteMacInt32(XLM_RUN_MODE, MODE_68K);
default: // EMUL_OP
execute_emul_op(EMUL_OP_field::extract(opcode) - 3);
pc() += 4;
break;
}
}
// Compile one instruction
bool sheepshaver_cpu::compile1(codegen_context_t & cg_context)
{
#if PPC_ENABLE_JIT
const instr_info_t *ii = cg_context.instr_info;
if (ii->mnemo != PPC_I(SHEEP))
return false;
bool compiled = false;
powerpc_dyngen & dg = cg_context.codegen;
uint32 opcode = cg_context.opcode;
switch (opcode & 0x3f) {
case 0: // EMUL_RETURN
dg.gen_invoke(QuitEmulator);
compiled = true;
break;
case 1: // EXEC_RETURN
dg.gen_spcflags_set(SPCFLAG_CPU_EXEC_RETURN);
compiled = true;
break;
case 2: { // EXEC_NATIVE
uint32 selector = NATIVE_OP_field::extract(opcode);
switch (selector) {
case NATIVE_PATCH_NAME_REGISTRY:
dg.gen_invoke(DoPatchNameRegistry);
compiled = true;
break;
case NATIVE_VIDEO_INSTALL_ACCEL:
dg.gen_invoke(VideoInstallAccel);
compiled = true;
break;
case NATIVE_VIDEO_VBL:
dg.gen_invoke(VideoVBL);
compiled = true;
break;
case NATIVE_GET_RESOURCE:
case NATIVE_GET_1_RESOURCE:
case NATIVE_GET_IND_RESOURCE:
case NATIVE_GET_1_IND_RESOURCE:
case NATIVE_R_GET_RESOURCE: {
static const uint32 get_resource_ptr[] = {
XLM_GET_RESOURCE,
XLM_GET_1_RESOURCE,
XLM_GET_IND_RESOURCE,
XLM_GET_1_IND_RESOURCE,
XLM_R_GET_RESOURCE
};
uint32 old_get_resource = ReadMacInt32(get_resource_ptr[selector - NATIVE_GET_RESOURCE]);
typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func = (func_t)nv_mem_fun(&sheepshaver_cpu::get_resource).ptr();
dg.gen_invoke_CPU_im(func, old_get_resource);
compiled = true;
break;
}
case NATIVE_DISABLE_INTERRUPT:
dg.gen_invoke(DisableInterrupt);
compiled = true;
break;
case NATIVE_ENABLE_INTERRUPT:
dg.gen_invoke(EnableInterrupt);
compiled = true;
break;
case NATIVE_CHECK_LOAD_INVOC:
dg.gen_load_T0_GPR(3);
dg.gen_load_T1_GPR(4);
dg.gen_se_16_32_T1();
dg.gen_load_T2_GPR(5);
dg.gen_invoke_T0_T1_T2((void (*)(uint32, uint32, uint32))check_load_invoc);
compiled = true;
break;
}
if (FN_field::test(opcode)) {
if (compiled) {
dg.gen_load_A0_LR();
dg.gen_set_PC_A0();
}
cg_context.done_compile = true;
}
else
cg_context.done_compile = false;
break;
}
default: { // EMUL_OP
typedef void (*func_t)(dyngen_cpu_base, uint32);
func_t func = (func_t)nv_mem_fun(&sheepshaver_cpu::execute_emul_op).ptr();
dg.gen_invoke_CPU_im(func, EMUL_OP_field::extract(opcode) - 3);
cg_context.done_compile = false;
compiled = true;
break;
}
}
return compiled;
#endif
return false;
}
// Handle MacOS interrupt
@ -493,8 +607,6 @@ inline void sheepshaver_cpu::execute_ppc(uint32 entry)
}
// Resource Manager thunk
extern "C" void check_load_invoc(uint32 type, int16 id, uint32 h);
inline void sheepshaver_cpu::get_resource(uint32 old_get_resource)
{
uint32 type = gpr(3);
@ -905,6 +1017,9 @@ static void NativeOp(int selector)
case NATIVE_MAKE_EXECUTABLE:
MakeExecutable(0, (void *)GPR(4), GPR(5));
break;
case NATIVE_CHECK_LOAD_INVOC:
check_load_invoc(GPR(3), GPR(4), GPR(5));
break;
default:
printf("FATAL: NATIVE_OP called with bogus selector %d\n", selector);
QuitEmulator();

View File

@ -22,6 +22,7 @@
#include "thunks.h"
#include "emul_op.h"
#include "cpu_emulation.h"
#include "xlowmem.h"
// Native function declarations
#include "main.h"
@ -31,6 +32,9 @@
#include "ether.h"
#include "macos_util.h"
// Generate PowerPC thunks for GetResource() replacements?
#define POWERPC_GET_RESOURCE_THUNKS 1
/* NativeOp instruction format:
+------------+--------------------------+--+----------+------------+
@ -53,6 +57,7 @@ uint32 NativeOpcode(int selector)
switch (selector) {
case NATIVE_DISABLE_INTERRUPT:
case NATIVE_ENABLE_INTERRUPT:
case NATIVE_CHECK_LOAD_INVOC:
opcode = POWERPC_NATIVE_OP(0, selector);
break;
case NATIVE_PATCH_NAME_REGISTRY:
@ -89,6 +94,92 @@ uint32 NativeOpcode(int selector)
#endif
/*
* Generate PowerPC thunks for GetResource() replacements
*/
#if EMULATED_PPC
static uint32 get_resource_func;
static uint32 get_1_resource_func;
static uint32 get_ind_resource_func;
static uint32 get_1_ind_resource_func;
static uint32 r_get_resource_func;
static void generate_powerpc_thunks(void)
{
static uint32 get_resource_template[] = {
PL(0x7c0802a6), // mflr r0
PL(0x90010008), // stw r0,8(r1)
PL(0x9421ffbc), // stwu r1,-68(r1)
PL(0x90610038), // stw r3,56(r1)
PL(0x9081003c), // stw r4,60(r1)
PL(0x00000000), // lwz r0,XLM_GET_RESOURCE(r0)
PL(0x80402834), // lwz r2,XLM_RES_LIB_TOC(r0)
PL(0x7c0903a6), // mtctr r0
PL(0x4e800421), // bctrl
PL(0x90610040), // stw r3,64(r1)
PL(0x80610038), // lwz r3,56(r1)
PL(0xa881003e), // lha r4,62(r1)
PL(0x80a10040), // lwz r5,64(r1)
PL(0x00000001), // <check_load_invoc>
PL(0x80610040), // lwz r3,64(r1)
PL(0x8001004c), // lwz r0,76(r1)
PL(0x7c0803a6), // mtlr r0
PL(0x38210044), // addi r1,r1,68
PL(0x4e800020) // blr
};
const uint32 get_resource_template_size = sizeof(get_resource_template);
int xlm_index = -1, check_load_invoc_index = -1;
for (int i = 0; i < get_resource_template_size/4; i++) {
uint32 opcode = ntohl(get_resource_template[i]);
switch (opcode) {
case 0x00000000:
xlm_index = i;
break;
case 0x00000001:
check_load_invoc_index = i;
break;
}
}
assert(xlm_index != -1 && check_load_invoc_index != -1);
uint32 check_load_invoc_opcode = NativeOpcode(NATIVE_CHECK_LOAD_INVOC);
uintptr base;
// GetResource()
get_resource_func = base = SheepMem::Reserve(get_resource_template_size);
Host2Mac_memcpy(base, get_resource_template, get_resource_template_size);
WriteMacInt32(base + xlm_index * 4, 0x80000000 | XLM_GET_RESOURCE);
WriteMacInt32(base + check_load_invoc_index * 4, check_load_invoc_opcode);
// Get1Resource()
get_1_resource_func = base = SheepMem::Reserve(get_resource_template_size);
Host2Mac_memcpy(base, get_resource_template, get_resource_template_size);
WriteMacInt32(base + xlm_index * 4, 0x80000000 | XLM_GET_1_RESOURCE);
WriteMacInt32(base + check_load_invoc_index * 4, check_load_invoc_opcode);
// GetIndResource()
get_ind_resource_func = base = SheepMem::Reserve(get_resource_template_size);
Host2Mac_memcpy(base, get_resource_template, get_resource_template_size);
WriteMacInt32(base + xlm_index * 4, 0x80000000 | XLM_GET_IND_RESOURCE);
WriteMacInt32(base + check_load_invoc_index * 4, check_load_invoc_opcode);
// Get1IndResource()
get_1_ind_resource_func = base = SheepMem::Reserve(get_resource_template_size);
Host2Mac_memcpy(base, get_resource_template, get_resource_template_size);
WriteMacInt32(base + xlm_index * 4, 0x80000000 | XLM_GET_1_IND_RESOURCE);
WriteMacInt32(base + check_load_invoc_index * 4, check_load_invoc_opcode);
// RGetResource()
r_get_resource_func = base = SheepMem::Reserve(get_resource_template_size);
Host2Mac_memcpy(base, get_resource_template, get_resource_template_size);
WriteMacInt32(base + xlm_index * 4, 0x80000000 | XLM_R_GET_RESOURCE);
WriteMacInt32(base + check_load_invoc_index * 4, check_load_invoc_opcode);
}
#endif
/*
* Initialize the thunks system
*/
@ -111,6 +202,14 @@ bool ThunksInit(void)
native_op[i].tvect = base;
native_op[i].func = base + 8;
}
#if POWERPC_GET_RESOURCE_THUNKS
generate_powerpc_thunks();
native_op[NATIVE_GET_RESOURCE].func = get_resource_func;
native_op[NATIVE_GET_1_RESOURCE].func = get_1_resource_func;
native_op[NATIVE_GET_IND_RESOURCE].func = get_ind_resource_func;
native_op[NATIVE_GET_1_IND_RESOURCE].func = get_1_ind_resource_func;
native_op[NATIVE_R_GET_RESOURCE].func = r_get_resource_func;
#endif
#else
#if defined(__linux__) || (defined(__APPLE__) && defined(__MACH__))
#define DEFINE_NATIVE_OP(ID, FUNC) do { \