Apply the no-stack-frame optimization in op_invoke_*() to MacOS X for Intel

templates. This avoids mis-aligninment of the stack, and useless reservation
of space on it for function args. Indeed, we now pre-allocate 16 stack-slots
in op_execute() for this purpose.
This commit is contained in:
gbeauche 2006-01-22 00:12:39 +00:00
parent 51a09ecc21
commit f32e053178

View File

@ -274,24 +274,26 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
{ {
typedef void (*func_t)(void); typedef void (*func_t)(void);
func_t func = (func_t)entry_point; func_t func = (func_t)entry_point;
const int n_slots = 16 + 6; /* 16 stack slots + 6 VCPU registers */
volatile uintptr stk[n_slots];
#ifdef REG_CPU #ifdef REG_CPU
volatile uintptr saved_CPU = (uintptr)CPU; stk[n_slots - 1] = (uintptr)CPU;
CPU = this_cpu; CPU = this_cpu;
#endif #endif
#ifdef REG_A0 #ifdef REG_A0
volatile uintptr saved_A0 = reg_A0; stk[n_slots - 2] = reg_A0;
#endif #endif
#ifdef REG_T0 #ifdef REG_T0
volatile uintptr saved_T0 = reg_T0; stk[n_slots - 3] = reg_T0;
#endif #endif
#ifdef REG_T1 #ifdef REG_T1
volatile uintptr saved_T1 = reg_T1; stk[n_slots - 4] = reg_T1;
#endif #endif
#ifdef REG_T2 #ifdef REG_T2
volatile uintptr saved_T2 = reg_T2; stk[n_slots - 5] = reg_T2;
#endif #endif
#ifdef REG_T3 #ifdef REG_T3
volatile uintptr saved_T3 = reg_T3; stk[n_slots - 6] = reg_T3;
#endif #endif
SLOW_DISPATCH(entry_point); SLOW_DISPATCH(entry_point);
func(); // NOTE: never called, fake to make compiler save return point func(); // NOTE: never called, fake to make compiler save return point
@ -307,22 +309,22 @@ void OPPROTO op_execute(uint8 *entry_point, basic_cpu *this_cpu)
asm volatile ("1:"); asm volatile ("1:");
#endif #endif
#ifdef REG_T3 #ifdef REG_T3
reg_T3 = saved_T3; reg_T3 = stk[n_slots - 6];
#endif #endif
#ifdef REG_T2 #ifdef REG_T2
reg_T2 = saved_T2; reg_T2 = stk[n_slots - 5];
#endif #endif
#ifdef REG_T1 #ifdef REG_T1
reg_T1 = saved_T1; reg_T1 = stk[n_slots - 4];
#endif #endif
#ifdef REG_T0 #ifdef REG_T0
reg_T0 = saved_T0; reg_T0 = stk[n_slots - 3];
#endif #endif
#ifdef REG_A0 #ifdef REG_A0
reg_A0 = saved_A0; reg_A0 = stk[n_slots - 2];
#endif #endif
#ifdef REG_CPU #ifdef REG_CPU
CPU = (basic_cpu *)saved_CPU; CPU = (basic_cpu *)stk[n_slots - 1];
#endif #endif
} }
@ -346,7 +348,8 @@ void OPPROTO op_jmp_A0(void)
} }
// Register calling conventions based arches don't need a stack frame // Register calling conventions based arches don't need a stack frame
#if (defined __APPLE__ && defined __MACH__) && defined __ppc__ // XXX enable on x86 too because we allocated a frame in op_execute()
#if (defined __APPLE__ && defined __MACH__)
#define DEFINE_OP(NAME, CODE) \ #define DEFINE_OP(NAME, CODE) \
static void OPPROTO impl_##NAME(void) __attribute__((used)); \ static void OPPROTO impl_##NAME(void) __attribute__((used)); \
void OPPROTO impl_##NAME(void) \ void OPPROTO impl_##NAME(void) \