Optimize lwarx/stwcx for uniprocessors and generate code for them. There is

no performance increase even though those two instructions represented approx
18M of untranslated instructions on a simple boot to MacOS.
This commit is contained in:
gbeauche 2007-01-18 07:02:35 +00:00
parent b9486d35e3
commit 3b6a579f33
6 changed files with 76 additions and 3 deletions

View File

@ -114,9 +114,11 @@ any_register powerpc_cpu::get_register(int id)
return value;
}
#if KPX_MAX_CPUS != 1
uint32 powerpc_registers::reserve_valid = 0;
uint32 powerpc_registers::reserve_addr = 0;
uint32 powerpc_registers::reserve_data = 0;
#endif
void powerpc_cpu::init_registers()
{

View File

@ -91,6 +91,7 @@ struct powerpc_dyngen_helper {
static inline powerpc_xer_register & xer() { return CPU->xer(); }
static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); }
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
static inline powerpc_registers *regs() { return &CPU->regs(); }
#ifndef REG_T3
static inline uintptr & reg_T3() { return CPU->codegen.reg_T3; }
@ -280,6 +281,37 @@ DEFINE_OP(T2);
#undef im
#undef DEFINE_OP
void OPPROTO op_lwarx_T0_T1(void)
{
T0 = vm_read_memory_4(T1);
powerpc_dyngen_helper::regs()->reserve_valid = 1;
powerpc_dyngen_helper::regs()->reserve_addr = T1;
#if KPX_MAX_CPUS != 1
powerpc_dyngen_helper::regs()->reserve_data = T0;
#endif
}
void OPPROTO op_stwcx_T0_T1(void)
{
uint32 cr = powerpc_dyngen_helper::get_cr() & ~CR_field<0>::mask();
cr |= powerpc_dyngen_helper::xer().get_so() << 28;
if (powerpc_dyngen_helper::regs()->reserve_valid) {
powerpc_dyngen_helper::regs()->reserve_valid = 0;
if (powerpc_dyngen_helper::regs()->reserve_addr == T1 /* physical_addr(EA) */
#if KPX_MAX_CPUS != 1
/* HACK: if another processor wrote to the reserved block,
nothing happens, i.e. we should operate as if reserve == 0 */
&& powerpc_dyngen_helper::regs()->reserve_data == vm_read_memory_4(T1)
#endif
) {
vm_write_memory_4(T1, T0);
cr |= CR_EQ_field<0>::mask();
}
}
powerpc_dyngen_helper::set_cr(cr);
dyngen_barrier();
}
/**
* Condition Registers

View File

@ -94,6 +94,8 @@ public:
#endif
// Misc instructions
DEFINE_ALIAS(lwarx_T0_T1,0);
DEFINE_ALIAS(stwcx_T0_T1,0);
DEFINE_ALIAS(inc_32_mem,1);
DEFINE_ALIAS(nego_T0,0);
DEFINE_ALIAS(dcbz_T0,0);

View File

@ -744,10 +744,13 @@ template< class RA >
void powerpc_cpu::execute_lwarx(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode);
uint32 reserve_data = vm_read_memory_4(ea);
regs().reserve_valid = 1;
regs().reserve_addr = ea;
regs().reserve_data = vm_read_memory_4(ea);
operand_RD::set(this, opcode, regs().reserve_data);
#if KPX_MAX_CPUS != 1
regs().reserve_data = reserve_data;
#endif
operand_RD::set(this, opcode, reserve_data);
increment_pc(4);
}
@ -758,7 +761,12 @@ void powerpc_cpu::execute_stwcx(uint32 opcode)
cr().clear(0);
if (regs().reserve_valid) {
if (regs().reserve_addr == ea /* physical_addr(EA) */
&& /* HACK */ regs().reserve_data == vm_read_memory_4(ea)) {
#if KPX_MAX_CPUS != 1
/* HACK: if another processor wrote to the reserved block,
nothing happens, i.e. we should operate as if reserve == 0 */
&& regs().reserve_data == vm_read_memory_4(ea)
#endif
) {
vm_write_memory_4(ea, operand_RS::get(this, opcode));
cr().set(0, standalone_CR_EQ_field::mask());
}

View File

@ -238,9 +238,14 @@ struct powerpc_registers
uint32 ctr; // Count Register (SPR 9)
uint32 pc; // Program Counter
powerpc_spcflags spcflags; // Special CPU flags
#if KPX_MAX_CPUS == 1
uint32 reserve_valid;
uint32 reserve_addr;
#else
static uint32 reserve_valid;
static uint32 reserve_addr;
static uint32 reserve_data;
#endif
};
#endif /* PPC_REGISTERS_H */

View File

@ -465,6 +465,30 @@ powerpc_cpu::compile_block(uint32 entry_point)
}
break;
}
case PPC_I(STWCX): // Store Word Conditional Indexed
case PPC_I(LWARX): // Load Word and Reserve Indexed
{
const int rA = rA_field::extract(opcode);
const int rB = rB_field::extract(opcode);
if (rA == 0)
dg.gen_load_T1_GPR(rB);
else {
dg.gen_load_T1_GPR(rA);
dg.gen_load_T2_GPR(rB);
dg.gen_add_32_T1_T2();
}
switch (ii->mnemo) {
case PPC_I(LWARX):
dg.gen_lwarx_T0_T1();
dg.gen_store_T0_GPR(rD_field::extract(opcode));
break;
case PPC_I(STWCX):
dg.gen_load_T0_GPR(rS_field::extract(opcode));
dg.gen_stwcx_T0_T1();
break;
}
break;
}
case PPC_I(BC): // Branch Conditional
{
const int bo = BO_field::extract(opcode);