From 3b6a579f333559e6be264c2478257ac79595354b Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Thu, 18 Jan 2007 07:02:35 +0000 Subject: [PATCH] Optimize lwarx/stwcx for uniprocessors and generate code for them. There is no performance increase even though those two instructions represented approx 18M of untranslated instructions on a simple boot to MacOS. --- .../src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp | 2 ++ .../kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp | 32 +++++++++++++++++++ .../src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp | 2 ++ .../src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp | 14 ++++++-- .../src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp | 5 +++ .../src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp | 24 ++++++++++++++ 6 files changed, 76 insertions(+), 3 deletions(-) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp index 3d31f931..17bb77b8 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp @@ -114,9 +114,11 @@ any_register powerpc_cpu::get_register(int id) return value; } +#if KPX_MAX_CPUS != 1 uint32 powerpc_registers::reserve_valid = 0; uint32 powerpc_registers::reserve_addr = 0; uint32 powerpc_registers::reserve_data = 0; +#endif void powerpc_cpu::init_registers() { diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp index 87ac9490..895a4355 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp @@ -91,6 +91,7 @@ struct powerpc_dyngen_helper { static inline powerpc_xer_register & xer() { return CPU->xer(); } static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); } static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); } + static inline powerpc_registers *regs() { return &CPU->regs(); } #ifndef REG_T3 static inline uintptr & reg_T3() { return CPU->codegen.reg_T3; } @@ -280,6 +281,37 @@ DEFINE_OP(T2); #undef im #undef DEFINE_OP +void OPPROTO op_lwarx_T0_T1(void) +{ + T0 = vm_read_memory_4(T1); + powerpc_dyngen_helper::regs()->reserve_valid = 1; + powerpc_dyngen_helper::regs()->reserve_addr = T1; +#if KPX_MAX_CPUS != 1 + powerpc_dyngen_helper::regs()->reserve_data = T0; +#endif +} + +void OPPROTO op_stwcx_T0_T1(void) +{ + uint32 cr = powerpc_dyngen_helper::get_cr() & ~CR_field<0>::mask(); + cr |= powerpc_dyngen_helper::xer().get_so() << 28; + if (powerpc_dyngen_helper::regs()->reserve_valid) { + powerpc_dyngen_helper::regs()->reserve_valid = 0; + if (powerpc_dyngen_helper::regs()->reserve_addr == T1 /* physical_addr(EA) */ +#if KPX_MAX_CPUS != 1 + /* HACK: if another processor wrote to the reserved block, + nothing happens, i.e. we should operate as if reserve == 0 */ + && powerpc_dyngen_helper::regs()->reserve_data == vm_read_memory_4(T1) +#endif + ) { + vm_write_memory_4(T1, T0); + cr |= CR_EQ_field<0>::mask(); + } + } + powerpc_dyngen_helper::set_cr(cr); + dyngen_barrier(); +} + /** * Condition Registers diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp index 44bcd8a1..e2eb503b 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp @@ -94,6 +94,8 @@ public: #endif // Misc instructions + DEFINE_ALIAS(lwarx_T0_T1,0); + DEFINE_ALIAS(stwcx_T0_T1,0); DEFINE_ALIAS(inc_32_mem,1); DEFINE_ALIAS(nego_T0,0); DEFINE_ALIAS(dcbz_T0,0); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp index 983e253b..fe2c727e 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp @@ -744,10 +744,13 @@ template< class RA > void powerpc_cpu::execute_lwarx(uint32 opcode) { const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode); + uint32 reserve_data = vm_read_memory_4(ea); regs().reserve_valid = 1; regs().reserve_addr = ea; - regs().reserve_data = vm_read_memory_4(ea); - operand_RD::set(this, opcode, regs().reserve_data); +#if KPX_MAX_CPUS != 1 + regs().reserve_data = reserve_data; +#endif + operand_RD::set(this, opcode, reserve_data); increment_pc(4); } @@ -758,7 +761,12 @@ void powerpc_cpu::execute_stwcx(uint32 opcode) cr().clear(0); if (regs().reserve_valid) { if (regs().reserve_addr == ea /* physical_addr(EA) */ - && /* HACK */ regs().reserve_data == vm_read_memory_4(ea)) { +#if KPX_MAX_CPUS != 1 + /* HACK: if another processor wrote to the reserved block, + nothing happens, i.e. we should operate as if reserve == 0 */ + && regs().reserve_data == vm_read_memory_4(ea) +#endif + ) { vm_write_memory_4(ea, operand_RS::get(this, opcode)); cr().set(0, standalone_CR_EQ_field::mask()); } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp index d6c1ec49..a44a32df 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp @@ -238,9 +238,14 @@ struct powerpc_registers uint32 ctr; // Count Register (SPR 9) uint32 pc; // Program Counter powerpc_spcflags spcflags; // Special CPU flags +#if KPX_MAX_CPUS == 1 + uint32 reserve_valid; + uint32 reserve_addr; +#else static uint32 reserve_valid; static uint32 reserve_addr; static uint32 reserve_data; +#endif }; #endif /* PPC_REGISTERS_H */ diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp index deccc209..1272a589 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp @@ -465,6 +465,30 @@ powerpc_cpu::compile_block(uint32 entry_point) } break; } + case PPC_I(STWCX): // Store Word Conditional Indexed + case PPC_I(LWARX): // Load Word and Reserve Indexed + { + const int rA = rA_field::extract(opcode); + const int rB = rB_field::extract(opcode); + if (rA == 0) + dg.gen_load_T1_GPR(rB); + else { + dg.gen_load_T1_GPR(rA); + dg.gen_load_T2_GPR(rB); + dg.gen_add_32_T1_T2(); + } + switch (ii->mnemo) { + case PPC_I(LWARX): + dg.gen_lwarx_T0_T1(); + dg.gen_store_T0_GPR(rD_field::extract(opcode)); + break; + case PPC_I(STWCX): + dg.gen_load_T0_GPR(rS_field::extract(opcode)); + dg.gen_stwcx_T0_T1(); + break; + } + break; + } case PPC_I(BC): // Branch Conditional { const int bo = BO_field::extract(opcode);