From ea9553ee656f1a35c0312d3a0340272d5d89797d Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Sun, 25 Jan 2004 23:21:06 +0000 Subject: [PATCH] Optimize rlwinm further. Translate FP instructions if we don't need to compute exceptions. --- .../kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp | 136 +-------- .../src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp | 11 + .../src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp | 5 + .../kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp | 228 +++++++++++++- .../src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp | 24 ++ .../src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp | 50 +++ .../src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp | 1 + .../src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp | 289 ++++++++++++++++-- 8 files changed, 601 insertions(+), 143 deletions(-) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp index cb5560c1..c74f19e9 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen-ops.cpp @@ -42,10 +42,8 @@ register uintptr reg_A0 asm(REG_A0); register uintptr reg_T0 asm(REG_T0); #define T1 REG32(reg_T1) register uintptr reg_T1 asm(REG_T1); -#ifdef REG_T2 #define T2 REG32(reg_T2) register uintptr reg_T2 asm(REG_T2); -#endif #ifdef REG_T3 #define T3 REG32(reg_T3) register uintptr reg_T3 asm(REG_T3); @@ -83,29 +81,39 @@ void OPPROTO op_##NAME(void) \ // Register moves DEFINE_OP(mov_32_T0_im, T0 = PARAM1); DEFINE_OP(mov_32_T0_T1, T0 = T1); +DEFINE_OP(mov_32_T0_T2, T0 = T2); DEFINE_OP(mov_32_T0_A0, T0 = A0); DEFINE_OP(mov_32_T1_im, T1 = PARAM1); DEFINE_OP(mov_32_T1_T0, T1 = T0); +DEFINE_OP(mov_32_T1_T2, T1 = T2); DEFINE_OP(mov_32_T1_A0, T1 = A0); +DEFINE_OP(mov_32_T2_im, T2 = PARAM1); +DEFINE_OP(mov_32_T2_T1, T2 = T1); +DEFINE_OP(mov_32_T2_T0, T2 = T0); +DEFINE_OP(mov_32_T2_A0, T2 = A0); DEFINE_OP(mov_32_A0_im, A0 = PARAM1); DEFINE_OP(mov_32_A0_T0, A0 = T0); DEFINE_OP(mov_32_A0_T1, A0 = T1); +DEFINE_OP(mov_32_A0_T2, A0 = T2); DEFINE_OP(mov_32_T0_0, T0 = 0); DEFINE_OP(mov_32_T1_0, T1 = 0); +DEFINE_OP(mov_32_T2_0, T2 = 0); DEFINE_OP(mov_32_A0_0, A0 = 0); -void OPPROTO op_mov_ad_A0_im(void) -{ #if SIZEOF_VOID_P == 8 #if defined(__x86_64__) - asm volatile ("movabsq $__op_param1,%" REG_A0); +#define DEFINE_MOV_AD(REG) asm volatile ("movabsq $__op_param1,%" REG_##REG) #else #error "unsupported 64-bit value move in" #endif #else - A0 = PARAM1; +#define DEFINE_MOV_AD(REG) REG = PARAM1 #endif -} + +void OPPROTO op_mov_ad_T0_im(void) { DEFINE_MOV_AD(T0); } +void OPPROTO op_mov_ad_T1_im(void) { DEFINE_MOV_AD(T1); } +void OPPROTO op_mov_ad_T2_im(void) { DEFINE_MOV_AD(T2); } +void OPPROTO op_mov_ad_A0_im(void) { DEFINE_MOV_AD(A0); } // Arithmetic operations DEFINE_OP(add_32_T0_T1, T0 += T1); @@ -190,120 +198,6 @@ DEFINE_OP(ze_8_32_T0, T0 = (uint32)(uint8)T0); #undef DEFINE_OP -/** - * Native FP operations optimization - **/ - -#ifndef do_fabs -#define do_fabs(x) fabs(x) -#endif -#ifndef do_fadd -#define do_fadd(x, y) x + y -#endif -#ifndef do_fdiv -#define do_fdiv(x, y) x / y -#endif -#ifndef do_fmadd -#define do_fmadd(x, y, z) ((x * y) + z) -#endif -#ifndef do_fmsub -#define do_fmsub(x, y, z) ((x * y) - z) -#endif -#ifndef do_fmul -#define do_fmul(x, y) (x * y) -#endif -#ifndef do_fnabs -#define do_fnabs(x) -fabs(x) -#endif -#ifndef do_fneg -#define do_fneg(x) -x -#endif -#ifndef do_fnmadd -#define do_fnmadd(x, y, z) -((x * y) + z) -#endif -#ifndef do_fnmsub -#define do_fnmsub(x, y, z) -((x * y) - z) -#endif -#ifndef do_fsub -#define do_fsub(x, y) x - y -#endif -#ifndef do_fmov -#define do_fmov(x) x -#endif - - -/** - * FP double operations - **/ - -#if 0 - -double OPPROTO op_lfd(void) -{ - union { double d; uint64 j; } r; - r.j = vm_do_read_memory_8((uint64 *)T1); - return r.d; -} - -float OPPROTO op_lfs(void) -{ - union { float f; uint32 i; } r; - r.i = vm_do_read_memory_4((uint32 *)T1); - return r.f; -} - -#define DEFINE_OP(NAME, OP, ARGS) \ -double OPPROTO op_##NAME(double F0, double F1, double F2) \ -{ \ - return do_##OP ARGS; \ -} - -DEFINE_OP(fmov_F1, fmov, (F1)); -DEFINE_OP(fmov_F2, fmov, (F2)); -DEFINE_OP(fabs, fabs, (F0)); -DEFINE_OP(fadd, fadd, (F0, F1)); -DEFINE_OP(fdiv, fdiv, (F0, F1)); -DEFINE_OP(fmadd, fmadd, (F0, F1, F2)); -DEFINE_OP(fmsub, fmsub, (F0, F1, F2)); -DEFINE_OP(fmul, fmul, (F0, F1)); -DEFINE_OP(fnabs, fnabs, (F0)); -DEFINE_OP(fneg, fneg, (F0)); -DEFINE_OP(fnmadd, fnmadd, (F0, F1, F2)); -DEFINE_OP(fnmsub, fnmsub, (F0, F1, F2)); -DEFINE_OP(fsub, fsub, (F0, F1)); - -#undef DEFINE_OP - - -/** - * FP single operations - **/ - -#define DEFINE_OP(NAME, OP, ARGS) \ -float OPPROTO op_##NAME(float F0, float F1, float F2) \ -{ \ - return do_##OP ARGS; \ -} - -DEFINE_OP(fmovs_F1, fmov, (F1)); -DEFINE_OP(fmovs_F2, fmov, (F2)); -DEFINE_OP(fabss_F0, fabs, (F0)); -DEFINE_OP(fadds_F0_F1, fadd, (F0, F1)); -DEFINE_OP(fdivs_F0_F1, fdiv, (F0, F1)); -DEFINE_OP(fmadds_F0_F1_F2, fmadd, (F0, F1, F2)); -DEFINE_OP(fmsubs_F0_F1_F2, fmsub, (F0, F1, F2)); -DEFINE_OP(fmuls_F0_F1, fmul, (F0, F1)); -DEFINE_OP(fnabss_F0, fnabs, (F0)); -DEFINE_OP(fnegs_F0, fneg, (F0)); -DEFINE_OP(fnmadds_F0_F1_F2, fnmadd, (F0, F1, F2)); -DEFINE_OP(fnmsubs_F0_F1_F2, fnmsub, (F0, F1, F2)); -DEFINE_OP(fsubs_F0_F1, fsub, (F0, F1)); - -#undef DEFINE_OP - -#endif - - /** * Load/Store instructions **/ diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp index 03b9e57b..a94781e2 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp @@ -98,13 +98,23 @@ public: // Register moves void gen_mov_32_T0_im(int32 value); DEFINE_ALIAS(mov_32_T0_T1,0); + DEFINE_ALIAS(mov_32_T0_T2,0); DEFINE_ALIAS(mov_32_T0_A0,0); void gen_mov_32_T1_im(int32 value); DEFINE_ALIAS(mov_32_T1_T0,0); + DEFINE_ALIAS(mov_32_T1_T2,0); DEFINE_ALIAS(mov_32_T1_A0,0); + void gen_mov_32_T2_im(int32 value); + DEFINE_ALIAS(mov_32_T2_T0,0); + DEFINE_ALIAS(mov_32_T2_T1,0); + DEFINE_ALIAS(mov_32_T2_A0,0); void gen_mov_32_A0_im(int32 value); DEFINE_ALIAS(mov_32_A0_T0,0); DEFINE_ALIAS(mov_32_A0_T1,0); + DEFINE_ALIAS(mov_32_A0_T2,0); + DEFINE_ALIAS(mov_ad_T0_im,1); + DEFINE_ALIAS(mov_ad_T1_im,1); + DEFINE_ALIAS(mov_ad_T2_im,1); DEFINE_ALIAS(mov_ad_A0_im,1); // Arithmetic operations @@ -283,6 +293,7 @@ basic_dyngen::gen_mov_32_##REG##_im(int32 value) \ DEFINE_OP(T0); DEFINE_OP(T1); +DEFINE_OP(T2); DEFINE_OP(A0); #undef DEFINE_OP diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp index 8821eb65..199b1d1b 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp @@ -49,6 +49,11 @@ protected: powerpc_xer_register & xer() { return regs.xer; } powerpc_xer_register const & xer() const { return regs.xer; } + double fp_result() const { return regs.fp_result.d; } + double & fp_result() { return regs.fp_result.d; } + uint64 fp_result_dw() const { return regs.fp_result.j; } + uint64 & fp_result_dw() { return regs.fp_result.j; } + uint32 & fpscr() { return regs.fpscr; } uint32 fpscr() const { return regs.fpscr; } uint32 & lr() { return regs.lr; } diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp index ee3729ea..cd12d84f 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp @@ -33,10 +33,25 @@ register struct powerpc_cpu *CPU asm(REG_CPU); #else #define CPU ((powerpc_cpu *)CPUPARAM) #endif -register uint32 A0 asm(REG_A0); -register uint32 T0 asm(REG_T0); -register uint32 T1 asm(REG_T1); -register uint32 T2 asm(REG_T2); +#if SIZEOF_VOID_P == 8 +#define REG32(X) ((uint32)X) +#else +#define REG32(X) X +#endif +#define FPREG64(X) (*((double *)(X))) +#define A0 REG32(reg_A0) +register uintptr reg_A0 asm(REG_A0); +#define T0 REG32(reg_T0) +#define F0 FPREG64(reg_T0) +register uintptr reg_T0 asm(REG_T0); +#define T1 REG32(reg_T1) +#define F1 FPREG64(reg_T1) +register uintptr reg_T1 asm(REG_T1); +#define T2 REG32(reg_T2) +#define F2 FPREG64(reg_T2) +register uintptr reg_T2 asm(REG_T2); +#define FD powerpc_dyngen_helper::fp_result() +#define FD_dw powerpc_dyngen_helper::fp_result_dw() // Semantic action templates #define DYNGEN_OPS @@ -57,12 +72,17 @@ struct powerpc_dyngen_helper { static inline void set_ctr(uint32 value) { CPU->ctr() = value; } static inline uint32 get_cr() { return CPU->cr().get(); } static inline void set_cr(uint32 value) { CPU->cr().set(value); } + static inline uint32 get_fpscr() { return CPU->fpscr(); } + static inline void set_fpscr(uint32 value) { CPU->fpscr() = value; } static inline uint32 get_xer() { return CPU->xer().get(); } static inline void set_xer(uint32 value) { CPU->xer().set(value); } static inline void record(int crf, int32 v) { CPU->record_cr(crf, v); } static inline powerpc_cr_register & cr() { return CPU->cr(); } static inline powerpc_xer_register & xer() { return CPU->xer(); } static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); } + static double & fp_result() { return CPU->fp_result(); } + static uint64 & fp_result_dw() { return CPU->fp_result_dw(); } + static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); } }; @@ -122,6 +142,99 @@ DEFINE_REG(31); #undef DEFINE_OP +/** + * Load/store floating-point registers + **/ + +#define DEFINE_OP(REG, N) \ +void OPPROTO op_load_F##REG##_FPR##N(void) \ +{ \ + reg_T##REG = (uintptr)&CPU->fpr(N); \ +} \ +void OPPROTO op_store_F##REG##_FPR##N(void) \ +{ \ + CPU->fpr(N) = F##REG; \ +} +#define DEFINE_REG(N) \ +DEFINE_OP(0,N); \ +DEFINE_OP(1,N); \ +DEFINE_OP(2,N); \ +void OPPROTO op_store_FD_FPR##N(void) \ +{ \ + CPU->fpr(N) = FD; \ +} + +DEFINE_REG(0); +DEFINE_REG(1); +DEFINE_REG(2); +DEFINE_REG(3); +DEFINE_REG(4); +DEFINE_REG(5); +DEFINE_REG(6); +DEFINE_REG(7); +DEFINE_REG(8); +DEFINE_REG(9); +DEFINE_REG(10); +DEFINE_REG(11); +DEFINE_REG(12); +DEFINE_REG(13); +DEFINE_REG(14); +DEFINE_REG(15); +DEFINE_REG(16); +DEFINE_REG(17); +DEFINE_REG(18); +DEFINE_REG(19); +DEFINE_REG(20); +DEFINE_REG(21); +DEFINE_REG(22); +DEFINE_REG(23); +DEFINE_REG(24); +DEFINE_REG(25); +DEFINE_REG(26); +DEFINE_REG(27); +DEFINE_REG(28); +DEFINE_REG(29); +DEFINE_REG(30); +DEFINE_REG(31); + +#undef DEFINE_REG +#undef DEFINE_OP + + +/** + * Load/Store floating-point data + **/ + +#define im PARAM1 +#define DEFINE_OP(OFFSET) \ +void OPPROTO op_load_double_FD_A0_##OFFSET(void) \ +{ \ + FD_dw = vm_read_memory_8(A0 + OFFSET); \ +} \ +void OPPROTO op_load_single_FD_A0_##OFFSET(void) \ +{ \ + any_register *x = (any_register *)&FD; \ + x->i = vm_read_memory_4(A0 + OFFSET); \ + FD = (double)x->f; \ +} \ +void OPPROTO op_store_double_F0_A0_##OFFSET(void) \ +{ \ + vm_write_memory_8(A0 + OFFSET, *(uint64 *)reg_T0); \ +} \ +void OPPROTO op_store_single_F0_A0_##OFFSET(void) \ +{ \ + any_register *x = (any_register *)&FD; \ + x->f = (float)F0; \ + vm_write_memory_4(A0 + OFFSET, x->i); \ +} + +DEFINE_OP(0); +DEFINE_OP(im); +DEFINE_OP(T1); + +#undef DEFINE_OP + + /** * Condition Registers **/ @@ -217,6 +330,107 @@ void OPPROTO op_mtcrf_T0_im(void) } +/** + * Native FP operations optimization + **/ + +#ifndef do_fabs +#define do_fabs(x) fabs(x) +#endif +#ifndef do_fadd +#define do_fadd(x, y) x + y +#endif +#ifndef do_fdiv +#define do_fdiv(x, y) x / y +#endif +#ifndef do_fmadd +#define do_fmadd(x, y, z) ((x * y) + z) +#endif +#ifndef do_fmsub +#define do_fmsub(x, y, z) ((x * y) - z) +#endif +#ifndef do_fmul +#define do_fmul(x, y) (x * y) +#endif +#ifndef do_fnabs +#define do_fnabs(x) -fabs(x) +#endif +#ifndef do_fneg +#define do_fneg(x) -x +#endif +#ifndef do_fnmadd +#define do_fnmadd(x, y, z) -((x * y) + z) +#endif +#ifndef do_fnmsub +#define do_fnmsub(x, y, z) -((x * y) - z) +#endif +#ifndef do_fsub +#define do_fsub(x, y) x - y +#endif +#ifndef do_fmov +#define do_fmov(x) x +#endif + + +/** + * Double-precision floating point operations + **/ + +#define DEFINE_OP(NAME, CODE) \ +void OPPROTO op_##NAME(void) \ +{ \ + CODE; \ +} + +DEFINE_OP(fmov_F0_F1, F0 = F1); +DEFINE_OP(fmov_F0_F2, F0 = F2); +DEFINE_OP(fmov_F1_F0, F1 = F0); +DEFINE_OP(fmov_F1_F2, F1 = F2); +DEFINE_OP(fmov_F2_F0, F2 = F0); +DEFINE_OP(fmov_F2_F1, F2 = F1); +DEFINE_OP(fmov_FD_F0, FD = F0); +DEFINE_OP(fmov_FD_F1, FD = F1); +DEFINE_OP(fmov_FD_F2, FD = F2); + +DEFINE_OP(fabs_FD_F0, FD = do_fabs(F0)); +DEFINE_OP(fneg_FD_F0, FD = do_fneg(F0)); +DEFINE_OP(fnabs_FD_F0, FD = do_fnabs(F0)); + +DEFINE_OP(fadd_FD_F0_F1, FD = F0 + F1); +DEFINE_OP(fsub_FD_F0_F1, FD = F0 - F1); +DEFINE_OP(fmul_FD_F0_F1, FD = F0 * F1); +DEFINE_OP(fdiv_FD_F0_F1, FD = F0 / F1); +DEFINE_OP(fmadd_FD_F0_F1_F2, FD = do_fmadd(F0, F1, F2)); +DEFINE_OP(fmsub_FD_F0_F1_F2, FD = do_fmsub(F0, F1, F2)); +DEFINE_OP(fnmadd_FD_F0_F1_F2, FD = do_fnmadd(F0, F1, F2)); +DEFINE_OP(fnmsub_FD_F0_F1_F2, FD = do_fnmsub(F0, F1, F2)); + +#undef DEFINE_OP + + +/** + * Single-Precision floating point operations + **/ + +#define DEFINE_OP(NAME, REG, OP) \ +void OPPROTO op_##NAME(void) \ +{ \ + float x = OP; \ + REG = x; \ +} + +DEFINE_OP(fadds_FD_F0_F1, FD, F0 + F1); +DEFINE_OP(fsubs_FD_F0_F1, FD, F0 - F1); +DEFINE_OP(fmuls_FD_F0_F1, FD, F0 * F1); +DEFINE_OP(fdivs_FD_F0_F1, FD, F0 / F1); +DEFINE_OP(fmadds_FD_F0_F1_F2, FD, do_fmadd(F0, F1, F2)); +DEFINE_OP(fmsubs_FD_F0_F1_F2, FD, do_fmsub(F0, F1, F2)); +DEFINE_OP(fnmadds_FD_F0_F1_F2, FD, do_fnmadd(F0, F1, F2)); +DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2)); + +#undef DEFINE_OP + + /** * Special purpose registers **/ @@ -475,6 +689,12 @@ void OPPROTO op_record_cr0_T0(void) dyngen_barrier(); } +void OPPROTO op_record_cr1(void) +{ + powerpc_dyngen_helper::set_cr((powerpc_dyngen_helper::get_cr() & ~CR_field<1>::mask()) | + ((powerpc_dyngen_helper::get_fpscr() >> 4) & 0x0f000000)); +} + #define im PARAM1 #if DYNGEN_ASM_OPTS && defined(__powerpc__) && 0 diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp index 3e637c49..fbab1beb 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp @@ -118,6 +118,13 @@ DEFINE_INSN(store, A0, GPR); DEFINE_INSN(store, T0, GPR); DEFINE_INSN(store, T1, GPR); DEFINE_INSN(store, T2, GPR); +DEFINE_INSN(load, F0, FPR); +DEFINE_INSN(load, F1, FPR); +DEFINE_INSN(load, F2, FPR); +DEFINE_INSN(store, F0, FPR); +DEFINE_INSN(store, F1, FPR); +DEFINE_INSN(store, F2, FPR); +DEFINE_INSN(store, FD, FPR); // Condition register bitfield DEFINE_INSN(load, T0, crb); @@ -127,6 +134,23 @@ DEFINE_INSN(store, T1, crb); #undef DEFINE_INSN +// Floating point load store +#define DEFINE_OP(NAME, REG, TYPE) \ +void powerpc_dyngen::gen_##NAME##_##TYPE##_##REG##_A0_im(int32 offset) \ +{ \ + if (offset == 0) \ + gen_op_##NAME##_##TYPE##_##REG##_A0_0(); \ + else \ + gen_op_##NAME##_##TYPE##_##REG##_A0_im(offset); \ +} + +DEFINE_OP(load, FD, double); +DEFINE_OP(load, FD, single); +DEFINE_OP(store, F0, double); +DEFINE_OP(store, F0, single); + +#undef DEFINE_OP + #define DEFINE_INSN(OP, REG) \ void powerpc_dyngen::gen_##OP##_##REG##_crf(int crf) \ { \ diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp index fafd1b58..7810dcfb 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp @@ -56,6 +56,13 @@ public: void gen_store_T0_GPR(int i); void gen_store_T1_GPR(int i); void gen_store_T2_GPR(int i); + void gen_load_F0_FPR(int i); + void gen_load_F1_FPR(int i); + void gen_load_F2_FPR(int i); + void gen_store_FD_FPR(int i); + void gen_store_F0_FPR(int i); + void gen_store_F1_FPR(int i); + void gen_store_F2_FPR(int i); // Raw aliases #define DEFINE_ALIAS_RAW(NAME, PRE, POST, ARGLIST, ARGS) \ @@ -115,6 +122,7 @@ public: // Compare & Record instructions DEFINE_ALIAS(record_cr0_T0,0); + DEFINE_ALIAS(record_cr1,0); void gen_compare_T0_T1(int crf); void gen_compare_T0_im(int crf, int32 value); void gen_compare_logical_T0_T1(int crf); @@ -163,6 +171,48 @@ public: DEFINE_ALIAS(subfze_T0,0); DEFINE_ALIAS(subfzeo_T0,0); + // Double-precision floating point operations + DEFINE_ALIAS(fmov_F0_F1,0); + DEFINE_ALIAS(fmov_F0_F2,0); + DEFINE_ALIAS(fmov_F1_F0,0); + DEFINE_ALIAS(fmov_F1_F2,0); + DEFINE_ALIAS(fmov_F2_F0,0); + DEFINE_ALIAS(fmov_F2_F1,0); + DEFINE_ALIAS(fmov_FD_F0,0); + DEFINE_ALIAS(fmov_FD_F1,0); + DEFINE_ALIAS(fmov_FD_F2,0); + DEFINE_ALIAS(fabs_FD_F0,0); + DEFINE_ALIAS(fneg_FD_F0,0); + DEFINE_ALIAS(fnabs_FD_F0,0); + DEFINE_ALIAS(fadd_FD_F0_F1,0); + DEFINE_ALIAS(fsub_FD_F0_F1,0); + DEFINE_ALIAS(fmul_FD_F0_F1,0); + DEFINE_ALIAS(fdiv_FD_F0_F1,0); + DEFINE_ALIAS(fmadd_FD_F0_F1_F2,0); + DEFINE_ALIAS(fmsub_FD_F0_F1_F2,0); + DEFINE_ALIAS(fnmadd_FD_F0_F1_F2,0); + DEFINE_ALIAS(fnmsub_FD_F0_F1_F2,0); + + // Single-precision floating point operations + DEFINE_ALIAS(fadds_FD_F0_F1,0); + DEFINE_ALIAS(fsubs_FD_F0_F1,0); + DEFINE_ALIAS(fmuls_FD_F0_F1,0); + DEFINE_ALIAS(fdivs_FD_F0_F1,0); + DEFINE_ALIAS(fmadds_FD_F0_F1_F2,0); + DEFINE_ALIAS(fmsubs_FD_F0_F1_F2,0); + DEFINE_ALIAS(fnmadds_FD_F0_F1_F2,0); + DEFINE_ALIAS(fnmsubs_FD_F0_F1_F2,0); + + // Load/store floating point data + DEFINE_ALIAS(load_double_FD_A0_T1,0); + void gen_load_double_FD_A0_im(int32 offset); + DEFINE_ALIAS(load_single_FD_A0_T1,0); + void gen_load_single_FD_A0_im(int32 offset); + DEFINE_ALIAS(store_double_F0_A0_T1,0); + void gen_store_double_F0_A0_im(int32 offset); + DEFINE_ALIAS(store_single_F0_A0_T1,0); + void gen_store_single_F0_A0_im(int32 offset); + // Branch instructions void gen_bc_A0(int bo, int bi, uint32 npc); diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp index e693bee2..1ff07966 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp @@ -192,6 +192,7 @@ struct powerpc_registers uint32 gpr[32]; // General-Purpose Registers powerpc_fpr fpr[32]; // Floating-Point Registers + powerpc_fpr fp_result; // Floating-Point result powerpc_cr_register cr; // Condition Register uint32 fpscr; // Floating-Point Status and Control Register powerpc_xer_register xer; // XER Register (SPR 1) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp index 2643a27d..38d35f91 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp @@ -869,26 +869,39 @@ powerpc_cpu::compile_block(uint32 entry_point) const int SH = SH_field::extract(opcode); const int MB = MB_field::extract(opcode); const int ME = ME_field::extract(opcode); + const uint32 m = mask_operand::compute(MB, ME); dg.gen_load_T0_GPR(rS); - if (MB == 0 && ME == 31) { - // rotlwi rA,rS,SH - if (SH > 0) - dg.gen_rol_32_T0_im(SH); - } - else if (MB == 0 && (ME == (31 - SH))) { - // slwi rA,rS,SH - dg.gen_lsl_32_T0_im(SH); - } - else { - const uint32 m = mask_operand::compute(MB, ME); - if (SH == 0) { - // andi rA,rS,MASK(MB,ME) + if (MB == 0) { + if (ME == 31) { + // rotlwi rA,rS,SH + if (SH > 0) + dg.gen_rol_32_T0_im(SH); + } + else if (ME == (31 - SH)) { + // slwi rA,rS,SH + dg.gen_lsl_32_T0_im(SH); + } + else if (SH == 0) { + // andi rA,rS,MASK(0,ME) dg.gen_and_32_T0_im(m); } - else { - // rlwinm rA,rS,SH,MB,ME - dg.gen_rlwinm_T0_T1(SH, m); + else goto do_generic_rlwinm; + } + else if (ME == 31) { + if (SH == (32 - MB)) { + // srwi rA,rS,SH + dg.gen_lsr_32_T0_im(MB); } + else if (SH == 0) { + // andi rA,rS,MASK(MB,31) + dg.gen_and_32_T0_im(m); + } + else goto do_generic_rlwinm; + } + else { + // rlwinm rA,rS,SH,MB,ME + do_generic_rlwinm: + dg.gen_rlwinm_T0_T1(SH, m); } dg.gen_store_T0_GPR(rA); if (Rc_field::test(opcode)) @@ -900,10 +913,15 @@ powerpc_cpu::compile_block(uint32 entry_point) const int rS = rS_field::extract(opcode); const int rB = rB_field::extract(opcode); const int rA = rA_field::extract(opcode); - const uint32 m = operand_MASK::get(this, opcode); + const int MB = MB_field::extract(opcode); + const int ME = ME_field::extract(opcode); + const uint32 m = mask_operand::compute(MB, ME); dg.gen_load_T0_GPR(rS); dg.gen_load_T1_GPR(rB); - dg.gen_rlwnm_T0_T1(m); + if (MB == 0 && ME == 31) + dg.gen_rol_32_T0_T1(); + else + dg.gen_rlwnm_T0_T1(m); dg.gen_store_T0_GPR(rA); if (Rc_field::test(opcode)) dg.gen_record_cr0_T0(); @@ -1010,6 +1028,241 @@ powerpc_cpu::compile_block(uint32 entry_point) dg.gen_store_T0_crf(crfD_field::extract(opcode)); break; } + case PPC_I(LFD): // Load Floating-Point Double + op.mem.size = 8; + op.mem.do_update = 0; + op.mem.do_indexed = 0; + goto do_fp_load;; + case PPC_I(LFDU): // Load Floating-Point Double with Update + op.mem.size = 8; + op.mem.do_update = 1; + op.mem.do_indexed = 0; + goto do_fp_load; + case PPC_I(LFDUX): // Load Floating-Point Double with Update Indexed + op.mem.size = 8; + op.mem.do_update = 1; + op.mem.do_indexed = 1; + goto do_fp_load; + case PPC_I(LFDX): // Load Floating-Point Double Indexed + op.mem.size = 8; + op.mem.do_update = 0; + op.mem.do_indexed = 1; + goto do_fp_load; + case PPC_I(LFS): // Load Floating-Point Single + op.mem.size = 4; + op.mem.do_update = 0; + op.mem.do_indexed = 0; + goto do_fp_load; + case PPC_I(LFSU): // Load Floating-Point Single with Update + op.mem.size = 4; + op.mem.do_update = 1; + op.mem.do_indexed = 0; + goto do_fp_load; + case PPC_I(LFSUX): // Load Floating-Point Single with Update Indexed + op.mem.size = 4; + op.mem.do_update = 1; + op.mem.do_indexed = 1; + goto do_fp_load; + case PPC_I(LFSX): // Load Floating-Point Single Indexed + op.mem.size = 4; + op.mem.do_update = 0; + op.mem.do_indexed = 1; + goto do_fp_load; + { + do_fp_load: + // Extract RZ operand + const int rA = rA_field::extract(opcode); + if (rA == 0 && !op.mem.do_update) + dg.gen_mov_32_A0_im(0); + else + dg.gen_load_A0_GPR(rA); + + // Extract index operand + if (op.mem.do_indexed) + dg.gen_load_T1_GPR(rB_field::extract(opcode)); + + // Load floating point data + if (op.mem.size == 8) { + if (op.mem.do_indexed) + dg.gen_load_double_FD_A0_T1(); + else + dg.gen_load_double_FD_A0_im(operand_D::get(this, opcode)); + } + else { + if (op.mem.do_indexed) + dg.gen_load_single_FD_A0_T1(); + else + dg.gen_load_single_FD_A0_im(operand_D::get(this, opcode)); + } + + // Commit result + dg.gen_store_FD_FPR(frD_field::extract(opcode)); + + // Update RA + if (op.mem.do_update) { + if (op.mem.do_indexed) + dg.gen_add_32_A0_T1(); + else + dg.gen_add_32_A0_im(operand_D::get(this, opcode)); + dg.gen_store_A0_GPR(rA); + } + break; + } + case PPC_I(STFD): // Store Floating-Point Double + op.mem.size = 8; + op.mem.do_update = 0; + op.mem.do_indexed = 0; + goto do_fp_store; + case PPC_I(STFDU): // Store Floating-Point Double with Update + op.mem.size = 8; + op.mem.do_update = 1; + op.mem.do_indexed = 0; + goto do_fp_store; + case PPC_I(STFDUX): // Store Floating-Point Double with Update Indexed + op.mem.size = 8; + op.mem.do_update = 1; + op.mem.do_indexed = 1; + goto do_fp_store; + case PPC_I(STFDX): // Store Floating-Point Double Indexed + op.mem.size = 8; + op.mem.do_update = 0; + op.mem.do_indexed = 1; + goto do_fp_store; + case PPC_I(STFS): // Store Floating-Point Single + op.mem.size = 4; + op.mem.do_update = 0; + op.mem.do_indexed = 0; + goto do_fp_store; + case PPC_I(STFSU): // Store Floating-Point Single with Update + op.mem.size = 4; + op.mem.do_update = 1; + op.mem.do_indexed = 0; + goto do_fp_store; + case PPC_I(STFSUX): // Store Floating-Point Single with Update Indexed + op.mem.size = 4; + op.mem.do_update = 1; + op.mem.do_indexed = 1; + goto do_fp_store; + case PPC_I(STFSX): // Store Floating-Point Single Indexed + op.mem.size = 4; + op.mem.do_update = 0; + op.mem.do_indexed = 1; + goto do_fp_store; + { + do_fp_store: + // Extract RZ operand + const int rA = rA_field::extract(opcode); + if (rA == 0 && !op.mem.do_update) + dg.gen_mov_32_A0_im(0); + else + dg.gen_load_A0_GPR(rA); + + // Extract index operand + if (op.mem.do_indexed) + dg.gen_load_T1_GPR(rB_field::extract(opcode)); + + // Load register to commit to memory + dg.gen_load_F0_FPR(frS_field::extract(opcode)); + + // Store floating point data + if (op.mem.size == 8) { + if (op.mem.do_indexed) + dg.gen_store_double_F0_A0_T1(); + else + dg.gen_store_double_F0_A0_im(operand_D::get(this, opcode)); + } + else { + if (op.mem.do_indexed) + dg.gen_store_single_F0_A0_T1(); + else + dg.gen_store_single_F0_A0_im(operand_D::get(this, opcode)); + } + + // Update RA + if (op.mem.do_update) { + if (op.mem.do_indexed) + dg.gen_add_32_A0_T1(); + else + dg.gen_add_32_A0_im(operand_D::get(this, opcode)); + dg.gen_store_A0_GPR(rA); + } + break; + } +#if PPC_ENABLE_FPU_EXCEPTIONS == 0 + case PPC_I(FABS): // Floating Absolute Value + case PPC_I(FNABS): // Floating Negative Absolute Value + case PPC_I(FNEG): // Floating Negate + case PPC_I(FMR): // Floating Move Register + { + dg.gen_load_F0_FPR(frB_field::extract(opcode)); + switch (ii->mnemo) { + case PPC_I(FABS): dg.gen_fabs_FD_F0(); break; + case PPC_I(FNABS): dg.gen_fnabs_FD_F0(); break; + case PPC_I(FNEG): dg.gen_fneg_FD_F0(); break; + case PPC_I(FMR): dg.gen_fmov_FD_F0(); break; + } + dg.gen_store_FD_FPR(frD_field::extract(opcode)); + if (Rc_field::test(opcode)) + dg.gen_record_cr1(); + break; + } + case PPC_I(FADD): // Floating Add (Double-Precision) + case PPC_I(FSUB): // Floating Subtract (Double-Precision) + case PPC_I(FMUL): // Floating Multiply (Double-Precision) + case PPC_I(FDIV): // Floating Divide (Double-Precision) + case PPC_I(FADDS): // Floating Add (Single-Precision) + case PPC_I(FSUBS): // Floating Subtract (Single-Precision) + case PPC_I(FMULS): // Floating Multiply (Single-Precision) + case PPC_I(FDIVS): // Floating Divide (Single-Precision) + { + dg.gen_load_F0_FPR(frA_field::extract(opcode)); + if (ii->mnemo == PPC_I(FMUL) || ii->mnemo == PPC_I(FMULS)) + dg.gen_load_F1_FPR(frC_field::extract(opcode)); + else + dg.gen_load_F1_FPR(frB_field::extract(opcode)); + switch (ii->mnemo) { + case PPC_I(FADD): dg.gen_fadd_FD_F0_F1(); break; + case PPC_I(FSUB): dg.gen_fsub_FD_F0_F1(); break; + case PPC_I(FMUL): dg.gen_fmul_FD_F0_F1(); break; + case PPC_I(FDIV): dg.gen_fdiv_FD_F0_F1(); break; + case PPC_I(FADDS): dg.gen_fadds_FD_F0_F1(); break; + case PPC_I(FSUBS): dg.gen_fsubs_FD_F0_F1(); break; + case PPC_I(FMULS): dg.gen_fmuls_FD_F0_F1(); break; + case PPC_I(FDIVS): dg.gen_fdivs_FD_F0_F1(); break; + } + dg.gen_store_FD_FPR(frD_field::extract(opcode)); + if (Rc_field::test(opcode)) + dg.gen_record_cr1(); + break; + } + case PPC_I(FMADD): // Floating Multiply-Add (Double-Precision) + case PPC_I(FMSUB): // Floating Multiply-Subtract (Double-Precision) + case PPC_I(FNMADD): // Floating Negative Multiply-Add (Double-Precision) + case PPC_I(FNMSUB): // Floating Negative Multiply-Subract (Double-Precision) + case PPC_I(FMADDS): // Floating Multiply-Add (Single-Precision) + case PPC_I(FMSUBS): // Floating Multiply-Subtract (Single-Precision) + case PPC_I(FNMADDS): // Floating Negative Multiply-Add (Single-Precision) + case PPC_I(FNMSUBS): // Floating Negative Multiply-Subract (Single-Precision) + { + dg.gen_load_F0_FPR(frA_field::extract(opcode)); + dg.gen_load_F1_FPR(frC_field::extract(opcode)); + dg.gen_load_F2_FPR(frB_field::extract(opcode)); + switch (ii->mnemo) { + case PPC_I(FMADD): dg.gen_fmadd_FD_F0_F1_F2(); break; + case PPC_I(FMSUB): dg.gen_fmsub_FD_F0_F1_F2(); break; + case PPC_I(FNMADD): dg.gen_fnmadd_FD_F0_F1_F2(); break; + case PPC_I(FNMSUB): dg.gen_fnmsub_FD_F0_F1_F2(); break; + case PPC_I(FMADDS): dg.gen_fmadds_FD_F0_F1_F2(); break; + case PPC_I(FMSUBS): dg.gen_fmsubs_FD_F0_F1_F2(); break; + case PPC_I(FNMADDS): dg.gen_fnmadds_FD_F0_F1_F2(); break; + case PPC_I(FNMSUBS): dg.gen_fnmsubs_FD_F0_F1_F2(); break; + } + dg.gen_store_FD_FPR(frD_field::extract(opcode)); + if (Rc_field::test(opcode)) + dg.gen_record_cr1(); + break; + } +#endif default: // Direct call to instruction handler { typedef void (*func_t)(dyngen_cpu_base, uint32);