Optimize rlwinm further. Translate FP instructions if we don't need to

compute exceptions.
This commit is contained in:
gbeauche 2004-01-25 23:21:06 +00:00
parent 9c6b42b014
commit ea9553ee65
8 changed files with 601 additions and 143 deletions

View File

@ -42,10 +42,8 @@ register uintptr reg_A0 asm(REG_A0);
register uintptr reg_T0 asm(REG_T0);
#define T1 REG32(reg_T1)
register uintptr reg_T1 asm(REG_T1);
#ifdef REG_T2
#define T2 REG32(reg_T2)
register uintptr reg_T2 asm(REG_T2);
#endif
#ifdef REG_T3
#define T3 REG32(reg_T3)
register uintptr reg_T3 asm(REG_T3);
@ -83,29 +81,39 @@ void OPPROTO op_##NAME(void) \
// Register moves
DEFINE_OP(mov_32_T0_im, T0 = PARAM1);
DEFINE_OP(mov_32_T0_T1, T0 = T1);
DEFINE_OP(mov_32_T0_T2, T0 = T2);
DEFINE_OP(mov_32_T0_A0, T0 = A0);
DEFINE_OP(mov_32_T1_im, T1 = PARAM1);
DEFINE_OP(mov_32_T1_T0, T1 = T0);
DEFINE_OP(mov_32_T1_T2, T1 = T2);
DEFINE_OP(mov_32_T1_A0, T1 = A0);
DEFINE_OP(mov_32_T2_im, T2 = PARAM1);
DEFINE_OP(mov_32_T2_T1, T2 = T1);
DEFINE_OP(mov_32_T2_T0, T2 = T0);
DEFINE_OP(mov_32_T2_A0, T2 = A0);
DEFINE_OP(mov_32_A0_im, A0 = PARAM1);
DEFINE_OP(mov_32_A0_T0, A0 = T0);
DEFINE_OP(mov_32_A0_T1, A0 = T1);
DEFINE_OP(mov_32_A0_T2, A0 = T2);
DEFINE_OP(mov_32_T0_0, T0 = 0);
DEFINE_OP(mov_32_T1_0, T1 = 0);
DEFINE_OP(mov_32_T2_0, T2 = 0);
DEFINE_OP(mov_32_A0_0, A0 = 0);
void OPPROTO op_mov_ad_A0_im(void)
{
#if SIZEOF_VOID_P == 8
#if defined(__x86_64__)
asm volatile ("movabsq $__op_param1,%" REG_A0);
#define DEFINE_MOV_AD(REG) asm volatile ("movabsq $__op_param1,%" REG_##REG)
#else
#error "unsupported 64-bit value move in"
#endif
#else
A0 = PARAM1;
#define DEFINE_MOV_AD(REG) REG = PARAM1
#endif
}
void OPPROTO op_mov_ad_T0_im(void) { DEFINE_MOV_AD(T0); }
void OPPROTO op_mov_ad_T1_im(void) { DEFINE_MOV_AD(T1); }
void OPPROTO op_mov_ad_T2_im(void) { DEFINE_MOV_AD(T2); }
void OPPROTO op_mov_ad_A0_im(void) { DEFINE_MOV_AD(A0); }
// Arithmetic operations
DEFINE_OP(add_32_T0_T1, T0 += T1);
@ -190,120 +198,6 @@ DEFINE_OP(ze_8_32_T0, T0 = (uint32)(uint8)T0);
#undef DEFINE_OP
/**
* Native FP operations optimization
**/
#ifndef do_fabs
#define do_fabs(x) fabs(x)
#endif
#ifndef do_fadd
#define do_fadd(x, y) x + y
#endif
#ifndef do_fdiv
#define do_fdiv(x, y) x / y
#endif
#ifndef do_fmadd
#define do_fmadd(x, y, z) ((x * y) + z)
#endif
#ifndef do_fmsub
#define do_fmsub(x, y, z) ((x * y) - z)
#endif
#ifndef do_fmul
#define do_fmul(x, y) (x * y)
#endif
#ifndef do_fnabs
#define do_fnabs(x) -fabs(x)
#endif
#ifndef do_fneg
#define do_fneg(x) -x
#endif
#ifndef do_fnmadd
#define do_fnmadd(x, y, z) -((x * y) + z)
#endif
#ifndef do_fnmsub
#define do_fnmsub(x, y, z) -((x * y) - z)
#endif
#ifndef do_fsub
#define do_fsub(x, y) x - y
#endif
#ifndef do_fmov
#define do_fmov(x) x
#endif
/**
* FP double operations
**/
#if 0
double OPPROTO op_lfd(void)
{
union { double d; uint64 j; } r;
r.j = vm_do_read_memory_8((uint64 *)T1);
return r.d;
}
float OPPROTO op_lfs(void)
{
union { float f; uint32 i; } r;
r.i = vm_do_read_memory_4((uint32 *)T1);
return r.f;
}
#define DEFINE_OP(NAME, OP, ARGS) \
double OPPROTO op_##NAME(double F0, double F1, double F2) \
{ \
return do_##OP ARGS; \
}
DEFINE_OP(fmov_F1, fmov, (F1));
DEFINE_OP(fmov_F2, fmov, (F2));
DEFINE_OP(fabs, fabs, (F0));
DEFINE_OP(fadd, fadd, (F0, F1));
DEFINE_OP(fdiv, fdiv, (F0, F1));
DEFINE_OP(fmadd, fmadd, (F0, F1, F2));
DEFINE_OP(fmsub, fmsub, (F0, F1, F2));
DEFINE_OP(fmul, fmul, (F0, F1));
DEFINE_OP(fnabs, fnabs, (F0));
DEFINE_OP(fneg, fneg, (F0));
DEFINE_OP(fnmadd, fnmadd, (F0, F1, F2));
DEFINE_OP(fnmsub, fnmsub, (F0, F1, F2));
DEFINE_OP(fsub, fsub, (F0, F1));
#undef DEFINE_OP
/**
* FP single operations
**/
#define DEFINE_OP(NAME, OP, ARGS) \
float OPPROTO op_##NAME(float F0, float F1, float F2) \
{ \
return do_##OP ARGS; \
}
DEFINE_OP(fmovs_F1, fmov, (F1));
DEFINE_OP(fmovs_F2, fmov, (F2));
DEFINE_OP(fabss_F0, fabs, (F0));
DEFINE_OP(fadds_F0_F1, fadd, (F0, F1));
DEFINE_OP(fdivs_F0_F1, fdiv, (F0, F1));
DEFINE_OP(fmadds_F0_F1_F2, fmadd, (F0, F1, F2));
DEFINE_OP(fmsubs_F0_F1_F2, fmsub, (F0, F1, F2));
DEFINE_OP(fmuls_F0_F1, fmul, (F0, F1));
DEFINE_OP(fnabss_F0, fnabs, (F0));
DEFINE_OP(fnegs_F0, fneg, (F0));
DEFINE_OP(fnmadds_F0_F1_F2, fnmadd, (F0, F1, F2));
DEFINE_OP(fnmsubs_F0_F1_F2, fnmsub, (F0, F1, F2));
DEFINE_OP(fsubs_F0_F1, fsub, (F0, F1));
#undef DEFINE_OP
#endif
/**
* Load/Store instructions
**/

View File

@ -98,13 +98,23 @@ public:
// Register moves
void gen_mov_32_T0_im(int32 value);
DEFINE_ALIAS(mov_32_T0_T1,0);
DEFINE_ALIAS(mov_32_T0_T2,0);
DEFINE_ALIAS(mov_32_T0_A0,0);
void gen_mov_32_T1_im(int32 value);
DEFINE_ALIAS(mov_32_T1_T0,0);
DEFINE_ALIAS(mov_32_T1_T2,0);
DEFINE_ALIAS(mov_32_T1_A0,0);
void gen_mov_32_T2_im(int32 value);
DEFINE_ALIAS(mov_32_T2_T0,0);
DEFINE_ALIAS(mov_32_T2_T1,0);
DEFINE_ALIAS(mov_32_T2_A0,0);
void gen_mov_32_A0_im(int32 value);
DEFINE_ALIAS(mov_32_A0_T0,0);
DEFINE_ALIAS(mov_32_A0_T1,0);
DEFINE_ALIAS(mov_32_A0_T2,0);
DEFINE_ALIAS(mov_ad_T0_im,1);
DEFINE_ALIAS(mov_ad_T1_im,1);
DEFINE_ALIAS(mov_ad_T2_im,1);
DEFINE_ALIAS(mov_ad_A0_im,1);
// Arithmetic operations
@ -283,6 +293,7 @@ basic_dyngen::gen_mov_32_##REG##_im(int32 value) \
DEFINE_OP(T0);
DEFINE_OP(T1);
DEFINE_OP(T2);
DEFINE_OP(A0);
#undef DEFINE_OP

View File

@ -49,6 +49,11 @@ protected:
powerpc_xer_register & xer() { return regs.xer; }
powerpc_xer_register const & xer() const { return regs.xer; }
double fp_result() const { return regs.fp_result.d; }
double & fp_result() { return regs.fp_result.d; }
uint64 fp_result_dw() const { return regs.fp_result.j; }
uint64 & fp_result_dw() { return regs.fp_result.j; }
uint32 & fpscr() { return regs.fpscr; }
uint32 fpscr() const { return regs.fpscr; }
uint32 & lr() { return regs.lr; }

View File

@ -33,10 +33,25 @@ register struct powerpc_cpu *CPU asm(REG_CPU);
#else
#define CPU ((powerpc_cpu *)CPUPARAM)
#endif
register uint32 A0 asm(REG_A0);
register uint32 T0 asm(REG_T0);
register uint32 T1 asm(REG_T1);
register uint32 T2 asm(REG_T2);
#if SIZEOF_VOID_P == 8
#define REG32(X) ((uint32)X)
#else
#define REG32(X) X
#endif
#define FPREG64(X) (*((double *)(X)))
#define A0 REG32(reg_A0)
register uintptr reg_A0 asm(REG_A0);
#define T0 REG32(reg_T0)
#define F0 FPREG64(reg_T0)
register uintptr reg_T0 asm(REG_T0);
#define T1 REG32(reg_T1)
#define F1 FPREG64(reg_T1)
register uintptr reg_T1 asm(REG_T1);
#define T2 REG32(reg_T2)
#define F2 FPREG64(reg_T2)
register uintptr reg_T2 asm(REG_T2);
#define FD powerpc_dyngen_helper::fp_result()
#define FD_dw powerpc_dyngen_helper::fp_result_dw()
// Semantic action templates
#define DYNGEN_OPS
@ -57,12 +72,17 @@ struct powerpc_dyngen_helper {
static inline void set_ctr(uint32 value) { CPU->ctr() = value; }
static inline uint32 get_cr() { return CPU->cr().get(); }
static inline void set_cr(uint32 value) { CPU->cr().set(value); }
static inline uint32 get_fpscr() { return CPU->fpscr(); }
static inline void set_fpscr(uint32 value) { CPU->fpscr() = value; }
static inline uint32 get_xer() { return CPU->xer().get(); }
static inline void set_xer(uint32 value) { CPU->xer().set(value); }
static inline void record(int crf, int32 v) { CPU->record_cr(crf, v); }
static inline powerpc_cr_register & cr() { return CPU->cr(); }
static inline powerpc_xer_register & xer() { return CPU->xer(); }
static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); }
static double & fp_result() { return CPU->fp_result(); }
static uint64 & fp_result_dw() { return CPU->fp_result_dw(); }
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
};
@ -122,6 +142,99 @@ DEFINE_REG(31);
#undef DEFINE_OP
/**
* Load/store floating-point registers
**/
#define DEFINE_OP(REG, N) \
void OPPROTO op_load_F##REG##_FPR##N(void) \
{ \
reg_T##REG = (uintptr)&CPU->fpr(N); \
} \
void OPPROTO op_store_F##REG##_FPR##N(void) \
{ \
CPU->fpr(N) = F##REG; \
}
#define DEFINE_REG(N) \
DEFINE_OP(0,N); \
DEFINE_OP(1,N); \
DEFINE_OP(2,N); \
void OPPROTO op_store_FD_FPR##N(void) \
{ \
CPU->fpr(N) = FD; \
}
DEFINE_REG(0);
DEFINE_REG(1);
DEFINE_REG(2);
DEFINE_REG(3);
DEFINE_REG(4);
DEFINE_REG(5);
DEFINE_REG(6);
DEFINE_REG(7);
DEFINE_REG(8);
DEFINE_REG(9);
DEFINE_REG(10);
DEFINE_REG(11);
DEFINE_REG(12);
DEFINE_REG(13);
DEFINE_REG(14);
DEFINE_REG(15);
DEFINE_REG(16);
DEFINE_REG(17);
DEFINE_REG(18);
DEFINE_REG(19);
DEFINE_REG(20);
DEFINE_REG(21);
DEFINE_REG(22);
DEFINE_REG(23);
DEFINE_REG(24);
DEFINE_REG(25);
DEFINE_REG(26);
DEFINE_REG(27);
DEFINE_REG(28);
DEFINE_REG(29);
DEFINE_REG(30);
DEFINE_REG(31);
#undef DEFINE_REG
#undef DEFINE_OP
/**
* Load/Store floating-point data
**/
#define im PARAM1
#define DEFINE_OP(OFFSET) \
void OPPROTO op_load_double_FD_A0_##OFFSET(void) \
{ \
FD_dw = vm_read_memory_8(A0 + OFFSET); \
} \
void OPPROTO op_load_single_FD_A0_##OFFSET(void) \
{ \
any_register *x = (any_register *)&FD; \
x->i = vm_read_memory_4(A0 + OFFSET); \
FD = (double)x->f; \
} \
void OPPROTO op_store_double_F0_A0_##OFFSET(void) \
{ \
vm_write_memory_8(A0 + OFFSET, *(uint64 *)reg_T0); \
} \
void OPPROTO op_store_single_F0_A0_##OFFSET(void) \
{ \
any_register *x = (any_register *)&FD; \
x->f = (float)F0; \
vm_write_memory_4(A0 + OFFSET, x->i); \
}
DEFINE_OP(0);
DEFINE_OP(im);
DEFINE_OP(T1);
#undef DEFINE_OP
/**
* Condition Registers
**/
@ -217,6 +330,107 @@ void OPPROTO op_mtcrf_T0_im(void)
}
/**
* Native FP operations optimization
**/
#ifndef do_fabs
#define do_fabs(x) fabs(x)
#endif
#ifndef do_fadd
#define do_fadd(x, y) x + y
#endif
#ifndef do_fdiv
#define do_fdiv(x, y) x / y
#endif
#ifndef do_fmadd
#define do_fmadd(x, y, z) ((x * y) + z)
#endif
#ifndef do_fmsub
#define do_fmsub(x, y, z) ((x * y) - z)
#endif
#ifndef do_fmul
#define do_fmul(x, y) (x * y)
#endif
#ifndef do_fnabs
#define do_fnabs(x) -fabs(x)
#endif
#ifndef do_fneg
#define do_fneg(x) -x
#endif
#ifndef do_fnmadd
#define do_fnmadd(x, y, z) -((x * y) + z)
#endif
#ifndef do_fnmsub
#define do_fnmsub(x, y, z) -((x * y) - z)
#endif
#ifndef do_fsub
#define do_fsub(x, y) x - y
#endif
#ifndef do_fmov
#define do_fmov(x) x
#endif
/**
* Double-precision floating point operations
**/
#define DEFINE_OP(NAME, CODE) \
void OPPROTO op_##NAME(void) \
{ \
CODE; \
}
DEFINE_OP(fmov_F0_F1, F0 = F1);
DEFINE_OP(fmov_F0_F2, F0 = F2);
DEFINE_OP(fmov_F1_F0, F1 = F0);
DEFINE_OP(fmov_F1_F2, F1 = F2);
DEFINE_OP(fmov_F2_F0, F2 = F0);
DEFINE_OP(fmov_F2_F1, F2 = F1);
DEFINE_OP(fmov_FD_F0, FD = F0);
DEFINE_OP(fmov_FD_F1, FD = F1);
DEFINE_OP(fmov_FD_F2, FD = F2);
DEFINE_OP(fabs_FD_F0, FD = do_fabs(F0));
DEFINE_OP(fneg_FD_F0, FD = do_fneg(F0));
DEFINE_OP(fnabs_FD_F0, FD = do_fnabs(F0));
DEFINE_OP(fadd_FD_F0_F1, FD = F0 + F1);
DEFINE_OP(fsub_FD_F0_F1, FD = F0 - F1);
DEFINE_OP(fmul_FD_F0_F1, FD = F0 * F1);
DEFINE_OP(fdiv_FD_F0_F1, FD = F0 / F1);
DEFINE_OP(fmadd_FD_F0_F1_F2, FD = do_fmadd(F0, F1, F2));
DEFINE_OP(fmsub_FD_F0_F1_F2, FD = do_fmsub(F0, F1, F2));
DEFINE_OP(fnmadd_FD_F0_F1_F2, FD = do_fnmadd(F0, F1, F2));
DEFINE_OP(fnmsub_FD_F0_F1_F2, FD = do_fnmsub(F0, F1, F2));
#undef DEFINE_OP
/**
* Single-Precision floating point operations
**/
#define DEFINE_OP(NAME, REG, OP) \
void OPPROTO op_##NAME(void) \
{ \
float x = OP; \
REG = x; \
}
DEFINE_OP(fadds_FD_F0_F1, FD, F0 + F1);
DEFINE_OP(fsubs_FD_F0_F1, FD, F0 - F1);
DEFINE_OP(fmuls_FD_F0_F1, FD, F0 * F1);
DEFINE_OP(fdivs_FD_F0_F1, FD, F0 / F1);
DEFINE_OP(fmadds_FD_F0_F1_F2, FD, do_fmadd(F0, F1, F2));
DEFINE_OP(fmsubs_FD_F0_F1_F2, FD, do_fmsub(F0, F1, F2));
DEFINE_OP(fnmadds_FD_F0_F1_F2, FD, do_fnmadd(F0, F1, F2));
DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2));
#undef DEFINE_OP
/**
* Special purpose registers
**/
@ -475,6 +689,12 @@ void OPPROTO op_record_cr0_T0(void)
dyngen_barrier();
}
void OPPROTO op_record_cr1(void)
{
powerpc_dyngen_helper::set_cr((powerpc_dyngen_helper::get_cr() & ~CR_field<1>::mask()) |
((powerpc_dyngen_helper::get_fpscr() >> 4) & 0x0f000000));
}
#define im PARAM1
#if DYNGEN_ASM_OPTS && defined(__powerpc__) && 0

View File

@ -118,6 +118,13 @@ DEFINE_INSN(store, A0, GPR);
DEFINE_INSN(store, T0, GPR);
DEFINE_INSN(store, T1, GPR);
DEFINE_INSN(store, T2, GPR);
DEFINE_INSN(load, F0, FPR);
DEFINE_INSN(load, F1, FPR);
DEFINE_INSN(load, F2, FPR);
DEFINE_INSN(store, F0, FPR);
DEFINE_INSN(store, F1, FPR);
DEFINE_INSN(store, F2, FPR);
DEFINE_INSN(store, FD, FPR);
// Condition register bitfield
DEFINE_INSN(load, T0, crb);
@ -127,6 +134,23 @@ DEFINE_INSN(store, T1, crb);
#undef DEFINE_INSN
// Floating point load store
#define DEFINE_OP(NAME, REG, TYPE) \
void powerpc_dyngen::gen_##NAME##_##TYPE##_##REG##_A0_im(int32 offset) \
{ \
if (offset == 0) \
gen_op_##NAME##_##TYPE##_##REG##_A0_0(); \
else \
gen_op_##NAME##_##TYPE##_##REG##_A0_im(offset); \
}
DEFINE_OP(load, FD, double);
DEFINE_OP(load, FD, single);
DEFINE_OP(store, F0, double);
DEFINE_OP(store, F0, single);
#undef DEFINE_OP
#define DEFINE_INSN(OP, REG) \
void powerpc_dyngen::gen_##OP##_##REG##_crf(int crf) \
{ \

View File

@ -56,6 +56,13 @@ public:
void gen_store_T0_GPR(int i);
void gen_store_T1_GPR(int i);
void gen_store_T2_GPR(int i);
void gen_load_F0_FPR(int i);
void gen_load_F1_FPR(int i);
void gen_load_F2_FPR(int i);
void gen_store_FD_FPR(int i);
void gen_store_F0_FPR(int i);
void gen_store_F1_FPR(int i);
void gen_store_F2_FPR(int i);
// Raw aliases
#define DEFINE_ALIAS_RAW(NAME, PRE, POST, ARGLIST, ARGS) \
@ -115,6 +122,7 @@ public:
// Compare & Record instructions
DEFINE_ALIAS(record_cr0_T0,0);
DEFINE_ALIAS(record_cr1,0);
void gen_compare_T0_T1(int crf);
void gen_compare_T0_im(int crf, int32 value);
void gen_compare_logical_T0_T1(int crf);
@ -163,6 +171,48 @@ public:
DEFINE_ALIAS(subfze_T0,0);
DEFINE_ALIAS(subfzeo_T0,0);
// Double-precision floating point operations
DEFINE_ALIAS(fmov_F0_F1,0);
DEFINE_ALIAS(fmov_F0_F2,0);
DEFINE_ALIAS(fmov_F1_F0,0);
DEFINE_ALIAS(fmov_F1_F2,0);
DEFINE_ALIAS(fmov_F2_F0,0);
DEFINE_ALIAS(fmov_F2_F1,0);
DEFINE_ALIAS(fmov_FD_F0,0);
DEFINE_ALIAS(fmov_FD_F1,0);
DEFINE_ALIAS(fmov_FD_F2,0);
DEFINE_ALIAS(fabs_FD_F0,0);
DEFINE_ALIAS(fneg_FD_F0,0);
DEFINE_ALIAS(fnabs_FD_F0,0);
DEFINE_ALIAS(fadd_FD_F0_F1,0);
DEFINE_ALIAS(fsub_FD_F0_F1,0);
DEFINE_ALIAS(fmul_FD_F0_F1,0);
DEFINE_ALIAS(fdiv_FD_F0_F1,0);
DEFINE_ALIAS(fmadd_FD_F0_F1_F2,0);
DEFINE_ALIAS(fmsub_FD_F0_F1_F2,0);
DEFINE_ALIAS(fnmadd_FD_F0_F1_F2,0);
DEFINE_ALIAS(fnmsub_FD_F0_F1_F2,0);
// Single-precision floating point operations
DEFINE_ALIAS(fadds_FD_F0_F1,0);
DEFINE_ALIAS(fsubs_FD_F0_F1,0);
DEFINE_ALIAS(fmuls_FD_F0_F1,0);
DEFINE_ALIAS(fdivs_FD_F0_F1,0);
DEFINE_ALIAS(fmadds_FD_F0_F1_F2,0);
DEFINE_ALIAS(fmsubs_FD_F0_F1_F2,0);
DEFINE_ALIAS(fnmadds_FD_F0_F1_F2,0);
DEFINE_ALIAS(fnmsubs_FD_F0_F1_F2,0);
// Load/store floating point data
DEFINE_ALIAS(load_double_FD_A0_T1,0);
void gen_load_double_FD_A0_im(int32 offset);
DEFINE_ALIAS(load_single_FD_A0_T1,0);
void gen_load_single_FD_A0_im(int32 offset);
DEFINE_ALIAS(store_double_F0_A0_T1,0);
void gen_store_double_F0_A0_im(int32 offset);
DEFINE_ALIAS(store_single_F0_A0_T1,0);
void gen_store_single_F0_A0_im(int32 offset);
// Branch instructions
void gen_bc_A0(int bo, int bi, uint32 npc);

View File

@ -192,6 +192,7 @@ struct powerpc_registers
uint32 gpr[32]; // General-Purpose Registers
powerpc_fpr fpr[32]; // Floating-Point Registers
powerpc_fpr fp_result; // Floating-Point result
powerpc_cr_register cr; // Condition Register
uint32 fpscr; // Floating-Point Status and Control Register
powerpc_xer_register xer; // XER Register (SPR 1)

View File

@ -869,26 +869,39 @@ powerpc_cpu::compile_block(uint32 entry_point)
const int SH = SH_field::extract(opcode);
const int MB = MB_field::extract(opcode);
const int ME = ME_field::extract(opcode);
const uint32 m = mask_operand::compute(MB, ME);
dg.gen_load_T0_GPR(rS);
if (MB == 0 && ME == 31) {
// rotlwi rA,rS,SH
if (SH > 0)
dg.gen_rol_32_T0_im(SH);
}
else if (MB == 0 && (ME == (31 - SH))) {
// slwi rA,rS,SH
dg.gen_lsl_32_T0_im(SH);
}
else {
const uint32 m = mask_operand::compute(MB, ME);
if (SH == 0) {
// andi rA,rS,MASK(MB,ME)
if (MB == 0) {
if (ME == 31) {
// rotlwi rA,rS,SH
if (SH > 0)
dg.gen_rol_32_T0_im(SH);
}
else if (ME == (31 - SH)) {
// slwi rA,rS,SH
dg.gen_lsl_32_T0_im(SH);
}
else if (SH == 0) {
// andi rA,rS,MASK(0,ME)
dg.gen_and_32_T0_im(m);
}
else {
// rlwinm rA,rS,SH,MB,ME
dg.gen_rlwinm_T0_T1(SH, m);
else goto do_generic_rlwinm;
}
else if (ME == 31) {
if (SH == (32 - MB)) {
// srwi rA,rS,SH
dg.gen_lsr_32_T0_im(MB);
}
else if (SH == 0) {
// andi rA,rS,MASK(MB,31)
dg.gen_and_32_T0_im(m);
}
else goto do_generic_rlwinm;
}
else {
// rlwinm rA,rS,SH,MB,ME
do_generic_rlwinm:
dg.gen_rlwinm_T0_T1(SH, m);
}
dg.gen_store_T0_GPR(rA);
if (Rc_field::test(opcode))
@ -900,10 +913,15 @@ powerpc_cpu::compile_block(uint32 entry_point)
const int rS = rS_field::extract(opcode);
const int rB = rB_field::extract(opcode);
const int rA = rA_field::extract(opcode);
const uint32 m = operand_MASK::get(this, opcode);
const int MB = MB_field::extract(opcode);
const int ME = ME_field::extract(opcode);
const uint32 m = mask_operand::compute(MB, ME);
dg.gen_load_T0_GPR(rS);
dg.gen_load_T1_GPR(rB);
dg.gen_rlwnm_T0_T1(m);
if (MB == 0 && ME == 31)
dg.gen_rol_32_T0_T1();
else
dg.gen_rlwnm_T0_T1(m);
dg.gen_store_T0_GPR(rA);
if (Rc_field::test(opcode))
dg.gen_record_cr0_T0();
@ -1010,6 +1028,241 @@ powerpc_cpu::compile_block(uint32 entry_point)
dg.gen_store_T0_crf(crfD_field::extract(opcode));
break;
}
case PPC_I(LFD): // Load Floating-Point Double
op.mem.size = 8;
op.mem.do_update = 0;
op.mem.do_indexed = 0;
goto do_fp_load;;
case PPC_I(LFDU): // Load Floating-Point Double with Update
op.mem.size = 8;
op.mem.do_update = 1;
op.mem.do_indexed = 0;
goto do_fp_load;
case PPC_I(LFDUX): // Load Floating-Point Double with Update Indexed
op.mem.size = 8;
op.mem.do_update = 1;
op.mem.do_indexed = 1;
goto do_fp_load;
case PPC_I(LFDX): // Load Floating-Point Double Indexed
op.mem.size = 8;
op.mem.do_update = 0;
op.mem.do_indexed = 1;
goto do_fp_load;
case PPC_I(LFS): // Load Floating-Point Single
op.mem.size = 4;
op.mem.do_update = 0;
op.mem.do_indexed = 0;
goto do_fp_load;
case PPC_I(LFSU): // Load Floating-Point Single with Update
op.mem.size = 4;
op.mem.do_update = 1;
op.mem.do_indexed = 0;
goto do_fp_load;
case PPC_I(LFSUX): // Load Floating-Point Single with Update Indexed
op.mem.size = 4;
op.mem.do_update = 1;
op.mem.do_indexed = 1;
goto do_fp_load;
case PPC_I(LFSX): // Load Floating-Point Single Indexed
op.mem.size = 4;
op.mem.do_update = 0;
op.mem.do_indexed = 1;
goto do_fp_load;
{
do_fp_load:
// Extract RZ operand
const int rA = rA_field::extract(opcode);
if (rA == 0 && !op.mem.do_update)
dg.gen_mov_32_A0_im(0);
else
dg.gen_load_A0_GPR(rA);
// Extract index operand
if (op.mem.do_indexed)
dg.gen_load_T1_GPR(rB_field::extract(opcode));
// Load floating point data
if (op.mem.size == 8) {
if (op.mem.do_indexed)
dg.gen_load_double_FD_A0_T1();
else
dg.gen_load_double_FD_A0_im(operand_D::get(this, opcode));
}
else {
if (op.mem.do_indexed)
dg.gen_load_single_FD_A0_T1();
else
dg.gen_load_single_FD_A0_im(operand_D::get(this, opcode));
}
// Commit result
dg.gen_store_FD_FPR(frD_field::extract(opcode));
// Update RA
if (op.mem.do_update) {
if (op.mem.do_indexed)
dg.gen_add_32_A0_T1();
else
dg.gen_add_32_A0_im(operand_D::get(this, opcode));
dg.gen_store_A0_GPR(rA);
}
break;
}
case PPC_I(STFD): // Store Floating-Point Double
op.mem.size = 8;
op.mem.do_update = 0;
op.mem.do_indexed = 0;
goto do_fp_store;
case PPC_I(STFDU): // Store Floating-Point Double with Update
op.mem.size = 8;
op.mem.do_update = 1;
op.mem.do_indexed = 0;
goto do_fp_store;
case PPC_I(STFDUX): // Store Floating-Point Double with Update Indexed
op.mem.size = 8;
op.mem.do_update = 1;
op.mem.do_indexed = 1;
goto do_fp_store;
case PPC_I(STFDX): // Store Floating-Point Double Indexed
op.mem.size = 8;
op.mem.do_update = 0;
op.mem.do_indexed = 1;
goto do_fp_store;
case PPC_I(STFS): // Store Floating-Point Single
op.mem.size = 4;
op.mem.do_update = 0;
op.mem.do_indexed = 0;
goto do_fp_store;
case PPC_I(STFSU): // Store Floating-Point Single with Update
op.mem.size = 4;
op.mem.do_update = 1;
op.mem.do_indexed = 0;
goto do_fp_store;
case PPC_I(STFSUX): // Store Floating-Point Single with Update Indexed
op.mem.size = 4;
op.mem.do_update = 1;
op.mem.do_indexed = 1;
goto do_fp_store;
case PPC_I(STFSX): // Store Floating-Point Single Indexed
op.mem.size = 4;
op.mem.do_update = 0;
op.mem.do_indexed = 1;
goto do_fp_store;
{
do_fp_store:
// Extract RZ operand
const int rA = rA_field::extract(opcode);
if (rA == 0 && !op.mem.do_update)
dg.gen_mov_32_A0_im(0);
else
dg.gen_load_A0_GPR(rA);
// Extract index operand
if (op.mem.do_indexed)
dg.gen_load_T1_GPR(rB_field::extract(opcode));
// Load register to commit to memory
dg.gen_load_F0_FPR(frS_field::extract(opcode));
// Store floating point data
if (op.mem.size == 8) {
if (op.mem.do_indexed)
dg.gen_store_double_F0_A0_T1();
else
dg.gen_store_double_F0_A0_im(operand_D::get(this, opcode));
}
else {
if (op.mem.do_indexed)
dg.gen_store_single_F0_A0_T1();
else
dg.gen_store_single_F0_A0_im(operand_D::get(this, opcode));
}
// Update RA
if (op.mem.do_update) {
if (op.mem.do_indexed)
dg.gen_add_32_A0_T1();
else
dg.gen_add_32_A0_im(operand_D::get(this, opcode));
dg.gen_store_A0_GPR(rA);
}
break;
}
#if PPC_ENABLE_FPU_EXCEPTIONS == 0
case PPC_I(FABS): // Floating Absolute Value
case PPC_I(FNABS): // Floating Negative Absolute Value
case PPC_I(FNEG): // Floating Negate
case PPC_I(FMR): // Floating Move Register
{
dg.gen_load_F0_FPR(frB_field::extract(opcode));
switch (ii->mnemo) {
case PPC_I(FABS): dg.gen_fabs_FD_F0(); break;
case PPC_I(FNABS): dg.gen_fnabs_FD_F0(); break;
case PPC_I(FNEG): dg.gen_fneg_FD_F0(); break;
case PPC_I(FMR): dg.gen_fmov_FD_F0(); break;
}
dg.gen_store_FD_FPR(frD_field::extract(opcode));
if (Rc_field::test(opcode))
dg.gen_record_cr1();
break;
}
case PPC_I(FADD): // Floating Add (Double-Precision)
case PPC_I(FSUB): // Floating Subtract (Double-Precision)
case PPC_I(FMUL): // Floating Multiply (Double-Precision)
case PPC_I(FDIV): // Floating Divide (Double-Precision)
case PPC_I(FADDS): // Floating Add (Single-Precision)
case PPC_I(FSUBS): // Floating Subtract (Single-Precision)
case PPC_I(FMULS): // Floating Multiply (Single-Precision)
case PPC_I(FDIVS): // Floating Divide (Single-Precision)
{
dg.gen_load_F0_FPR(frA_field::extract(opcode));
if (ii->mnemo == PPC_I(FMUL) || ii->mnemo == PPC_I(FMULS))
dg.gen_load_F1_FPR(frC_field::extract(opcode));
else
dg.gen_load_F1_FPR(frB_field::extract(opcode));
switch (ii->mnemo) {
case PPC_I(FADD): dg.gen_fadd_FD_F0_F1(); break;
case PPC_I(FSUB): dg.gen_fsub_FD_F0_F1(); break;
case PPC_I(FMUL): dg.gen_fmul_FD_F0_F1(); break;
case PPC_I(FDIV): dg.gen_fdiv_FD_F0_F1(); break;
case PPC_I(FADDS): dg.gen_fadds_FD_F0_F1(); break;
case PPC_I(FSUBS): dg.gen_fsubs_FD_F0_F1(); break;
case PPC_I(FMULS): dg.gen_fmuls_FD_F0_F1(); break;
case PPC_I(FDIVS): dg.gen_fdivs_FD_F0_F1(); break;
}
dg.gen_store_FD_FPR(frD_field::extract(opcode));
if (Rc_field::test(opcode))
dg.gen_record_cr1();
break;
}
case PPC_I(FMADD): // Floating Multiply-Add (Double-Precision)
case PPC_I(FMSUB): // Floating Multiply-Subtract (Double-Precision)
case PPC_I(FNMADD): // Floating Negative Multiply-Add (Double-Precision)
case PPC_I(FNMSUB): // Floating Negative Multiply-Subract (Double-Precision)
case PPC_I(FMADDS): // Floating Multiply-Add (Single-Precision)
case PPC_I(FMSUBS): // Floating Multiply-Subtract (Single-Precision)
case PPC_I(FNMADDS): // Floating Negative Multiply-Add (Single-Precision)
case PPC_I(FNMSUBS): // Floating Negative Multiply-Subract (Single-Precision)
{
dg.gen_load_F0_FPR(frA_field::extract(opcode));
dg.gen_load_F1_FPR(frC_field::extract(opcode));
dg.gen_load_F2_FPR(frB_field::extract(opcode));
switch (ii->mnemo) {
case PPC_I(FMADD): dg.gen_fmadd_FD_F0_F1_F2(); break;
case PPC_I(FMSUB): dg.gen_fmsub_FD_F0_F1_F2(); break;
case PPC_I(FNMADD): dg.gen_fnmadd_FD_F0_F1_F2(); break;
case PPC_I(FNMSUB): dg.gen_fnmsub_FD_F0_F1_F2(); break;
case PPC_I(FMADDS): dg.gen_fmadds_FD_F0_F1_F2(); break;
case PPC_I(FMSUBS): dg.gen_fmsubs_FD_F0_F1_F2(); break;
case PPC_I(FNMADDS): dg.gen_fnmadds_FD_F0_F1_F2(); break;
case PPC_I(FNMSUBS): dg.gen_fnmsubs_FD_F0_F1_F2(); break;
}
dg.gen_store_FD_FPR(frD_field::extract(opcode));
if (Rc_field::test(opcode))
dg.gen_record_cr1();
break;
}
#endif
default: // Direct call to instruction handler
{
typedef void (*func_t)(dyngen_cpu_base, uint32);