macemu/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp

1770 lines
41 KiB
C++

/*
* ppc-dyngen-ops.hpp - PowerPC synthetic instructions
*
* Kheperix (C) 2003-2005 Gwenole Beauchesne
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "sysdeps.h"
#include "cpu/vm.hpp"
#include "cpu/jit/dyngen-exec.h"
#define NO_DEFINE_ALIAS 1
#include "cpu/ppc/ppc-cpu.hpp"
#include "cpu/ppc/ppc-bitfields.hpp"
#include "cpu/ppc/ppc-registers.hpp"
#include "cpu/ppc/ppc-operations.hpp"
#if defined(__GNUC__)
// Force inlining under newer versions of GCC.
static inline uint64 vm_read_memory_8(vm_addr_t addr) __attribute__((always_inline));
static inline void vm_write_memory_8(vm_addr_t addr, uint64 value) __attribute__((always_inline));
static inline uint64 vm_do_read_memory_8(uint64 *a) __attribute__((always_inline));
static inline void vm_do_write_memory_8(uint64 *a, uint64 v) __attribute__((always_inline));
static inline uint64 generic_bswap_64(uint64 x) __attribute__((always_inline));
static inline uint32 fp_store_single_convert(uint64 v) __attribute__((always_inline));
#define INLINE inline __attribute__((always_inline))
#else
#define INLINE inline
#endif
// We need at least 4 general purpose registers
register struct powerpc_cpu *CPU asm(REG_CPU);
#define DYNGEN_DEFINE_GLOBAL_REGISTER(REG) \
register uintptr A##REG asm(REG_T##REG); \
register uint32 T##REG asm(REG_T##REG)
DYNGEN_DEFINE_GLOBAL_REGISTER(0);
DYNGEN_DEFINE_GLOBAL_REGISTER(1);
DYNGEN_DEFINE_GLOBAL_REGISTER(2);
#ifdef REG_T3
DYNGEN_DEFINE_GLOBAL_REGISTER(3);
#else
#define A3 powerpc_dyngen_helper::reg_T3()
#define T3 powerpc_dyngen_helper::reg_T3()
#endif
// Floating-point registers
#define FPREG(X) ((powerpc_fpr *)(X))
#define F0 FPREG(A0)->d
#define F0_dw FPREG(A0)->j
#define F1 FPREG(A1)->d
#define F1_dw FPREG(A1)->j
#define F2 FPREG(A2)->d
#define F2_dw FPREG(A2)->j
#define FD powerpc_dyngen_helper::reg_F3().d
#define FD_dw powerpc_dyngen_helper::reg_F3().j
// Vector registers
#define VREG(X) ((powerpc_vr *)(X))[0]
#define V0 VREG(reg_V0)
#define reg_V0 A0
#define V1 VREG(reg_V1)
#define reg_V1 A1
#define V2 VREG(reg_V2)
#define reg_V2 A2
#define VD VREG(reg_VD)
#define reg_VD A3
/**
* Helper class to access protected CPU context
**/
struct powerpc_dyngen_helper {
static INLINE uint32 get_pc() { return CPU->pc(); }
static INLINE void set_pc(uint32 value) { CPU->pc() = value; }
static INLINE void inc_pc(int32 offset) { CPU->pc() += offset; }
static INLINE uint32 get_lr() { return CPU->lr(); }
static INLINE void set_lr(uint32 value) { CPU->lr() = value; }
static INLINE uint32 get_ctr() { return CPU->ctr(); }
static INLINE void set_ctr(uint32 value) { CPU->ctr() = value; }
static INLINE uint32 get_cr() { return CPU->cr().get(); }
static INLINE void set_cr(uint32 value) { CPU->cr().set(value); }
static INLINE uint32 get_fpscr() { return CPU->fpscr(); }
static INLINE void set_fpscr(uint32 value) { CPU->fpscr() = value; }
static INLINE uint32 get_xer() { return CPU->xer().get(); }
static INLINE void set_xer(uint32 value) { CPU->xer().set(value); }
static INLINE uint32 get_vrsave() { return CPU->vrsave(); }
static INLINE void set_vrsave(uint32 value) { CPU->vrsave() = value; }
static INLINE uint32 get_vscr() { return CPU->vscr().get(); }
static INLINE void set_vscr(uint32 value) { CPU->vscr().set(value); }
static INLINE void record(int crf, int32 v) { CPU->record_cr(crf, v); }
static INLINE powerpc_cr_register & cr() { return CPU->cr(); }
static INLINE powerpc_xer_register & xer() { return CPU->xer(); }
static INLINE powerpc_spcflags & spcflags() { return CPU->spcflags(); }
static INLINE void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
static INLINE powerpc_registers *regs() { return &CPU->regs(); }
#ifndef REG_T3
static INLINE uintptr & reg_T3() { return CPU->codegen.reg_T3; }
#endif
//#ifndef REG_F3
static INLINE powerpc_fpr & reg_F3() { return CPU->codegen.reg_F3; }
//#endif
static INLINE powerpc_block_info *find_block(uint32 pc) { return CPU->my_block_cache.fast_find(pc); }
};
// Semantic action templates
#define DYNGEN_OPS
#include "ppc-execute.hpp"
/**
* Load/store general purpose registers
**/
#define DEFINE_OP(REG, N) \
void OPPROTO op_load_##REG##_GPR##N(void) \
{ \
REG = CPU->gpr(N); \
} \
void OPPROTO op_store_##REG##_GPR##N(void) \
{ \
CPU->gpr(N) = REG; \
}
#define DEFINE_REG(N) \
DEFINE_OP(T0,N); \
DEFINE_OP(T1,N); \
DEFINE_OP(T2,N);
DEFINE_REG(0);
DEFINE_REG(1);
DEFINE_REG(2);
DEFINE_REG(3);
DEFINE_REG(4);
DEFINE_REG(5);
DEFINE_REG(6);
DEFINE_REG(7);
DEFINE_REG(8);
DEFINE_REG(9);
DEFINE_REG(10);
DEFINE_REG(11);
DEFINE_REG(12);
DEFINE_REG(13);
DEFINE_REG(14);
DEFINE_REG(15);
DEFINE_REG(16);
DEFINE_REG(17);
DEFINE_REG(18);
DEFINE_REG(19);
DEFINE_REG(20);
DEFINE_REG(21);
DEFINE_REG(22);
DEFINE_REG(23);
DEFINE_REG(24);
DEFINE_REG(25);
DEFINE_REG(26);
DEFINE_REG(27);
DEFINE_REG(28);
DEFINE_REG(29);
DEFINE_REG(30);
DEFINE_REG(31);
#undef DEFINE_REG
#undef DEFINE_OP
/**
* Load/store floating-point registers
**/
#define DEFINE_OP(REG, N) \
void OPPROTO op_load_F##REG##_FPR##N(void) \
{ \
A##REG = (uintptr)&CPU->fpr(N); \
} \
void OPPROTO op_store_F##REG##_FPR##N(void) \
{ \
CPU->fpr_dw(N) = F##REG##_dw; \
}
#define DEFINE_REG(N) \
DEFINE_OP(0,N); \
DEFINE_OP(1,N); \
DEFINE_OP(2,N); \
void OPPROTO op_store_FD_FPR##N(void) \
{ \
CPU->fpr_dw(N) = FD_dw; \
}
DEFINE_REG(0);
DEFINE_REG(1);
DEFINE_REG(2);
DEFINE_REG(3);
DEFINE_REG(4);
DEFINE_REG(5);
DEFINE_REG(6);
DEFINE_REG(7);
DEFINE_REG(8);
DEFINE_REG(9);
DEFINE_REG(10);
DEFINE_REG(11);
DEFINE_REG(12);
DEFINE_REG(13);
DEFINE_REG(14);
DEFINE_REG(15);
DEFINE_REG(16);
DEFINE_REG(17);
DEFINE_REG(18);
DEFINE_REG(19);
DEFINE_REG(20);
DEFINE_REG(21);
DEFINE_REG(22);
DEFINE_REG(23);
DEFINE_REG(24);
DEFINE_REG(25);
DEFINE_REG(26);
DEFINE_REG(27);
DEFINE_REG(28);
DEFINE_REG(29);
DEFINE_REG(30);
DEFINE_REG(31);
#undef DEFINE_REG
#undef DEFINE_OP
/**
* Load/Store floating-point data
**/
#if defined(__i386__)
#define do_load_double(REG, EA) do { \
uint32 *w = (uint32 *)® \
w[1] = vm_read_memory_4(EA + 0); \
w[0] = vm_read_memory_4(EA + 4); \
} while (0)
#define do_store_double(REG, EA) do { \
uint32 *w = (uint32 *)® \
vm_write_memory_4(EA + 0, w[1]); \
vm_write_memory_4(EA + 4, w[0]); \
} while (0)
#endif
#ifndef do_load_single
#define do_load_single(REG, EA) REG##_dw = fp_load_single_convert(vm_read_memory_4(EA))
#endif
#ifndef do_store_single
#define do_store_single(REG, EA) vm_write_memory_4(EA, fp_store_single_convert(REG##_dw))
#endif
#ifndef do_load_double
#define do_load_double(REG, EA) REG##_dw = vm_read_memory_8(EA)
#endif
#ifndef do_store_double
#define do_store_double(REG, EA) vm_write_memory_8(EA, REG##_dw)
#endif
#define im PARAM1
#define DEFINE_OP(OFFSET) \
void OPPROTO op_load_double_FD_T1_##OFFSET(void) \
{ \
do_load_double(FD, T1 + OFFSET); \
} \
void OPPROTO op_load_single_FD_T1_##OFFSET(void) \
{ \
do_load_single(FD, T1 + OFFSET); \
} \
void OPPROTO op_store_double_F0_T1_##OFFSET(void) \
{ \
do_store_double(F0, T1 + OFFSET); \
} \
void OPPROTO op_store_single_F0_T1_##OFFSET(void) \
{ \
do_store_single(F0, T1 + OFFSET); \
}
DEFINE_OP(0);
DEFINE_OP(im);
DEFINE_OP(T2);
#undef im
#undef DEFINE_OP
#if KPX_MAX_CPUS == 1
void OPPROTO op_lwarx_T0_T1(void)
{
T0 = vm_read_memory_4(T1);
powerpc_dyngen_helper::regs()->reserve_valid = 1;
powerpc_dyngen_helper::regs()->reserve_addr = T1;
}
void OPPROTO op_stwcx_T0_T1(void)
{
uint32 cr = powerpc_dyngen_helper::get_cr() & ~CR_field<0>::mask();
cr |= powerpc_dyngen_helper::xer().get_so() << 28;
if (powerpc_dyngen_helper::regs()->reserve_valid) {
powerpc_dyngen_helper::regs()->reserve_valid = 0;
if (powerpc_dyngen_helper::regs()->reserve_addr == T1 /* physical_addr(EA) */) {
vm_write_memory_4(T1, T0);
cr |= CR_EQ_field<0>::mask();
}
}
powerpc_dyngen_helper::set_cr(cr);
dyngen_barrier();
}
#endif
/**
* Condition Registers
**/
void OPPROTO op_load_T0_CR(void)
{
T0 = powerpc_dyngen_helper::get_cr();
}
void OPPROTO op_store_T0_CR(void)
{
powerpc_dyngen_helper::set_cr(T0);
}
#define DEFINE_OP(REG, N) \
void OPPROTO op_load_##REG##_crb##N(void) \
{ \
const uint32 cr = powerpc_dyngen_helper::get_cr(); \
REG = (cr >> (31 - N)) & 1; \
} \
void OPPROTO op_store_##REG##_crb##N(void) \
{ \
uint32 cr = powerpc_dyngen_helper::get_cr() & ~(1 << (31 - N)); \
cr |= ((REG & 1) << (31 - N)); \
powerpc_dyngen_helper::set_cr(cr); \
}
#define DEFINE_REG(N) \
DEFINE_OP(T0, N); \
DEFINE_OP(T1, N);
DEFINE_REG(0);
DEFINE_REG(1);
DEFINE_REG(2);
DEFINE_REG(3);
DEFINE_REG(4);
DEFINE_REG(5);
DEFINE_REG(6);
DEFINE_REG(7);
DEFINE_REG(8);
DEFINE_REG(9);
DEFINE_REG(10);
DEFINE_REG(11);
DEFINE_REG(12);
DEFINE_REG(13);
DEFINE_REG(14);
DEFINE_REG(15);
DEFINE_REG(16);
DEFINE_REG(17);
DEFINE_REG(18);
DEFINE_REG(19);
DEFINE_REG(20);
DEFINE_REG(21);
DEFINE_REG(22);
DEFINE_REG(23);
DEFINE_REG(24);
DEFINE_REG(25);
DEFINE_REG(26);
DEFINE_REG(27);
DEFINE_REG(28);
DEFINE_REG(29);
DEFINE_REG(30);
DEFINE_REG(31);
#undef DEFINE_REG
#undef DEFINE_OP
#define DEFINE_OP(CRF, REG) \
void OPPROTO op_load_##REG##_cr##CRF(void) \
{ \
REG = powerpc_dyngen_helper::cr().get(CRF); \
} \
void OPPROTO op_store_##REG##_cr##CRF(void) \
{ \
powerpc_dyngen_helper::cr().set(CRF, REG); \
}
DEFINE_OP(0, T0);
DEFINE_OP(1, T0);
DEFINE_OP(2, T0);
DEFINE_OP(3, T0);
DEFINE_OP(4, T0);
DEFINE_OP(5, T0);
DEFINE_OP(6, T0);
DEFINE_OP(7, T0);
#undef DEFINE_OP
void OPPROTO op_mtcrf_T0_im(void)
{
const uint32 mask = PARAM1;
const uint32 cr = powerpc_dyngen_helper::get_cr();
powerpc_dyngen_helper::set_cr((cr & ~mask) | (T0 & mask));
}
/**
* Native FP operations optimization
**/
#if defined(__i386__)
#define do_fabs(x) ({ double y; asm volatile ("fabs" : "=t" (y) : "0" (x)); y; })
#define do_fneg(x) ({ double y; asm volatile ("fchs" : "=t" (y) : "0" (x)); y; })
#endif
#ifndef do_fabs
#define do_fabs(x) fabs(x)
#endif
#ifndef do_fadd
#define do_fadd(x, y) (x) + (y)
#endif
#ifndef do_fdiv
#define do_fdiv(x, y) (x) / (y)
#endif
#ifndef do_fmadd
#define do_fmadd(x, y, z) (((x) * (y)) + (z))
#endif
#ifndef do_fmsub
#define do_fmsub(x, y, z) (((x) * (y)) - (z))
#endif
#ifndef do_fmul
#define do_fmul(x, y) ((x) * (y))
#endif
#ifndef do_fneg
#define do_fneg(x) -(x)
#endif
#ifndef do_fnabs
#define do_fnabs(x) do_fneg(do_fabs(x))
#endif
#ifndef do_fnmadd
#define do_fnmadd(x, y, z) do_fneg(((x) * (y)) + (z))
#endif
#ifndef do_fnmsub
#define do_fnmsub(x, y, z) do_fneg(((x) * (y)) - (z))
#endif
#ifndef do_fsub
#define do_fsub(x, y) (x) - (y)
#endif
/**
* Double-precision floating point operations
**/
#define DEFINE_OP(NAME, CODE) \
void OPPROTO op_##NAME(void) \
{ \
CODE; \
}
DEFINE_OP(fmov_F0_F1, F0_dw = F1_dw);
DEFINE_OP(fmov_F0_F2, F0_dw = F2_dw);
DEFINE_OP(fmov_F1_F0, F1_dw = F0_dw);
DEFINE_OP(fmov_F1_F2, F1_dw = F2_dw);
DEFINE_OP(fmov_F2_F0, F2_dw = F0_dw);
DEFINE_OP(fmov_F2_F1, F2_dw = F1_dw);
DEFINE_OP(fmov_FD_F0, FD_dw = F0_dw);
DEFINE_OP(fmov_FD_F1, FD_dw = F1_dw);
DEFINE_OP(fmov_FD_F2, FD_dw = F2_dw);
DEFINE_OP(fabs_FD_F0, FD = do_fabs(F0));
DEFINE_OP(fneg_FD_F0, FD = do_fneg(F0));
DEFINE_OP(fnabs_FD_F0, FD = do_fnabs(F0));
DEFINE_OP(fadd_FD_F0_F1, FD = do_fadd(F0, F1));
DEFINE_OP(fsub_FD_F0_F1, FD = do_fsub(F0, F1));
DEFINE_OP(fmul_FD_F0_F1, FD = do_fmul(F0, F1));
DEFINE_OP(fdiv_FD_F0_F1, FD = do_fdiv(F0, F1));
DEFINE_OP(fmadd_FD_F0_F1_F2, FD = do_fmadd(F0, F1, F2));
DEFINE_OP(fmsub_FD_F0_F1_F2, FD = do_fmsub(F0, F1, F2));
DEFINE_OP(fnmadd_FD_F0_F1_F2, FD = do_fnmadd(F0, F1, F2));
DEFINE_OP(fnmsub_FD_F0_F1_F2, FD = do_fnmsub(F0, F1, F2));
#undef DEFINE_OP
/**
* Single-Precision floating point operations
**/
#define DEFINE_OP(NAME, REG, OP) \
void OPPROTO op_##NAME(void) \
{ \
float x = OP; \
REG = x; \
}
DEFINE_OP(fadds_FD_F0_F1, FD, do_fadd(F0, F1));
DEFINE_OP(fsubs_FD_F0_F1, FD, do_fsub(F0, F1));
DEFINE_OP(fmuls_FD_F0_F1, FD, do_fmul(F0, F1));
DEFINE_OP(fdivs_FD_F0_F1, FD, do_fdiv(F0, F1));
DEFINE_OP(fmadds_FD_F0_F1_F2, FD, do_fmadd(F0, F1, F2));
DEFINE_OP(fmsubs_FD_F0_F1_F2, FD, do_fmsub(F0, F1, F2));
DEFINE_OP(fnmadds_FD_F0_F1_F2, FD, do_fnmadd(F0, F1, F2));
DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2));
#undef DEFINE_OP
/**
* Special purpose registers
**/
void OPPROTO op_load_T0_VRSAVE(void)
{
T0 = powerpc_dyngen_helper::get_vrsave();
}
void OPPROTO op_store_T0_VRSAVE(void)
{
powerpc_dyngen_helper::set_vrsave(T0);
}
void OPPROTO op_load_T0_XER(void)
{
T0 = powerpc_dyngen_helper::get_xer();
}
void OPPROTO op_store_T0_XER(void)
{
powerpc_dyngen_helper::set_xer(T0);
}
void OPPROTO op_load_T0_PC(void)
{
T0 = powerpc_dyngen_helper::get_pc();
}
void OPPROTO op_store_T0_PC(void)
{
powerpc_dyngen_helper::set_pc(T0);
}
void OPPROTO op_set_PC_im(void)
{
powerpc_dyngen_helper::set_pc(PARAM1);
}
void OPPROTO op_set_PC_T0(void)
{
powerpc_dyngen_helper::set_pc(T0);
}
void OPPROTO op_inc_PC(void)
{
powerpc_dyngen_helper::inc_pc(PARAM1);
}
void OPPROTO op_load_T0_LR(void)
{
T0 = powerpc_dyngen_helper::get_lr();
}
void OPPROTO op_store_T0_LR(void)
{
powerpc_dyngen_helper::set_lr(T0);
}
void OPPROTO op_load_T0_CTR(void)
{
T0 = powerpc_dyngen_helper::get_ctr();
}
void OPPROTO op_store_T0_CTR(void)
{
powerpc_dyngen_helper::set_ctr(T0);
}
void OPPROTO op_store_im_LR(void)
{
powerpc_dyngen_helper::set_lr(PARAM1);
}
void OPPROTO op_load_T0_CTR_aligned(void)
{
T0 = powerpc_dyngen_helper::get_ctr() & -4;
}
void OPPROTO op_load_T0_LR_aligned(void)
{
T0 = powerpc_dyngen_helper::get_lr() & -4;
}
void OPPROTO op_spcflags_init(void)
{
powerpc_dyngen_helper::spcflags().set(PARAM1);
}
void OPPROTO op_spcflags_set(void)
{
powerpc_dyngen_helper::spcflags().set(PARAM1);
}
void OPPROTO op_spcflags_clear(void)
{
powerpc_dyngen_helper::spcflags().clear(PARAM1);
}
#if defined(__x86_64__)
#define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \
asm volatile ("test %0,%0 ; jz " #TARGET : : "r" (SPCFLAGS))
#endif
#ifndef FAST_COMPARE_SPECFLAGS_DISPATCH
#define FAST_COMPARE_SPECFLAGS_DISPATCH(SPCFLAGS, TARGET) \
if (SPCFLAGS == 0) DYNGEN_FAST_DISPATCH(TARGET)
#endif
void OPPROTO op_spcflags_check(void)
{
FAST_COMPARE_SPECFLAGS_DISPATCH(powerpc_dyngen_helper::spcflags().get(), __op_jmp0);
}
/**
* Branch instructions
**/
template< int bo >
static INLINE void do_prep_branch_bo(void)
{
bool ctr_ok = true;
bool cond_ok = true;
if (BO_CONDITIONAL_BRANCH(bo)) {
if (BO_BRANCH_IF_TRUE(bo))
cond_ok = T1;
else
cond_ok = !T1;
}
if (BO_DECREMENT_CTR(bo)) {
T2 = powerpc_dyngen_helper::get_ctr() - 1;
powerpc_dyngen_helper::set_ctr(T2);
if (BO_BRANCH_IF_CTR_ZERO(bo))
ctr_ok = !T2;
else
ctr_ok = T2;
}
T1 = ctr_ok && cond_ok;
dyngen_barrier();
}
#define BO(A,B,C,D) (((A) << 4)| ((B) << 3) | ((C) << 2) | ((D) << 1))
#define DEFINE_OP(BO_SUFFIX, BO_VALUE) \
void OPPROTO op_prep_branch_bo_##BO_SUFFIX(void) \
{ \
do_prep_branch_bo<BO BO_VALUE>(); \
}
DEFINE_OP(0000,(0,0,0,0));
DEFINE_OP(0001,(0,0,0,1));
DEFINE_OP(001x,(0,0,1,0));
DEFINE_OP(0100,(0,1,0,0));
DEFINE_OP(0101,(0,1,0,1));
DEFINE_OP(011x,(0,1,1,0));
DEFINE_OP(1x00,(1,0,0,0));
DEFINE_OP(1x01,(1,0,0,1));
// NOTE: the compiler is expected to optimize out the use of PARAM1
DEFINE_OP(1x1x,(1,0,1,0));
#undef DEFINE_OP
#undef BO
void OPPROTO op_branch_chain_1(void)
{
DYNGEN_FAST_DISPATCH(__op_jmp0);
}
void OPPROTO op_branch_chain_2(void)
{
if (T1)
DYNGEN_FAST_DISPATCH(__op_jmp0);
else
DYNGEN_FAST_DISPATCH(__op_jmp1);
dyngen_barrier();
}
static INLINE void do_execute_branch_1(uint32 tpc)
{
powerpc_dyngen_helper::set_pc(tpc);
}
void OPPROTO op_branch_1_T0(void)
{
do_execute_branch_1(T0);
}
void OPPROTO op_branch_1_im(void)
{
do_execute_branch_1(PARAM1);
}
static INLINE void do_execute_branch_2(uint32 tpc, uint32 npc)
{
powerpc_dyngen_helper::set_pc(T1 ? tpc : npc);
dyngen_barrier();
}
void OPPROTO op_branch_2_T0_im(void)
{
do_execute_branch_2(T0, PARAM1);
}
void OPPROTO op_branch_2_im_im(void)
{
do_execute_branch_2(PARAM1, PARAM2);
}
/**
* Compare & Record instructions
**/
void OPPROTO op_record_cr0_T0(void)
{
uint32 cr = powerpc_dyngen_helper::get_cr() & ~CR_field<0>::mask();
cr |= powerpc_dyngen_helper::xer().get_so() << 28;
if ((int32)T0 < 0)
cr |= CR_LT_field<0>::mask();
else if ((int32)T0 > 0)
cr |= CR_GT_field<0>::mask();
else
cr |= CR_EQ_field<0>::mask();
powerpc_dyngen_helper::set_cr(cr);
dyngen_barrier();
}
void OPPROTO op_record_cr1(void)
{
powerpc_dyngen_helper::set_cr((powerpc_dyngen_helper::get_cr() & ~CR_field<1>::mask()) |
((powerpc_dyngen_helper::get_fpscr() >> 4) & 0x0f000000));
}
#define im PARAM1
#if DYNGEN_ASM_OPTS && defined(__powerpc__) && 0
#define DEFINE_OP(NAME, COMP, LHS, RHST, RHS) \
void OPPROTO op_##NAME##_##LHS##_##RHS(void) \
{ \
T0 = powerpc_dyngen_helper::xer().get_so(); \
uint32 v; \
asm volatile (COMP " 7,%1,%2 ; mfcr %0" : "=r" (v) : "r" (LHS), RHST (RHS) : "cr7"); \
T0 |= (v & 0xe); \
}
DEFINE_OP(compare,"cmpw",T0,"r",T1);
DEFINE_OP(compare,"cmpw",T0,"r",im);
DEFINE_OP(compare,"cmpwi",T0,"i",0);
DEFINE_OP(compare_logical,"cmplw",T0,"r",T1);
DEFINE_OP(compare_logical,"cmplw",T0,"r",im);
DEFINE_OP(compare_logical,"cmplwi",T0,"i",0);
#else
#define DEFINE_OP(NAME, TYPE, LHS, RHS) \
void OPPROTO op_##NAME##_##LHS##_##RHS(void) \
{ \
const uint32 SO = powerpc_dyngen_helper::xer().get_so(); \
if ((TYPE)LHS < (TYPE)RHS) \
T0 = SO | standalone_CR_LT_field::mask(); \
else if ((TYPE)LHS > (TYPE)RHS) \
T0 = SO | standalone_CR_GT_field::mask(); \
else \
T0 = SO | standalone_CR_EQ_field::mask(); \
dyngen_barrier(); \
}
DEFINE_OP(compare,int32,T0,T1);
DEFINE_OP(compare,int32,T0,im);
DEFINE_OP(compare,int32,T0,0);
DEFINE_OP(compare_logical,uint32,T0,T1);
DEFINE_OP(compare_logical,uint32,T0,im);
DEFINE_OP(compare_logical,uint32,T0,0);
#endif
#undef im
#undef DEFINE_OP
/**
* Divide instructions
**/
#if DYNGEN_ASM_OPTS && defined(__powerpc__)
#define get_ov() ({ uint32 xer; asm volatile ("mfxer %0" : "=r" (xer)); XER_OV_field::extract(xer); })
#endif
void OPPROTO op_divw_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
asm volatile ("divw %0,%0,%1" : "=r" (T0) : "r" (T1));
return;
#endif
#endif
T0 = do_execute_divide<true, false>(T0, T1);
}
void OPPROTO op_divwo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
asm volatile ("divwo %0,%0,%1" : "=r" (T0) : "r" (T1));
powerpc_dyngen_helper::xer().set_ov(get_ov());
return;
#endif
#endif
T0 = do_execute_divide<true, true>(T0, T1);
}
void OPPROTO op_divwu_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
asm volatile ("divwu %0,%0,%1" : "=r" (T0) : "r" (T1));
return;
#endif
#endif
T0 = do_execute_divide<false, false>(T0, T1);
}
void OPPROTO op_divwuo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
asm volatile ("divwuo %0,%0,%1" : "=r" (T0) : "r" (T1));
powerpc_dyngen_helper::xer().set_ov(get_ov());
return;
#endif
#endif
T0 = do_execute_divide<false, true>(T0, T1);
}
/**
* Multiply instructions
**/
void OPPROTO op_mulhw_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
asm volatile ("imul %0" : "+d" (T0) : "a" (T1));
return;
#endif
#endif
T0 = (((int64)(int32)T0) * ((int64)(int32)T1)) >> 32;
}
void OPPROTO op_mulhwu_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
asm volatile ("mul %0" : "+d" (T0) : "a" (T1));
return;
#endif
#endif
T0 = (((uint64)T0) * ((uint64)T1)) >> 32;
}
void OPPROTO op_mulli_T0_im(void)
{
T0 = (int32)T0 * (int32)PARAM1;
}
void OPPROTO op_mullwo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
asm volatile ("mullwo %0,%0,%1" : "=r" (T0) : "r" (T1));
powerpc_dyngen_helper::xer().set_ov(get_ov());
return;
#endif
#endif
int64 RD = (int64)(int32)T0 * (int64)(int32)T1;
powerpc_dyngen_helper::xer().set_ov((int32)RD != RD);
T0 = RD;
dyngen_barrier();
}
/**
* Shift/Rotate instructions
**/
void OPPROTO op_slw_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
T0 <<= T1; // the shift count is masked to 5 bits
if (T1 & 0x20)
T0 = 0;
return;
#endif
#endif
T1 &= 0x3f;
T0 = (T1 & 0x20) ? 0 : (T0 << T1);
dyngen_barrier();
}
void OPPROTO op_srw_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
T0 >>= T1; // the shift count is masked to 5 bits
if (T1 & 0x20)
T0 = 0;
return;
#endif
#endif
T1 &= 0x3f;
T0 = (T1 & 0x20) ? 0 : (T0 >> T1);
dyngen_barrier();
}
void OPPROTO op_sraw_T0_T1(void)
{
T1 &= 0x3f;
if (T1 & 0x20) {
const uint32 SB = T0 >> 31;
powerpc_dyngen_helper::xer().set_ca(SB);
T0 = -SB;
}
else {
const uint32 RD = ((int32)T0) >> T1;
const bool CA = (int32)T0 < 0 && (T0 & ~(0xffffffff << T1));
powerpc_dyngen_helper::xer().set_ca(CA);
T0 = RD;
}
dyngen_barrier();
}
void OPPROTO op_sraw_T0_im(void)
{
const uint32 n = PARAM1;
const uint32 RD = ((int32)T0) >> n;
const bool ca = (((int32)T0) < 0) && (T0 & ~(0xffffffff << n));
powerpc_dyngen_helper::xer().set_ca(ca);
T0 = RD;
dyngen_barrier();
}
void OPPROTO op_rlwimi_T0_T1(void)
{
T0 = op_ppc_rlwimi::apply(T1, PARAM1, PARAM2, T0);
}
void OPPROTO op_rlwinm_T0_T1(void)
{
T0 = op_rotl::apply(T0, PARAM1) & PARAM2;
}
void OPPROTO op_rlwnm_T0_T1(void)
{
T0 = op_rotl::apply(T0, T1) & PARAM1;
}
void OPPROTO op_cntlzw_32_T0(void)
{
int n;
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
n = -1;
asm volatile ("bsr %1,%0" : "+r" (n) : "r" (T0));
T0 = 31 - n;
return;
#endif
#endif
uint32 m = 0x80000000;
for (n = 0; n < 32; n++, m >>= 1)
if (T0 & m)
break;
T0 = n;
dyngen_barrier();
}
/**
* Addition/Subtraction
**/
void OPPROTO op_addo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer;
asm volatile ("addo %0,%0,%2 ; mfxer %1" : "=r" (T0), "=r" (xer) : "r" (T1));
powerpc_dyngen_helper::xer().set_ov(XER_OV_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ov;
asm volatile ("add %2,%0; seto %b1" : "=r" (T0), "=r" (ov) : "r" (T1) : "cc");
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_addition<false, false, true>(T0, T1);
}
void OPPROTO op_addc_T0_im(void)
{
T0 = do_execute_addition<false, true, false>(T0, PARAM1);
}
void OPPROTO op_addc_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer;
asm volatile ("addc %0,%0,%2 ; mfxer %1" : "=r" (T0), "=r" (xer) : "r" (T1));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ca;
asm volatile ("add %2,%0; setc %b1" : "=r" (T0), "=r" (ca) : "r" (T1) : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_addition<false, true, false>(T0, T1);
}
void OPPROTO op_addco_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer;
asm volatile ("addco %0,%0,%2 ; mfxer %1" : "=r" (T0), "=r" (xer) : "r" (T1));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
powerpc_dyngen_helper::xer().set_ov(XER_OV_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ca, ov;
asm volatile ("add %3,%0; setc %b1; seto %b2" : "=r" (T0), "=r" (ca), "=r" (ov) : "r" (T1) : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_addition<false, true, true>(T0, T1);
}
void OPPROTO op_adde_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("adde %0,%0,%2 ; mfxer %1" : "=r" (T0), "=r" (xer) : "r" (T1));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc %2,%0; setc %b1" : "=r" (T0), "+r" (ca) : "r" (T1) : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_addition<true, false, false>(T0, T1);
}
void OPPROTO op_addeo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("addeo %0,%0,%2 ; mfxer %1" : "=r" (T0), "=r" (xer) : "r" (T1));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
powerpc_dyngen_helper::xer().set_ov(XER_OV_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ov, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc %3,%0; setc %b1; seto %b2" : "=r" (T0), "+r" (ca), "=r" (ov) : "r" (T1) : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_addition<true, false, true>(T0, T1);
}
void OPPROTO op_addme_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("addme %0,%0 ; mfxer %1" : "=r" (T0), "=r" (xer));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc $-1,%0; setc %b1" : "=r" (T0), "+r" (ca) : : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_addition<true, false, false>(T0, 0xffffffff);
}
void OPPROTO op_addmeo_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("addmeo %0,%0 ; mfxer %1" : "=r" (T0), "=r" (xer));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
powerpc_dyngen_helper::xer().set_ov(XER_OV_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ov, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc $-1,%0; setc %b1; seto %b2" : "=r" (T0), "+r" (ca), "=r" (ov) : : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_addition<true, false, true>(T0, 0xffffffff);
}
void OPPROTO op_addze_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("addze %0,%0 ; mfxer %1" : "=r" (T0), "=r" (xer));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc $0,%0; setc %b1" : "=r" (T0), "+r" (ca) : : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_addition<true, false, false>(T0, 0);
}
void OPPROTO op_addzeo_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__powerpc__)
uint32 xer, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("li 0,-1 ; addc 0,%0,0" : : "r" (ca) : "r0");
asm volatile ("addzeo %0,%0 ; mfxer %1" : "=r" (T0), "=r" (xer));
powerpc_dyngen_helper::xer().set_ca(XER_CA_field::extract(xer));
powerpc_dyngen_helper::xer().set_ov(XER_OV_field::extract(xer));
return;
#endif
#if defined(__i386__) || defined(__x86_64__)
uint32 ov, ca = powerpc_dyngen_helper::xer().get_ca();
asm volatile ("bt $0,%1; adc $0,%0; setc %b1; seto %b2" : "=r" (T0), "+r" (ca), "=r" (ov) : : "cc");
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_addition<true, false, true>(T0, 0);
}
void OPPROTO op_subf_T0_T1(void)
{
T0 = T1 - T0;
}
void OPPROTO op_subfo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ov, TI;
TI = T1;
asm volatile ("sub %2,%0; seto %b1" : "+r" (TI), "=r" (ov) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_subtract<false, true>(T0, T1);
}
void OPPROTO op_subfc_T0_im(void)
{
T0 = do_execute_subtract<true, false>(T0, PARAM1);
}
void OPPROTO op_subfc_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ca, TI;
TI = T1;
asm volatile ("sub %2,%0; cmc; setc %b1" : "+r" (TI), "=r" (ca) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_subtract<true, false>(T0, T1);
}
void OPPROTO op_subfco_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ca, ov, TI;
TI = T1;
asm volatile ("sub %3,%0; cmc; setc %b1; seto %b2" : "+r" (TI), "=r" (ca), "=r" (ov) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_subtract<true, true>(T0, T1);
}
void OPPROTO op_subfe_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = T1;
asm volatile ("bt $0,%1; cmc; sbb %2,%0; cmc; setc %b1" : "+r" (TI), "+r" (ca) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_subtract_extended<false>(T0, T1);
}
void OPPROTO op_subfeo_T0_T1(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ov, ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = T1;
asm volatile ("bt $0,%1; cmc; sbb %3,%0; cmc; setc %b1; seto %b2" : "+r" (TI), "+r" (ca), "=r" (ov) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_subtract_extended<true>(T0, T1);
}
void OPPROTO op_subfme_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = (uint32)-1;
asm volatile ("bt $0,%1; cmc; sbb %2,%0; cmc; setc %b1" : "+r" (TI), "+r" (ca) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_subtract_extended<false>(T0, 0xffffffff);
}
void OPPROTO op_subfmeo_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ov;
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = (uint32)-1;
asm volatile ("bt $0,%1; cmc; sbb %3,%0; cmc; setc %b1; seto %b2" : "+r" (TI), "+r" (ca), "=r" (ov) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_subtract_extended<true>(T0, 0xffffffff);
}
void OPPROTO op_subfze_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = 0;
asm volatile ("bt $0,%1; cmc; sbb %2,%0; cmc; setc %b1" : "+r" (TI), "+r" (ca) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
return;
#endif
#endif
T0 = do_execute_subtract_extended<false>(T0, 0);
}
void OPPROTO op_subfzeo_T0(void)
{
#if DYNGEN_ASM_OPTS
#if defined(__i386__) || defined(__x86_64__)
uint32 ov;
uint32 ca = powerpc_dyngen_helper::xer().get_ca();
uint32 TI = 0;
asm volatile ("bt $0,%1; cmc; sbb %3,%0; cmc; setc %b1; seto %b2" : "+r" (TI), "+r" (ca), "=r" (ov) : "r" (T0) : "cc");
T0 = TI;
powerpc_dyngen_helper::xer().set_ca(ca);
powerpc_dyngen_helper::xer().set_ov(ov);
return;
#endif
#endif
T0 = do_execute_subtract_extended<true>(T0, 0);
}
/**
* Misc synthetic instructions
**/
void OPPROTO op_inc_32_mem(void)
{
uint32 *m = (uint32 *)PARAM1;
*m += 1;
}
void OPPROTO op_nego_T0(void)
{
powerpc_dyngen_helper::xer().set_ov(T0 == 0x80000000);
T0 = -T0;
}
void OPPROTO op_dcbz_T0(void)
{
T0 &= -32; // align T0 on cache line boundaries
uint64 * block = (uint64 *) vm_do_get_real_address(T0);
block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0;
}
/**
* Generate possible call to next basic block without going
* through register state restore & full cache lookup
**/
void OPPROTO op_jump_next_A0(void)
{
powerpc_block_info *bi = (powerpc_block_info *)A0;
uint32 pc = powerpc_dyngen_helper::get_pc();
if (likely(bi->pc == pc) || likely((bi = powerpc_dyngen_helper::find_block(pc)) != NULL))
goto *(bi->entry_point);
dyngen_barrier();
}
/**
* Load/store multiple
**/
template< int N >
static INLINE void do_lmw(void)
{
CPU->gpr(N) = vm_read_memory_4(T0);
T0 += 4;
do_lmw<N + 1>();
}
template<>
INLINE void do_lmw<31>(void)
{
CPU->gpr(31) = vm_read_memory_4(T0);
}
template<>
INLINE void do_lmw<32>(void)
{
for (uint32 r = PARAM1, ad = T0; r <= 31; r++, ad += 4)
CPU->gpr(r) = vm_read_memory_4(ad);
dyngen_barrier();
}
template< int N >
static INLINE void do_stmw(void)
{
vm_write_memory_4(T0, CPU->gpr(N));
T0 += 4;
do_stmw<N + 1>();
}
template<>
INLINE void do_stmw<31>(void)
{
vm_write_memory_4(T0, CPU->gpr(31));
}
template<>
INLINE void do_stmw<32>(void)
{
for (uint32 r = PARAM1, ad = T0; r <= 31; r++, ad += 4)
vm_write_memory_4(ad, CPU->gpr(r));
dyngen_barrier();
}
#define im 32
#define DEFINE_OP(N) \
void op_lmw_T0_## N(void) { do_lmw <N>(); } \
void op_stmw_T0_##N(void) { do_stmw<N>(); }
DEFINE_OP(im);
DEFINE_OP(26);
DEFINE_OP(27);
DEFINE_OP(28);
DEFINE_OP(29);
DEFINE_OP(30);
DEFINE_OP(31);
#undef im
#undef DEFINE_OP
/**
* Load/store addresses to vector registers
**/
#define DEFINE_OP(REG, N) \
void OPPROTO op_load_ad_V##REG##_VR##N(void) \
{ \
reg_V##REG = (uintptr)&CPU->vr(N); \
}
#define DEFINE_REG(N) \
DEFINE_OP(D,N); \
DEFINE_OP(0,N); \
DEFINE_OP(1,N); \
DEFINE_OP(2,N)
DEFINE_REG(0);
DEFINE_REG(1);
DEFINE_REG(2);
DEFINE_REG(3);
DEFINE_REG(4);
DEFINE_REG(5);
DEFINE_REG(6);
DEFINE_REG(7);
DEFINE_REG(8);
DEFINE_REG(9);
DEFINE_REG(10);
DEFINE_REG(11);
DEFINE_REG(12);
DEFINE_REG(13);
DEFINE_REG(14);
DEFINE_REG(15);
DEFINE_REG(16);
DEFINE_REG(17);
DEFINE_REG(18);
DEFINE_REG(19);
DEFINE_REG(20);
DEFINE_REG(21);
DEFINE_REG(22);
DEFINE_REG(23);
DEFINE_REG(24);
DEFINE_REG(25);
DEFINE_REG(26);
DEFINE_REG(27);
DEFINE_REG(28);
DEFINE_REG(29);
DEFINE_REG(30);
DEFINE_REG(31);
#undef DEFINE_REG
#undef DEFINE_OP
#undef AD
void op_load_word_VD_T0(void)
{
const uint32 ea = T0;
VD.w[(ea >> 2) & 3] = vm_read_memory_4(ea & ~3);
}
void op_store_word_VD_T0(void)
{
const uint32 ea = T0;
vm_write_memory_4(ea & ~3, VD.w[(ea >> 2) & 3]);
}
void op_load_vect_VD_T0(void)
{
const uint32 ea = T0 & ~15;
VD.w[0] = vm_read_memory_4(ea + 0);
VD.w[1] = vm_read_memory_4(ea + 4);
VD.w[2] = vm_read_memory_4(ea + 8);
VD.w[3] = vm_read_memory_4(ea + 12);
}
void op_store_vect_VD_T0(void)
{
const uint32 ea = T0 & ~15;
vm_write_memory_4(ea + 0, VD.w[0]);
vm_write_memory_4(ea + 4, VD.w[1]);
vm_write_memory_4(ea + 8, VD.w[2]);
vm_write_memory_4(ea + 12, VD.w[3]);
}
/**
* Vector operations helpers
**/
#define VNONE op_VNONE
struct op_VNONE {
typedef null_operand type;
static INLINE uint32 get(powerpc_vr const & v, int i) { return 0; }
static INLINE void set(powerpc_vr const & v, int i, uint32) { }
};
#define V16QI op_V16QI
struct op_V16QI {
typedef uint8 type;
static INLINE type get(powerpc_vr const & v, int i) { return v.b[i]; }
static INLINE void set(powerpc_vr & v, int i, type x) { v.b[i] = x; }
};
#define V8HI op_V8HI
struct op_V8HI {
typedef uint16 type;
static INLINE type get(powerpc_vr const & v, int i) { return v.h[i]; }
static INLINE void set(powerpc_vr & v, int i, type x) { v.h[i] = x; }
};
#define V4SI op_V4SI
struct op_V4SI {
typedef uint32 type;
static INLINE type get(powerpc_vr const & v, int i) { return v.w[i]; }
static INLINE void set(powerpc_vr & v, int i, type x) { v.w[i] = x; }
};
#define V2DI op_V2DI
struct op_V2DI {
typedef uint64 type;
static INLINE type get(powerpc_vr const & v, int i) { return v.j[i]; }
static INLINE void set(powerpc_vr & v, int i, type x) { v.j[i] = x; }
};
#define V4SF op_V4SF
struct op_V4SF {
typedef float type;
static INLINE type get(powerpc_vr const & v, int i) { return v.f[i]; }
static INLINE void set(powerpc_vr & v, int i, type x) { v.f[i] = x; }
};
template< class OP, class VX, class VA, class VB, class VC, int N >
struct do_vector_execute {
static INLINE void apply() {
do_vector_execute<OP, VX, VA, VB, VC, N - 1>::apply();
VX::set(
VD, N,
op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
VA::get(V0, N),
VB::get(V1, N),
VC::get(V2, N)));
}
};
template< class OP, class VX, class VA, class VB, class VC >
struct do_vector_execute<OP, VX, VA, VB, VC, 0> {
static INLINE void apply() {
VX::set(
VD, 0, op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
VA::get(V0, 0),
VB::get(V1, 0),
VC::get(V2, 0)));
}
};
template< class OP, class VX, class VA, class VB = VNONE, class VC = VNONE >
struct vector_execute {
static INLINE void apply() {
do_vector_execute<OP, VX, VA, VB, VC, (16 / sizeof(typename VX::type)) - 1>::apply();
}
};
/**
* Vector synthetic operations
**/
void op_vaddfp_VD_V0_V1(void)
{
vector_execute<op_fadds, V4SF, V4SF, V4SF>::apply();
}
void op_vsubfp_VD_V0_V1(void)
{
vector_execute<op_fsubs, V4SF, V4SF, V4SF>::apply();
}
void op_vmaddfp_VD_V0_V1_V2(void)
{
vector_execute<op_vmaddfp, V4SF, V4SF, V4SF, V4SF>::apply();
}
#if defined(__i386__) && defined(__SSE__)
// Workaround gcc 3.2.2 miscompilation that inserts SSE instructions
struct op_do_vnmsubfp {
static INLINE float apply(float x, float y, float z) {
// return 0. - ((x * z) - y);
return y - (x * z);
}
};
#else
typedef op_vnmsubfp op_do_vnmsubfp;
#endif
void op_vnmsubfp_VD_V0_V1_V2(void)
{
vector_execute<op_do_vnmsubfp, V4SF, V4SF, V4SF, V4SF>::apply();
}
void op_vand_VD_V0_V1(void)
{
vector_execute<op_and_64, V2DI, V2DI, V2DI>::apply();
}
void op_vandc_VD_V0_V1(void)
{
vector_execute<op_andc_64, V2DI, V2DI, V2DI>::apply();
}
void op_vnor_VD_V0_V1(void)
{
vector_execute<op_nor_64, V2DI, V2DI, V2DI>::apply();
}
void op_vor_VD_V0_V1(void)
{
vector_execute<op_or_64, V2DI, V2DI, V2DI>::apply();
}
void op_vxor_VD_V0_V1(void)
{
vector_execute<op_xor_64, V2DI, V2DI, V2DI>::apply();
}
void op_record_cr6_VD(void)
{
unsigned int cr6 = 0;
#if SIZEOF_VOID_P == 8
if ((~VD.j[0] | ~VD.j[1]) == 0)
cr6 = 8;
else if ((VD.j[0] | VD.j[1]) == 0)
cr6 = 2;
#else
if ((~VD.w[0] | ~VD.w[1] | ~VD.w[2] | ~VD.w[3]) == 0)
cr6 = 8;
else if ((VD.w[0] | VD.w[1] | VD.w[2] | VD.w[3]) == 0)
cr6 = 2;
#endif
powerpc_dyngen_helper::cr().set(6, cr6);
dyngen_barrier();
}
void op_mfvscr_VD(void)
{
VD.w[0] = 0;
VD.w[1] = 0;
VD.w[2] = 0;
VD.w[3] = powerpc_dyngen_helper::get_vscr();
}
void op_mtvscr_V0(void)
{
powerpc_dyngen_helper::set_vscr(V0.w[3]);
}
#undef VNONE
#undef V16QI
#undef V8HI
#undef V4SI
#undef V2DI
#undef V4SF
/**
* X86 SIMD optimizations
**/
#if defined(__i386__) || defined(__x86_64__)
#undef VD
#undef V0
#undef V1
#undef V2
/* We are using GCC, so we can use its extensions */
#if defined __MMX__ || __GNUC__ < 4
#define __mmx_clobbers(reglist...) reglist
#else
#define __mmx_clobbers(reglist...)
#endif
#if defined __SSE__ || __GNUC__ < 4
#define __sse_clobbers(reglist...) reglist
#else
#define __sse_clobbers(reglist...)
#endif
// MMX instructions
void op_emms(void)
{
asm volatile ("emms");
}
#define DEFINE_OP(NAME, OP, VA, VB) \
void op_mmx_##NAME(void) \
{ \
asm volatile ("movq (%1),%%mm0\n" \
"movq 8(%1),%%mm1\n" \
#OP " (%2),%%mm0\n" \
#OP " 8(%2),%%mm1\n" \
"movq %%mm0,(%0)\n" \
"movq %%mm1,8(%0)\n" \
: : "r" (reg_VD), "r" (reg_##VA), "r" (reg_##VB) \
: __mmx_clobbers("mm0", "mm1")); \
}
DEFINE_OP(vcmpequb, pcmpeqb, V0, V1);
DEFINE_OP(vcmpequh, pcmpeqw, V0, V1);
DEFINE_OP(vcmpequw, pcmpeqd, V0, V1);
DEFINE_OP(vcmpgtsb, pcmpgtb, V0, V1);
DEFINE_OP(vcmpgtsh, pcmpgtw, V0, V1);
DEFINE_OP(vcmpgtsw, pcmpgtd, V0, V1);
DEFINE_OP(vaddubm, paddb, V0, V1);
DEFINE_OP(vadduhm, paddw, V0, V1);
DEFINE_OP(vadduwm, paddd, V0, V1);
DEFINE_OP(vsububm, psubb, V0, V1);
DEFINE_OP(vsubuhm, psubw, V0, V1);
DEFINE_OP(vsubuwm, psubd, V0, V1);
DEFINE_OP(vand, pand, V0, V1);
DEFINE_OP(vandc, pandn, V1, V0);
DEFINE_OP(vor, por, V0, V1);
DEFINE_OP(vxor, pxor, V0, V1);
DEFINE_OP(vmaxub, pmaxub, V0, V1);
DEFINE_OP(vminub, pminub, V0, V1);
DEFINE_OP(vmaxsh, pmaxsw, V0, V1);
DEFINE_OP(vminsh, pminsw, V0, V1);
#undef DEFINE_OP
#endif