mirror of
https://github.com/kanjitalk755/macemu.git
synced 2024-11-26 10:49:21 +00:00
AltiVec emulation! ;-)
This commit is contained in:
parent
d92989dc53
commit
313cddeeb2
@ -181,13 +181,6 @@ sheepshaver_cpu::sheepshaver_cpu()
|
||||
|
||||
void sheepshaver_cpu::init_decoder()
|
||||
{
|
||||
#ifndef PPC_NO_STATIC_II_INDEX_TABLE
|
||||
static bool initialized = false;
|
||||
if (initialized)
|
||||
return;
|
||||
initialized = true;
|
||||
#endif
|
||||
|
||||
static const instr_info_t sheep_ii_table[] = {
|
||||
{ "sheep",
|
||||
(execute_pmf)&sheepshaver_cpu::execute_sheep,
|
||||
|
@ -88,6 +88,17 @@ typedef bit_field< 21, 25 > frC_field;
|
||||
typedef bit_field< 6, 10 > frD_field;
|
||||
typedef bit_field< 6, 10 > frS_field;
|
||||
|
||||
// Vector registers
|
||||
typedef bit_field< 11, 15 > vA_field;
|
||||
typedef bit_field< 16, 20 > vB_field;
|
||||
typedef bit_field< 21, 25 > vC_field;
|
||||
typedef bit_field< 6, 10 > vD_field;
|
||||
typedef bit_field< 6, 10 > vS_field;
|
||||
|
||||
typedef bit_field< 21, 21 > vRc_field;
|
||||
typedef bit_field< 11, 15 > vUIMM_field;
|
||||
typedef bit_field< 22, 25 > vSH_field;
|
||||
|
||||
// Condition registers
|
||||
typedef bit_field< 11, 15 > crbA_field;
|
||||
typedef bit_field< 16, 20 > crbB_field;
|
||||
@ -151,6 +162,10 @@ typedef bit_field< 17, 17 > FPSCR_FPRF_FG_field; // >
|
||||
typedef bit_field< 18, 18 > FPSCR_FPRF_FE_field; // =
|
||||
typedef bit_field< 19, 19 > FPSCR_FPRF_FU_field; // ?
|
||||
|
||||
// Vector Status and Control Register
|
||||
typedef bit_field< 15, 15 > VSCR_NJ_field;
|
||||
typedef bit_field< 31, 31 > VSCR_SAT_field;
|
||||
|
||||
// Define variations for branch instructions
|
||||
typedef bit_field< 30, 30 > AA_field;
|
||||
typedef bit_field< 31, 31 > LK_field;
|
||||
@ -202,6 +217,7 @@ DEFINE_FIELD_ALIAS(AA_BIT, AA);
|
||||
DEFINE_FIELD_ALIAS(LK_BIT, LK);
|
||||
DEFINE_FIELD_ALIAS(BO_BIT, BO);
|
||||
DEFINE_FIELD_ALIAS(BI_BIT, BI);
|
||||
DEFINE_FIELD_ALIAS(vRC_BIT, vRc);
|
||||
|
||||
#undef DEFINE_FIELD_ALIAS
|
||||
#undef DEFINE_FAKE_FIELD_ALIAS
|
||||
|
@ -37,32 +37,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* PPC_NO_STATIC_II_INDEX_TABLE
|
||||
*
|
||||
* Define to make sure the ii_index_table[] is a non static
|
||||
* member so that powerpc_cpu object size is reduced by 64
|
||||
* KB. This is only supported for mono CPU configurations.
|
||||
**/
|
||||
|
||||
#ifndef PPC_NO_STATIC_II_INDEX_TABLE
|
||||
#define PPC_NO_STATIC_II_INDEX_TABLE
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* PPC_OPCODE_HASH_XO_PRIMARY
|
||||
*
|
||||
* Define to hash opcode hash (xo, primary opcode) instead of
|
||||
* (primary opcode, xo). This simply reduces the computation
|
||||
* index into instr_info[] table by one operation.
|
||||
**/
|
||||
|
||||
#ifndef PPC_OPCODE_HASH_XO_PRIMARY
|
||||
#define PPC_OPCODE_HASH_XO_PRIMARY
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* PPC_ENABLE_FPU_EXCEPTIONS
|
||||
*
|
||||
@ -148,13 +122,4 @@
|
||||
#define PPC_PROFILE_GENERIC_CALLS 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Sanity checks and features enforcements
|
||||
**/
|
||||
|
||||
#if KPX_MAX_CPUS == 1
|
||||
#undef PPC_NO_STATIC_II_INDEX_TABLE
|
||||
#endif
|
||||
|
||||
#endif /* PPC_CONFIG_H */
|
||||
|
@ -431,7 +431,6 @@ bool powerpc_cpu::check_spcflags()
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void powerpc_cpu::execute(uint32 entry)
|
||||
{
|
||||
pc() = entry;
|
||||
|
@ -48,7 +48,11 @@ protected:
|
||||
powerpc_cr_register const & cr() const { return regs.cr; }
|
||||
powerpc_xer_register & xer() { return regs.xer; }
|
||||
powerpc_xer_register const & xer() const { return regs.xer; }
|
||||
powerpc_vscr & vscr() { return regs.vscr; }
|
||||
powerpc_vscr const & vscr() const { return regs.vscr; }
|
||||
|
||||
uint32 vrsave() const { return regs.vrsave; }
|
||||
uint32 & vrsave() { return regs.vrsave; }
|
||||
double fp_result() const { return regs.fp_result.d; }
|
||||
double & fp_result() { return regs.fp_result.d; }
|
||||
uint64 fp_result_dw() const { return regs.fp_result.j; }
|
||||
@ -79,6 +83,8 @@ public:
|
||||
double fpr(int i) const { return regs.fpr[i].d; }
|
||||
uint64 & fpr_dw(int i) { return regs.fpr[i].j; }
|
||||
uint64 fpr_dw(int i) const { return regs.fpr[i].j; }
|
||||
powerpc_vr & vr(int i) { return regs.vr[i]; }
|
||||
powerpc_vr const & vr(int i) const { return regs.vr[i]; }
|
||||
|
||||
protected:
|
||||
|
||||
@ -90,6 +96,15 @@ protected:
|
||||
void record_cr1()
|
||||
{ cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); }
|
||||
void record_fpscr();
|
||||
void record_cr6(powerpc_vr const & vS, bool check_one) {
|
||||
if (check_one && (vS.j[0] == UVAL64(0xffffffffffffffff) &&
|
||||
vS.j[1] == UVAL64(0xffffffffffffffff)))
|
||||
cr().set(6, 8);
|
||||
else if (vS.j[0] == UVAL64(0) && vS.j[1] == UVAL64(0))
|
||||
cr().set(6, 2);
|
||||
else
|
||||
cr().set(6, 0);
|
||||
}
|
||||
|
||||
template< class FP >
|
||||
void fp_classify(FP x);
|
||||
@ -125,7 +140,8 @@ protected:
|
||||
MD_form, MDS_form,
|
||||
SC_form,
|
||||
X_form,
|
||||
XFL_form, XFX_form, XL_form, XO_form, XS_form
|
||||
XFL_form, XFX_form, XL_form, XO_form, XS_form,
|
||||
VX_form, VXR_form, VA_form,
|
||||
};
|
||||
|
||||
// Control flow types
|
||||
@ -149,13 +165,13 @@ protected:
|
||||
|
||||
// Instruction information structure
|
||||
struct instr_info_t {
|
||||
char name[8]; // Instruction name
|
||||
char name[12]; // Instruction name
|
||||
execute_fn execute; // Semantic routine for this instruction
|
||||
decode_fn decode; // Specialized instruction decoder
|
||||
uint16 mnemo; // Mnemonic
|
||||
uint16 format; // Instruction format (XO-form, D-form, etc.)
|
||||
uint32 opcode:6; // Primary opcode
|
||||
uint32 xo:10; // Extended opcode
|
||||
uint16 opcode; // Primary opcode
|
||||
uint16 xo; // Extended opcode
|
||||
uint16 cflow; // Mask of control flow information
|
||||
};
|
||||
|
||||
@ -192,25 +208,15 @@ private:
|
||||
syscall_fn execute_do_syscall;
|
||||
int syscall_exit_code;
|
||||
|
||||
#ifdef PPC_NO_STATIC_II_INDEX_TABLE
|
||||
#define PPC_STATIC_II_TABLE
|
||||
#else
|
||||
#define PPC_STATIC_II_TABLE static
|
||||
#endif
|
||||
|
||||
static const instr_info_t powerpc_ii_table[];
|
||||
PPC_STATIC_II_TABLE std::vector<instr_info_t> ii_table;
|
||||
typedef uint8 ii_index_t;
|
||||
static const int II_INDEX_TABLE_SIZE = 0x10000;
|
||||
PPC_STATIC_II_TABLE ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];
|
||||
std::vector<instr_info_t> ii_table;
|
||||
typedef uint16 ii_index_t;
|
||||
static const int II_INDEX_TABLE_SIZE = 0x20000;
|
||||
ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];
|
||||
|
||||
#ifdef PPC_OPCODE_HASH_XO_PRIMARY
|
||||
// Pack/unpack index into decode table
|
||||
uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode | (xo << 6); }
|
||||
uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7fe) << 5); }
|
||||
#else
|
||||
uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode << 10 | xo; }
|
||||
uint32 get_ii_index(uint32 opcode) { return ((opcode >> 16) & 0xfc00) | ((opcode >> 1) & 0x3ff); }
|
||||
#endif
|
||||
uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7ff) << 6); }
|
||||
|
||||
// Convert 8-bit field mask (e.g. mtcrf) to bit mask
|
||||
uint32 field2mask[256];
|
||||
@ -411,6 +417,36 @@ private:
|
||||
void execute_invalidate_cache_range();
|
||||
template< class RA, class RB >
|
||||
void execute_dcbz(uint32 opcode);
|
||||
template< class VD, class RA, class RB >
|
||||
void execute_vector_load(uint32 opcode);
|
||||
template< class VS, class RA, class RB >
|
||||
void execute_vector_store(uint32 opcode);
|
||||
void execute_mfvscr(uint32 opcode);
|
||||
void execute_mtvscr(uint32 opcode);
|
||||
template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
|
||||
void execute_vector_arith(uint32 opcode);
|
||||
template< class OP, class VD, class VA, class VB, class VC >
|
||||
void execute_vector_arith_mixed(uint32 opcode);
|
||||
template< int ODD, class OP, class VD, class VA, class VB, class VC >
|
||||
void execute_vector_arith_odd(uint32 opcode);
|
||||
template< class VD, class VA, class VB, int LO >
|
||||
void execute_vector_merge(uint32 opcode);
|
||||
template< class VD, class VA, class VB >
|
||||
void execute_vector_pack(uint32 opcode);
|
||||
void execute_vector_pack_pixel(uint32 opcode);
|
||||
template< int LO >
|
||||
void execute_vector_unpack_pixel(uint32 opcode);
|
||||
template< int LO, class VD, class VA >
|
||||
void execute_vector_unpack(uint32 opcode);
|
||||
void execute_vector_permute(uint32 opcode);
|
||||
template< int SD >
|
||||
void execute_vector_shift(uint32 opcode);
|
||||
template< int SD, class VD, class VA, class VB, class SH >
|
||||
void execute_vector_shift_octet(uint32 opcode);
|
||||
template< class OP, class VD, class VB, bool IM >
|
||||
void execute_vector_splat(uint32 opcode);
|
||||
template< int SZ, class VD, class VA, class VB >
|
||||
void execute_vector_sum(uint32 opcode);
|
||||
|
||||
// Specialized instruction decoders
|
||||
template< class RA, class RB, class RC, class CA >
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -79,6 +79,8 @@ struct powerpc_dyngen_helper {
|
||||
static inline void set_fpscr(uint32 value) { CPU->fpscr() = value; }
|
||||
static inline uint32 get_xer() { return CPU->xer().get(); }
|
||||
static inline void set_xer(uint32 value) { CPU->xer().set(value); }
|
||||
static inline uint32 get_vrsave() { return CPU->vrsave(); }
|
||||
static inline void set_vrsave(uint32 value) { CPU->vrsave() = value; }
|
||||
static inline void record(int crf, int32 v) { CPU->record_cr(crf, v); }
|
||||
static inline powerpc_cr_register & cr() { return CPU->cr(); }
|
||||
static inline powerpc_xer_register & xer() { return CPU->xer(); }
|
||||
@ -473,6 +475,16 @@ DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2));
|
||||
* Special purpose registers
|
||||
**/
|
||||
|
||||
void OPPROTO op_load_T0_VRSAVE(void)
|
||||
{
|
||||
T0 = powerpc_dyngen_helper::get_vrsave();
|
||||
}
|
||||
|
||||
void OPPROTO op_store_T0_VRSAVE(void)
|
||||
{
|
||||
powerpc_dyngen_helper::set_vrsave(T0);
|
||||
}
|
||||
|
||||
void OPPROTO op_load_T0_XER(void)
|
||||
{
|
||||
T0 = powerpc_dyngen_helper::get_xer();
|
||||
|
@ -94,6 +94,8 @@ public:
|
||||
void gen_mtcrf_T0_im(uint32 mask);
|
||||
|
||||
// Special purpose registers
|
||||
DEFINE_ALIAS(load_T0_VRSAVE,0);
|
||||
DEFINE_ALIAS(store_T0_VRSAVE,0);
|
||||
DEFINE_ALIAS(load_T0_XER,0);
|
||||
DEFINE_ALIAS(store_T0_XER,0);
|
||||
DEFINE_ALIAS(load_T0_PC,0);
|
||||
|
@ -54,28 +54,52 @@
|
||||
|
||||
template< class RT, class OP, class RA, class RB, class RC >
|
||||
struct op_apply {
|
||||
template< class T >
|
||||
static inline RT apply(T a, T b, T c) {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A a, B b, C c) {
|
||||
return OP::apply(a, b, c);
|
||||
}
|
||||
};
|
||||
|
||||
template< class RT, class OP, class RA, class RB >
|
||||
struct op_apply<RT, OP, RA, RB, null_operand> {
|
||||
template< class T >
|
||||
static inline RT apply(T a, T b, T) {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A a, B b, C) {
|
||||
return OP::apply(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template< class RT, class OP, class RA >
|
||||
struct op_apply<RT, OP, RA, null_operand, null_operand> {
|
||||
template< class T >
|
||||
static inline RT apply(T a, T, T) {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A a, B, C) {
|
||||
return OP::apply(a);
|
||||
}
|
||||
};
|
||||
|
||||
template< class RT, class OP, class RA, class RB >
|
||||
struct op_apply<RT, OP, RA, RB, null_vector_operand> {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A a, B b, C) {
|
||||
return (RT)OP::apply(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template< class RT, class OP, class RA >
|
||||
struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A a, B, C) {
|
||||
return (RT)OP::apply(a);
|
||||
}
|
||||
};
|
||||
|
||||
template< class RT, class OP, class RB >
|
||||
struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
|
||||
template< class A, class B, class C >
|
||||
static inline RT apply(A, B b, C) {
|
||||
return (RT)OP::apply(b);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Illegal & NOP instructions
|
||||
**/
|
||||
@ -1111,6 +1135,7 @@ void powerpc_cpu::execute_mfspr(uint32 opcode)
|
||||
case powerpc_registers::SPR_XER: d = xer().get();break;
|
||||
case powerpc_registers::SPR_LR: d = lr(); break;
|
||||
case powerpc_registers::SPR_CTR: d = ctr(); break;
|
||||
case powerpc_registers::SPR_VRSAVE: d = vrsave(); break;
|
||||
#ifdef SHEEPSHAVER
|
||||
case powerpc_registers::SPR_SDR1: d = 0xdead001f; break;
|
||||
case powerpc_registers::SPR_PVR: {
|
||||
@ -1137,6 +1162,7 @@ void powerpc_cpu::execute_mtspr(uint32 opcode)
|
||||
case powerpc_registers::SPR_XER: xer().set(s); break;
|
||||
case powerpc_registers::SPR_LR: lr() = s; break;
|
||||
case powerpc_registers::SPR_CTR: ctr() = s; break;
|
||||
case powerpc_registers::SPR_VRSAVE: vrsave() = s; break;
|
||||
#ifndef SHEEPSHAVER
|
||||
default: execute_illegal(opcode);
|
||||
#endif
|
||||
@ -1209,6 +1235,480 @@ void powerpc_cpu::execute_dcbz(uint32 opcode)
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector load/store instructions
|
||||
**/
|
||||
|
||||
template< class VD, class RA, class RB >
|
||||
void powerpc_cpu::execute_vector_load(uint32 opcode)
|
||||
{
|
||||
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
switch (VD::element_size) {
|
||||
case 1:
|
||||
VD::set_element(vD, (ea & 0x0f), vm_read_memory_1(ea));
|
||||
break;
|
||||
case 2:
|
||||
VD::set_element(vD, ((ea >> 1) & 0x07), vm_read_memory_2(ea & ~1));
|
||||
break;
|
||||
case 4:
|
||||
VD::set_element(vD, ((ea >> 2) & 0x03), vm_read_memory_4(ea & ~3));
|
||||
break;
|
||||
case 8:
|
||||
ea &= ~15;
|
||||
vD.w[0] = vm_read_memory_4(ea + 0);
|
||||
vD.w[1] = vm_read_memory_4(ea + 4);
|
||||
vD.w[2] = vm_read_memory_4(ea + 8);
|
||||
vD.w[3] = vm_read_memory_4(ea + 12);
|
||||
break;
|
||||
}
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
template< class VS, class RA, class RB >
|
||||
void powerpc_cpu::execute_vector_store(uint32 opcode)
|
||||
{
|
||||
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
|
||||
typename VS::type & vS = VS::ref(this, opcode);
|
||||
switch (VS::element_size) {
|
||||
case 1:
|
||||
vm_write_memory_1(ea, VS::get_element(vS, (ea & 0x0f)));
|
||||
break;
|
||||
case 2:
|
||||
vm_write_memory_2(ea & ~1, VS::get_element(vS, ((ea >> 1) & 0x07)));
|
||||
break;
|
||||
case 4:
|
||||
vm_write_memory_4(ea & ~3, VS::get_element(vS, ((ea >> 2) & 0x03)));
|
||||
break;
|
||||
case 8:
|
||||
ea &= ~15;
|
||||
vm_write_memory_4(ea + 0, vS.w[0]);
|
||||
vm_write_memory_4(ea + 4, vS.w[1]);
|
||||
vm_write_memory_4(ea + 8, vS.w[2]);
|
||||
vm_write_memory_4(ea + 12, vS.w[3]);
|
||||
break;
|
||||
}
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector arithmetic
|
||||
*
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
* Rc Predicate to record CR6
|
||||
* C1 If recording CR6, do we check for '1' bits in vD?
|
||||
**/
|
||||
|
||||
template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
|
||||
void powerpc_cpu::execute_vector_arith(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VC::type const & vC = VC::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
for (int i = 0; i < n_elements; i++) {
|
||||
const typename VA::element_type a = VA::get_element(vA, i);
|
||||
const typename VB::element_type b = VB::get_element(vB, i);
|
||||
const typename VC::element_type c = VC::get_element(vC, i);
|
||||
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i, d);
|
||||
}
|
||||
|
||||
// Propagate all conditions to CR6
|
||||
if (Rc::test(opcode))
|
||||
record_cr6(vD, C1);
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector mixed arithmetic
|
||||
*
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
**/
|
||||
|
||||
template< class OP, class VD, class VA, class VB, class VC >
|
||||
void powerpc_cpu::execute_vector_arith_mixed(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VC::type const & vC = VC::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
const int n_sub_elements = 4 / VA::element_size;
|
||||
|
||||
for (int i = 0; i < n_elements; i++) {
|
||||
const typename VC::element_type c = VC::get_element(vC, i);
|
||||
typename VD::element_type d = c;
|
||||
for (int j = 0; j < n_sub_elements; j++) {
|
||||
const typename VA::element_type a = VA::get_element(vA, i * n_sub_elements + j);
|
||||
const typename VB::element_type b = VB::get_element(vB, i * n_sub_elements + j);
|
||||
d += op_apply<typename VD::element_type, OP, VA, VB, null_vector_operand>::apply(a, b, c);
|
||||
}
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i, d);
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector odd/even arithmetic
|
||||
*
|
||||
* ODD Flag: are we computing every odd element?
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
**/
|
||||
|
||||
template< int ODD, class OP, class VD, class VA, class VB, class VC >
|
||||
void powerpc_cpu::execute_vector_arith_odd(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VC::type const & vC = VC::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
for (int i = 0; i < n_elements; i++) {
|
||||
const typename VA::element_type a = VA::get_element(vA, (i * 2) + ODD);
|
||||
const typename VB::element_type b = VB::get_element(vB, (i * 2) + ODD);
|
||||
const typename VC::element_type c = VC::get_element(vC, (i * 2) + ODD);
|
||||
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i, d);
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector merge instructions
|
||||
*
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
* LO Flag: use lower part of element
|
||||
**/
|
||||
|
||||
template< class VD, class VA, class VB, int LO >
|
||||
void powerpc_cpu::execute_vector_merge(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
for (int i = 0; i < n_elements; i += 2) {
|
||||
VD::set_element(vD, i , VA::get_element(vA, (i / 2) + LO * (n_elements / 2)));
|
||||
VD::set_element(vD, i + 1, VB::get_element(vB, (i / 2) + LO * (n_elements / 2)));
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector pack/unpack instructions
|
||||
*
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
* LO Flag: use lower part of element
|
||||
**/
|
||||
|
||||
template< class VD, class VA, class VB >
|
||||
void powerpc_cpu::execute_vector_pack(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
const int n_pivot = n_elements / 2;
|
||||
|
||||
for (int i = 0; i < n_elements; i++) {
|
||||
typename VD::element_type d;
|
||||
if (i < n_pivot)
|
||||
d = VA::get_element(vA, i);
|
||||
else
|
||||
d = VB::get_element(vB, i - n_pivot);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i, d);
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
template< int LO, class VD, class VA >
|
||||
void powerpc_cpu::execute_vector_unpack(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
for (int i = 0; i < n_elements; i++)
|
||||
VD::set_element(vD, i, VA::get_element(vA, i + LO * n_elements));
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
void powerpc_cpu::execute_vector_pack_pixel(uint32 opcode)
|
||||
{
|
||||
powerpc_vr const & vA = vr(vA_field::extract(opcode));
|
||||
powerpc_vr const & vB = vr(vB_field::extract(opcode));
|
||||
powerpc_vr & vD = vr(vD_field::extract(opcode));
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const uint32 a = vA.w[i];
|
||||
vD.h[ev_mixed::half_element(i)] = ((a >> 9) & 0xfc00) | ((a >> 6) & 0x03e0) | ((a >> 3) & 0x001f);
|
||||
const uint32 b = vB.w[i];
|
||||
vD.h[ev_mixed::half_element(i + 4)] = ((b >> 9) & 0xfc00) | ((b >> 6) & 0x03e0) | ((b >> 3) & 0x001f);
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
template< int LO >
|
||||
void powerpc_cpu::execute_vector_unpack_pixel(uint32 opcode)
|
||||
{
|
||||
powerpc_vr const & vB = vr(vB_field::extract(opcode));
|
||||
powerpc_vr & vD = vr(vD_field::extract(opcode));
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const uint32 h = vB.h[ev_mixed::half_element(i + LO * 4)];
|
||||
vD.w[i] = (((h & 0x8000) ? 0xff000000 : 0) |
|
||||
((h & 0x7c00) << 6) |
|
||||
((h & 0x03e0) << 3) |
|
||||
(h & 0x001f));
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector shift instructions
|
||||
*
|
||||
* SD Shift direction: left (-1), right (+1)
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* VC Input operand vector (optional: operand_NONE)
|
||||
* SH Shift count operand
|
||||
**/
|
||||
|
||||
template< int SD >
|
||||
void powerpc_cpu::execute_vector_shift(uint32 opcode)
|
||||
{
|
||||
powerpc_vr const & vA = vr(vA_field::extract(opcode));
|
||||
powerpc_vr const & vB = vr(vB_field::extract(opcode));
|
||||
powerpc_vr & vD = vr(vD_field::extract(opcode));
|
||||
|
||||
// The contents of the low-order three bits of all byte
|
||||
// elements in vB must be identical to vB[125-127]; otherwise
|
||||
// the value placed into vD is undefined.
|
||||
const int sh = vB.b[ev_mixed::byte_element(15)] & 7;
|
||||
if (sh == 0) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
vD.w[i] = vA.w[i];
|
||||
}
|
||||
else {
|
||||
uint32 prev_bits = 0;
|
||||
if (SD < 0) {
|
||||
for (int i = 3; i >= 0; i--) {
|
||||
uint32 next_bits = vA.w[i] >> (32 - sh);
|
||||
vD.w[i] = ((vA.w[i] << sh) | prev_bits);
|
||||
prev_bits = next_bits;
|
||||
}
|
||||
}
|
||||
else if (SD > 0) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
uint32 next_bits = vA.w[i] << (32 - sh);
|
||||
vD.w[i] = ((vA.w[i] >> sh) | prev_bits);
|
||||
prev_bits = next_bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
template< int SD, class VD, class VA, class VB, class SH >
|
||||
void powerpc_cpu::execute_vector_shift_octet(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
const int sh = SH::get(this, opcode);
|
||||
if (SD < 0) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (i + sh < 16)
|
||||
VD::set_element(vD, i, VA::get_element(vA, i + sh));
|
||||
else
|
||||
VD::set_element(vD, i, VB::get_element(vB, 16 - (i + sh)));
|
||||
}
|
||||
}
|
||||
else if (SD > 0) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (i < sh)
|
||||
VD::set_element(vD, i, VB::get_element(vB, 16 - (i - sh)));
|
||||
else
|
||||
VD::set_element(vD, i, VA::get_element(vA, i - sh));
|
||||
}
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector splat instructions
|
||||
*
|
||||
* OP Operation to perform on element
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
* IM Immediate value to replicate
|
||||
**/
|
||||
|
||||
template< class OP, class VD, class VB, bool IM >
|
||||
void powerpc_cpu::execute_vector_splat(uint32 opcode)
|
||||
{
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
const int n_elements = 16 / VD::element_size;
|
||||
|
||||
uint32 value;
|
||||
if (IM)
|
||||
value = OP::apply(vUIMM_field::extract(opcode));
|
||||
else {
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
const int n = vUIMM_field::extract(opcode) & (n_elements - 1);
|
||||
value = OP::apply(VB::get_element(vB, n));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_elements; i++)
|
||||
VD::set_element(vD, i, value);
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector sum instructions
|
||||
*
|
||||
* SZ Size of destination vector elements
|
||||
* VD Output operand vector
|
||||
* VA Input operand vector
|
||||
* VB Input operand vector (optional: operand_NONE)
|
||||
**/
|
||||
|
||||
template< int SZ, class VD, class VA, class VB >
|
||||
void powerpc_cpu::execute_vector_sum(uint32 opcode)
|
||||
{
|
||||
typename VA::type const & vA = VA::const_ref(this, opcode);
|
||||
typename VB::type const & vB = VB::const_ref(this, opcode);
|
||||
typename VD::type & vD = VD::ref(this, opcode);
|
||||
typename VD::element_type d;
|
||||
|
||||
switch (SZ) {
|
||||
case 1: // vsum
|
||||
d = VB::get_element(vB, 3);
|
||||
for (int j = 0; j < 4; j++)
|
||||
d += VA::get_element(vA, j);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, 0, 0);
|
||||
VD::set_element(vD, 1, 0);
|
||||
VD::set_element(vD, 2, 0);
|
||||
VD::set_element(vD, 3, d);
|
||||
break;
|
||||
|
||||
case 2: // vsum2
|
||||
for (int i = 0; i < 4; i += 2) {
|
||||
d = VB::get_element(vB, i + 1);
|
||||
for (int j = 0; j < 2; j++)
|
||||
d += VA::get_element(vA, i + j);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i + 0, 0);
|
||||
VD::set_element(vD, i + 1, d);
|
||||
}
|
||||
break;
|
||||
|
||||
case 4: // vsum4
|
||||
for (int i = 0; i < 4; i += 1) {
|
||||
d = VB::get_element(vB, i);
|
||||
const int n_elements = 4 / VA::element_size;
|
||||
for (int j = 0; j < n_elements; j++)
|
||||
d += VA::get_element(vA, i * n_elements + j);
|
||||
if (VD::saturate(d))
|
||||
vscr().set_sat(1);
|
||||
VD::set_element(vD, i, d);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Misc vector instructions
|
||||
**/
|
||||
|
||||
void powerpc_cpu::execute_vector_permute(uint32 opcode)
|
||||
{
|
||||
powerpc_vr const & vA = vr(vA_field::extract(opcode));
|
||||
powerpc_vr const & vB = vr(vB_field::extract(opcode));
|
||||
powerpc_vr const & vC = vr(vC_field::extract(opcode));
|
||||
powerpc_vr & vD = vr(vD_field::extract(opcode));
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
const int ei = ev_mixed::byte_element(i);
|
||||
const int n = vC.b[ei] & 0x1f;
|
||||
const int en = ev_mixed::byte_element(n & 0xf);
|
||||
vD.b[ei] = (n & 0x10) ? vB.b[en] : vA.b[en];
|
||||
}
|
||||
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
void powerpc_cpu::execute_mfvscr(uint32 opcode)
|
||||
{
|
||||
const int vD = vD_field::extract(opcode);
|
||||
vr(vD).w[0] = 0;
|
||||
vr(vD).w[1] = 0;
|
||||
vr(vD).w[2] = 0;
|
||||
vr(vD).w[3] = vscr().get();
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
void powerpc_cpu::execute_mtvscr(uint32 opcode)
|
||||
{
|
||||
const int vB = vB_field::extract(opcode);
|
||||
vscr().set(vr(vB).w[3]);
|
||||
increment_pc(4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicit template instantiations
|
||||
**/
|
||||
|
@ -68,6 +68,9 @@ enum powerpc_instruction {
|
||||
PPC_I(DCBZ),
|
||||
PPC_I(DIVW),
|
||||
PPC_I(DIVWU),
|
||||
PPC_I(DSS),
|
||||
PPC_I(DST),
|
||||
PPC_I(DSTST),
|
||||
PPC_I(ECIWX),
|
||||
PPC_I(ECOWX),
|
||||
PPC_I(EIEIO),
|
||||
@ -125,6 +128,11 @@ enum powerpc_instruction {
|
||||
PPC_I(LMW),
|
||||
PPC_I(LSWI),
|
||||
PPC_I(LSWX),
|
||||
PPC_I(LVEBX),
|
||||
PPC_I(LVEHX),
|
||||
PPC_I(LVEWX),
|
||||
PPC_I(LVX),
|
||||
PPC_I(LVXL),
|
||||
PPC_I(LWARX),
|
||||
PPC_I(LWBRX),
|
||||
PPC_I(LWZ),
|
||||
@ -138,12 +146,14 @@ enum powerpc_instruction {
|
||||
PPC_I(MFMSR),
|
||||
PPC_I(MFSPR),
|
||||
PPC_I(MFTB),
|
||||
PPC_I(MFVSCR),
|
||||
PPC_I(MTCRF),
|
||||
PPC_I(MTFSB0),
|
||||
PPC_I(MTFSB1),
|
||||
PPC_I(MTFSF),
|
||||
PPC_I(MTFSFI),
|
||||
PPC_I(MTSPR),
|
||||
PPC_I(MTVSCR),
|
||||
PPC_I(MULHW),
|
||||
PPC_I(MULHWU),
|
||||
PPC_I(MULLI),
|
||||
@ -183,6 +193,11 @@ enum powerpc_instruction {
|
||||
PPC_I(STMW),
|
||||
PPC_I(STSWI),
|
||||
PPC_I(STSWX),
|
||||
PPC_I(STVEBX),
|
||||
PPC_I(STVEHX),
|
||||
PPC_I(STVEWX),
|
||||
PPC_I(STVX),
|
||||
PPC_I(STVXL),
|
||||
PPC_I(STW),
|
||||
PPC_I(STWBRX),
|
||||
PPC_I(STWCX),
|
||||
@ -199,6 +214,148 @@ enum powerpc_instruction {
|
||||
PPC_I(XOR),
|
||||
PPC_I(XORI),
|
||||
PPC_I(XORIS),
|
||||
PPC_I(VADDCUW),
|
||||
PPC_I(VADDFP),
|
||||
PPC_I(VADDSBS),
|
||||
PPC_I(VADDSHS),
|
||||
PPC_I(VADDSWS),
|
||||
PPC_I(VADDUBM),
|
||||
PPC_I(VADDUBS),
|
||||
PPC_I(VADDUHM),
|
||||
PPC_I(VADDUHS),
|
||||
PPC_I(VADDUWM),
|
||||
PPC_I(VADDUWS),
|
||||
PPC_I(VAND),
|
||||
PPC_I(VANDC),
|
||||
PPC_I(VAVGSB),
|
||||
PPC_I(VAVGSH),
|
||||
PPC_I(VAVGSW),
|
||||
PPC_I(VAVGUB),
|
||||
PPC_I(VAVGUH),
|
||||
PPC_I(VAVGUW),
|
||||
PPC_I(VCFSX),
|
||||
PPC_I(VCFUX),
|
||||
PPC_I(VCMPBFP),
|
||||
PPC_I(VCMPEQFP),
|
||||
PPC_I(VCMPEQUB),
|
||||
PPC_I(VCMPEQUH),
|
||||
PPC_I(VCMPEQUW),
|
||||
PPC_I(VCMPGEFP),
|
||||
PPC_I(VCMPGTFP),
|
||||
PPC_I(VCMPGTSB),
|
||||
PPC_I(VCMPGTSH),
|
||||
PPC_I(VCMPGTSW),
|
||||
PPC_I(VCMPGTUB),
|
||||
PPC_I(VCMPGTUH),
|
||||
PPC_I(VCMPGTUW),
|
||||
PPC_I(VCTSXS),
|
||||
PPC_I(VCTUXS),
|
||||
PPC_I(VEXPTEFP),
|
||||
PPC_I(VLOGEFP),
|
||||
PPC_I(VMADDFP),
|
||||
PPC_I(VMAXFP),
|
||||
PPC_I(VMAXSB),
|
||||
PPC_I(VMAXSH),
|
||||
PPC_I(VMAXSW),
|
||||
PPC_I(VMAXUB),
|
||||
PPC_I(VMAXUH),
|
||||
PPC_I(VMAXUW),
|
||||
PPC_I(VMHADDSHS),
|
||||
PPC_I(VMHRADDSHS),
|
||||
PPC_I(VMINFP),
|
||||
PPC_I(VMINSB),
|
||||
PPC_I(VMINSH),
|
||||
PPC_I(VMINSW),
|
||||
PPC_I(VMINUB),
|
||||
PPC_I(VMINUH),
|
||||
PPC_I(VMINUW),
|
||||
PPC_I(VMLADDUHM),
|
||||
PPC_I(VMRGHB),
|
||||
PPC_I(VMRGHH),
|
||||
PPC_I(VMRGHW),
|
||||
PPC_I(VMRGLB),
|
||||
PPC_I(VMRGLH),
|
||||
PPC_I(VMRGLW),
|
||||
PPC_I(VMSUMMBM),
|
||||
PPC_I(VMSUMSHM),
|
||||
PPC_I(VMSUMSHS),
|
||||
PPC_I(VMSUMUBM),
|
||||
PPC_I(VMSUMUHM),
|
||||
PPC_I(VMSUMUHS),
|
||||
PPC_I(VMULESB),
|
||||
PPC_I(VMULESH),
|
||||
PPC_I(VMULEUB),
|
||||
PPC_I(VMULEUH),
|
||||
PPC_I(VMULOSB),
|
||||
PPC_I(VMULOSH),
|
||||
PPC_I(VMULOUB),
|
||||
PPC_I(VMULOUH),
|
||||
PPC_I(VNMSUB),
|
||||
PPC_I(VNOR),
|
||||
PPC_I(VOR),
|
||||
PPC_I(VPERM),
|
||||
PPC_I(VPKPX),
|
||||
PPC_I(VPKSHSS),
|
||||
PPC_I(VPKSHUS),
|
||||
PPC_I(VPKSWSS),
|
||||
PPC_I(VPKSWUS),
|
||||
PPC_I(VPKUHUM),
|
||||
PPC_I(VPKUHUS),
|
||||
PPC_I(VPKUWUM),
|
||||
PPC_I(VPKUWUS),
|
||||
PPC_I(VREFP),
|
||||
PPC_I(VRFIM),
|
||||
PPC_I(VRFIN),
|
||||
PPC_I(VRFIP),
|
||||
PPC_I(VRFIZ),
|
||||
PPC_I(VRLB),
|
||||
PPC_I(VRLH),
|
||||
PPC_I(VRLW),
|
||||
PPC_I(VRSQRTEFP),
|
||||
PPC_I(VSEL),
|
||||
PPC_I(VSL),
|
||||
PPC_I(VSLB),
|
||||
PPC_I(VSLDOI),
|
||||
PPC_I(VSLH),
|
||||
PPC_I(VSLO),
|
||||
PPC_I(VSLW),
|
||||
PPC_I(VSPLTB),
|
||||
PPC_I(VSPLTH),
|
||||
PPC_I(VSPLTISB),
|
||||
PPC_I(VSPLTISH),
|
||||
PPC_I(VSPLTISW),
|
||||
PPC_I(VSPLTW),
|
||||
PPC_I(VSR),
|
||||
PPC_I(VSRAB),
|
||||
PPC_I(VSRAH),
|
||||
PPC_I(VSRAW),
|
||||
PPC_I(VSRB),
|
||||
PPC_I(VSRH),
|
||||
PPC_I(VSRO),
|
||||
PPC_I(VSRW),
|
||||
PPC_I(VSUBCUW),
|
||||
PPC_I(VSUBFP),
|
||||
PPC_I(VSUBSBS),
|
||||
PPC_I(VSUBSHS),
|
||||
PPC_I(VSUBSWS),
|
||||
PPC_I(VSUBUBM),
|
||||
PPC_I(VSUBUBS),
|
||||
PPC_I(VSUBUHM),
|
||||
PPC_I(VSUBUHS),
|
||||
PPC_I(VSUBUWM),
|
||||
PPC_I(VSUBUWS),
|
||||
PPC_I(VSUMSWS),
|
||||
PPC_I(VSUM2SWS),
|
||||
PPC_I(VSUM4SBS),
|
||||
PPC_I(VSUM4SHS),
|
||||
PPC_I(VSUM4UBS),
|
||||
PPC_I(VUPKHPX),
|
||||
PPC_I(VUPKHSB),
|
||||
PPC_I(VUPKHSH),
|
||||
PPC_I(VUPKLPX),
|
||||
PPC_I(VUPKLSB),
|
||||
PPC_I(VUPKLSH),
|
||||
PPC_I(VXOR),
|
||||
PPC_I(MAX) // Total number of instruction types
|
||||
};
|
||||
|
||||
|
@ -21,6 +21,21 @@
|
||||
#ifndef PPC_OPERANDS_H
|
||||
#define PPC_OPERANDS_H
|
||||
|
||||
#include <limits>
|
||||
|
||||
/**
|
||||
* Compile time checks
|
||||
**/
|
||||
|
||||
template< int a, int b >
|
||||
struct ensure_equals;
|
||||
|
||||
template< int n >
|
||||
struct ensure_equals<n, n> { };
|
||||
|
||||
template< class type, int size >
|
||||
struct ensure_sizeof : ensure_equals<sizeof(type), size> { };
|
||||
|
||||
/**
|
||||
* General purpose registers
|
||||
**/
|
||||
@ -96,6 +111,191 @@ struct output_fpr_dw {
|
||||
template< class field >
|
||||
struct fpr_dw_operand : input_fpr_dw< field >, output_fpr_dw< field > { };
|
||||
|
||||
/**
|
||||
* Vector registers
|
||||
**/
|
||||
|
||||
struct ev_direct {
|
||||
static inline int byte_element(int i) { return i; }
|
||||
static inline int half_element(int i) { return i; }
|
||||
static inline int word_element(int i) { return i; }
|
||||
};
|
||||
|
||||
// This supposes elements are loaded by 4-byte word parts
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
typedef ev_direct ev_mixed;
|
||||
#else
|
||||
struct ev_mixed : public ev_direct {
|
||||
#if 0
|
||||
static inline int byte_element(int i) { return (i & ~3) + (3 - (i & 3)); }
|
||||
static inline int half_element(int i) { return (i & ~1) + (1 - (i & 1)); }
|
||||
#else
|
||||
static inline int byte_element(int i) {
|
||||
static const int lookup[16] = {
|
||||
3, 2, 1, 0,
|
||||
7, 6, 5, 4,
|
||||
11, 10, 9, 8,
|
||||
15, 14, 13, 12
|
||||
};
|
||||
return lookup[i];
|
||||
}
|
||||
static inline int half_element(int i) {
|
||||
static const int lookup[8] = {
|
||||
1, 0, 3, 2,
|
||||
5, 4, 7, 6
|
||||
};
|
||||
return lookup[i];
|
||||
}
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
struct null_vector_operand {
|
||||
typedef uint32 type;
|
||||
typedef uint32 element_type;
|
||||
static const uint32 element_size = sizeof(element_type);
|
||||
static inline type const_ref(powerpc_cpu *, uint32) { return 0; } // fake so that compiler optimizes it out
|
||||
static inline element_type get_element(type const & reg, int i) { return 0; }
|
||||
};
|
||||
|
||||
template< class field >
|
||||
struct vimm_operand {
|
||||
typedef uint32 type;
|
||||
typedef uint32 element_type;
|
||||
static const uint32 element_size = sizeof(element_type);
|
||||
static inline type const_ref(powerpc_cpu *, uint32 opcode) { return field::extract(opcode); }
|
||||
static inline element_type get_element(type const & reg, int i) { return reg; }
|
||||
};
|
||||
|
||||
template< class field >
|
||||
struct input_vr {
|
||||
static inline powerpc_vr const & const_ref(powerpc_cpu * cpu, uint32 opcode) {
|
||||
return cpu->vr(field::extract(opcode));
|
||||
}
|
||||
};
|
||||
|
||||
template< class field >
|
||||
struct output_vr {
|
||||
static inline powerpc_vr & ref(powerpc_cpu * cpu, uint32 opcode) {
|
||||
return cpu->vr(field::extract(opcode));
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type >
|
||||
struct vector_operand : input_vr< field >, output_vr< field > {
|
||||
typedef powerpc_vr type;
|
||||
typedef value_type element_type;
|
||||
static const uint32 element_size = sizeof(element_type);
|
||||
static inline bool saturate(element_type) { return false; }
|
||||
};
|
||||
|
||||
template< class field, class value_type, class sat_type >
|
||||
struct vector_saturate_operand : input_vr< field >, output_vr< field > {
|
||||
typedef powerpc_vr type;
|
||||
typedef sat_type element_type;
|
||||
static const uint32 element_size = sizeof(value_type);
|
||||
static inline bool saturate(element_type & v) {
|
||||
bool sat = false;
|
||||
if (v > std::numeric_limits<value_type>::max()) {
|
||||
v = std::numeric_limits<value_type>::max();
|
||||
sat = true;
|
||||
}
|
||||
else if (v < std::numeric_limits<value_type>::min()) {
|
||||
v = std::numeric_limits<value_type>::min();
|
||||
sat = true;
|
||||
}
|
||||
return sat;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type, class sat_type = int16, class ev = ev_direct >
|
||||
struct v16qi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 2 > {
|
||||
static inline sat_type get_element(powerpc_vr const & reg, int i) {
|
||||
return (sat_type)(value_type)reg.b[ev::byte_element(i)];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
|
||||
reg.b[ev::byte_element(i)] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type, class sat_type = int32, class ev = ev_direct >
|
||||
struct v8hi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 4 > {
|
||||
static inline sat_type get_element(powerpc_vr const & reg, int i) {
|
||||
return (sat_type)(value_type)reg.h[ev::half_element(i)];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
|
||||
reg.h[ev::half_element(i)] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type, class sat_type = int64 >
|
||||
struct v4si_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 8 > {
|
||||
static inline sat_type get_element(powerpc_vr const & reg, int i) {
|
||||
return (sat_type)(value_type)reg.w[i];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
|
||||
reg.w[i] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type = uint8, class ev = ev_direct >
|
||||
struct v16qi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 1 > {
|
||||
static inline value_type get_element(powerpc_vr const & reg, int i) {
|
||||
return reg.b[ev::byte_element(i)];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
|
||||
reg.b[ev::byte_element(i)] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type = uint16, class ev = ev_direct >
|
||||
struct v8hi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 2 > {
|
||||
static inline value_type get_element(powerpc_vr const & reg, int i) {
|
||||
return reg.h[ev::half_element(i)];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
|
||||
reg.h[ev::half_element(i)] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type = uint32 >
|
||||
struct v4si_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 4 > {
|
||||
static inline value_type get_element(powerpc_vr const & reg, int i) {
|
||||
return reg.w[i];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
|
||||
reg.w[i] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field, class value_type = uint64 >
|
||||
struct v2di_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 8 > {
|
||||
static inline value_type get_element(powerpc_vr const & reg, int i) {
|
||||
return reg.j[i];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
|
||||
reg.j[i] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field >
|
||||
struct v4sf_operand : vector_operand< field, float > {
|
||||
static inline float get_element(powerpc_vr const & reg, int i) {
|
||||
return reg.f[i];
|
||||
}
|
||||
static inline void set_element(powerpc_vr & reg, int i, float value) {
|
||||
reg.f[i] = value;
|
||||
}
|
||||
};
|
||||
|
||||
template< class field >
|
||||
struct vSH_operand {
|
||||
static inline uint32 get(powerpc_cpu * cpu, uint32 opcode) {
|
||||
return (cpu->vr(field::extract(opcode)).b[ev_mixed::byte_element(15)] >> 3) & 15;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Immediate operands
|
||||
**/
|
||||
@ -239,6 +439,108 @@ typedef fpscr_operand<FPSCR_RN_field> operand_FPSCR_RN;
|
||||
typedef spr_operand operand_SPR;
|
||||
typedef tbr_operand operand_TBR;
|
||||
typedef mask_operand operand_MASK;
|
||||
typedef null_vector_operand operand_vD_NONE;
|
||||
typedef null_vector_operand operand_vA_NONE;
|
||||
typedef null_vector_operand operand_vB_NONE;
|
||||
typedef null_vector_operand operand_vC_NONE;
|
||||
typedef v16qi_operand<vD_field> operand_vD_V16QI;
|
||||
typedef v16qi_operand<vA_field> operand_vA_V16QI;
|
||||
typedef v16qi_operand<vB_field> operand_vB_V16QI;
|
||||
typedef v16qi_operand<vC_field> operand_vC_V16QI;
|
||||
typedef v16qi_operand<vD_field, int8> operand_vD_V16QIs;
|
||||
typedef v16qi_operand<vA_field, int8> operand_vA_V16QIs;
|
||||
typedef v16qi_operand<vB_field, int8> operand_vB_V16QIs;
|
||||
typedef v16qi_operand<vC_field, int8> operand_vC_V16QIs;
|
||||
typedef v16qi_operand<vD_field, int8, ev_mixed> operand_vD_V16QIms;
|
||||
typedef v16qi_operand<vB_field, int8, ev_mixed> operand_vB_V16QIms;
|
||||
typedef v8hi_operand<vD_field> operand_vD_V8HI;
|
||||
typedef v8hi_operand<vA_field> operand_vA_V8HI;
|
||||
typedef v8hi_operand<vB_field> operand_vB_V8HI;
|
||||
typedef v8hi_operand<vC_field> operand_vC_V8HI;
|
||||
typedef v8hi_operand<vD_field, int16> operand_vD_V8HIs;
|
||||
typedef v8hi_operand<vA_field, int16> operand_vA_V8HIs;
|
||||
typedef v8hi_operand<vB_field, int16> operand_vB_V8HIs;
|
||||
typedef v8hi_operand<vC_field, int16> operand_vC_V8HIs;
|
||||
typedef v8hi_operand<vD_field, int16, ev_mixed> operand_vD_V8HIms;
|
||||
typedef v8hi_operand<vB_field, int16, ev_mixed> operand_vB_V8HIms;
|
||||
typedef v4si_operand<vD_field> operand_vD_V4SI;
|
||||
typedef v4si_operand<vA_field> operand_vA_V4SI;
|
||||
typedef v4si_operand<vB_field> operand_vB_V4SI;
|
||||
typedef v4si_operand<vC_field> operand_vC_V4SI;
|
||||
typedef v4si_operand<vD_field, int32> operand_vD_V4SIs;
|
||||
typedef v4si_operand<vA_field, int32> operand_vA_V4SIs;
|
||||
typedef v4si_operand<vB_field, int32> operand_vB_V4SIs;
|
||||
typedef v4si_operand<vC_field, int32> operand_vC_V4SIs;
|
||||
typedef v2di_operand<vD_field> operand_vD_V2DI;
|
||||
typedef v2di_operand<vA_field> operand_vA_V2DI;
|
||||
typedef v2di_operand<vB_field> operand_vB_V2DI;
|
||||
typedef v2di_operand<vC_field> operand_vC_V2DI;
|
||||
typedef v2di_operand<vD_field, int64> operand_vD_V2DIs;
|
||||
typedef v2di_operand<vA_field, int64> operand_vA_V2DIs;
|
||||
typedef v2di_operand<vB_field, int64> operand_vB_V2DIs;
|
||||
typedef v2di_operand<vC_field, int64> operand_vC_V2DIs;
|
||||
typedef v4sf_operand<vD_field> operand_vD_V4SF;
|
||||
typedef v4sf_operand<vA_field> operand_vA_V4SF;
|
||||
typedef v4sf_operand<vB_field> operand_vB_V4SF;
|
||||
typedef v4sf_operand<vC_field> operand_vC_V4SF;
|
||||
typedef v4si_operand<vS_field> operand_vS_V4SI;
|
||||
typedef v2di_operand<vS_field> operand_vS_V2DI;
|
||||
typedef vimm_operand<vA_field> operand_vA_UIMM;
|
||||
typedef vimm_operand<vB_field> operand_vB_UIMM;
|
||||
typedef vSH_operand<vB_field> operand_SHBO;
|
||||
|
||||
// vector mixed element accessors
|
||||
typedef v16qi_operand<vA_field, uint8, ev_mixed> operand_vA_V16QIm;
|
||||
typedef v16qi_operand<vB_field, uint8, ev_mixed> operand_vB_V16QIm;
|
||||
typedef v16qi_operand<vD_field, uint8, ev_mixed> operand_vD_V16QIm;
|
||||
typedef v8hi_operand<vA_field, uint16, ev_mixed> operand_vA_V8HIm;
|
||||
typedef v8hi_operand<vB_field, uint16, ev_mixed> operand_vB_V8HIm;
|
||||
typedef v8hi_operand<vD_field, uint16, ev_mixed> operand_vD_V8HIm;
|
||||
|
||||
#define DEFINE_VECTOR_SAT_OPERAND(EV, REG, OP) \
|
||||
template< class value_type > \
|
||||
struct operand_##REG##_##EV##_SAT : OP##_sat_operand<REG##_field, value_type> { }
|
||||
|
||||
DEFINE_VECTOR_SAT_OPERAND(V4SI, vD, v4si);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V4SI, vA, v4si);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V4SI, vB, v4si);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V4SI, vC, v4si);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V8HI, vD, v8hi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V8HI, vA, v8hi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V8HI, vB, v8hi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V8HI, vC, v8hi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V16QI, vD, v16qi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V16QI, vA, v16qi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V16QI, vB, v16qi);
|
||||
DEFINE_VECTOR_SAT_OPERAND(V16QI, vC, v16qi);
|
||||
|
||||
#undef DEFINE_VECTOR_SAT_OPERAND
|
||||
|
||||
#define DEFINE_VECTOR_MIXED_SAT_OPERAND(EV, SAT, REG, OP, TYPE) \
|
||||
template< class value_type > \
|
||||
struct operand_##REG##_##EV##m_##SAT : OP##_sat_operand<REG##_field, value_type, TYPE, ev_mixed> { }
|
||||
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vA, v16qi, int16);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vB, v16qi, int16);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vD, v16qi, int16);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, USAT, vD, v16qi, uint16);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vA, v8hi, int32);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vB, v8hi, int32);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vD, v8hi, int32);
|
||||
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, USAT, vD, v8hi, uint32);
|
||||
|
||||
#undef DEFINE_VECTOR_MIXED_SAT_OPERAND
|
||||
|
||||
#define DEFINE_VECTOR_USAT_OPERAND(EV, REG, OP, TYPE) \
|
||||
template< class value_type > \
|
||||
struct operand_##REG##_##EV##_USAT : OP##_sat_operand<REG##_field, value_type, TYPE> { }
|
||||
|
||||
// FIXME: temporary for vector pack unsigned saturate variants
|
||||
DEFINE_VECTOR_USAT_OPERAND(V4SI, vD, v4si, uint64);
|
||||
DEFINE_VECTOR_USAT_OPERAND(V8HI, vD, v8hi, uint32);
|
||||
DEFINE_VECTOR_USAT_OPERAND(V16QI, vD, v16qi, uint16);
|
||||
|
||||
#undef DEFINE_VECTOR_USAT_OPERAND
|
||||
|
||||
#define DEFINE_IMMEDIATE_OPERAND(NAME, FIELD, OP) \
|
||||
typedef immediate_operand<FIELD##_field, op_##OP> operand_##NAME
|
||||
@ -255,6 +557,7 @@ DEFINE_IMMEDIATE_OPERAND(D, d, sign_extend_16_32);
|
||||
DEFINE_IMMEDIATE_OPERAND(NB, NB, nop);
|
||||
DEFINE_IMMEDIATE_OPERAND(SH, SH, nop);
|
||||
DEFINE_IMMEDIATE_OPERAND(FM, FM, nop);
|
||||
DEFINE_IMMEDIATE_OPERAND(SHB, vSH, nop);
|
||||
|
||||
#undef DEFINE_IMMEDIATE_OPERAND
|
||||
|
||||
|
@ -31,6 +31,9 @@
|
||||
* EXPR C++ expression defining the operation, parameters are x/y/z/t
|
||||
**/
|
||||
|
||||
#define DEFINE_ALIAS_OP(NAME, T_NAME, TYPE) \
|
||||
typedef op_template_##T_NAME<TYPE> op_##NAME
|
||||
|
||||
#define DEFINE_OP1(NAME, TYPE, EXPR) \
|
||||
struct op_##NAME { \
|
||||
static inline TYPE apply(TYPE x) { \
|
||||
@ -38,6 +41,10 @@ struct op_##NAME { \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DEFINE_TEMPLATE_OP1(NAME, EXPR) \
|
||||
template< class TYPE > \
|
||||
DEFINE_OP1(template_##NAME, TYPE, EXPR)
|
||||
|
||||
#define DEFINE_OP2(NAME, TYPE, EXPR) \
|
||||
struct op_##NAME { \
|
||||
static inline TYPE apply(TYPE x, TYPE y) { \
|
||||
@ -45,6 +52,10 @@ struct op_##NAME { \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DEFINE_TEMPLATE_OP2(NAME, EXPR) \
|
||||
template< class TYPE > \
|
||||
DEFINE_OP2(template_##NAME, TYPE, EXPR)
|
||||
|
||||
#define DEFINE_OP3(NAME, TYPE, EXPR) \
|
||||
struct op_##NAME { \
|
||||
static inline TYPE apply(TYPE x, TYPE y, TYPE z) { \
|
||||
@ -59,26 +70,42 @@ struct op_##NAME { \
|
||||
} \
|
||||
}
|
||||
|
||||
// Basic operations
|
||||
|
||||
DEFINE_TEMPLATE_OP1(nop, x);
|
||||
DEFINE_TEMPLATE_OP2(add, x + y);
|
||||
DEFINE_TEMPLATE_OP2(sub, x - y);
|
||||
DEFINE_TEMPLATE_OP2(mul, x * y);
|
||||
DEFINE_TEMPLATE_OP2(div, x / y);
|
||||
DEFINE_TEMPLATE_OP2(and, x & y);
|
||||
DEFINE_TEMPLATE_OP2(or, x | y);
|
||||
DEFINE_TEMPLATE_OP2(xor, x ^ y);
|
||||
DEFINE_TEMPLATE_OP2(orc, x | ~y);
|
||||
DEFINE_TEMPLATE_OP2(andc,x & ~y);
|
||||
DEFINE_TEMPLATE_OP2(nand,~(x & y));
|
||||
DEFINE_TEMPLATE_OP2(nor, ~(x | y));
|
||||
DEFINE_TEMPLATE_OP2(eqv, ~(x ^ y));
|
||||
|
||||
// Integer basic operations
|
||||
|
||||
DEFINE_OP1(nop, uint32, x);
|
||||
DEFINE_ALIAS_OP(nop, nop, uint32);
|
||||
DEFINE_ALIAS_OP(add, add, uint32);
|
||||
DEFINE_ALIAS_OP(sub, sub, uint32);
|
||||
DEFINE_ALIAS_OP(mul, mul, uint32);
|
||||
DEFINE_ALIAS_OP(smul,mul, int32);
|
||||
DEFINE_ALIAS_OP(div, div, uint32);
|
||||
DEFINE_ALIAS_OP(sdiv,div, int32);
|
||||
DEFINE_OP1(neg, uint32, -x);
|
||||
DEFINE_OP1(compl, uint32, ~x);
|
||||
DEFINE_OP2(add, uint32, x + y);
|
||||
DEFINE_OP2(sub, uint32, x - y);
|
||||
DEFINE_OP2(mul, uint32, x * y);
|
||||
DEFINE_OP2(smul, int32, x * y);
|
||||
DEFINE_OP2(div, uint32, x / y);
|
||||
DEFINE_OP2(sdiv, int32, x / y);
|
||||
DEFINE_OP2(mod, uint32, x % y);
|
||||
DEFINE_OP2(and, uint32, x & y);
|
||||
DEFINE_OP2(or, uint32, x | y);
|
||||
DEFINE_OP2(xor, uint32, x ^ y);
|
||||
DEFINE_OP2(orc, uint32, x | ~y);
|
||||
DEFINE_OP2(andc,uint32, x & ~y);
|
||||
DEFINE_OP2(nand,uint32, ~(x & y));
|
||||
DEFINE_OP2(nor, uint32, ~(x | y));
|
||||
DEFINE_OP2(eqv, uint32, ~(x ^ y));
|
||||
DEFINE_ALIAS_OP(and, and, uint32);
|
||||
DEFINE_ALIAS_OP(or, or, uint32);
|
||||
DEFINE_ALIAS_OP(xor, xor, uint32);
|
||||
DEFINE_ALIAS_OP(orc, orc, uint32);
|
||||
DEFINE_ALIAS_OP(andc,andc,uint32);
|
||||
DEFINE_ALIAS_OP(nand,nand,uint32);
|
||||
DEFINE_ALIAS_OP(nor, nor, uint32);
|
||||
DEFINE_ALIAS_OP(eqv, eqv, uint32);
|
||||
DEFINE_OP2(shll, uint32, x << y);
|
||||
DEFINE_OP2(shrl, uint32, x >> y);
|
||||
DEFINE_OP2(shra, uint32, (int32)x >> y);
|
||||
@ -89,6 +116,14 @@ DEFINE_OP4(ppc_rlwimi, uint32, (op_rotl::apply(x, y) & z) | (t & ~z));
|
||||
DEFINE_OP3(ppc_rlwinm, uint32, (op_rotl::apply(x, y) & z));
|
||||
DEFINE_OP3(ppc_rlwnm, uint32, (op_rotl::apply(x, (y & 0x1f)) & z));
|
||||
|
||||
DEFINE_ALIAS_OP(add_64, add, uint64);
|
||||
DEFINE_ALIAS_OP(sub_64, sub, uint64);
|
||||
DEFINE_ALIAS_OP(smul_64,mul, int64);
|
||||
DEFINE_ALIAS_OP(and_64, and, uint64);
|
||||
DEFINE_ALIAS_OP(andc_64,andc,uint64);
|
||||
DEFINE_ALIAS_OP(or_64, or, uint64);
|
||||
DEFINE_ALIAS_OP(nor_64, nor, uint64);
|
||||
DEFINE_ALIAS_OP(xor_64, xor, uint64);
|
||||
|
||||
// Floating-point basic operations
|
||||
|
||||
@ -105,14 +140,168 @@ DEFINE_OP3(fnmadd, double, -((x * y) + z));
|
||||
DEFINE_OP3(fnmsub, double, -((x * y) - z));
|
||||
DEFINE_OP2(fsub, double, x - y);
|
||||
|
||||
DEFINE_OP1(fnops, float, x);
|
||||
DEFINE_OP1(fabss, float, fabs(x));
|
||||
DEFINE_OP2(fadds, float, x + y);
|
||||
DEFINE_OP2(fdivs, float, x / y);
|
||||
DEFINE_OP3(fmadds, float, (x * y) + z);
|
||||
DEFINE_OP3(fmsubs, float, (x * y) - z);
|
||||
DEFINE_OP2(fmuls, float, x * y);
|
||||
DEFINE_OP1(fnabss, float, -fabs(x));
|
||||
DEFINE_OP1(fnegs, float, -x);
|
||||
DEFINE_OP3(fnmadds, float, -((x * y) + z));
|
||||
DEFINE_OP3(fnmsubs, float, -((x * y) - z));
|
||||
DEFINE_OP2(fsubs, float, x - y);
|
||||
|
||||
DEFINE_OP1(exp2, float, exp2f(x));
|
||||
DEFINE_OP1(log2, float, log2f(x));
|
||||
DEFINE_OP1(fres, float, 1 / x);
|
||||
DEFINE_OP1(frsqrt, float, 1 / sqrt(x));
|
||||
DEFINE_OP1(frim, float, floorf(x));
|
||||
DEFINE_OP1(frin, float, roundf(x));
|
||||
DEFINE_OP1(frip, float, ceilf(x));
|
||||
DEFINE_OP1(friz, float, trunc(x));
|
||||
|
||||
// Misc operations used in AltiVec instructions
|
||||
|
||||
template< class TYPE >
|
||||
struct op_vrl {
|
||||
static inline TYPE apply(TYPE v, TYPE n) {
|
||||
const int sh = n & ((8 * sizeof(TYPE)) - 1);
|
||||
return ((v << sh) | (v >> ((8 * sizeof(TYPE)) - sh)));
|
||||
}
|
||||
};
|
||||
|
||||
template< class TYPE >
|
||||
struct op_vsl {
|
||||
static inline TYPE apply(TYPE v, TYPE n) {
|
||||
const int sh = n & ((8 * sizeof(TYPE)) - 1);
|
||||
return v << sh;
|
||||
}
|
||||
};
|
||||
|
||||
template< class TYPE >
|
||||
struct op_vsr {
|
||||
static inline TYPE apply(TYPE v, TYPE n) {
|
||||
const int sh = n & ((8 * sizeof(TYPE)) - 1);
|
||||
return v >> sh;
|
||||
}
|
||||
};
|
||||
|
||||
template< uint16 round = 0 >
|
||||
struct op_mhraddsh {
|
||||
static inline int32 apply(int32 a, int32 b, int32 c) {
|
||||
return (((a * b) + round) >> 15) + c;
|
||||
}
|
||||
};
|
||||
|
||||
struct op_cvt_fp2si {
|
||||
static inline int64 apply(uint32 a, float b) {
|
||||
return (int64)(b * (1U << a));
|
||||
}
|
||||
};
|
||||
|
||||
template< class TYPE >
|
||||
struct op_cvt_si2fp {
|
||||
static inline float apply(uint32 a, TYPE b) {
|
||||
return ((float)b) / ((float)(1U << a));
|
||||
}
|
||||
};
|
||||
|
||||
template< class TYPE >
|
||||
struct op_max {
|
||||
static inline TYPE apply(TYPE a, TYPE b) {
|
||||
return (a > b) ? a : b;
|
||||
}
|
||||
};
|
||||
|
||||
template< class TYPE >
|
||||
struct op_min {
|
||||
static inline TYPE apply(TYPE a, TYPE b) {
|
||||
return (a < b) ? a : b;
|
||||
}
|
||||
};
|
||||
|
||||
template< int nbytes >
|
||||
struct op_all_ones {
|
||||
static const uint32 value = (1U << (8 * nbytes)) - 1;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct op_all_ones<4> {
|
||||
static const uint32 value = 0xffffffff;
|
||||
};
|
||||
|
||||
template< class VX >
|
||||
struct op_cmp {
|
||||
static const uint32 result = op_all_ones<sizeof(VX)>::value;
|
||||
};
|
||||
|
||||
template< class VX >
|
||||
struct op_cmp_eq {
|
||||
static inline uint32 apply(VX a, VX b) {
|
||||
return a == b ? op_cmp<VX>::result : 0;
|
||||
}
|
||||
};
|
||||
|
||||
template< class VX >
|
||||
struct op_cmp_ge {
|
||||
static inline uint32 apply(VX a, VX b) {
|
||||
return a >= b ? op_cmp<VX>::result : 0;
|
||||
}
|
||||
};
|
||||
|
||||
template< class VX >
|
||||
struct op_cmp_gt {
|
||||
static inline uint32 apply(VX a, VX b) {
|
||||
return a > b ? op_cmp<VX>::result : 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct op_cmpbfp {
|
||||
static inline uint32 apply(float a, float b) {
|
||||
const bool le = a <= b;
|
||||
const bool ge = a >= -b;
|
||||
return (le ? 0 : (1 << 31)) | (ge ? 0 : (1 << 30));
|
||||
}
|
||||
};
|
||||
|
||||
DEFINE_OP3(vsel, uint32, ((y & z) | (x & ~z)));
|
||||
DEFINE_OP3(vmaddfp, float, ((x * z) + y));
|
||||
DEFINE_OP3(vnmsubfp, float, -((x * z) - y));
|
||||
DEFINE_OP3(mladduh, uint32, ((x * y) + z) & 0xffff);
|
||||
DEFINE_OP2(addcuw, uint32, ((uint64)x + (uint64)y) >> 32);
|
||||
DEFINE_OP2(subcuw, uint32, (~((int64)x - (int64)y) >> 32) & 1);
|
||||
DEFINE_OP2(avgsb, int8, (((int16)x + (int16)y + 1) >> 1));
|
||||
DEFINE_OP2(avgsh, int16, (((int32)x + (int32)y + 1) >> 1));
|
||||
DEFINE_OP2(avgsw, int32, (((int64)x + (int64)y + 1) >> 1));
|
||||
DEFINE_OP2(avgub, uint8, ((uint16)x + (uint16)y + 1) >> 1);
|
||||
DEFINE_OP2(avguh, uint16, ((uint32)x + (uint32)y + 1) >> 1);
|
||||
DEFINE_OP2(avguw, uint32, ((uint64)x + (uint64)y + 1) >> 1);
|
||||
|
||||
|
||||
#undef DEFINE_OP1
|
||||
#undef DEFINE_OP2
|
||||
#undef DEFINE_OP3
|
||||
#undef DEFINE_OP4
|
||||
|
||||
#undef DEFINE_TEMPLATE_OP1
|
||||
#undef DEFINE_TEMPLATE_OP2
|
||||
#undef DEFINE_TEMPLATE_OP3
|
||||
|
||||
#undef DEFINE_ALIAS_OP
|
||||
|
||||
|
||||
// Sign/Zero-extend operation
|
||||
|
||||
struct op_sign_extend_5_32 {
|
||||
static inline uint32 apply(uint32 value) {
|
||||
if (value & 0x10)
|
||||
value -= 0x20;
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
struct op_sign_extend_16_32 {
|
||||
static inline uint32 apply(uint32 value) {
|
||||
return (uint32)(int32)(int16)value;
|
||||
|
@ -162,6 +162,58 @@ union powerpc_fpr {
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Vector Status and Control Register
|
||||
**/
|
||||
|
||||
class powerpc_vscr
|
||||
{
|
||||
uint8 nj;
|
||||
uint8 sat;
|
||||
public:
|
||||
powerpc_vscr();
|
||||
void set(uint32 v);
|
||||
uint32 get() const;
|
||||
uint32 get_nj() const { return nj; }
|
||||
void set_nj(int v) { nj = v; }
|
||||
uint32 get_sat() const { return sat; }
|
||||
void set_sat(int v) { sat = v; }
|
||||
};
|
||||
|
||||
inline
|
||||
powerpc_vscr::powerpc_vscr()
|
||||
: nj(0), sat(0)
|
||||
{ }
|
||||
|
||||
inline uint32
|
||||
powerpc_vscr::get() const
|
||||
{
|
||||
return (nj << 16) | sat;
|
||||
}
|
||||
|
||||
inline void
|
||||
powerpc_vscr::set(uint32 v)
|
||||
{
|
||||
nj = VSCR_NJ_field::extract(v);
|
||||
sat = VSCR_SAT_field::extract(v);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Vector register
|
||||
**/
|
||||
|
||||
union powerpc_vr
|
||||
{
|
||||
uint8 b[16];
|
||||
uint16 h[8];
|
||||
uint32 w[4];
|
||||
uint64 j[2];
|
||||
float f[4];
|
||||
double d[2];
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* User Environment Architecture (UEA) Register Set
|
||||
**/
|
||||
@ -185,6 +237,7 @@ struct powerpc_registers
|
||||
SPR_CTR = 9,
|
||||
SPR_SDR1 = 25,
|
||||
SPR_PVR = 287,
|
||||
SPR_VRSAVE = 256,
|
||||
};
|
||||
|
||||
static inline int GPR(int r) { return GPR_BASE + r; }
|
||||
@ -203,6 +256,9 @@ struct powerpc_registers
|
||||
static uint32 reserve_valid;
|
||||
static uint32 reserve_addr;
|
||||
static uint32 reserve_data;
|
||||
powerpc_vr vr[32]; // Vector Registers
|
||||
powerpc_vscr vscr; // Vector Status and Control Register
|
||||
uint32 vrsave; // AltiVec Save Register
|
||||
};
|
||||
|
||||
#endif /* PPC_REGISTERS_H */
|
||||
|
@ -684,6 +684,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
case powerpc_registers::SPR_CTR:
|
||||
dg.gen_load_T0_CTR();
|
||||
break;
|
||||
case powerpc_registers::SPR_VRSAVE:
|
||||
dg.gen_load_T0_VRSAVE();
|
||||
break;
|
||||
#ifdef SHEEPSHAVER
|
||||
case powerpc_registers::SPR_SDR1:
|
||||
dg.gen_mov_32_T0_im(0xdead001f);
|
||||
@ -697,7 +700,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
dg.gen_mov_32_T0_im(0);
|
||||
break;
|
||||
#else
|
||||
default: goto do_illegal;
|
||||
default: goto do_generic;
|
||||
#endif
|
||||
}
|
||||
dg.gen_store_T0_GPR(rD_field::extract(opcode));
|
||||
@ -717,8 +720,11 @@ powerpc_cpu::compile_block(uint32 entry_point)
|
||||
case powerpc_registers::SPR_CTR:
|
||||
dg.gen_store_T0_CTR();
|
||||
break;
|
||||
case powerpc_registers::SPR_VRSAVE:
|
||||
dg.gen_store_T0_VRSAVE();
|
||||
break;
|
||||
#ifndef SHEEPSHAVER
|
||||
default: goto do_illegal;
|
||||
default: goto do_generic;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user