AltiVec emulation! ;-)

This commit is contained in:
gbeauche 2004-02-15 17:17:37 +00:00
parent d92989dc53
commit 313cddeeb2
14 changed files with 2323 additions and 104 deletions

View File

@ -181,13 +181,6 @@ sheepshaver_cpu::sheepshaver_cpu()
void sheepshaver_cpu::init_decoder()
{
#ifndef PPC_NO_STATIC_II_INDEX_TABLE
static bool initialized = false;
if (initialized)
return;
initialized = true;
#endif
static const instr_info_t sheep_ii_table[] = {
{ "sheep",
(execute_pmf)&sheepshaver_cpu::execute_sheep,

View File

@ -88,6 +88,17 @@ typedef bit_field< 21, 25 > frC_field;
typedef bit_field< 6, 10 > frD_field;
typedef bit_field< 6, 10 > frS_field;
// Vector registers
typedef bit_field< 11, 15 > vA_field;
typedef bit_field< 16, 20 > vB_field;
typedef bit_field< 21, 25 > vC_field;
typedef bit_field< 6, 10 > vD_field;
typedef bit_field< 6, 10 > vS_field;
typedef bit_field< 21, 21 > vRc_field;
typedef bit_field< 11, 15 > vUIMM_field;
typedef bit_field< 22, 25 > vSH_field;
// Condition registers
typedef bit_field< 11, 15 > crbA_field;
typedef bit_field< 16, 20 > crbB_field;
@ -151,6 +162,10 @@ typedef bit_field< 17, 17 > FPSCR_FPRF_FG_field; // >
typedef bit_field< 18, 18 > FPSCR_FPRF_FE_field; // =
typedef bit_field< 19, 19 > FPSCR_FPRF_FU_field; // ?
// Vector Status and Control Register
typedef bit_field< 15, 15 > VSCR_NJ_field;
typedef bit_field< 31, 31 > VSCR_SAT_field;
// Define variations for branch instructions
typedef bit_field< 30, 30 > AA_field;
typedef bit_field< 31, 31 > LK_field;
@ -202,6 +217,7 @@ DEFINE_FIELD_ALIAS(AA_BIT, AA);
DEFINE_FIELD_ALIAS(LK_BIT, LK);
DEFINE_FIELD_ALIAS(BO_BIT, BO);
DEFINE_FIELD_ALIAS(BI_BIT, BI);
DEFINE_FIELD_ALIAS(vRC_BIT, vRc);
#undef DEFINE_FIELD_ALIAS
#undef DEFINE_FAKE_FIELD_ALIAS

View File

@ -37,32 +37,6 @@
#endif
/**
* PPC_NO_STATIC_II_INDEX_TABLE
*
* Define to make sure the ii_index_table[] is a non static
* member so that powerpc_cpu object size is reduced by 64
* KB. This is only supported for mono CPU configurations.
**/
#ifndef PPC_NO_STATIC_II_INDEX_TABLE
#define PPC_NO_STATIC_II_INDEX_TABLE
#endif
/**
* PPC_OPCODE_HASH_XO_PRIMARY
*
* Define to hash opcode hash (xo, primary opcode) instead of
* (primary opcode, xo). This simply reduces the computation
* index into instr_info[] table by one operation.
**/
#ifndef PPC_OPCODE_HASH_XO_PRIMARY
#define PPC_OPCODE_HASH_XO_PRIMARY
#endif
/**
* PPC_ENABLE_FPU_EXCEPTIONS
*
@ -148,13 +122,4 @@
#define PPC_PROFILE_GENERIC_CALLS 0
#endif
/**
* Sanity checks and features enforcements
**/
#if KPX_MAX_CPUS == 1
#undef PPC_NO_STATIC_II_INDEX_TABLE
#endif
#endif /* PPC_CONFIG_H */

View File

@ -431,7 +431,6 @@ bool powerpc_cpu::check_spcflags()
return true;
}
void powerpc_cpu::execute(uint32 entry)
{
pc() = entry;

View File

@ -48,7 +48,11 @@ protected:
powerpc_cr_register const & cr() const { return regs.cr; }
powerpc_xer_register & xer() { return regs.xer; }
powerpc_xer_register const & xer() const { return regs.xer; }
powerpc_vscr & vscr() { return regs.vscr; }
powerpc_vscr const & vscr() const { return regs.vscr; }
uint32 vrsave() const { return regs.vrsave; }
uint32 & vrsave() { return regs.vrsave; }
double fp_result() const { return regs.fp_result.d; }
double & fp_result() { return regs.fp_result.d; }
uint64 fp_result_dw() const { return regs.fp_result.j; }
@ -79,6 +83,8 @@ public:
double fpr(int i) const { return regs.fpr[i].d; }
uint64 & fpr_dw(int i) { return regs.fpr[i].j; }
uint64 fpr_dw(int i) const { return regs.fpr[i].j; }
powerpc_vr & vr(int i) { return regs.vr[i]; }
powerpc_vr const & vr(int i) const { return regs.vr[i]; }
protected:
@ -90,6 +96,15 @@ protected:
void record_cr1()
{ cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); }
void record_fpscr();
void record_cr6(powerpc_vr const & vS, bool check_one) {
if (check_one && (vS.j[0] == UVAL64(0xffffffffffffffff) &&
vS.j[1] == UVAL64(0xffffffffffffffff)))
cr().set(6, 8);
else if (vS.j[0] == UVAL64(0) && vS.j[1] == UVAL64(0))
cr().set(6, 2);
else
cr().set(6, 0);
}
template< class FP >
void fp_classify(FP x);
@ -125,7 +140,8 @@ protected:
MD_form, MDS_form,
SC_form,
X_form,
XFL_form, XFX_form, XL_form, XO_form, XS_form
XFL_form, XFX_form, XL_form, XO_form, XS_form,
VX_form, VXR_form, VA_form,
};
// Control flow types
@ -149,13 +165,13 @@ protected:
// Instruction information structure
struct instr_info_t {
char name[8]; // Instruction name
char name[12]; // Instruction name
execute_fn execute; // Semantic routine for this instruction
decode_fn decode; // Specialized instruction decoder
uint16 mnemo; // Mnemonic
uint16 format; // Instruction format (XO-form, D-form, etc.)
uint32 opcode:6; // Primary opcode
uint32 xo:10; // Extended opcode
uint16 opcode; // Primary opcode
uint16 xo; // Extended opcode
uint16 cflow; // Mask of control flow information
};
@ -192,25 +208,15 @@ private:
syscall_fn execute_do_syscall;
int syscall_exit_code;
#ifdef PPC_NO_STATIC_II_INDEX_TABLE
#define PPC_STATIC_II_TABLE
#else
#define PPC_STATIC_II_TABLE static
#endif
static const instr_info_t powerpc_ii_table[];
PPC_STATIC_II_TABLE std::vector<instr_info_t> ii_table;
typedef uint8 ii_index_t;
static const int II_INDEX_TABLE_SIZE = 0x10000;
PPC_STATIC_II_TABLE ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];
std::vector<instr_info_t> ii_table;
typedef uint16 ii_index_t;
static const int II_INDEX_TABLE_SIZE = 0x20000;
ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];
#ifdef PPC_OPCODE_HASH_XO_PRIMARY
// Pack/unpack index into decode table
uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode | (xo << 6); }
uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7fe) << 5); }
#else
uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode << 10 | xo; }
uint32 get_ii_index(uint32 opcode) { return ((opcode >> 16) & 0xfc00) | ((opcode >> 1) & 0x3ff); }
#endif
uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7ff) << 6); }
// Convert 8-bit field mask (e.g. mtcrf) to bit mask
uint32 field2mask[256];
@ -411,6 +417,36 @@ private:
void execute_invalidate_cache_range();
template< class RA, class RB >
void execute_dcbz(uint32 opcode);
template< class VD, class RA, class RB >
void execute_vector_load(uint32 opcode);
template< class VS, class RA, class RB >
void execute_vector_store(uint32 opcode);
void execute_mfvscr(uint32 opcode);
void execute_mtvscr(uint32 opcode);
template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
void execute_vector_arith(uint32 opcode);
template< class OP, class VD, class VA, class VB, class VC >
void execute_vector_arith_mixed(uint32 opcode);
template< int ODD, class OP, class VD, class VA, class VB, class VC >
void execute_vector_arith_odd(uint32 opcode);
template< class VD, class VA, class VB, int LO >
void execute_vector_merge(uint32 opcode);
template< class VD, class VA, class VB >
void execute_vector_pack(uint32 opcode);
void execute_vector_pack_pixel(uint32 opcode);
template< int LO >
void execute_vector_unpack_pixel(uint32 opcode);
template< int LO, class VD, class VA >
void execute_vector_unpack(uint32 opcode);
void execute_vector_permute(uint32 opcode);
template< int SD >
void execute_vector_shift(uint32 opcode);
template< int SD, class VD, class VA, class VB, class SH >
void execute_vector_shift_octet(uint32 opcode);
template< class OP, class VD, class VB, bool IM >
void execute_vector_splat(uint32 opcode);
template< int SZ, class VD, class VA, class VB >
void execute_vector_sum(uint32 opcode);
// Specialized instruction decoders
template< class RA, class RB, class RC, class CA >

File diff suppressed because it is too large Load Diff

View File

@ -79,6 +79,8 @@ struct powerpc_dyngen_helper {
static inline void set_fpscr(uint32 value) { CPU->fpscr() = value; }
static inline uint32 get_xer() { return CPU->xer().get(); }
static inline void set_xer(uint32 value) { CPU->xer().set(value); }
static inline uint32 get_vrsave() { return CPU->vrsave(); }
static inline void set_vrsave(uint32 value) { CPU->vrsave() = value; }
static inline void record(int crf, int32 v) { CPU->record_cr(crf, v); }
static inline powerpc_cr_register & cr() { return CPU->cr(); }
static inline powerpc_xer_register & xer() { return CPU->xer(); }
@ -473,6 +475,16 @@ DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2));
* Special purpose registers
**/
void OPPROTO op_load_T0_VRSAVE(void)
{
T0 = powerpc_dyngen_helper::get_vrsave();
}
void OPPROTO op_store_T0_VRSAVE(void)
{
powerpc_dyngen_helper::set_vrsave(T0);
}
void OPPROTO op_load_T0_XER(void)
{
T0 = powerpc_dyngen_helper::get_xer();

View File

@ -94,6 +94,8 @@ public:
void gen_mtcrf_T0_im(uint32 mask);
// Special purpose registers
DEFINE_ALIAS(load_T0_VRSAVE,0);
DEFINE_ALIAS(store_T0_VRSAVE,0);
DEFINE_ALIAS(load_T0_XER,0);
DEFINE_ALIAS(store_T0_XER,0);
DEFINE_ALIAS(load_T0_PC,0);

View File

@ -54,28 +54,52 @@
template< class RT, class OP, class RA, class RB, class RC >
struct op_apply {
template< class T >
static inline RT apply(T a, T b, T c) {
template< class A, class B, class C >
static inline RT apply(A a, B b, C c) {
return OP::apply(a, b, c);
}
};
template< class RT, class OP, class RA, class RB >
struct op_apply<RT, OP, RA, RB, null_operand> {
template< class T >
static inline RT apply(T a, T b, T) {
template< class A, class B, class C >
static inline RT apply(A a, B b, C) {
return OP::apply(a, b);
}
};
template< class RT, class OP, class RA >
struct op_apply<RT, OP, RA, null_operand, null_operand> {
template< class T >
static inline RT apply(T a, T, T) {
template< class A, class B, class C >
static inline RT apply(A a, B, C) {
return OP::apply(a);
}
};
template< class RT, class OP, class RA, class RB >
struct op_apply<RT, OP, RA, RB, null_vector_operand> {
template< class A, class B, class C >
static inline RT apply(A a, B b, C) {
return (RT)OP::apply(a, b);
}
};
template< class RT, class OP, class RA >
struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
template< class A, class B, class C >
static inline RT apply(A a, B, C) {
return (RT)OP::apply(a);
}
};
template< class RT, class OP, class RB >
struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
template< class A, class B, class C >
static inline RT apply(A, B b, C) {
return (RT)OP::apply(b);
}
};
/**
* Illegal & NOP instructions
**/
@ -1111,6 +1135,7 @@ void powerpc_cpu::execute_mfspr(uint32 opcode)
case powerpc_registers::SPR_XER: d = xer().get();break;
case powerpc_registers::SPR_LR: d = lr(); break;
case powerpc_registers::SPR_CTR: d = ctr(); break;
case powerpc_registers::SPR_VRSAVE: d = vrsave(); break;
#ifdef SHEEPSHAVER
case powerpc_registers::SPR_SDR1: d = 0xdead001f; break;
case powerpc_registers::SPR_PVR: {
@ -1137,6 +1162,7 @@ void powerpc_cpu::execute_mtspr(uint32 opcode)
case powerpc_registers::SPR_XER: xer().set(s); break;
case powerpc_registers::SPR_LR: lr() = s; break;
case powerpc_registers::SPR_CTR: ctr() = s; break;
case powerpc_registers::SPR_VRSAVE: vrsave() = s; break;
#ifndef SHEEPSHAVER
default: execute_illegal(opcode);
#endif
@ -1209,6 +1235,480 @@ void powerpc_cpu::execute_dcbz(uint32 opcode)
increment_pc(4);
}
/**
* Vector load/store instructions
**/
template< class VD, class RA, class RB >
void powerpc_cpu::execute_vector_load(uint32 opcode)
{
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
switch (VD::element_size) {
case 1:
VD::set_element(vD, (ea & 0x0f), vm_read_memory_1(ea));
break;
case 2:
VD::set_element(vD, ((ea >> 1) & 0x07), vm_read_memory_2(ea & ~1));
break;
case 4:
VD::set_element(vD, ((ea >> 2) & 0x03), vm_read_memory_4(ea & ~3));
break;
case 8:
ea &= ~15;
vD.w[0] = vm_read_memory_4(ea + 0);
vD.w[1] = vm_read_memory_4(ea + 4);
vD.w[2] = vm_read_memory_4(ea + 8);
vD.w[3] = vm_read_memory_4(ea + 12);
break;
}
increment_pc(4);
}
template< class VS, class RA, class RB >
void powerpc_cpu::execute_vector_store(uint32 opcode)
{
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
typename VS::type & vS = VS::ref(this, opcode);
switch (VS::element_size) {
case 1:
vm_write_memory_1(ea, VS::get_element(vS, (ea & 0x0f)));
break;
case 2:
vm_write_memory_2(ea & ~1, VS::get_element(vS, ((ea >> 1) & 0x07)));
break;
case 4:
vm_write_memory_4(ea & ~3, VS::get_element(vS, ((ea >> 2) & 0x03)));
break;
case 8:
ea &= ~15;
vm_write_memory_4(ea + 0, vS.w[0]);
vm_write_memory_4(ea + 4, vS.w[1]);
vm_write_memory_4(ea + 8, vS.w[2]);
vm_write_memory_4(ea + 12, vS.w[3]);
break;
}
increment_pc(4);
}
/**
* Vector arithmetic
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* Rc Predicate to record CR6
* C1 If recording CR6, do we check for '1' bits in vD?
**/
template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
void powerpc_cpu::execute_vector_arith(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VA::element_type a = VA::get_element(vA, i);
const typename VB::element_type b = VB::get_element(vB, i);
const typename VC::element_type c = VC::get_element(vC, i);
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
// Propagate all conditions to CR6
if (Rc::test(opcode))
record_cr6(vD, C1);
increment_pc(4);
}
/**
* Vector mixed arithmetic
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
**/
template< class OP, class VD, class VA, class VB, class VC >
void powerpc_cpu::execute_vector_arith_mixed(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
const int n_sub_elements = 4 / VA::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VC::element_type c = VC::get_element(vC, i);
typename VD::element_type d = c;
for (int j = 0; j < n_sub_elements; j++) {
const typename VA::element_type a = VA::get_element(vA, i * n_sub_elements + j);
const typename VB::element_type b = VB::get_element(vB, i * n_sub_elements + j);
d += op_apply<typename VD::element_type, OP, VA, VB, null_vector_operand>::apply(a, b, c);
}
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
/**
* Vector odd/even arithmetic
*
* ODD Flag: are we computing every odd element?
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
**/
template< int ODD, class OP, class VD, class VA, class VB, class VC >
void powerpc_cpu::execute_vector_arith_odd(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VA::element_type a = VA::get_element(vA, (i * 2) + ODD);
const typename VB::element_type b = VB::get_element(vB, (i * 2) + ODD);
const typename VC::element_type c = VC::get_element(vC, (i * 2) + ODD);
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
/**
* Vector merge instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* LO Flag: use lower part of element
**/
template< class VD, class VA, class VB, int LO >
void powerpc_cpu::execute_vector_merge(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i += 2) {
VD::set_element(vD, i , VA::get_element(vA, (i / 2) + LO * (n_elements / 2)));
VD::set_element(vD, i + 1, VB::get_element(vB, (i / 2) + LO * (n_elements / 2)));
}
increment_pc(4);
}
/**
* Vector pack/unpack instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* LO Flag: use lower part of element
**/
template< class VD, class VA, class VB >
void powerpc_cpu::execute_vector_pack(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
const int n_pivot = n_elements / 2;
for (int i = 0; i < n_elements; i++) {
typename VD::element_type d;
if (i < n_pivot)
d = VA::get_element(vA, i);
else
d = VB::get_element(vB, i - n_pivot);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
template< int LO, class VD, class VA >
void powerpc_cpu::execute_vector_unpack(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++)
VD::set_element(vD, i, VA::get_element(vA, i + LO * n_elements));
increment_pc(4);
}
void powerpc_cpu::execute_vector_pack_pixel(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 4; i++) {
const uint32 a = vA.w[i];
vD.h[ev_mixed::half_element(i)] = ((a >> 9) & 0xfc00) | ((a >> 6) & 0x03e0) | ((a >> 3) & 0x001f);
const uint32 b = vB.w[i];
vD.h[ev_mixed::half_element(i + 4)] = ((b >> 9) & 0xfc00) | ((b >> 6) & 0x03e0) | ((b >> 3) & 0x001f);
}
increment_pc(4);
}
template< int LO >
void powerpc_cpu::execute_vector_unpack_pixel(uint32 opcode)
{
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 4; i++) {
const uint32 h = vB.h[ev_mixed::half_element(i + LO * 4)];
vD.w[i] = (((h & 0x8000) ? 0xff000000 : 0) |
((h & 0x7c00) << 6) |
((h & 0x03e0) << 3) |
(h & 0x001f));
}
increment_pc(4);
}
/**
* Vector shift instructions
*
* SD Shift direction: left (-1), right (+1)
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* SH Shift count operand
**/
template< int SD >
void powerpc_cpu::execute_vector_shift(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
// The contents of the low-order three bits of all byte
// elements in vB must be identical to vB[125-127]; otherwise
// the value placed into vD is undefined.
const int sh = vB.b[ev_mixed::byte_element(15)] & 7;
if (sh == 0) {
for (int i = 0; i < 4; i++)
vD.w[i] = vA.w[i];
}
else {
uint32 prev_bits = 0;
if (SD < 0) {
for (int i = 3; i >= 0; i--) {
uint32 next_bits = vA.w[i] >> (32 - sh);
vD.w[i] = ((vA.w[i] << sh) | prev_bits);
prev_bits = next_bits;
}
}
else if (SD > 0) {
for (int i = 0; i < 4; i++) {
uint32 next_bits = vA.w[i] << (32 - sh);
vD.w[i] = ((vA.w[i] >> sh) | prev_bits);
prev_bits = next_bits;
}
}
}
increment_pc(4);
}
template< int SD, class VD, class VA, class VB, class SH >
void powerpc_cpu::execute_vector_shift_octet(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
const int sh = SH::get(this, opcode);
if (SD < 0) {
for (int i = 0; i < 16; i++) {
if (i + sh < 16)
VD::set_element(vD, i, VA::get_element(vA, i + sh));
else
VD::set_element(vD, i, VB::get_element(vB, 16 - (i + sh)));
}
}
else if (SD > 0) {
for (int i = 0; i < 16; i++) {
if (i < sh)
VD::set_element(vD, i, VB::get_element(vB, 16 - (i - sh)));
else
VD::set_element(vD, i, VA::get_element(vA, i - sh));
}
}
increment_pc(4);
}
/**
* Vector splat instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* IM Immediate value to replicate
**/
template< class OP, class VD, class VB, bool IM >
void powerpc_cpu::execute_vector_splat(uint32 opcode)
{
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
uint32 value;
if (IM)
value = OP::apply(vUIMM_field::extract(opcode));
else {
typename VB::type const & vB = VB::const_ref(this, opcode);
const int n = vUIMM_field::extract(opcode) & (n_elements - 1);
value = OP::apply(VB::get_element(vB, n));
}
for (int i = 0; i < n_elements; i++)
VD::set_element(vD, i, value);
increment_pc(4);
}
/**
* Vector sum instructions
*
* SZ Size of destination vector elements
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
**/
template< int SZ, class VD, class VA, class VB >
void powerpc_cpu::execute_vector_sum(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
typename VD::element_type d;
switch (SZ) {
case 1: // vsum
d = VB::get_element(vB, 3);
for (int j = 0; j < 4; j++)
d += VA::get_element(vA, j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, 0, 0);
VD::set_element(vD, 1, 0);
VD::set_element(vD, 2, 0);
VD::set_element(vD, 3, d);
break;
case 2: // vsum2
for (int i = 0; i < 4; i += 2) {
d = VB::get_element(vB, i + 1);
for (int j = 0; j < 2; j++)
d += VA::get_element(vA, i + j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i + 0, 0);
VD::set_element(vD, i + 1, d);
}
break;
case 4: // vsum4
for (int i = 0; i < 4; i += 1) {
d = VB::get_element(vB, i);
const int n_elements = 4 / VA::element_size;
for (int j = 0; j < n_elements; j++)
d += VA::get_element(vA, i * n_elements + j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
break;
}
increment_pc(4);
}
/**
* Misc vector instructions
**/
void powerpc_cpu::execute_vector_permute(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr const & vC = vr(vC_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 16; i++) {
const int ei = ev_mixed::byte_element(i);
const int n = vC.b[ei] & 0x1f;
const int en = ev_mixed::byte_element(n & 0xf);
vD.b[ei] = (n & 0x10) ? vB.b[en] : vA.b[en];
}
increment_pc(4);
}
void powerpc_cpu::execute_mfvscr(uint32 opcode)
{
const int vD = vD_field::extract(opcode);
vr(vD).w[0] = 0;
vr(vD).w[1] = 0;
vr(vD).w[2] = 0;
vr(vD).w[3] = vscr().get();
increment_pc(4);
}
void powerpc_cpu::execute_mtvscr(uint32 opcode)
{
const int vB = vB_field::extract(opcode);
vscr().set(vr(vB).w[3]);
increment_pc(4);
}
/**
* Explicit template instantiations
**/

View File

@ -68,6 +68,9 @@ enum powerpc_instruction {
PPC_I(DCBZ),
PPC_I(DIVW),
PPC_I(DIVWU),
PPC_I(DSS),
PPC_I(DST),
PPC_I(DSTST),
PPC_I(ECIWX),
PPC_I(ECOWX),
PPC_I(EIEIO),
@ -125,6 +128,11 @@ enum powerpc_instruction {
PPC_I(LMW),
PPC_I(LSWI),
PPC_I(LSWX),
PPC_I(LVEBX),
PPC_I(LVEHX),
PPC_I(LVEWX),
PPC_I(LVX),
PPC_I(LVXL),
PPC_I(LWARX),
PPC_I(LWBRX),
PPC_I(LWZ),
@ -138,12 +146,14 @@ enum powerpc_instruction {
PPC_I(MFMSR),
PPC_I(MFSPR),
PPC_I(MFTB),
PPC_I(MFVSCR),
PPC_I(MTCRF),
PPC_I(MTFSB0),
PPC_I(MTFSB1),
PPC_I(MTFSF),
PPC_I(MTFSFI),
PPC_I(MTSPR),
PPC_I(MTVSCR),
PPC_I(MULHW),
PPC_I(MULHWU),
PPC_I(MULLI),
@ -183,6 +193,11 @@ enum powerpc_instruction {
PPC_I(STMW),
PPC_I(STSWI),
PPC_I(STSWX),
PPC_I(STVEBX),
PPC_I(STVEHX),
PPC_I(STVEWX),
PPC_I(STVX),
PPC_I(STVXL),
PPC_I(STW),
PPC_I(STWBRX),
PPC_I(STWCX),
@ -199,6 +214,148 @@ enum powerpc_instruction {
PPC_I(XOR),
PPC_I(XORI),
PPC_I(XORIS),
PPC_I(VADDCUW),
PPC_I(VADDFP),
PPC_I(VADDSBS),
PPC_I(VADDSHS),
PPC_I(VADDSWS),
PPC_I(VADDUBM),
PPC_I(VADDUBS),
PPC_I(VADDUHM),
PPC_I(VADDUHS),
PPC_I(VADDUWM),
PPC_I(VADDUWS),
PPC_I(VAND),
PPC_I(VANDC),
PPC_I(VAVGSB),
PPC_I(VAVGSH),
PPC_I(VAVGSW),
PPC_I(VAVGUB),
PPC_I(VAVGUH),
PPC_I(VAVGUW),
PPC_I(VCFSX),
PPC_I(VCFUX),
PPC_I(VCMPBFP),
PPC_I(VCMPEQFP),
PPC_I(VCMPEQUB),
PPC_I(VCMPEQUH),
PPC_I(VCMPEQUW),
PPC_I(VCMPGEFP),
PPC_I(VCMPGTFP),
PPC_I(VCMPGTSB),
PPC_I(VCMPGTSH),
PPC_I(VCMPGTSW),
PPC_I(VCMPGTUB),
PPC_I(VCMPGTUH),
PPC_I(VCMPGTUW),
PPC_I(VCTSXS),
PPC_I(VCTUXS),
PPC_I(VEXPTEFP),
PPC_I(VLOGEFP),
PPC_I(VMADDFP),
PPC_I(VMAXFP),
PPC_I(VMAXSB),
PPC_I(VMAXSH),
PPC_I(VMAXSW),
PPC_I(VMAXUB),
PPC_I(VMAXUH),
PPC_I(VMAXUW),
PPC_I(VMHADDSHS),
PPC_I(VMHRADDSHS),
PPC_I(VMINFP),
PPC_I(VMINSB),
PPC_I(VMINSH),
PPC_I(VMINSW),
PPC_I(VMINUB),
PPC_I(VMINUH),
PPC_I(VMINUW),
PPC_I(VMLADDUHM),
PPC_I(VMRGHB),
PPC_I(VMRGHH),
PPC_I(VMRGHW),
PPC_I(VMRGLB),
PPC_I(VMRGLH),
PPC_I(VMRGLW),
PPC_I(VMSUMMBM),
PPC_I(VMSUMSHM),
PPC_I(VMSUMSHS),
PPC_I(VMSUMUBM),
PPC_I(VMSUMUHM),
PPC_I(VMSUMUHS),
PPC_I(VMULESB),
PPC_I(VMULESH),
PPC_I(VMULEUB),
PPC_I(VMULEUH),
PPC_I(VMULOSB),
PPC_I(VMULOSH),
PPC_I(VMULOUB),
PPC_I(VMULOUH),
PPC_I(VNMSUB),
PPC_I(VNOR),
PPC_I(VOR),
PPC_I(VPERM),
PPC_I(VPKPX),
PPC_I(VPKSHSS),
PPC_I(VPKSHUS),
PPC_I(VPKSWSS),
PPC_I(VPKSWUS),
PPC_I(VPKUHUM),
PPC_I(VPKUHUS),
PPC_I(VPKUWUM),
PPC_I(VPKUWUS),
PPC_I(VREFP),
PPC_I(VRFIM),
PPC_I(VRFIN),
PPC_I(VRFIP),
PPC_I(VRFIZ),
PPC_I(VRLB),
PPC_I(VRLH),
PPC_I(VRLW),
PPC_I(VRSQRTEFP),
PPC_I(VSEL),
PPC_I(VSL),
PPC_I(VSLB),
PPC_I(VSLDOI),
PPC_I(VSLH),
PPC_I(VSLO),
PPC_I(VSLW),
PPC_I(VSPLTB),
PPC_I(VSPLTH),
PPC_I(VSPLTISB),
PPC_I(VSPLTISH),
PPC_I(VSPLTISW),
PPC_I(VSPLTW),
PPC_I(VSR),
PPC_I(VSRAB),
PPC_I(VSRAH),
PPC_I(VSRAW),
PPC_I(VSRB),
PPC_I(VSRH),
PPC_I(VSRO),
PPC_I(VSRW),
PPC_I(VSUBCUW),
PPC_I(VSUBFP),
PPC_I(VSUBSBS),
PPC_I(VSUBSHS),
PPC_I(VSUBSWS),
PPC_I(VSUBUBM),
PPC_I(VSUBUBS),
PPC_I(VSUBUHM),
PPC_I(VSUBUHS),
PPC_I(VSUBUWM),
PPC_I(VSUBUWS),
PPC_I(VSUMSWS),
PPC_I(VSUM2SWS),
PPC_I(VSUM4SBS),
PPC_I(VSUM4SHS),
PPC_I(VSUM4UBS),
PPC_I(VUPKHPX),
PPC_I(VUPKHSB),
PPC_I(VUPKHSH),
PPC_I(VUPKLPX),
PPC_I(VUPKLSB),
PPC_I(VUPKLSH),
PPC_I(VXOR),
PPC_I(MAX) // Total number of instruction types
};

View File

@ -21,6 +21,21 @@
#ifndef PPC_OPERANDS_H
#define PPC_OPERANDS_H
#include <limits>
/**
* Compile time checks
**/
template< int a, int b >
struct ensure_equals;
template< int n >
struct ensure_equals<n, n> { };
template< class type, int size >
struct ensure_sizeof : ensure_equals<sizeof(type), size> { };
/**
* General purpose registers
**/
@ -96,6 +111,191 @@ struct output_fpr_dw {
template< class field >
struct fpr_dw_operand : input_fpr_dw< field >, output_fpr_dw< field > { };
/**
* Vector registers
**/
struct ev_direct {
static inline int byte_element(int i) { return i; }
static inline int half_element(int i) { return i; }
static inline int word_element(int i) { return i; }
};
// This supposes elements are loaded by 4-byte word parts
#ifdef WORDS_BIGENDIAN
typedef ev_direct ev_mixed;
#else
struct ev_mixed : public ev_direct {
#if 0
static inline int byte_element(int i) { return (i & ~3) + (3 - (i & 3)); }
static inline int half_element(int i) { return (i & ~1) + (1 - (i & 1)); }
#else
static inline int byte_element(int i) {
static const int lookup[16] = {
3, 2, 1, 0,
7, 6, 5, 4,
11, 10, 9, 8,
15, 14, 13, 12
};
return lookup[i];
}
static inline int half_element(int i) {
static const int lookup[8] = {
1, 0, 3, 2,
5, 4, 7, 6
};
return lookup[i];
}
#endif
};
#endif
struct null_vector_operand {
typedef uint32 type;
typedef uint32 element_type;
static const uint32 element_size = sizeof(element_type);
static inline type const_ref(powerpc_cpu *, uint32) { return 0; } // fake so that compiler optimizes it out
static inline element_type get_element(type const & reg, int i) { return 0; }
};
template< class field >
struct vimm_operand {
typedef uint32 type;
typedef uint32 element_type;
static const uint32 element_size = sizeof(element_type);
static inline type const_ref(powerpc_cpu *, uint32 opcode) { return field::extract(opcode); }
static inline element_type get_element(type const & reg, int i) { return reg; }
};
template< class field >
struct input_vr {
static inline powerpc_vr const & const_ref(powerpc_cpu * cpu, uint32 opcode) {
return cpu->vr(field::extract(opcode));
}
};
template< class field >
struct output_vr {
static inline powerpc_vr & ref(powerpc_cpu * cpu, uint32 opcode) {
return cpu->vr(field::extract(opcode));
}
};
template< class field, class value_type >
struct vector_operand : input_vr< field >, output_vr< field > {
typedef powerpc_vr type;
typedef value_type element_type;
static const uint32 element_size = sizeof(element_type);
static inline bool saturate(element_type) { return false; }
};
template< class field, class value_type, class sat_type >
struct vector_saturate_operand : input_vr< field >, output_vr< field > {
typedef powerpc_vr type;
typedef sat_type element_type;
static const uint32 element_size = sizeof(value_type);
static inline bool saturate(element_type & v) {
bool sat = false;
if (v > std::numeric_limits<value_type>::max()) {
v = std::numeric_limits<value_type>::max();
sat = true;
}
else if (v < std::numeric_limits<value_type>::min()) {
v = std::numeric_limits<value_type>::min();
sat = true;
}
return sat;
}
};
template< class field, class value_type, class sat_type = int16, class ev = ev_direct >
struct v16qi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 2 > {
static inline sat_type get_element(powerpc_vr const & reg, int i) {
return (sat_type)(value_type)reg.b[ev::byte_element(i)];
}
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
reg.b[ev::byte_element(i)] = value;
}
};
template< class field, class value_type, class sat_type = int32, class ev = ev_direct >
struct v8hi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 4 > {
static inline sat_type get_element(powerpc_vr const & reg, int i) {
return (sat_type)(value_type)reg.h[ev::half_element(i)];
}
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
reg.h[ev::half_element(i)] = value;
}
};
template< class field, class value_type, class sat_type = int64 >
struct v4si_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 8 > {
static inline sat_type get_element(powerpc_vr const & reg, int i) {
return (sat_type)(value_type)reg.w[i];
}
static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
reg.w[i] = value;
}
};
template< class field, class value_type = uint8, class ev = ev_direct >
struct v16qi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 1 > {
static inline value_type get_element(powerpc_vr const & reg, int i) {
return reg.b[ev::byte_element(i)];
}
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
reg.b[ev::byte_element(i)] = value;
}
};
template< class field, class value_type = uint16, class ev = ev_direct >
struct v8hi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 2 > {
static inline value_type get_element(powerpc_vr const & reg, int i) {
return reg.h[ev::half_element(i)];
}
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
reg.h[ev::half_element(i)] = value;
}
};
template< class field, class value_type = uint32 >
struct v4si_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 4 > {
static inline value_type get_element(powerpc_vr const & reg, int i) {
return reg.w[i];
}
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
reg.w[i] = value;
}
};
template< class field, class value_type = uint64 >
struct v2di_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 8 > {
static inline value_type get_element(powerpc_vr const & reg, int i) {
return reg.j[i];
}
static inline void set_element(powerpc_vr & reg, int i, value_type value) {
reg.j[i] = value;
}
};
template< class field >
struct v4sf_operand : vector_operand< field, float > {
static inline float get_element(powerpc_vr const & reg, int i) {
return reg.f[i];
}
static inline void set_element(powerpc_vr & reg, int i, float value) {
reg.f[i] = value;
}
};
template< class field >
struct vSH_operand {
static inline uint32 get(powerpc_cpu * cpu, uint32 opcode) {
return (cpu->vr(field::extract(opcode)).b[ev_mixed::byte_element(15)] >> 3) & 15;
}
};
/**
* Immediate operands
**/
@ -239,6 +439,108 @@ typedef fpscr_operand<FPSCR_RN_field> operand_FPSCR_RN;
typedef spr_operand operand_SPR;
typedef tbr_operand operand_TBR;
typedef mask_operand operand_MASK;
typedef null_vector_operand operand_vD_NONE;
typedef null_vector_operand operand_vA_NONE;
typedef null_vector_operand operand_vB_NONE;
typedef null_vector_operand operand_vC_NONE;
typedef v16qi_operand<vD_field> operand_vD_V16QI;
typedef v16qi_operand<vA_field> operand_vA_V16QI;
typedef v16qi_operand<vB_field> operand_vB_V16QI;
typedef v16qi_operand<vC_field> operand_vC_V16QI;
typedef v16qi_operand<vD_field, int8> operand_vD_V16QIs;
typedef v16qi_operand<vA_field, int8> operand_vA_V16QIs;
typedef v16qi_operand<vB_field, int8> operand_vB_V16QIs;
typedef v16qi_operand<vC_field, int8> operand_vC_V16QIs;
typedef v16qi_operand<vD_field, int8, ev_mixed> operand_vD_V16QIms;
typedef v16qi_operand<vB_field, int8, ev_mixed> operand_vB_V16QIms;
typedef v8hi_operand<vD_field> operand_vD_V8HI;
typedef v8hi_operand<vA_field> operand_vA_V8HI;
typedef v8hi_operand<vB_field> operand_vB_V8HI;
typedef v8hi_operand<vC_field> operand_vC_V8HI;
typedef v8hi_operand<vD_field, int16> operand_vD_V8HIs;
typedef v8hi_operand<vA_field, int16> operand_vA_V8HIs;
typedef v8hi_operand<vB_field, int16> operand_vB_V8HIs;
typedef v8hi_operand<vC_field, int16> operand_vC_V8HIs;
typedef v8hi_operand<vD_field, int16, ev_mixed> operand_vD_V8HIms;
typedef v8hi_operand<vB_field, int16, ev_mixed> operand_vB_V8HIms;
typedef v4si_operand<vD_field> operand_vD_V4SI;
typedef v4si_operand<vA_field> operand_vA_V4SI;
typedef v4si_operand<vB_field> operand_vB_V4SI;
typedef v4si_operand<vC_field> operand_vC_V4SI;
typedef v4si_operand<vD_field, int32> operand_vD_V4SIs;
typedef v4si_operand<vA_field, int32> operand_vA_V4SIs;
typedef v4si_operand<vB_field, int32> operand_vB_V4SIs;
typedef v4si_operand<vC_field, int32> operand_vC_V4SIs;
typedef v2di_operand<vD_field> operand_vD_V2DI;
typedef v2di_operand<vA_field> operand_vA_V2DI;
typedef v2di_operand<vB_field> operand_vB_V2DI;
typedef v2di_operand<vC_field> operand_vC_V2DI;
typedef v2di_operand<vD_field, int64> operand_vD_V2DIs;
typedef v2di_operand<vA_field, int64> operand_vA_V2DIs;
typedef v2di_operand<vB_field, int64> operand_vB_V2DIs;
typedef v2di_operand<vC_field, int64> operand_vC_V2DIs;
typedef v4sf_operand<vD_field> operand_vD_V4SF;
typedef v4sf_operand<vA_field> operand_vA_V4SF;
typedef v4sf_operand<vB_field> operand_vB_V4SF;
typedef v4sf_operand<vC_field> operand_vC_V4SF;
typedef v4si_operand<vS_field> operand_vS_V4SI;
typedef v2di_operand<vS_field> operand_vS_V2DI;
typedef vimm_operand<vA_field> operand_vA_UIMM;
typedef vimm_operand<vB_field> operand_vB_UIMM;
typedef vSH_operand<vB_field> operand_SHBO;
// vector mixed element accessors
typedef v16qi_operand<vA_field, uint8, ev_mixed> operand_vA_V16QIm;
typedef v16qi_operand<vB_field, uint8, ev_mixed> operand_vB_V16QIm;
typedef v16qi_operand<vD_field, uint8, ev_mixed> operand_vD_V16QIm;
typedef v8hi_operand<vA_field, uint16, ev_mixed> operand_vA_V8HIm;
typedef v8hi_operand<vB_field, uint16, ev_mixed> operand_vB_V8HIm;
typedef v8hi_operand<vD_field, uint16, ev_mixed> operand_vD_V8HIm;
#define DEFINE_VECTOR_SAT_OPERAND(EV, REG, OP) \
template< class value_type > \
struct operand_##REG##_##EV##_SAT : OP##_sat_operand<REG##_field, value_type> { }
DEFINE_VECTOR_SAT_OPERAND(V4SI, vD, v4si);
DEFINE_VECTOR_SAT_OPERAND(V4SI, vA, v4si);
DEFINE_VECTOR_SAT_OPERAND(V4SI, vB, v4si);
DEFINE_VECTOR_SAT_OPERAND(V4SI, vC, v4si);
DEFINE_VECTOR_SAT_OPERAND(V8HI, vD, v8hi);
DEFINE_VECTOR_SAT_OPERAND(V8HI, vA, v8hi);
DEFINE_VECTOR_SAT_OPERAND(V8HI, vB, v8hi);
DEFINE_VECTOR_SAT_OPERAND(V8HI, vC, v8hi);
DEFINE_VECTOR_SAT_OPERAND(V16QI, vD, v16qi);
DEFINE_VECTOR_SAT_OPERAND(V16QI, vA, v16qi);
DEFINE_VECTOR_SAT_OPERAND(V16QI, vB, v16qi);
DEFINE_VECTOR_SAT_OPERAND(V16QI, vC, v16qi);
#undef DEFINE_VECTOR_SAT_OPERAND
#define DEFINE_VECTOR_MIXED_SAT_OPERAND(EV, SAT, REG, OP, TYPE) \
template< class value_type > \
struct operand_##REG##_##EV##m_##SAT : OP##_sat_operand<REG##_field, value_type, TYPE, ev_mixed> { }
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vA, v16qi, int16);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vB, v16qi, int16);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vD, v16qi, int16);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, USAT, vD, v16qi, uint16);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vA, v8hi, int32);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vB, v8hi, int32);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vD, v8hi, int32);
DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, USAT, vD, v8hi, uint32);
#undef DEFINE_VECTOR_MIXED_SAT_OPERAND
#define DEFINE_VECTOR_USAT_OPERAND(EV, REG, OP, TYPE) \
template< class value_type > \
struct operand_##REG##_##EV##_USAT : OP##_sat_operand<REG##_field, value_type, TYPE> { }
// FIXME: temporary for vector pack unsigned saturate variants
DEFINE_VECTOR_USAT_OPERAND(V4SI, vD, v4si, uint64);
DEFINE_VECTOR_USAT_OPERAND(V8HI, vD, v8hi, uint32);
DEFINE_VECTOR_USAT_OPERAND(V16QI, vD, v16qi, uint16);
#undef DEFINE_VECTOR_USAT_OPERAND
#define DEFINE_IMMEDIATE_OPERAND(NAME, FIELD, OP) \
typedef immediate_operand<FIELD##_field, op_##OP> operand_##NAME
@ -255,6 +557,7 @@ DEFINE_IMMEDIATE_OPERAND(D, d, sign_extend_16_32);
DEFINE_IMMEDIATE_OPERAND(NB, NB, nop);
DEFINE_IMMEDIATE_OPERAND(SH, SH, nop);
DEFINE_IMMEDIATE_OPERAND(FM, FM, nop);
DEFINE_IMMEDIATE_OPERAND(SHB, vSH, nop);
#undef DEFINE_IMMEDIATE_OPERAND

View File

@ -31,6 +31,9 @@
* EXPR C++ expression defining the operation, parameters are x/y/z/t
**/
#define DEFINE_ALIAS_OP(NAME, T_NAME, TYPE) \
typedef op_template_##T_NAME<TYPE> op_##NAME
#define DEFINE_OP1(NAME, TYPE, EXPR) \
struct op_##NAME { \
static inline TYPE apply(TYPE x) { \
@ -38,6 +41,10 @@ struct op_##NAME { \
} \
}
#define DEFINE_TEMPLATE_OP1(NAME, EXPR) \
template< class TYPE > \
DEFINE_OP1(template_##NAME, TYPE, EXPR)
#define DEFINE_OP2(NAME, TYPE, EXPR) \
struct op_##NAME { \
static inline TYPE apply(TYPE x, TYPE y) { \
@ -45,6 +52,10 @@ struct op_##NAME { \
} \
}
#define DEFINE_TEMPLATE_OP2(NAME, EXPR) \
template< class TYPE > \
DEFINE_OP2(template_##NAME, TYPE, EXPR)
#define DEFINE_OP3(NAME, TYPE, EXPR) \
struct op_##NAME { \
static inline TYPE apply(TYPE x, TYPE y, TYPE z) { \
@ -59,26 +70,42 @@ struct op_##NAME { \
} \
}
// Basic operations
DEFINE_TEMPLATE_OP1(nop, x);
DEFINE_TEMPLATE_OP2(add, x + y);
DEFINE_TEMPLATE_OP2(sub, x - y);
DEFINE_TEMPLATE_OP2(mul, x * y);
DEFINE_TEMPLATE_OP2(div, x / y);
DEFINE_TEMPLATE_OP2(and, x & y);
DEFINE_TEMPLATE_OP2(or, x | y);
DEFINE_TEMPLATE_OP2(xor, x ^ y);
DEFINE_TEMPLATE_OP2(orc, x | ~y);
DEFINE_TEMPLATE_OP2(andc,x & ~y);
DEFINE_TEMPLATE_OP2(nand,~(x & y));
DEFINE_TEMPLATE_OP2(nor, ~(x | y));
DEFINE_TEMPLATE_OP2(eqv, ~(x ^ y));
// Integer basic operations
DEFINE_OP1(nop, uint32, x);
DEFINE_ALIAS_OP(nop, nop, uint32);
DEFINE_ALIAS_OP(add, add, uint32);
DEFINE_ALIAS_OP(sub, sub, uint32);
DEFINE_ALIAS_OP(mul, mul, uint32);
DEFINE_ALIAS_OP(smul,mul, int32);
DEFINE_ALIAS_OP(div, div, uint32);
DEFINE_ALIAS_OP(sdiv,div, int32);
DEFINE_OP1(neg, uint32, -x);
DEFINE_OP1(compl, uint32, ~x);
DEFINE_OP2(add, uint32, x + y);
DEFINE_OP2(sub, uint32, x - y);
DEFINE_OP2(mul, uint32, x * y);
DEFINE_OP2(smul, int32, x * y);
DEFINE_OP2(div, uint32, x / y);
DEFINE_OP2(sdiv, int32, x / y);
DEFINE_OP2(mod, uint32, x % y);
DEFINE_OP2(and, uint32, x & y);
DEFINE_OP2(or, uint32, x | y);
DEFINE_OP2(xor, uint32, x ^ y);
DEFINE_OP2(orc, uint32, x | ~y);
DEFINE_OP2(andc,uint32, x & ~y);
DEFINE_OP2(nand,uint32, ~(x & y));
DEFINE_OP2(nor, uint32, ~(x | y));
DEFINE_OP2(eqv, uint32, ~(x ^ y));
DEFINE_ALIAS_OP(and, and, uint32);
DEFINE_ALIAS_OP(or, or, uint32);
DEFINE_ALIAS_OP(xor, xor, uint32);
DEFINE_ALIAS_OP(orc, orc, uint32);
DEFINE_ALIAS_OP(andc,andc,uint32);
DEFINE_ALIAS_OP(nand,nand,uint32);
DEFINE_ALIAS_OP(nor, nor, uint32);
DEFINE_ALIAS_OP(eqv, eqv, uint32);
DEFINE_OP2(shll, uint32, x << y);
DEFINE_OP2(shrl, uint32, x >> y);
DEFINE_OP2(shra, uint32, (int32)x >> y);
@ -89,6 +116,14 @@ DEFINE_OP4(ppc_rlwimi, uint32, (op_rotl::apply(x, y) & z) | (t & ~z));
DEFINE_OP3(ppc_rlwinm, uint32, (op_rotl::apply(x, y) & z));
DEFINE_OP3(ppc_rlwnm, uint32, (op_rotl::apply(x, (y & 0x1f)) & z));
DEFINE_ALIAS_OP(add_64, add, uint64);
DEFINE_ALIAS_OP(sub_64, sub, uint64);
DEFINE_ALIAS_OP(smul_64,mul, int64);
DEFINE_ALIAS_OP(and_64, and, uint64);
DEFINE_ALIAS_OP(andc_64,andc,uint64);
DEFINE_ALIAS_OP(or_64, or, uint64);
DEFINE_ALIAS_OP(nor_64, nor, uint64);
DEFINE_ALIAS_OP(xor_64, xor, uint64);
// Floating-point basic operations
@ -105,14 +140,168 @@ DEFINE_OP3(fnmadd, double, -((x * y) + z));
DEFINE_OP3(fnmsub, double, -((x * y) - z));
DEFINE_OP2(fsub, double, x - y);
DEFINE_OP1(fnops, float, x);
DEFINE_OP1(fabss, float, fabs(x));
DEFINE_OP2(fadds, float, x + y);
DEFINE_OP2(fdivs, float, x / y);
DEFINE_OP3(fmadds, float, (x * y) + z);
DEFINE_OP3(fmsubs, float, (x * y) - z);
DEFINE_OP2(fmuls, float, x * y);
DEFINE_OP1(fnabss, float, -fabs(x));
DEFINE_OP1(fnegs, float, -x);
DEFINE_OP3(fnmadds, float, -((x * y) + z));
DEFINE_OP3(fnmsubs, float, -((x * y) - z));
DEFINE_OP2(fsubs, float, x - y);
DEFINE_OP1(exp2, float, exp2f(x));
DEFINE_OP1(log2, float, log2f(x));
DEFINE_OP1(fres, float, 1 / x);
DEFINE_OP1(frsqrt, float, 1 / sqrt(x));
DEFINE_OP1(frim, float, floorf(x));
DEFINE_OP1(frin, float, roundf(x));
DEFINE_OP1(frip, float, ceilf(x));
DEFINE_OP1(friz, float, trunc(x));
// Misc operations used in AltiVec instructions
template< class TYPE >
struct op_vrl {
static inline TYPE apply(TYPE v, TYPE n) {
const int sh = n & ((8 * sizeof(TYPE)) - 1);
return ((v << sh) | (v >> ((8 * sizeof(TYPE)) - sh)));
}
};
template< class TYPE >
struct op_vsl {
static inline TYPE apply(TYPE v, TYPE n) {
const int sh = n & ((8 * sizeof(TYPE)) - 1);
return v << sh;
}
};
template< class TYPE >
struct op_vsr {
static inline TYPE apply(TYPE v, TYPE n) {
const int sh = n & ((8 * sizeof(TYPE)) - 1);
return v >> sh;
}
};
template< uint16 round = 0 >
struct op_mhraddsh {
static inline int32 apply(int32 a, int32 b, int32 c) {
return (((a * b) + round) >> 15) + c;
}
};
struct op_cvt_fp2si {
static inline int64 apply(uint32 a, float b) {
return (int64)(b * (1U << a));
}
};
template< class TYPE >
struct op_cvt_si2fp {
static inline float apply(uint32 a, TYPE b) {
return ((float)b) / ((float)(1U << a));
}
};
template< class TYPE >
struct op_max {
static inline TYPE apply(TYPE a, TYPE b) {
return (a > b) ? a : b;
}
};
template< class TYPE >
struct op_min {
static inline TYPE apply(TYPE a, TYPE b) {
return (a < b) ? a : b;
}
};
template< int nbytes >
struct op_all_ones {
static const uint32 value = (1U << (8 * nbytes)) - 1;
};
template<>
struct op_all_ones<4> {
static const uint32 value = 0xffffffff;
};
template< class VX >
struct op_cmp {
static const uint32 result = op_all_ones<sizeof(VX)>::value;
};
template< class VX >
struct op_cmp_eq {
static inline uint32 apply(VX a, VX b) {
return a == b ? op_cmp<VX>::result : 0;
}
};
template< class VX >
struct op_cmp_ge {
static inline uint32 apply(VX a, VX b) {
return a >= b ? op_cmp<VX>::result : 0;
}
};
template< class VX >
struct op_cmp_gt {
static inline uint32 apply(VX a, VX b) {
return a > b ? op_cmp<VX>::result : 0;
}
};
struct op_cmpbfp {
static inline uint32 apply(float a, float b) {
const bool le = a <= b;
const bool ge = a >= -b;
return (le ? 0 : (1 << 31)) | (ge ? 0 : (1 << 30));
}
};
DEFINE_OP3(vsel, uint32, ((y & z) | (x & ~z)));
DEFINE_OP3(vmaddfp, float, ((x * z) + y));
DEFINE_OP3(vnmsubfp, float, -((x * z) - y));
DEFINE_OP3(mladduh, uint32, ((x * y) + z) & 0xffff);
DEFINE_OP2(addcuw, uint32, ((uint64)x + (uint64)y) >> 32);
DEFINE_OP2(subcuw, uint32, (~((int64)x - (int64)y) >> 32) & 1);
DEFINE_OP2(avgsb, int8, (((int16)x + (int16)y + 1) >> 1));
DEFINE_OP2(avgsh, int16, (((int32)x + (int32)y + 1) >> 1));
DEFINE_OP2(avgsw, int32, (((int64)x + (int64)y + 1) >> 1));
DEFINE_OP2(avgub, uint8, ((uint16)x + (uint16)y + 1) >> 1);
DEFINE_OP2(avguh, uint16, ((uint32)x + (uint32)y + 1) >> 1);
DEFINE_OP2(avguw, uint32, ((uint64)x + (uint64)y + 1) >> 1);
#undef DEFINE_OP1
#undef DEFINE_OP2
#undef DEFINE_OP3
#undef DEFINE_OP4
#undef DEFINE_TEMPLATE_OP1
#undef DEFINE_TEMPLATE_OP2
#undef DEFINE_TEMPLATE_OP3
#undef DEFINE_ALIAS_OP
// Sign/Zero-extend operation
struct op_sign_extend_5_32 {
static inline uint32 apply(uint32 value) {
if (value & 0x10)
value -= 0x20;
return value;
}
};
struct op_sign_extend_16_32 {
static inline uint32 apply(uint32 value) {
return (uint32)(int32)(int16)value;

View File

@ -162,6 +162,58 @@ union powerpc_fpr {
};
/**
* Vector Status and Control Register
**/
class powerpc_vscr
{
uint8 nj;
uint8 sat;
public:
powerpc_vscr();
void set(uint32 v);
uint32 get() const;
uint32 get_nj() const { return nj; }
void set_nj(int v) { nj = v; }
uint32 get_sat() const { return sat; }
void set_sat(int v) { sat = v; }
};
inline
powerpc_vscr::powerpc_vscr()
: nj(0), sat(0)
{ }
inline uint32
powerpc_vscr::get() const
{
return (nj << 16) | sat;
}
inline void
powerpc_vscr::set(uint32 v)
{
nj = VSCR_NJ_field::extract(v);
sat = VSCR_SAT_field::extract(v);
}
/**
* Vector register
**/
union powerpc_vr
{
uint8 b[16];
uint16 h[8];
uint32 w[4];
uint64 j[2];
float f[4];
double d[2];
};
/**
* User Environment Architecture (UEA) Register Set
**/
@ -185,6 +237,7 @@ struct powerpc_registers
SPR_CTR = 9,
SPR_SDR1 = 25,
SPR_PVR = 287,
SPR_VRSAVE = 256,
};
static inline int GPR(int r) { return GPR_BASE + r; }
@ -203,6 +256,9 @@ struct powerpc_registers
static uint32 reserve_valid;
static uint32 reserve_addr;
static uint32 reserve_data;
powerpc_vr vr[32]; // Vector Registers
powerpc_vscr vscr; // Vector Status and Control Register
uint32 vrsave; // AltiVec Save Register
};
#endif /* PPC_REGISTERS_H */

View File

@ -684,6 +684,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
case powerpc_registers::SPR_CTR:
dg.gen_load_T0_CTR();
break;
case powerpc_registers::SPR_VRSAVE:
dg.gen_load_T0_VRSAVE();
break;
#ifdef SHEEPSHAVER
case powerpc_registers::SPR_SDR1:
dg.gen_mov_32_T0_im(0xdead001f);
@ -697,7 +700,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
dg.gen_mov_32_T0_im(0);
break;
#else
default: goto do_illegal;
default: goto do_generic;
#endif
}
dg.gen_store_T0_GPR(rD_field::extract(opcode));
@ -717,8 +720,11 @@ powerpc_cpu::compile_block(uint32 entry_point)
case powerpc_registers::SPR_CTR:
dg.gen_store_T0_CTR();
break;
case powerpc_registers::SPR_VRSAVE:
dg.gen_store_T0_VRSAVE();
break;
#ifndef SHEEPSHAVER
default: goto do_illegal;
default: goto do_generic;
#endif
}
break;