macemu/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp

1777 lines
44 KiB
C++

/*
* ppc-execute.cpp - PowerPC semantics
*
* Kheperix (C) 2003-2005 Gwenole Beauchesne
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "sysdeps.h"
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "cpu/vm.hpp"
#include "cpu/ppc/ppc-cpu.hpp"
#include "cpu/ppc/ppc-bitfields.hpp"
#include "cpu/ppc/ppc-operands.hpp"
#include "cpu/ppc/ppc-operations.hpp"
#include "cpu/ppc/ppc-execute.hpp"
#ifndef SHEEPSHAVER
#include "basic-kernel.hpp"
#endif
#ifdef SHEEPSHAVER
#include "main.h"
#include "prefs.h"
#endif
#if ENABLE_MON
#include "mon.h"
#include "mon_disass.h"
#endif
#define DEBUG 0
#include "debug.h"
/**
* Illegal & NOP instructions
**/
void powerpc_cpu::execute_illegal(uint32 opcode)
{
fprintf(stderr, "Illegal instruction at %08x, opcode = %08x\n", pc(), opcode);
#ifdef SHEEPSHAVER
if (PrefsFindBool("ignoreillegal")) {
increment_pc(4);
return;
}
#endif
#if ENABLE_MON
disass_ppc(stdout, pc(), opcode);
// Start up mon in real-mode
const char *arg[4] = {"mon", "-m", "-r", NULL};
mon(3, arg);
#endif
abort();
}
void powerpc_cpu::execute_nop(uint32 opcode)
{
increment_pc(4);
}
/**
* Floating-point rounding modes conversion
**/
static inline int ppc_to_native_rounding_mode(int round)
{
switch (round) {
case 0: return FE_TONEAREST;
case 1: return FE_TOWARDZERO;
case 2: return FE_UPWARD;
case 3: return FE_DOWNWARD;
}
}
/**
* Helper class to compute the overflow/carry condition
*
* OP Operation to perform
*/
template< class OP >
struct op_carry {
static inline bool apply(uint32, uint32, uint32) {
return false;
}
};
template<>
struct op_carry<op_add> {
static inline bool apply(uint32 a, uint32 b, uint32 c) {
// TODO: use 32-bit arithmetics
uint64 carry = (uint64)a + (uint64)b + (uint64)c;
return (carry >> 32) != 0;
}
};
template< class OP >
struct op_overflow {
static inline bool apply(uint32, uint32, uint32) {
return false;
}
};
template<>
struct op_overflow<op_neg> {
static inline bool apply(uint32 a, uint32, uint32) {
return a == 0x80000000;
};
};
template<>
struct op_overflow<op_add> {
static inline bool apply(uint32 a, uint32 b, uint32 c) {
// TODO: use 32-bit arithmetics
int64 overflow = (int64)(int32)a + (int64)(int32)b + (int64)(int32)c;
return (((uint64)overflow) >> 63) ^ (((uint32)overflow) >> 31);
}
};
/**
* Perform an addition/substraction
*
* RA Input operand register, possibly 0
* RB Input operand either register or immediate
* RC Input carry
* CA Predicate to compute the carry out of the operation
* OE Predicate to compute the overflow flag
* Rc Predicate to record CR0
**/
template< class RA, class RB, class RC, class CA, class OE, class Rc >
void powerpc_cpu::execute_addition(uint32 opcode)
{
const uint32 a = RA::get(this, opcode);
const uint32 b = RB::get(this, opcode);
const uint32 c = RC::get(this, opcode);
uint32 d = a + b + c;
// Set XER (CA) if instruction affects carry bit
if (CA::test(opcode))
xer().set_ca(op_carry<op_add>::apply(a, b, c));
// Set XER (OV, SO) if instruction has OE set
if (OE::test(opcode))
xer().set_ov(op_overflow<op_add>::apply(a, b, c));
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((int32)d);
// Commit result to output operand
operand_RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Generic arithmetic instruction
*
* OP Operation to perform
* RD Output register
* RA Input operand register
* RB Input operand register or immediate (optional: operand_NONE)
* RC Input operand register or immediate (optional: operand_NONE)
* OE Predicate to compute overflow flag
* Rc Predicate to record CR0
**/
template< class OP, class RD, class RA, class RB, class RC, class OE, class Rc >
void powerpc_cpu::execute_generic_arith(uint32 opcode)
{
const uint32 a = RA::get(this, opcode);
const uint32 b = RB::get(this, opcode);
const uint32 c = RC::get(this, opcode);
uint32 d = op_apply<uint32, OP, RA, RB, RC>::apply(a, b, c);
// Set XER (OV, SO) if instruction has OE set
if (OE::test(opcode))
xer().set_ov(op_overflow<OP>::apply(a, b, c));
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((int32)d);
// commit result to output operand
RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Rotate Left Word Immediate then Mask Insert
*
* SH Shift count
* MA Mask value
* Rc Predicate to record CR0
**/
template< class SH, class MA, class Rc >
void powerpc_cpu::execute_rlwimi(uint32 opcode)
{
const uint32 n = SH::get(this, opcode);
const uint32 m = MA::get(this, opcode);
const uint32 rs = operand_RS::get(this, opcode);
const uint32 ra = operand_RA::get(this, opcode);
uint32 d = op_ppc_rlwimi::apply(rs, n, m, ra);
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((int32)d);
// Commit result to output operand
operand_RA::set(this, opcode, d);
increment_pc(4);
}
/**
* Shift instructions
*
* OP Operation to perform
* RD Output operand
* RA Source operand
* SH Shift count
* SO Shift operation
* CA Predicate to compute carry bit
* Rc Predicate to record CR0
**/
template< class OP >
struct invalid_shift {
static inline uint32 value(uint32) {
return 0;
}
};
template<>
struct invalid_shift<op_shra> {
static inline uint32 value(uint32 r) {
return 0 - (r >> 31);
}
};
template< class OP, class RD, class RA, class SH, class SO, class CA, class Rc >
void powerpc_cpu::execute_shift(uint32 opcode)
{
const uint32 n = SO::apply(SH::get(this, opcode));
const uint32 r = RA::get(this, opcode);
uint32 d;
// Shift operation is valid only if rB[26] = 0
if (n & 0x20) {
d = invalid_shift<OP>::value(r);
if (CA::test(opcode))
xer().set_ca(d >> 31);
}
else {
d = OP::apply(r, n);
if (CA::test(opcode)) {
const uint32 ca = (r & 0x80000000) && (r & ~(0xffffffff << n));
xer().set_ca(ca);
}
}
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((int32)d);
// Commit result to output operand
RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Branch conditional instructions
*
* PC Input program counter (PC, LR, CTR)
* BO BO operand
* DP Displacement operand
* AA Predicate for absolute address
* LK Predicate to record NPC into link register
**/
template< class PC, class BO, class DP, class AA, class LK >
void powerpc_cpu::execute_branch(uint32 opcode)
{
const int bo = BO::get(this, opcode);
bool ctr_ok = true;
bool cond_ok = true;
if (BO_CONDITIONAL_BRANCH(bo)) {
cond_ok = cr().test(BI_field::extract(opcode));
if (!BO_BRANCH_IF_TRUE(bo))
cond_ok = !cond_ok;
}
if (BO_DECREMENT_CTR(bo)) {
ctr_ok = (ctr() -= 1) == 0;
if (!BO_BRANCH_IF_CTR_ZERO(bo))
ctr_ok = !ctr_ok;
}
const uint32 npc = pc() + 4;
if (ctr_ok && cond_ok)
pc() = ((AA::test(opcode) ? 0 : PC::get(this, opcode)) + DP::get(this, opcode)) & -4;
else
pc() = npc;
if (LK::test(opcode))
lr() = npc;
}
/**
* Compare instructions
*
* RB Second operand (GPR, SIMM, UIMM)
* CT Type of variables to be compared (uint32, int32)
**/
template< class RB, typename CT >
void powerpc_cpu::execute_compare(uint32 opcode)
{
const uint32 a = operand_RA::get(this, opcode);
const uint32 b = RB::get(this, opcode);
const uint32 crfd = crfD_field::extract(opcode);
record_cr(crfd, (CT)a < (CT)b ? -1 : ((CT)a > (CT)b ? +1 : 0));
increment_pc(4);
}
/**
* Operations on condition register
*
* OP Operation to perform
**/
template< class OP >
void powerpc_cpu::execute_cr_op(uint32 opcode)
{
const uint32 crbA = crbA_field::extract(opcode);
uint32 a = (cr().get() >> (31 - crbA)) & 1;
const uint32 crbB = crbB_field::extract(opcode);
uint32 b = (cr().get() >> (31 - crbB)) & 1;
const uint32 crbD = crbD_field::extract(opcode);
uint32 d = OP::apply(a, b) & 1;
cr().set((cr().get() & ~(1 << (31 - crbD))) | (d << (31 - crbD)));
increment_pc(4);
}
/**
* Divide instructions
*
* SB Signed division
* OE Predicate to compute overflow
* Rc Predicate to record CR0
**/
template< bool SB, class OE, class Rc >
void powerpc_cpu::execute_divide(uint32 opcode)
{
const uint32 a = operand_RA::get(this, opcode);
const uint32 b = operand_RB::get(this, opcode);
uint32 d;
// Specialize divide semantic action
if (OE::test(opcode))
d = do_execute_divide<SB, true>(a, b);
else
d = do_execute_divide<SB, false>(a, b);
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((int32)d);
// Commit result to output operand
operand_RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Multiply instructions
*
* HI Predicate for multiply high word
* SB Predicate for signed operation
* OE Predicate to compute overflow
* Rc Predicate to record CR0
**/
template< bool HI, bool SB, class OE, class Rc >
void powerpc_cpu::execute_multiply(uint32 opcode)
{
const uint32 a = operand_RA::get(this, opcode);
const uint32 b = operand_RB::get(this, opcode);
uint64 d = SB ? (int64)(int32)a * (int64)(int32)b : (uint64)a * (uint64)b;
// Overflow if the product cannot be represented in 32 bits
if (OE::test(opcode)) {
xer().set_ov((d & UVAL64(0xffffffff80000000)) != 0 &&
(d & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000));
}
// Only keep high word if multiply high instruction
if (HI)
d >>= 32;
// Set CR0 (LT, GT, EQ, SO) if instruction has Rc set
if (Rc::test(opcode))
record_cr0((uint32)d);
// Commit result to output operand
operand_RD::set(this, opcode, (uint32)d);
increment_pc(4);
}
/**
* Record FPSCR
*
* Update FP exception bits
**/
void powerpc_cpu::record_fpscr(int exceptions)
{
#if PPC_ENABLE_FPU_EXCEPTIONS
// Reset non-sticky bits
fpscr() &= ~(FPSCR_VX_field::mask() | FPSCR_FEX_field::mask());
// Always update FX if any exception bit was set
if (exceptions)
fpscr() |= FPSCR_FX_field::mask() | exceptions;
// Always update VX
if (fpscr() & (FPSCR_VXSNAN_field::mask() | FPSCR_VXISI_field::mask() |
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() |
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() |
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() |
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()))
fpscr() |= FPSCR_VX_field::mask();
// Always update FEX
if (((fpscr() & FPSCR_VX_field::mask()) && (fpscr() & FPSCR_VE_field::mask())) ||
((fpscr() & FPSCR_OX_field::mask()) && (fpscr() & FPSCR_OE_field::mask())) ||
((fpscr() & FPSCR_UX_field::mask()) && (fpscr() & FPSCR_UE_field::mask())) ||
((fpscr() & FPSCR_ZX_field::mask()) && (fpscr() & FPSCR_ZE_field::mask())) ||
((fpscr() & FPSCR_XX_field::mask()) && (fpscr() & FPSCR_XE_field::mask())))
fpscr() |= FPSCR_FEX_field::mask();
#endif
}
/**
* Floating-point arithmetics
*
* FP Floating Point type
* OP Operation to perform
* RD Output register
* RA Input operand
* RB Input operand (optional)
* RC Input operand (optional)
* Rc Predicate to record CR1
* FPSCR Predicate to compute FPSCR bits
**/
template< class FP, class OP, class RD, class RA, class RB, class RC, class Rc, bool FPSCR >
void powerpc_cpu::execute_fp_arith(uint32 opcode)
{
const double a = RA::get(this, opcode);
const double b = RB::get(this, opcode);
const double c = RC::get(this, opcode);
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions;
if (FPSCR) {
exceptions = op_apply<uint32, fp_exception_condition<OP>, RA, RB, RC>::apply(a, b, c);
feclearexcept(FE_ALL_EXCEPT);
febarrier();
}
#endif
FP d = op_apply<double, OP, RA, RB, RC>::apply(a, b, c);
if (FPSCR) {
// Update FPSCR exception bits
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
if (raised & FE_DIVBYZERO)
exceptions |= FPSCR_ZX_field::mask();
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_OVERFLOW)
exceptions |= FPSCR_OX_field::mask();
record_fpscr(exceptions);
#endif
// FPSCR[FPRF] is set to the class and sign of the result
if (!FPSCR_VE_field::test(fpscr()))
fp_classify(d);
}
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
// Commit result to output operand
RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Load/store instructions
*
* OP Operation to perform on loaded value
* RA Base operand
* RB Displacement (GPR(RB), EXTS(d))
* LD Load operation?
* SZ Size of load/store operation
* UP Update RA with EA
* RX Reverse operand
**/
template< int SZ, bool RX >
struct memory_helper;
#define DEFINE_MEMORY_HELPER(SIZE) \
template< bool RX > \
struct memory_helper<SIZE, RX> \
{ \
static inline uint32 load(uint32 ea) { \
return RX ? vm_read_memory_##SIZE##_reversed(ea) : vm_read_memory_##SIZE(ea); \
} \
static inline void store(uint32 ea, uint32 value) { \
RX ? vm_write_memory_##SIZE##_reversed(ea, value) : vm_write_memory_##SIZE(ea, value); \
} \
}
DEFINE_MEMORY_HELPER(1);
DEFINE_MEMORY_HELPER(2);
DEFINE_MEMORY_HELPER(4);
template< class OP, class RA, class RB, bool LD, int SZ, bool UP, bool RX >
void powerpc_cpu::execute_loadstore(uint32 opcode)
{
const uint32 a = RA::get(this, opcode);
const uint32 b = RB::get(this, opcode);
const uint32 ea = a + b;
if (LD)
operand_RD::set(this, opcode, OP::apply(memory_helper<SZ, RX>::load(ea)));
else
memory_helper<SZ, RX>::store(ea, operand_RS::get(this, opcode));
if (UP)
RA::set(this, opcode, ea);
increment_pc(4);
}
template< class RA, class DP, bool LD >
void powerpc_cpu::execute_loadstore_multiple(uint32 opcode)
{
const uint32 a = RA::get(this, opcode);
const uint32 d = DP::get(this, opcode);
uint32 ea = a + d;
// FIXME: generate exception if ea is not word-aligned
if ((ea & 3) != 0) {
#ifdef SHEEPSHAVER
D(bug("unaligned load/store multiple to %08x\n", ea));
increment_pc(4);
return;
#else
abort();
#endif
}
int r = LD ? rD_field::extract(opcode) : rS_field::extract(opcode);
while (r <= 31) {
if (LD)
gpr(r) = vm_read_memory_4(ea);
else
vm_write_memory_4(ea, gpr(r));
r++;
ea += 4;
}
increment_pc(4);
}
/**
* Floating-point load/store instructions
*
* RA Base operand
* RB Displacement (GPR(RB), EXTS(d))
* LD Load operation?
* DB Predicate for double value
* UP Predicate to update RA with EA
**/
template< class RA, class RB, bool LD, bool DB, bool UP >
void powerpc_cpu::execute_fp_loadstore(uint32 opcode)
{
const uint32 a = RA::get(this, opcode);
const uint32 b = RB::get(this, opcode);
const uint32 ea = a + b;
uint64 v;
if (LD) {
if (DB)
v = vm_read_memory_8(ea);
else
v = fp_load_single_convert(vm_read_memory_4(ea));
operand_fp_dw_RD::set(this, opcode, v);
}
else {
v = operand_fp_dw_RS::get(this, opcode);
if (DB)
vm_write_memory_8(ea, v);
else
vm_write_memory_4(ea, fp_store_single_convert(v));
}
if (UP)
RA::set(this, opcode, ea);
increment_pc(4);
}
/**
* Load/Store String Word instruction
*
* RA Input operand as base EA
* IM lswi mode?
* NB Number of bytes to transfer
**/
template< class RA, bool IM, class NB >
void powerpc_cpu::execute_load_string(uint32 opcode)
{
uint32 ea = RA::get(this, opcode);
if (!IM)
ea += operand_RB::get(this, opcode);
int nb = NB::get(this, opcode);
if (IM && nb == 0)
nb = 32;
int rd = rD_field::extract(opcode);
#if 1
int i;
for (i = 0; nb - i >= 4; i += 4, rd = (rd + 1) & 0x1f)
gpr(rd) = vm_read_memory_4(ea + i);
switch (nb - i) {
case 1:
gpr(rd) = vm_read_memory_1(ea + i) << 24;
break;
case 2:
gpr(rd) = vm_read_memory_2(ea + i) << 16;
break;
case 3:
gpr(rd) = (vm_read_memory_2(ea + i) << 16) + (vm_read_memory_1(ea + i + 2) << 8);
break;
}
#else
for (int i = 0; i < nb; i++) {
switch (i & 3) {
case 0:
gpr(rd) = vm_read_memory_1(ea + i) << 24;
break;
case 1:
gpr(rd) = (gpr(rd) & 0xff00ffff) | (vm_read_memory_1(ea + i) << 16);
break;
case 2:
gpr(rd) = (gpr(rd) & 0xffff00ff) | (vm_read_memory_1(ea + i) << 8);
break;
case 3:
gpr(rd) = (gpr(rd) & 0xffffff00) | vm_read_memory_1(ea + i);
rd = (rd + 1) & 0x1f;
break;
}
}
#endif
increment_pc(4);
}
template< class RA, bool IM, class NB >
void powerpc_cpu::execute_store_string(uint32 opcode)
{
uint32 ea = RA::get(this, opcode);
if (!IM)
ea += operand_RB::get(this, opcode);
int nb = NB::get(this, opcode);
if (IM && nb == 0)
nb = 32;
int rs = rS_field::extract(opcode);
int sh = 24;
for (int i = 0; i < nb; i++) {
vm_write_memory_1(ea + i, gpr(rs) >> sh);
sh -= 8;
if (sh < 0) {
sh = 24;
rs = (rs + 1) & 0x1f;
}
}
increment_pc(4);
}
/**
* Load Word and Reserve Indexed / Store Word Conditional Indexed
*
* RA Input operand as base EA
**/
template< class RA >
void powerpc_cpu::execute_lwarx(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode);
uint32 reserve_data = vm_read_memory_4(ea);
regs().reserve_valid = 1;
regs().reserve_addr = ea;
#if KPX_MAX_CPUS != 1
regs().reserve_data = reserve_data;
#endif
operand_RD::set(this, opcode, reserve_data);
increment_pc(4);
}
template< class RA >
void powerpc_cpu::execute_stwcx(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + operand_RB::get(this, opcode);
cr().clear(0);
if (regs().reserve_valid) {
if (regs().reserve_addr == ea /* physical_addr(EA) */
#if KPX_MAX_CPUS != 1
/* HACK: if another processor wrote to the reserved block,
nothing happens, i.e. we should operate as if reserve == 0 */
&& regs().reserve_data == vm_read_memory_4(ea)
#endif
) {
vm_write_memory_4(ea, operand_RS::get(this, opcode));
cr().set(0, standalone_CR_EQ_field::mask());
}
regs().reserve_valid = 0;
}
cr().set_so(0, xer().get_so());
increment_pc(4);
}
/**
* Floating-point compare instruction
*
* OC Predicate for ordered compare
**/
template< bool OC >
void powerpc_cpu::execute_fp_compare(uint32 opcode)
{
const double a = operand_fp_RA::get(this, opcode);
const double b = operand_fp_RB::get(this, opcode);
const int crfd = crfD_field::extract(opcode);
int c;
if (is_NaN(a) || is_NaN(b))
c = 1;
else if (isless(a, b))
c = 8;
else if (isgreater(a, b))
c = 4;
else
c = 2;
FPSCR_FPCC_field::insert(fpscr(), c);
cr().set(crfd, c);
// Update FPSCR exception bits
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
if (is_SNaN(a) || is_SNaN(b)) {
exceptions |= FPSCR_VXSNAN_field::mask();
if (OC && !FPSCR_VE_field::test(fpscr()))
exceptions |= FPSCR_VXVC_field::mask();
}
else if (OC && (is_QNaN(a) || is_QNaN(b)))
exceptions |= FPSCR_VXVC_field::mask();
record_fpscr(exceptions);
#endif
increment_pc(4);
}
/**
* Floating Convert to Integer Word instructions
*
* RN Rounding mode
* Rc Predicate to record CR1
**/
template< class RN, class Rc >
void powerpc_cpu::execute_fp_int_convert(uint32 opcode)
{
const double b = operand_fp_RB::get(this, opcode);
const uint32 r = RN::get(this, opcode);
any_register d;
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions = 0;
if (is_NaN(b)) {
exceptions |= FPSCR_VXCVI_field::mask();
if (is_SNaN(b))
exceptions |= FPSCR_VXSNAN_field::mask();
}
if (isinf(b))
exceptions |= FPSCR_VXCVI_field::mask();
feclearexcept(FE_ALL_EXCEPT);
febarrier();
#endif
// Convert to integer word if operand fits bounds
if (b >= -(double)0x80000000 && b <= (double)0x7fffffff) {
#if defined mathlib_lrint
int old_round = fegetround();
fesetround(ppc_to_native_rounding_mode(r));
d.j = (int32)mathlib_lrint(b);
fesetround(old_round);
#else
switch (r) {
case 0: d.j = (int32)op_frin::apply(b); break; // near
case 1: d.j = (int32)op_friz::apply(b); break; // zero
case 2: d.j = (int32)op_frip::apply(b); break; // +inf
case 3: d.j = (int32)op_frim::apply(b); break; // -inf
}
#endif
}
// NOTE: this catches infinity and NaN operands
else if (b > 0)
d.j = 0x7fffffff;
else
d.j = 0x80000000;
// Update FPSCR exception bits
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
record_fpscr(exceptions);
#endif
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
// Commit result to output operand
operand_fp_RD::set(this, opcode, d.d);
increment_pc(4);
}
/**
* Floating-point Round to Single
*
* Rc Predicate to record CR1
**/
template< class FP >
void powerpc_cpu::fp_classify(FP x)
{
uint32 c = fpscr() & ~FPSCR_FPRF_field::mask();
uint8 fc = fpclassify(x);
switch (fc) {
case FP_NAN:
c |= FPSCR_FPRF_FU_field::mask() | FPSCR_FPRF_C_field::mask();
break;
case FP_ZERO:
c |= FPSCR_FPRF_FE_field::mask();
if (signbit(x))
c |= FPSCR_FPRF_C_field::mask();
break;
case FP_INFINITE:
c |= FPSCR_FPRF_FU_field::mask();
goto FL_FG_field;
case FP_SUBNORMAL:
c |= FPSCR_FPRF_C_field::mask();
// fall-through
case FP_NORMAL:
FL_FG_field:
if (x < 0)
c |= FPSCR_FPRF_FL_field::mask();
else
c |= FPSCR_FPRF_FG_field::mask();
break;
}
fpscr() = c;
}
template< class Rc >
void powerpc_cpu::execute_fp_round(uint32 opcode)
{
const double b = operand_fp_RB::get(this, opcode);
#if PPC_ENABLE_FPU_EXCEPTIONS
int exceptions =
fp_invalid_operation_condition<double>::
apply(FPSCR_VXSNAN_field::mask(), b);
feclearexcept(FE_ALL_EXCEPT);
febarrier();
#endif
float d = (float)b;
// Update FPSCR exception bits
#if PPC_ENABLE_FPU_EXCEPTIONS
febarrier();
int raised = fetestexcept(FE_ALL_EXCEPT);
if (raised & FE_UNDERFLOW)
exceptions |= FPSCR_UX_field::mask();
if (raised & FE_OVERFLOW)
exceptions |= FPSCR_OX_field::mask();
if (raised & FE_INEXACT)
exceptions |= FPSCR_XX_field::mask();
record_fpscr(exceptions);
#endif
// FPSCR[FPRF] is set to the class and sign of the result
if (!FPSCR_VE_field::test(fpscr()))
fp_classify(d);
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
// Commit result to output operand
operand_fp_RD::set(this, opcode, (double)d);
increment_pc(4);
}
/**
* System Call instruction
**/
void powerpc_cpu::execute_syscall(uint32 opcode)
{
#ifdef SHEEPSHAVER
execute_illegal(opcode);
#else
cr().set_so(0, execute_do_syscall && !execute_do_syscall(this));
#endif
increment_pc(4);
}
/**
* Instructions dealing with system registers
**/
void powerpc_cpu::execute_mcrf(uint32 opcode)
{
const int crfS = crfS_field::extract(opcode);
const int crfD = crfD_field::extract(opcode);
cr().set(crfD, cr().get(crfS));
increment_pc(4);
}
void powerpc_cpu::execute_mcrfs(uint32 opcode)
{
const int crfS = crfS_field::extract(opcode);
const int crfD = crfD_field::extract(opcode);
// The contents of FPSCR field crfS are copied to CR field crfD
const uint32 m = 0xf << (28 - 4 * crfS);
cr().set(crfD, (fpscr() & m) >> (28 - 4 * crfS));
// All exception bits copied (except FEX and VX) are cleared in the FPSCR
fpscr() &= ~(m & (FPSCR_FX_field::mask() | FPSCR_OX_field::mask() |
FPSCR_UX_field::mask() | FPSCR_ZX_field::mask() |
FPSCR_XX_field::mask() | FPSCR_VXSNAN_field::mask() |
FPSCR_VXISI_field::mask() | FPSCR_VXIDI_field::mask() |
FPSCR_VXZDZ_field::mask() | FPSCR_VXIMZ_field::mask() |
FPSCR_VXVC_field::mask() | FPSCR_VXSOFT_field::mask() |
FPSCR_VXSQRT_field::mask() | FPSCR_VXCVI_field::mask()));
increment_pc(4);
}
void powerpc_cpu::execute_mcrxr(uint32 opcode)
{
const int crfD = crfD_field::extract(opcode);
const uint32 x = xer().get();
cr().set(crfD, x >> 28);
xer().set(x & 0x0fffffff);
increment_pc(4);
}
void powerpc_cpu::execute_mtcrf(uint32 opcode)
{
uint32 mask = field2mask[CRM_field::extract(opcode)];
cr().set((operand_RS::get(this, opcode) & mask) | (cr().get() & ~mask));
increment_pc(4);
}
template< class FM, class RB, class Rc >
void powerpc_cpu::execute_mtfsf(uint32 opcode)
{
const uint64 fsf = RB::get(this, opcode);
const uint32 f = FM::get(this, opcode);
uint32 m = field2mask[f];
// FPSCR[FX] is altered only if FM[0] = 1
if ((f & 0x80) == 0)
m &= ~FPSCR_FX_field::mask();
// The mtfsf instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
int exceptions = fsf & m;
exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Move frB bits to FPSCR according to field mask
fpscr() = (fpscr() & ~m) | exceptions;
// Update FPSCR exception bits (don't implicitly update FX)
record_fpscr(0);
// Update native FP control word
if (m & FPSCR_RN_field::mask())
fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr())));
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
increment_pc(4);
}
template< class RB, class Rc >
void powerpc_cpu::execute_mtfsfi(uint32 opcode)
{
const uint32 crfD = crfD_field::extract(opcode);
uint32 m = 0xf << (4 * (7 - crfD));
// FPSCR[FX] is altered only if crfD = 0
if (crfD == 0)
m &= ~FPSCR_FX_field::mask();
// The mtfsfi instruction cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
int exceptions = RB::get(this, opcode) & m;
exceptions &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Move immediate to FPSCR according to field crfD
fpscr() = (fpscr() & ~m) | exceptions;
// Update native FP control word
if (m & FPSCR_RN_field::mask())
fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr())));
// Update FPSCR exception bits (don't implicitly update FX)
record_fpscr(0);
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
increment_pc(4);
}
template< class RB, class Rc >
void powerpc_cpu::execute_mtfsb(uint32 opcode)
{
const bool set_bit = RB::get(this, opcode);
// The mtfsb0 and mtfsb1 instructions cannot alter FPSCR[FEX] nor FPSCR[VX] explicitly
uint32 m = 1 << (31 - crbD_field::extract(opcode));
m &= ~(FPSCR_FEX_field::mask() | FPSCR_VX_field::mask());
// Bit crbD of the FPSCR is set or clear
fpscr() &= ~m;
// Update FPSCR exception bits
record_fpscr(set_bit ? m : 0);
// Update native FP control word if FPSCR[RN] changed
if (m & FPSCR_RN_field::mask())
fesetround(ppc_to_native_rounding_mode(FPSCR_RN_field::extract(fpscr())));
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
increment_pc(4);
}
template< class Rc >
void powerpc_cpu::execute_mffs(uint32 opcode)
{
// Move FPSCR to FPR(FRD)
operand_fp_dw_RD::set(this, opcode, fpscr());
// Set CR1 (FX, FEX, VX, VOX) if instruction has Rc set
if (Rc::test(opcode))
record_cr1();
increment_pc(4);
}
void powerpc_cpu::execute_mfmsr(uint32 opcode)
{
operand_RD::set(this, opcode, 0xf072);
increment_pc(4);
}
template< class SPR >
void powerpc_cpu::execute_mfspr(uint32 opcode)
{
const uint32 spr = SPR::get(this, opcode);
uint32 d;
switch (spr) {
case powerpc_registers::SPR_XER: d = xer().get();break;
case powerpc_registers::SPR_LR: d = lr(); break;
case powerpc_registers::SPR_CTR: d = ctr(); break;
case powerpc_registers::SPR_VRSAVE: d = vrsave(); break;
#ifdef SHEEPSHAVER
case powerpc_registers::SPR_SDR1: d = 0xdead001f; break;
case powerpc_registers::SPR_PVR: {
extern uint32 PVR;
d = PVR;
break;
}
default: d = 0;
#else
default: execute_illegal(opcode);
#endif
}
operand_RD::set(this, opcode, d);
increment_pc(4);
}
template< class SPR >
void powerpc_cpu::execute_mtspr(uint32 opcode)
{
const uint32 spr = SPR::get(this, opcode);
const uint32 s = operand_RS::get(this, opcode);
switch (spr) {
case powerpc_registers::SPR_XER: xer().set(s); break;
case powerpc_registers::SPR_LR: lr() = s; break;
case powerpc_registers::SPR_CTR: ctr() = s; break;
case powerpc_registers::SPR_VRSAVE: vrsave() = s; break;
#ifndef SHEEPSHAVER
default: execute_illegal(opcode);
#endif
}
increment_pc(4);
}
// Compute with 96 bit intermediate result: (a * b) / c
static uint64 muldiv64(uint64 a, uint32 b, uint32 c)
{
union {
uint64 ll;
struct {
#ifdef WORDS_BIGENDIAN
uint32 high, low;
#else
uint32 low, high;
#endif
} l;
} u, res;
u.ll = a;
uint64 rl = (uint64)u.l.low * (uint64)b;
uint64 rh = (uint64)u.l.high * (uint64)b;
rh += (rl >> 32);
res.l.high = rh / c;
res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
return res.ll;
}
static inline uint64 get_tb_ticks(void)
{
uint64 ticks;
#ifdef SHEEPSHAVER
const uint32 TBFreq = TimebaseSpeed;
ticks = muldiv64(GetTicks_usec(), TBFreq, 1000000);
#else
const uint32 TBFreq = 25 * 1000 * 1000; // 25 MHz
ticks = muldiv64((uint64)clock(), TBFreq, CLOCKS_PER_SEC);
#endif
return ticks;
}
template< class TBR >
void powerpc_cpu::execute_mftbr(uint32 opcode)
{
uint32 tbr = TBR::get(this, opcode);
uint32 d = 0;
switch (tbr) {
case 268: d = (uint32)get_tb_ticks(); break;
case 269: d = (get_tb_ticks() >> 32); break;
default: execute_illegal(opcode);
}
operand_RD::set(this, opcode, d);
increment_pc(4);
}
/**
* Instruction cache management
**/
void powerpc_cpu::execute_invalidate_cache_range()
{
if (cache_range.start != cache_range.end) {
invalidate_cache_range(cache_range.start, cache_range.end);
cache_range.start = cache_range.end = 0;
}
}
template< class RA, class RB >
void powerpc_cpu::execute_icbi(uint32 opcode)
{
const uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
const uint32 block_start = ea - (ea % 32);
if (block_start == cache_range.end) {
// Extend region to invalidate
cache_range.end += 32;
}
else {
// New region to invalidate
execute_invalidate_cache_range();
cache_range.start = block_start;
cache_range.end = cache_range.start + 32;
}
increment_pc(4);
}
void powerpc_cpu::execute_isync(uint32 opcode)
{
execute_invalidate_cache_range();
increment_pc(4);
}
/**
* (Fake) data cache management
**/
template< class RA, class RB >
void powerpc_cpu::execute_dcbz(uint32 opcode)
{
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
vm_memset(ea - (ea % 32), 0, 32);
increment_pc(4);
}
/**
* Vector load/store instructions
**/
template< bool SL >
void powerpc_cpu::execute_vector_load_for_shift(uint32 opcode)
{
const uint32 ra = operand_RA_or_0::get(this, opcode);
const uint32 rb = operand_RB::get(this, opcode);
const uint32 ea = ra + rb;
powerpc_vr & vD = vr(vD_field::extract(opcode));
int j = SL ? (ea & 0xf) : (0x10 - (ea & 0xf));
for (int i = 0; i < 16; i++)
vD.b[ev_mixed::byte_element(i)] = j++;
increment_pc(4);
}
template< class VD, class RA, class RB >
void powerpc_cpu::execute_vector_load(uint32 opcode)
{
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
switch (VD::element_size) {
case 1:
VD::set_element(vD, (ea & 0x0f), vm_read_memory_1(ea));
break;
case 2:
VD::set_element(vD, ((ea >> 1) & 0x07), vm_read_memory_2(ea & ~1));
break;
case 4:
VD::set_element(vD, ((ea >> 2) & 0x03), vm_read_memory_4(ea & ~3));
break;
case 8:
ea &= ~15;
vD.w[0] = vm_read_memory_4(ea + 0);
vD.w[1] = vm_read_memory_4(ea + 4);
vD.w[2] = vm_read_memory_4(ea + 8);
vD.w[3] = vm_read_memory_4(ea + 12);
break;
}
increment_pc(4);
}
template< class VS, class RA, class RB >
void powerpc_cpu::execute_vector_store(uint32 opcode)
{
uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
typename VS::type & vS = VS::ref(this, opcode);
switch (VS::element_size) {
case 1:
vm_write_memory_1(ea, VS::get_element(vS, (ea & 0x0f)));
break;
case 2:
vm_write_memory_2(ea & ~1, VS::get_element(vS, ((ea >> 1) & 0x07)));
break;
case 4:
vm_write_memory_4(ea & ~3, VS::get_element(vS, ((ea >> 2) & 0x03)));
break;
case 8:
ea &= ~15;
vm_write_memory_4(ea + 0, vS.w[0]);
vm_write_memory_4(ea + 4, vS.w[1]);
vm_write_memory_4(ea + 8, vS.w[2]);
vm_write_memory_4(ea + 12, vS.w[3]);
break;
}
increment_pc(4);
}
/**
* Vector arithmetic
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* Rc Predicate to record CR6
* C1 If recording CR6, do we check for '1' bits in vD?
**/
template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
void powerpc_cpu::execute_vector_arith(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VA::element_type a = VA::get_element(vA, i);
const typename VB::element_type b = VB::get_element(vB, i);
const typename VC::element_type c = VC::get_element(vC, i);
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
// Propagate all conditions to CR6
if (Rc::test(opcode))
record_cr6(vD, C1);
increment_pc(4);
}
/**
* Vector mixed arithmetic
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
**/
template< class OP, class VD, class VA, class VB, class VC >
void powerpc_cpu::execute_vector_arith_mixed(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
const int n_sub_elements = 4 / VA::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VC::element_type c = VC::get_element(vC, i);
typename VD::element_type d = c;
for (int j = 0; j < n_sub_elements; j++) {
const typename VA::element_type a = VA::get_element(vA, i * n_sub_elements + j);
const typename VB::element_type b = VB::get_element(vB, i * n_sub_elements + j);
d += op_apply<typename VD::element_type, OP, VA, VB, null_vector_operand>::apply(a, b, c);
}
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
/**
* Vector odd/even arithmetic
*
* ODD Flag: are we computing every odd element?
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
**/
template< int ODD, class OP, class VD, class VA, class VB, class VC >
void powerpc_cpu::execute_vector_arith_odd(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VC::type const & vC = VC::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++) {
const typename VA::element_type a = VA::get_element(vA, (i * 2) + ODD);
const typename VB::element_type b = VB::get_element(vB, (i * 2) + ODD);
const typename VC::element_type c = VC::get_element(vC, (i * 2) + ODD);
typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
/**
* Vector merge instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* LO Flag: use lower part of element
**/
template< class VD, class VA, class VB, int LO >
void powerpc_cpu::execute_vector_merge(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i += 2) {
VD::set_element(vD, i , VA::get_element(vA, (i / 2) + LO * (n_elements / 2)));
VD::set_element(vD, i + 1, VB::get_element(vB, (i / 2) + LO * (n_elements / 2)));
}
increment_pc(4);
}
/**
* Vector pack/unpack instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* LO Flag: use lower part of element
**/
template< class VD, class VA, class VB >
void powerpc_cpu::execute_vector_pack(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
const int n_pivot = n_elements / 2;
for (int i = 0; i < n_elements; i++) {
typename VD::element_type d;
if (i < n_pivot)
d = VA::get_element(vA, i);
else
d = VB::get_element(vB, i - n_pivot);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
increment_pc(4);
}
template< int LO, class VD, class VA >
void powerpc_cpu::execute_vector_unpack(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
for (int i = 0; i < n_elements; i++)
VD::set_element(vD, i, VA::get_element(vA, i + LO * n_elements));
increment_pc(4);
}
void powerpc_cpu::execute_vector_pack_pixel(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 4; i++) {
const uint32 a = vA.w[i];
vD.h[ev_mixed::half_element(i)] = ((a >> 9) & 0xfc00) | ((a >> 6) & 0x03e0) | ((a >> 3) & 0x001f);
const uint32 b = vB.w[i];
vD.h[ev_mixed::half_element(i + 4)] = ((b >> 9) & 0xfc00) | ((b >> 6) & 0x03e0) | ((b >> 3) & 0x001f);
}
increment_pc(4);
}
template< int LO >
void powerpc_cpu::execute_vector_unpack_pixel(uint32 opcode)
{
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 4; i++) {
const uint32 h = vB.h[ev_mixed::half_element(i + LO * 4)];
vD.w[i] = (((h & 0x8000) ? 0xff000000 : 0) |
((h & 0x7c00) << 6) |
((h & 0x03e0) << 3) |
(h & 0x001f));
}
increment_pc(4);
}
/**
* Vector shift instructions
*
* SD Shift direction: left (-1), right (+1)
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* VC Input operand vector (optional: operand_NONE)
* SH Shift count operand
**/
template< int SD >
void powerpc_cpu::execute_vector_shift(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
// The contents of the low-order three bits of all byte
// elements in vB must be identical to vB[125-127]; otherwise
// the value placed into vD is undefined.
const int sh = vB.b[ev_mixed::byte_element(15)] & 7;
if (sh == 0) {
for (int i = 0; i < 4; i++)
vD.w[i] = vA.w[i];
}
else {
uint32 prev_bits = 0;
if (SD < 0) {
for (int i = 3; i >= 0; i--) {
uint32 next_bits = vA.w[i] >> (32 - sh);
vD.w[i] = ((vA.w[i] << sh) | prev_bits);
prev_bits = next_bits;
}
}
else if (SD > 0) {
for (int i = 0; i < 4; i++) {
uint32 next_bits = vA.w[i] << (32 - sh);
vD.w[i] = ((vA.w[i] >> sh) | prev_bits);
prev_bits = next_bits;
}
}
}
increment_pc(4);
}
template< int SD, class VD, class VA, class VB, class SH >
void powerpc_cpu::execute_vector_shift_octet(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
const int sh = SH::get(this, opcode);
if (SD < 0) {
for (int i = 0; i < 16; i++) {
if (i + sh < 16)
VD::set_element(vD, i, VA::get_element(vA, i + sh));
else
VD::set_element(vD, i, VB::get_element(vB, i - (16 - sh)));
}
}
else if (SD > 0) {
for (int i = 0; i < 16; i++) {
if (i < sh)
VD::set_element(vD, i, VB::get_element(vB, 16 - (i - sh)));
else
VD::set_element(vD, i, VA::get_element(vA, i - sh));
}
}
increment_pc(4);
}
/**
* Vector splat instructions
*
* OP Operation to perform on element
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
* IM Immediate value to replicate
**/
template< class OP, class VD, class VB, bool IM >
void powerpc_cpu::execute_vector_splat(uint32 opcode)
{
typename VD::type & vD = VD::ref(this, opcode);
const int n_elements = 16 / VD::element_size;
uint32 value;
if (IM)
value = OP::apply(vUIMM_field::extract(opcode));
else {
typename VB::type const & vB = VB::const_ref(this, opcode);
const int n = vUIMM_field::extract(opcode) & (n_elements - 1);
value = OP::apply(VB::get_element(vB, n));
}
for (int i = 0; i < n_elements; i++)
VD::set_element(vD, i, value);
increment_pc(4);
}
/**
* Vector sum instructions
*
* SZ Size of destination vector elements
* VD Output operand vector
* VA Input operand vector
* VB Input operand vector (optional: operand_NONE)
**/
template< int SZ, class VD, class VA, class VB >
void powerpc_cpu::execute_vector_sum(uint32 opcode)
{
typename VA::type const & vA = VA::const_ref(this, opcode);
typename VB::type const & vB = VB::const_ref(this, opcode);
typename VD::type & vD = VD::ref(this, opcode);
typename VD::element_type d;
switch (SZ) {
case 1: // vsum
d = VB::get_element(vB, 3);
for (int j = 0; j < 4; j++)
d += VA::get_element(vA, j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, 0, 0);
VD::set_element(vD, 1, 0);
VD::set_element(vD, 2, 0);
VD::set_element(vD, 3, d);
break;
case 2: // vsum2
for (int i = 0; i < 4; i += 2) {
d = VB::get_element(vB, i + 1);
for (int j = 0; j < 2; j++)
d += VA::get_element(vA, i + j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i + 0, 0);
VD::set_element(vD, i + 1, d);
}
break;
case 4: // vsum4
for (int i = 0; i < 4; i += 1) {
d = VB::get_element(vB, i);
const int n_elements = 4 / VA::element_size;
for (int j = 0; j < n_elements; j++)
d += VA::get_element(vA, i * n_elements + j);
if (VD::saturate(d))
vscr().set_sat(1);
VD::set_element(vD, i, d);
}
break;
}
increment_pc(4);
}
/**
* Misc vector instructions
**/
void powerpc_cpu::execute_vector_permute(uint32 opcode)
{
powerpc_vr const & vA = vr(vA_field::extract(opcode));
powerpc_vr const & vB = vr(vB_field::extract(opcode));
powerpc_vr const & vC = vr(vC_field::extract(opcode));
powerpc_vr & vD = vr(vD_field::extract(opcode));
for (int i = 0; i < 16; i++) {
const int ei = ev_mixed::byte_element(i);
const int n = vC.b[ei] & 0x1f;
const int en = ev_mixed::byte_element(n & 0xf);
vD.b[ei] = (n & 0x10) ? vB.b[en] : vA.b[en];
}
increment_pc(4);
}
void powerpc_cpu::execute_mfvscr(uint32 opcode)
{
const int vD = vD_field::extract(opcode);
vr(vD).w[0] = 0;
vr(vD).w[1] = 0;
vr(vD).w[2] = 0;
vr(vD).w[3] = vscr().get();
increment_pc(4);
}
void powerpc_cpu::execute_mtvscr(uint32 opcode)
{
const int vB = vB_field::extract(opcode);
vscr().set(vr(vB).w[3]);
increment_pc(4);
}
/**
* Explicit template instantiations
**/
#include "ppc-execute-impl.cpp"