AltiVec emulation! ;-)

2025-02-18 12:30:33 +00:00 · 2004-02-15 17:17:37 +00:00 · 2004-02-15 17:17:37 +00:00 · 313cddeeb2
commit 313cddeeb2
parent d92989dc53
14 changed files with 2323 additions and 104 deletions
--- a/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp
+++ b/SheepShaver/src/kpx_cpu/sheepshaver_glue.cpp
@ -181,13 +181,6 @@ sheepshaver_cpu::sheepshaver_cpu()

 void sheepshaver_cpu::init_decoder()
 {
-#ifndef PPC_NO_STATIC_II_INDEX_TABLE
-	static bool initialized = false;
-	if (initialized)
-		return;
-	initialized = true;
-#endif
-
 	static const instr_info_t sheep_ii_table[] = {
 		{ "sheep",
 		  (execute_pmf)&sheepshaver_cpu::execute_sheep,
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-bitfields.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-bitfields.hpp
@ -88,6 +88,17 @@ typedef bit_field< 21, 25 > frC_field;
 typedef bit_field<  6, 10 > frD_field;
 typedef bit_field<  6, 10 > frS_field;

+// Vector registers
+typedef bit_field< 11, 15 > vA_field;
+typedef bit_field< 16, 20 > vB_field;
+typedef bit_field< 21, 25 > vC_field;
+typedef bit_field<  6, 10 > vD_field;
+typedef bit_field<  6, 10 > vS_field;
+
+typedef bit_field< 21, 21 > vRc_field;
+typedef bit_field< 11, 15 > vUIMM_field;
+typedef bit_field< 22, 25 > vSH_field;
+
 // Condition registers
 typedef bit_field< 11, 15 > crbA_field;
 typedef bit_field< 16, 20 > crbB_field;
@ -151,6 +162,10 @@ typedef bit_field< 17, 17 > FPSCR_FPRF_FG_field; // >
 typedef bit_field< 18, 18 > FPSCR_FPRF_FE_field; // =
 typedef bit_field< 19, 19 > FPSCR_FPRF_FU_field; // ?

+// Vector Status and Control Register
+typedef bit_field< 15, 15 > VSCR_NJ_field;
+typedef bit_field< 31, 31 > VSCR_SAT_field;
+
 // Define variations for branch instructions
 typedef bit_field< 30, 30 > AA_field;
 typedef bit_field< 31, 31 > LK_field;
@ -202,6 +217,7 @@ DEFINE_FIELD_ALIAS(AA_BIT, AA);
 DEFINE_FIELD_ALIAS(LK_BIT, LK);
 DEFINE_FIELD_ALIAS(BO_BIT, BO);
 DEFINE_FIELD_ALIAS(BI_BIT, BI);
+DEFINE_FIELD_ALIAS(vRC_BIT, vRc);

 #undef DEFINE_FIELD_ALIAS
 #undef DEFINE_FAKE_FIELD_ALIAS
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-config.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-config.hpp
@ -37,32 +37,6 @@
 #endif


-/**
- *	PPC_NO_STATIC_II_INDEX_TABLE
- *
- *		Define to make sure the ii_index_table[] is a non static
- *		member so that powerpc_cpu object size is reduced by 64
- *		KB. This is only supported for mono CPU configurations.
- **/
-
-#ifndef PPC_NO_STATIC_II_INDEX_TABLE
-#define PPC_NO_STATIC_II_INDEX_TABLE
-#endif
-
-
-/**
- *	PPC_OPCODE_HASH_XO_PRIMARY
- *
- *		Define to hash opcode hash (xo, primary opcode) instead of
- *		(primary opcode, xo). This simply reduces the computation
- *		index into instr_info[] table by one operation.
- **/
-
-#ifndef PPC_OPCODE_HASH_XO_PRIMARY
-#define PPC_OPCODE_HASH_XO_PRIMARY
-#endif
-
-
 /**
 *	PPC_ENABLE_FPU_EXCEPTIONS
 *
@ -148,13 +122,4 @@
 #define PPC_PROFILE_GENERIC_CALLS 0
 #endif

-
-/**
- *		Sanity checks and features enforcements
- **/
-
-#if KPX_MAX_CPUS == 1
-#undef PPC_NO_STATIC_II_INDEX_TABLE
-#endif
-
 #endif /* PPC_CONFIG_H */
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.cpp
@ -431,7 +431,6 @@ bool powerpc_cpu::check_spcflags()
 	return true;
 }

-
 void powerpc_cpu::execute(uint32 entry)
 {
 	pc() = entry;
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-cpu.hpp
@ -48,7 +48,11 @@ protected:
 	powerpc_cr_register const & cr() const { return regs.cr; }
 	powerpc_xer_register & xer() { return regs.xer; }
 	powerpc_xer_register const & xer() const { return regs.xer; }
+	powerpc_vscr & vscr() { return regs.vscr; }
+	powerpc_vscr const & vscr() const { return regs.vscr; }

+	uint32 vrsave() const		{ return regs.vrsave; }
+	uint32 & vrsave()			{ return regs.vrsave; }
 	double fp_result() const	{ return regs.fp_result.d; }
 	double & fp_result()		{ return regs.fp_result.d; }
 	uint64 fp_result_dw() const	{ return regs.fp_result.j; }
@ -79,6 +83,8 @@ public:
 	double fpr(int i) const		{ return regs.fpr[i].d; }
 	uint64 & fpr_dw(int i)		{ return regs.fpr[i].j; }
 	uint64 fpr_dw(int i) const	{ return regs.fpr[i].j; }
+	powerpc_vr & vr(int i)		{ return regs.vr[i]; }
+	powerpc_vr const & vr(int i) const { return regs.vr[i]; }

 protected:

@ -90,6 +96,15 @@ protected:
 	void record_cr1()
 		{ cr().set((cr().get() & ~CR_field<1>::mask()) | ((fpscr() >> 4) & 0x0f000000)); }
 	void record_fpscr();
+	void record_cr6(powerpc_vr const & vS, bool check_one) {
+		if (check_one && (vS.j[0] == UVAL64(0xffffffffffffffff) &&
+						  vS.j[1] == UVAL64(0xffffffffffffffff)))
+			cr().set(6, 8);
+		else if (vS.j[0] == UVAL64(0) && vS.j[1] == UVAL64(0))
+			cr().set(6, 2);
+		else
+			cr().set(6, 0);
+	}

 	template< class FP >
 	void fp_classify(FP x);
@ -125,7 +140,8 @@ protected:
 		MD_form, MDS_form,
 		SC_form,
 		X_form,
-		XFL_form, XFX_form, XL_form, XO_form, XS_form
+		XFL_form, XFX_form, XL_form, XO_form, XS_form,
+		VX_form, VXR_form, VA_form,
 	};

 	// Control flow types
@ -149,13 +165,13 @@ protected:

 	// Instruction information structure
 	struct instr_info_t {
-		char			name[8];		// Instruction name
+		char			name[12];		// Instruction name
 		execute_fn		execute;		// Semantic routine for this instruction
 		decode_fn		decode;			// Specialized instruction decoder
 		uint16			mnemo;			// Mnemonic
 		uint16			format;			// Instruction format (XO-form, D-form, etc.)
-		uint32			opcode:6;		// Primary opcode
-		uint32			xo:10;			// Extended opcode
+		uint16			opcode;			// Primary opcode
+		uint16			xo;				// Extended opcode
 		uint16			cflow;			// Mask of control flow information
 	};

@ -192,25 +208,15 @@ private:
 	syscall_fn execute_do_syscall;
 	int syscall_exit_code;

-#ifdef PPC_NO_STATIC_II_INDEX_TABLE
-#define PPC_STATIC_II_TABLE
-#else
-#define PPC_STATIC_II_TABLE static
-#endif
-
 	static const instr_info_t powerpc_ii_table[];
-	PPC_STATIC_II_TABLE std::vector<instr_info_t> ii_table;
-	typedef uint8 ii_index_t;
-	static const int II_INDEX_TABLE_SIZE = 0x10000;
-	PPC_STATIC_II_TABLE ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];
+	std::vector<instr_info_t> ii_table;
+	typedef uint16 ii_index_t;
+	static const int II_INDEX_TABLE_SIZE = 0x20000;
+	ii_index_t ii_index_table[II_INDEX_TABLE_SIZE];

-#ifdef PPC_OPCODE_HASH_XO_PRIMARY
+	// Pack/unpack index into decode table
 	uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode | (xo << 6); }
-	uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7fe) << 5); }
-#else
-	uint32 make_ii_index(uint32 opcode, uint32 xo) { return opcode << 10 | xo; }
-	uint32 get_ii_index(uint32 opcode) { return ((opcode >> 16) & 0xfc00) | ((opcode >> 1) & 0x3ff); }
-#endif
+	uint32 get_ii_index(uint32 opcode) { return (opcode >> 26) | ((opcode & 0x7ff) << 6); }

 	// Convert 8-bit field mask (e.g. mtcrf) to bit mask
 	uint32 field2mask[256];
@ -411,6 +417,36 @@ private:
 	void execute_invalidate_cache_range();
 	template< class RA, class RB >
 	void execute_dcbz(uint32 opcode);
+	template< class VD, class RA, class RB >
+	void execute_vector_load(uint32 opcode);
+	template< class VS, class RA, class RB >
+	void execute_vector_store(uint32 opcode);
+	void execute_mfvscr(uint32 opcode);
+	void execute_mtvscr(uint32 opcode);
+	template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
+	void execute_vector_arith(uint32 opcode);
+	template< class OP, class VD, class VA, class VB, class VC >
+	void execute_vector_arith_mixed(uint32 opcode);
+	template< int ODD, class OP, class VD, class VA, class VB, class VC >
+	void execute_vector_arith_odd(uint32 opcode);
+	template< class VD, class VA, class VB, int LO >
+	void execute_vector_merge(uint32 opcode);
+	template< class VD, class VA, class VB >
+	void execute_vector_pack(uint32 opcode);
+	void execute_vector_pack_pixel(uint32 opcode);
+	template< int LO >
+	void execute_vector_unpack_pixel(uint32 opcode);
+	template< int LO, class VD, class VA >
+	void execute_vector_unpack(uint32 opcode);
+	void execute_vector_permute(uint32 opcode);
+	template< int SD >
+	void execute_vector_shift(uint32 opcode);
+	template< int SD, class VD, class VA, class VB, class SH >
+	void execute_vector_shift_octet(uint32 opcode);
+	template< class OP, class VD, class VB, bool IM >
+	void execute_vector_splat(uint32 opcode);
+	template< int SZ, class VD, class VA, class VB >
+	void execute_vector_sum(uint32 opcode);

 	// Specialized instruction decoders
 	template< class RA, class RB, class RC, class CA >
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
@ -79,6 +79,8 @@ struct powerpc_dyngen_helper {
 	static inline void set_fpscr(uint32 value)	{ CPU->fpscr() = value; }
 	static inline uint32 get_xer()				{ return CPU->xer().get(); }
 	static inline void set_xer(uint32 value)	{ CPU->xer().set(value); }
+	static inline uint32 get_vrsave()			{ return CPU->vrsave(); }
+	static inline void set_vrsave(uint32 value)	{ CPU->vrsave() = value; }
 	static inline void record(int crf, int32 v)	{ CPU->record_cr(crf, v); }
 	static inline powerpc_cr_register & cr()	{ return CPU->cr(); }
 	static inline powerpc_xer_register & xer()	{ return CPU->xer(); }
@ -473,6 +475,16 @@ DEFINE_OP(fnmsubs_FD_F0_F1_F2, FD, do_fnmsub(F0, F1, F2));
 *		Special purpose registers
 **/

+void OPPROTO op_load_T0_VRSAVE(void)
+{
+	T0 = powerpc_dyngen_helper::get_vrsave();
+}
+
+void OPPROTO op_store_T0_VRSAVE(void)
+{
+	powerpc_dyngen_helper::set_vrsave(T0);
+}
+
 void OPPROTO op_load_T0_XER(void)
 {
 	T0 = powerpc_dyngen_helper::get_xer();
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
@ -94,6 +94,8 @@ public:
 	void gen_mtcrf_T0_im(uint32 mask);

 	// Special purpose registers
+	DEFINE_ALIAS(load_T0_VRSAVE,0);
+	DEFINE_ALIAS(store_T0_VRSAVE,0);
 	DEFINE_ALIAS(load_T0_XER,0);
 	DEFINE_ALIAS(store_T0_XER,0);
 	DEFINE_ALIAS(load_T0_PC,0);
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
@ -54,28 +54,52 @@

 template< class RT, class OP, class RA, class RB, class RC >
 struct op_apply {
-	template< class T >
-	static inline RT apply(T a, T b, T c) {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C c) {
 		return OP::apply(a, b, c);
 	}
 };

 template< class RT, class OP, class RA, class RB >
 struct op_apply<RT, OP, RA, RB, null_operand> {
-	template< class T >
-	static inline RT apply(T a, T b, T) {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C) {
 		return OP::apply(a, b);
 	}
 };

 template< class RT, class OP, class RA >
 struct op_apply<RT, OP, RA, null_operand, null_operand> {
-	template< class T >
-	static inline RT apply(T a, T, T) {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B, C) {
 		return OP::apply(a);
 	}
 };

+template< class RT, class OP, class RA, class RB >
+struct op_apply<RT, OP, RA, RB, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C) {
+		return (RT)OP::apply(a, b);
+	}
+};
+
+template< class RT, class OP, class RA >
+struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B, C) {
+		return (RT)OP::apply(a);
+	}
+};
+
+template< class RT, class OP, class RB >
+struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A, B b, C) {
+		return (RT)OP::apply(b);
+	}
+};
+
 /**
 *	Illegal & NOP instructions
 **/
@ -1111,6 +1135,7 @@ void powerpc_cpu::execute_mfspr(uint32 opcode)
 	case powerpc_registers::SPR_XER:	d = xer().get();break;
 	case powerpc_registers::SPR_LR:		d = lr();		break;
 	case powerpc_registers::SPR_CTR:	d = ctr();		break;
+	case powerpc_registers::SPR_VRSAVE:	d = vrsave();	break;
 #ifdef SHEEPSHAVER
 	case powerpc_registers::SPR_SDR1:	d = 0xdead001f;	break;
 	case powerpc_registers::SPR_PVR: {
@ -1137,6 +1162,7 @@ void powerpc_cpu::execute_mtspr(uint32 opcode)
 	case powerpc_registers::SPR_XER:	xer().set(s);	break;
 	case powerpc_registers::SPR_LR:		lr() = s;		break;
 	case powerpc_registers::SPR_CTR:	ctr() = s;		break;
+	case powerpc_registers::SPR_VRSAVE:	vrsave() = s;	break;
 #ifndef SHEEPSHAVER
 	default: execute_illegal(opcode);
 #endif
@ -1209,6 +1235,480 @@ void powerpc_cpu::execute_dcbz(uint32 opcode)
 	increment_pc(4);
 }

+/**
+ *		Vector load/store instructions
+ **/
+
+template< class VD, class RA, class RB >
+void powerpc_cpu::execute_vector_load(uint32 opcode)
+{
+	uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	switch (VD::element_size) {
+	case 1:
+		VD::set_element(vD, (ea & 0x0f), vm_read_memory_1(ea));
+		break;
+	case 2:
+		VD::set_element(vD, ((ea >> 1) & 0x07), vm_read_memory_2(ea & ~1));
+		break;
+	case 4:
+		VD::set_element(vD, ((ea >> 2) & 0x03), vm_read_memory_4(ea & ~3));
+		break;
+	case 8:
+		ea &= ~15;
+		vD.w[0] = vm_read_memory_4(ea +  0);
+		vD.w[1] = vm_read_memory_4(ea +  4);
+		vD.w[2] = vm_read_memory_4(ea +  8);
+		vD.w[3] = vm_read_memory_4(ea + 12);
+		break;
+	}
+	increment_pc(4);
+}
+
+template< class VS, class RA, class RB >
+void powerpc_cpu::execute_vector_store(uint32 opcode)
+{
+	uint32 ea = RA::get(this, opcode) + RB::get(this, opcode);
+	typename VS::type & vS = VS::ref(this, opcode);
+	switch (VS::element_size) {
+	case 1:
+		vm_write_memory_1(ea, VS::get_element(vS, (ea & 0x0f)));
+		break;
+	case 2:
+		vm_write_memory_2(ea & ~1, VS::get_element(vS, ((ea >> 1) & 0x07)));
+		break;
+	case 4:
+		vm_write_memory_4(ea & ~3, VS::get_element(vS, ((ea >> 2) & 0x03)));
+		break;
+	case 8:
+		ea &= ~15;
+		vm_write_memory_4(ea +  0, vS.w[0]);
+		vm_write_memory_4(ea +  4, vS.w[1]);
+		vm_write_memory_4(ea +  8, vS.w[2]);
+		vm_write_memory_4(ea + 12, vS.w[3]);
+		break;
+	}
+	increment_pc(4);
+}
+
+/**
+ *	Vector arithmetic
+ *
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ *		Rc		Predicate to record CR6
+ *		C1		If recording CR6, do we check for '1' bits in vD?
+ **/
+
+template< class OP, class VD, class VA, class VB, class VC, class Rc, int C1 >
+void powerpc_cpu::execute_vector_arith(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VC::type const & vC = VC::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	for (int i = 0; i < n_elements; i++) {
+		const typename VA::element_type a = VA::get_element(vA, i);
+		const typename VB::element_type b = VB::get_element(vB, i);
+		const typename VC::element_type c = VC::get_element(vC, i);
+		typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
+		if (VD::saturate(d))
+			vscr().set_sat(1);
+		VD::set_element(vD, i, d);
+	}
+
+	// Propagate all conditions to CR6
+	if (Rc::test(opcode))
+		record_cr6(vD, C1);
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector mixed arithmetic
+ *
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ **/
+
+template< class OP, class VD, class VA, class VB, class VC >
+void powerpc_cpu::execute_vector_arith_mixed(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VC::type const & vC = VC::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+	const int n_sub_elements = 4 / VA::element_size;
+
+	for (int i = 0; i < n_elements; i++) {
+		const typename VC::element_type c = VC::get_element(vC, i);
+		typename VD::element_type d = c;
+		for (int j = 0; j < n_sub_elements; j++) {
+			const typename VA::element_type a = VA::get_element(vA, i * n_sub_elements + j);
+			const typename VB::element_type b = VB::get_element(vB, i * n_sub_elements + j);
+			d += op_apply<typename VD::element_type, OP, VA, VB, null_vector_operand>::apply(a, b, c);
+		}
+		if (VD::saturate(d))
+			vscr().set_sat(1);
+		VD::set_element(vD, i, d);
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector odd/even arithmetic
+ *
+ *		ODD		Flag: are we computing every odd element?
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ **/
+
+template< int ODD, class OP, class VD, class VA, class VB, class VC >
+void powerpc_cpu::execute_vector_arith_odd(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VC::type const & vC = VC::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	for (int i = 0; i < n_elements; i++) {
+		const typename VA::element_type a = VA::get_element(vA, (i * 2) + ODD);
+		const typename VB::element_type b = VB::get_element(vB, (i * 2) + ODD);
+		const typename VC::element_type c = VC::get_element(vC, (i * 2) + ODD);
+		typename VD::element_type d = op_apply<typename VD::element_type, OP, VA, VB, VC>::apply(a, b, c);
+		if (VD::saturate(d))
+			vscr().set_sat(1);
+		VD::set_element(vD, i, d);
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector merge instructions
+ *
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ *		LO		Flag: use lower part of element
+ **/
+
+template< class VD, class VA, class VB, int LO >
+void powerpc_cpu::execute_vector_merge(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	for (int i = 0; i < n_elements; i += 2) {
+		VD::set_element(vD, i    , VA::get_element(vA, (i / 2) + LO * (n_elements / 2)));
+		VD::set_element(vD, i + 1, VB::get_element(vB, (i / 2) + LO * (n_elements / 2)));
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector pack/unpack instructions
+ *
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ *		LO		Flag: use lower part of element
+ **/
+
+template< class VD, class VA, class VB >
+void powerpc_cpu::execute_vector_pack(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+	const int n_pivot = n_elements / 2;
+
+	for (int i = 0; i < n_elements; i++) {
+		typename VD::element_type d;
+		if (i < n_pivot)
+			d = VA::get_element(vA, i);
+		else
+			d = VB::get_element(vB, i - n_pivot);
+		if (VD::saturate(d))
+			vscr().set_sat(1);
+		VD::set_element(vD, i, d);
+	}
+
+	increment_pc(4);
+}
+
+template< int LO, class VD, class VA >
+void powerpc_cpu::execute_vector_unpack(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	for (int i = 0; i < n_elements; i++)
+		VD::set_element(vD, i, VA::get_element(vA, i + LO * n_elements));
+
+	increment_pc(4);
+}
+
+void powerpc_cpu::execute_vector_pack_pixel(uint32 opcode)
+{
+	powerpc_vr const & vA = vr(vA_field::extract(opcode));
+	powerpc_vr const & vB = vr(vB_field::extract(opcode));
+	powerpc_vr & vD = vr(vD_field::extract(opcode));
+
+	for (int i = 0; i < 4; i++) {
+		const uint32 a = vA.w[i];
+		vD.h[ev_mixed::half_element(i)] = ((a >> 9) & 0xfc00) | ((a >> 6) & 0x03e0) | ((a >> 3) & 0x001f);
+		const uint32 b = vB.w[i];
+		vD.h[ev_mixed::half_element(i + 4)] = ((b >> 9) & 0xfc00) | ((b >> 6) & 0x03e0) | ((b >> 3) & 0x001f);
+	}
+
+	increment_pc(4);
+}
+
+template< int LO >
+void powerpc_cpu::execute_vector_unpack_pixel(uint32 opcode)
+{
+	powerpc_vr const & vB = vr(vB_field::extract(opcode));
+	powerpc_vr & vD = vr(vD_field::extract(opcode));
+
+	for (int i = 0; i < 4; i++) {
+		const uint32 h = vB.h[ev_mixed::half_element(i + LO * 4)];
+		vD.w[i] = (((h & 0x8000) ? 0xff000000 : 0) |
+				   ((h & 0x7c00) << 6) |
+				   ((h & 0x03e0) << 3) |
+				   (h & 0x001f));
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector shift instructions
+ *
+ *		SD		Shift direction: left (-1), right (+1)
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		VC		Input operand vector (optional: operand_NONE)
+ *		SH		Shift count operand
+ **/
+
+template< int SD >
+void powerpc_cpu::execute_vector_shift(uint32 opcode)
+{
+	powerpc_vr const & vA = vr(vA_field::extract(opcode));
+	powerpc_vr const & vB = vr(vB_field::extract(opcode));
+	powerpc_vr & vD = vr(vD_field::extract(opcode));
+
+	// The contents of the low-order three bits of all byte
+	// elements in vB must be identical to vB[125-127]; otherwise
+	// the value placed into vD is undefined.
+	const int sh = vB.b[ev_mixed::byte_element(15)] & 7;
+	if (sh == 0) {
+		for (int i = 0; i < 4; i++)
+			vD.w[i] = vA.w[i];
+	}
+	else {
+		uint32 prev_bits = 0;
+		if (SD < 0) {
+			for (int i = 3; i >= 0; i--) {
+				uint32 next_bits = vA.w[i] >> (32 - sh);
+				vD.w[i] = ((vA.w[i] << sh) | prev_bits);
+				prev_bits = next_bits;
+			}
+		}
+		else if (SD > 0) {
+			for (int i = 0; i < 4; i++) {
+				uint32 next_bits = vA.w[i] << (32 - sh);
+				vD.w[i] = ((vA.w[i] >> sh) | prev_bits);
+				prev_bits = next_bits;
+			}
+		}
+	}
+
+	increment_pc(4);
+}
+
+template< int SD, class VD, class VA, class VB, class SH >
+void powerpc_cpu::execute_vector_shift_octet(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	const int sh = SH::get(this, opcode);
+	if (SD < 0) {
+		for (int i = 0; i < 16; i++) {
+			if (i + sh < 16)
+				VD::set_element(vD, i, VA::get_element(vA, i + sh));
+			else
+				VD::set_element(vD, i, VB::get_element(vB, 16 - (i + sh)));
+		}
+	}
+	else if (SD > 0) {
+		for (int i = 0; i < 16; i++) {
+			if (i < sh)
+				VD::set_element(vD, i, VB::get_element(vB, 16 - (i - sh)));
+			else
+				VD::set_element(vD, i, VA::get_element(vA, i - sh));
+		}
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector splat instructions
+ *
+ *		OP		Operation to perform on element
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ *		IM		Immediate value to replicate
+ **/
+
+template< class OP, class VD, class VB, bool IM >
+void powerpc_cpu::execute_vector_splat(uint32 opcode)
+{
+	typename VD::type & vD = VD::ref(this, opcode);
+	const int n_elements = 16 / VD::element_size;
+
+	uint32 value;
+	if (IM)
+		value = OP::apply(vUIMM_field::extract(opcode));
+	else {
+		typename VB::type const & vB = VB::const_ref(this, opcode);
+		const int n = vUIMM_field::extract(opcode) & (n_elements - 1);
+		value = OP::apply(VB::get_element(vB, n));
+	}
+
+	for (int i = 0; i < n_elements; i++)
+		VD::set_element(vD, i, value);
+
+	increment_pc(4);
+}
+
+/**
+ *	Vector sum instructions
+ *
+ *		SZ		Size of destination vector elements
+ *		VD		Output operand vector
+ *		VA		Input operand vector
+ *		VB		Input operand vector (optional: operand_NONE)
+ **/
+
+template< int SZ, class VD, class VA, class VB >
+void powerpc_cpu::execute_vector_sum(uint32 opcode)
+{
+	typename VA::type const & vA = VA::const_ref(this, opcode);
+	typename VB::type const & vB = VB::const_ref(this, opcode);
+	typename VD::type & vD = VD::ref(this, opcode);
+	typename VD::element_type d;
+	
+	switch (SZ) {
+	case 1: // vsum
+		d = VB::get_element(vB, 3);
+		for (int j = 0; j < 4; j++)
+			d += VA::get_element(vA, j);
+		if (VD::saturate(d))
+			vscr().set_sat(1);
+		VD::set_element(vD, 0, 0);
+		VD::set_element(vD, 1, 0);
+		VD::set_element(vD, 2, 0);
+		VD::set_element(vD, 3, d);
+		break;
+
+	case 2: // vsum2
+		for (int i = 0; i < 4; i += 2) {
+			d = VB::get_element(vB, i + 1);
+			for (int j = 0; j < 2; j++)
+				d += VA::get_element(vA, i + j);
+			if (VD::saturate(d))
+				vscr().set_sat(1);
+			VD::set_element(vD, i + 0, 0);
+			VD::set_element(vD, i + 1, d);
+		}
+		break;
+
+	case 4: // vsum4
+		for (int i = 0; i < 4; i += 1) {
+			d = VB::get_element(vB, i);
+			const int n_elements = 4 / VA::element_size;
+			for (int j = 0; j < n_elements; j++)
+				d += VA::get_element(vA, i * n_elements + j);
+			if (VD::saturate(d))
+				vscr().set_sat(1);
+			VD::set_element(vD, i, d);
+		}
+		break;
+	}
+
+	increment_pc(4);
+}
+
+/**
+ *		Misc vector instructions
+ **/
+
+void powerpc_cpu::execute_vector_permute(uint32 opcode)
+{
+	powerpc_vr const & vA = vr(vA_field::extract(opcode));
+	powerpc_vr const & vB = vr(vB_field::extract(opcode));
+	powerpc_vr const & vC = vr(vC_field::extract(opcode));
+	powerpc_vr & vD = vr(vD_field::extract(opcode));
+
+	for (int i = 0; i < 16; i++) {
+		const int ei = ev_mixed::byte_element(i);
+		const int n  = vC.b[ei] & 0x1f;
+		const int en = ev_mixed::byte_element(n & 0xf);
+		vD.b[ei] = (n & 0x10) ? vB.b[en] : vA.b[en];
+	}
+
+	increment_pc(4);
+}
+
+void powerpc_cpu::execute_mfvscr(uint32 opcode)
+{
+	const int vD = vD_field::extract(opcode);
+	vr(vD).w[0] = 0;
+	vr(vD).w[1] = 0;
+	vr(vD).w[2] = 0;
+	vr(vD).w[3] = vscr().get();
+	increment_pc(4);
+}
+
+void powerpc_cpu::execute_mtvscr(uint32 opcode)
+{
+	const int vB = vB_field::extract(opcode);
+	vscr().set(vr(vB).w[3]);
+	increment_pc(4);
+}
+
 /**
 *		Explicit template instantiations
 **/
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
@ -68,6 +68,9 @@ enum powerpc_instruction {
 	PPC_I(DCBZ),
 	PPC_I(DIVW),
 	PPC_I(DIVWU),
+	PPC_I(DSS),
+	PPC_I(DST),
+	PPC_I(DSTST),
 	PPC_I(ECIWX),
 	PPC_I(ECOWX),
 	PPC_I(EIEIO),
@ -125,6 +128,11 @@ enum powerpc_instruction {
 	PPC_I(LMW),
 	PPC_I(LSWI),
 	PPC_I(LSWX),
+	PPC_I(LVEBX),
+	PPC_I(LVEHX),
+	PPC_I(LVEWX),
+	PPC_I(LVX),
+	PPC_I(LVXL),
 	PPC_I(LWARX),
 	PPC_I(LWBRX),
 	PPC_I(LWZ),
@ -138,12 +146,14 @@ enum powerpc_instruction {
 	PPC_I(MFMSR),
 	PPC_I(MFSPR),
 	PPC_I(MFTB),
+	PPC_I(MFVSCR),
 	PPC_I(MTCRF),
 	PPC_I(MTFSB0),
 	PPC_I(MTFSB1),
 	PPC_I(MTFSF),
 	PPC_I(MTFSFI),
 	PPC_I(MTSPR),
+	PPC_I(MTVSCR),
 	PPC_I(MULHW),
 	PPC_I(MULHWU),
 	PPC_I(MULLI),
@ -183,6 +193,11 @@ enum powerpc_instruction {
 	PPC_I(STMW),
 	PPC_I(STSWI),
 	PPC_I(STSWX),
+	PPC_I(STVEBX),
+	PPC_I(STVEHX),
+	PPC_I(STVEWX),
+	PPC_I(STVX),
+	PPC_I(STVXL),
 	PPC_I(STW),
 	PPC_I(STWBRX),
 	PPC_I(STWCX),
@ -199,6 +214,148 @@ enum powerpc_instruction {
 	PPC_I(XOR),
 	PPC_I(XORI),
 	PPC_I(XORIS),
+	PPC_I(VADDCUW),
+	PPC_I(VADDFP),
+	PPC_I(VADDSBS),
+	PPC_I(VADDSHS),
+	PPC_I(VADDSWS),
+	PPC_I(VADDUBM),
+	PPC_I(VADDUBS),
+	PPC_I(VADDUHM),
+	PPC_I(VADDUHS),
+	PPC_I(VADDUWM),
+	PPC_I(VADDUWS),
+	PPC_I(VAND),
+	PPC_I(VANDC),
+	PPC_I(VAVGSB),
+	PPC_I(VAVGSH),
+	PPC_I(VAVGSW),
+	PPC_I(VAVGUB),
+	PPC_I(VAVGUH),
+	PPC_I(VAVGUW),
+	PPC_I(VCFSX),
+	PPC_I(VCFUX),
+	PPC_I(VCMPBFP),
+	PPC_I(VCMPEQFP),
+	PPC_I(VCMPEQUB),
+	PPC_I(VCMPEQUH),
+	PPC_I(VCMPEQUW),
+	PPC_I(VCMPGEFP),
+	PPC_I(VCMPGTFP),
+	PPC_I(VCMPGTSB),
+	PPC_I(VCMPGTSH),
+	PPC_I(VCMPGTSW),
+	PPC_I(VCMPGTUB),
+	PPC_I(VCMPGTUH),
+	PPC_I(VCMPGTUW),
+	PPC_I(VCTSXS),
+	PPC_I(VCTUXS),
+	PPC_I(VEXPTEFP),
+	PPC_I(VLOGEFP),
+	PPC_I(VMADDFP),
+	PPC_I(VMAXFP),
+	PPC_I(VMAXSB),
+	PPC_I(VMAXSH),
+	PPC_I(VMAXSW),
+	PPC_I(VMAXUB),
+	PPC_I(VMAXUH),
+	PPC_I(VMAXUW),
+	PPC_I(VMHADDSHS),
+	PPC_I(VMHRADDSHS),
+	PPC_I(VMINFP),
+	PPC_I(VMINSB),
+	PPC_I(VMINSH),
+	PPC_I(VMINSW),
+	PPC_I(VMINUB),
+	PPC_I(VMINUH),
+	PPC_I(VMINUW),
+	PPC_I(VMLADDUHM),
+	PPC_I(VMRGHB),
+	PPC_I(VMRGHH),
+	PPC_I(VMRGHW),
+	PPC_I(VMRGLB),
+	PPC_I(VMRGLH),
+	PPC_I(VMRGLW),
+	PPC_I(VMSUMMBM),
+	PPC_I(VMSUMSHM),
+	PPC_I(VMSUMSHS),
+	PPC_I(VMSUMUBM),
+	PPC_I(VMSUMUHM),
+	PPC_I(VMSUMUHS),
+	PPC_I(VMULESB),
+	PPC_I(VMULESH),
+	PPC_I(VMULEUB),
+	PPC_I(VMULEUH),
+	PPC_I(VMULOSB),
+	PPC_I(VMULOSH),
+	PPC_I(VMULOUB),
+	PPC_I(VMULOUH),
+	PPC_I(VNMSUB),
+	PPC_I(VNOR),
+	PPC_I(VOR),
+	PPC_I(VPERM),
+	PPC_I(VPKPX),
+	PPC_I(VPKSHSS),
+	PPC_I(VPKSHUS),
+	PPC_I(VPKSWSS),
+	PPC_I(VPKSWUS),
+	PPC_I(VPKUHUM),
+	PPC_I(VPKUHUS),
+	PPC_I(VPKUWUM),
+	PPC_I(VPKUWUS),
+	PPC_I(VREFP),
+	PPC_I(VRFIM),
+	PPC_I(VRFIN),
+	PPC_I(VRFIP),
+	PPC_I(VRFIZ),
+	PPC_I(VRLB),
+	PPC_I(VRLH),
+	PPC_I(VRLW),
+	PPC_I(VRSQRTEFP),
+	PPC_I(VSEL),
+	PPC_I(VSL),
+	PPC_I(VSLB),
+	PPC_I(VSLDOI),
+	PPC_I(VSLH),
+	PPC_I(VSLO),
+	PPC_I(VSLW),
+	PPC_I(VSPLTB),
+	PPC_I(VSPLTH),
+	PPC_I(VSPLTISB),
+	PPC_I(VSPLTISH),
+	PPC_I(VSPLTISW),
+	PPC_I(VSPLTW),
+	PPC_I(VSR),
+	PPC_I(VSRAB),
+	PPC_I(VSRAH),
+	PPC_I(VSRAW),
+	PPC_I(VSRB),
+	PPC_I(VSRH),
+	PPC_I(VSRO),
+	PPC_I(VSRW),
+	PPC_I(VSUBCUW),
+	PPC_I(VSUBFP),
+	PPC_I(VSUBSBS),
+	PPC_I(VSUBSHS),
+	PPC_I(VSUBSWS),
+	PPC_I(VSUBUBM),
+	PPC_I(VSUBUBS),
+	PPC_I(VSUBUHM),
+	PPC_I(VSUBUHS),
+	PPC_I(VSUBUWM),
+	PPC_I(VSUBUWS),
+	PPC_I(VSUMSWS),
+	PPC_I(VSUM2SWS),
+	PPC_I(VSUM4SBS),
+	PPC_I(VSUM4SHS),
+	PPC_I(VSUM4UBS),
+	PPC_I(VUPKHPX),
+	PPC_I(VUPKHSB),
+	PPC_I(VUPKHSH),
+	PPC_I(VUPKLPX),
+	PPC_I(VUPKLSB),
+	PPC_I(VUPKLSH),
+	PPC_I(VXOR),
 	PPC_I(MAX) // Total number of instruction types
 };

--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-operands.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-operands.hpp
@ -21,6 +21,21 @@
 #ifndef PPC_OPERANDS_H
 #define PPC_OPERANDS_H

+#include <limits>
+
+/**
+ *		Compile time checks
+ **/
+
+template< int a, int b >
+struct ensure_equals;
+
+template< int n >
+struct ensure_equals<n, n> { };
+
+template< class type, int size >
+struct ensure_sizeof : ensure_equals<sizeof(type), size> { };
+
 /**
 *		General purpose registers
 **/
@ -96,6 +111,191 @@ struct output_fpr_dw {
 template< class field >
 struct fpr_dw_operand : input_fpr_dw< field >, output_fpr_dw< field > { };

+/**
+ *		Vector registers
+ **/
+
+struct ev_direct {
+	static inline int byte_element(int i) { return i; }
+	static inline int half_element(int i) { return i; }
+	static inline int word_element(int i) { return i; }
+};
+
+// This supposes elements are loaded by 4-byte word parts
+#ifdef WORDS_BIGENDIAN
+typedef ev_direct ev_mixed;
+#else
+struct ev_mixed : public ev_direct {
+#if 0
+	static inline int byte_element(int i) { return (i & ~3) + (3 - (i & 3)); }
+	static inline int half_element(int i) { return (i & ~1) + (1 - (i & 1)); }
+#else
+	static inline int byte_element(int i) {
+		static const int lookup[16] = {
+			3,  2,  1,  0,
+			7,  6,  5,  4,
+			11, 10, 9,  8,
+			15, 14, 13, 12
+		};
+		return lookup[i];
+	}
+	static inline int half_element(int i) {
+		static const int lookup[8] = {
+			1, 0, 3, 2,
+			5, 4, 7, 6
+		};
+		return lookup[i];
+	}
+#endif
+};
+#endif
+
+struct null_vector_operand {
+	typedef uint32 type;
+	typedef uint32 element_type;
+	static const uint32	element_size = sizeof(element_type);
+	static inline type const_ref(powerpc_cpu *, uint32) { return 0; } // fake so that compiler optimizes it out
+	static inline element_type get_element(type const & reg, int i) { return 0; }
+};
+
+template< class field >
+struct vimm_operand {
+	typedef uint32 type;
+	typedef uint32 element_type;
+	static const uint32 element_size = sizeof(element_type);
+	static inline type const_ref(powerpc_cpu *, uint32 opcode) { return field::extract(opcode); }
+	static inline element_type get_element(type const & reg, int i) { return reg; }
+};
+
+template< class field >
+struct input_vr {
+	static inline powerpc_vr const & const_ref(powerpc_cpu * cpu, uint32 opcode) {
+		return cpu->vr(field::extract(opcode));
+	}
+};
+
+template< class field >
+struct output_vr {
+	static inline powerpc_vr & ref(powerpc_cpu * cpu, uint32 opcode) {
+		return cpu->vr(field::extract(opcode));
+	}
+};
+
+template< class field, class value_type >
+struct vector_operand : input_vr< field >, output_vr< field > {
+	typedef powerpc_vr	type;
+	typedef value_type	element_type;
+	static const uint32	element_size = sizeof(element_type);
+	static inline bool	saturate(element_type) { return false; }
+};
+
+template< class field, class value_type, class sat_type >
+struct vector_saturate_operand : input_vr< field >, output_vr< field > {
+	typedef powerpc_vr	type;
+	typedef sat_type	element_type;
+	static const uint32	element_size = sizeof(value_type);
+	static inline bool saturate(element_type & v) {
+		bool sat = false;
+		if (v > std::numeric_limits<value_type>::max()) {
+			v = std::numeric_limits<value_type>::max();
+			sat = true;
+		}
+		else if (v < std::numeric_limits<value_type>::min()) {
+			v = std::numeric_limits<value_type>::min();
+			sat = true;
+		}
+		return sat;
+	}
+};
+
+template< class field, class value_type, class sat_type = int16, class ev = ev_direct >
+struct v16qi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 2 > {
+	static inline sat_type get_element(powerpc_vr const & reg, int i) {
+		return (sat_type)(value_type)reg.b[ev::byte_element(i)];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
+		reg.b[ev::byte_element(i)] = value;
+	}
+};
+
+template< class field, class value_type, class sat_type = int32, class ev = ev_direct >
+struct v8hi_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 4 > {
+	static inline sat_type get_element(powerpc_vr const & reg, int i) {
+		return (sat_type)(value_type)reg.h[ev::half_element(i)];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
+		reg.h[ev::half_element(i)] = value;
+	}
+};
+
+template< class field, class value_type, class sat_type = int64 >
+struct v4si_sat_operand : vector_saturate_operand< field, value_type, sat_type >, ensure_sizeof< sat_type, 8 > {
+	static inline sat_type get_element(powerpc_vr const & reg, int i) {
+		return (sat_type)(value_type)reg.w[i];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, sat_type value) {
+		reg.w[i] = value;
+	}
+};
+
+template< class field, class value_type = uint8, class ev = ev_direct >
+struct v16qi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 1 > {
+	static inline value_type get_element(powerpc_vr const & reg, int i) {
+		return reg.b[ev::byte_element(i)];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, value_type value) {
+		reg.b[ev::byte_element(i)] = value;
+	}
+};
+
+template< class field, class value_type = uint16, class ev = ev_direct >
+struct v8hi_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 2 > {
+	static inline value_type get_element(powerpc_vr const & reg, int i) {
+		return reg.h[ev::half_element(i)];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, value_type value) {
+		reg.h[ev::half_element(i)] = value;
+	}
+};
+
+template< class field, class value_type = uint32 >
+struct v4si_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 4 > {
+	static inline value_type get_element(powerpc_vr const & reg, int i) {
+		return reg.w[i];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, value_type value) {
+		reg.w[i] = value;
+	}
+};
+
+template< class field, class value_type = uint64 >
+struct v2di_operand : vector_operand< field, value_type >, ensure_sizeof< value_type, 8 > {
+	static inline value_type get_element(powerpc_vr const & reg, int i) {
+		return reg.j[i];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, value_type value) {
+		reg.j[i] = value;
+	}
+};
+
+template< class field >
+struct v4sf_operand : vector_operand< field, float > {
+	static inline float get_element(powerpc_vr const & reg, int i) {
+		return reg.f[i];
+	}
+	static inline void set_element(powerpc_vr & reg, int i, float value) {
+		reg.f[i] = value;
+	}
+};
+
+template< class field >
+struct vSH_operand {
+	static inline uint32 get(powerpc_cpu * cpu, uint32 opcode) {
+		return (cpu->vr(field::extract(opcode)).b[ev_mixed::byte_element(15)] >> 3) & 15;
+	}
+};
+
+
 /**
 *		Immediate operands
 **/
@ -239,6 +439,108 @@ typedef fpscr_operand<FPSCR_RN_field>			operand_FPSCR_RN;
 typedef spr_operand								operand_SPR;
 typedef tbr_operand								operand_TBR;
 typedef mask_operand							operand_MASK;
+typedef null_vector_operand						operand_vD_NONE;
+typedef null_vector_operand						operand_vA_NONE;
+typedef null_vector_operand						operand_vB_NONE;
+typedef null_vector_operand						operand_vC_NONE;
+typedef v16qi_operand<vD_field>					operand_vD_V16QI;
+typedef v16qi_operand<vA_field>					operand_vA_V16QI;
+typedef v16qi_operand<vB_field>					operand_vB_V16QI;
+typedef v16qi_operand<vC_field>					operand_vC_V16QI;
+typedef v16qi_operand<vD_field, int8>			operand_vD_V16QIs;
+typedef v16qi_operand<vA_field, int8>			operand_vA_V16QIs;
+typedef v16qi_operand<vB_field, int8>			operand_vB_V16QIs;
+typedef v16qi_operand<vC_field, int8>			operand_vC_V16QIs;
+typedef v16qi_operand<vD_field, int8, ev_mixed>	operand_vD_V16QIms;
+typedef v16qi_operand<vB_field, int8, ev_mixed>	operand_vB_V16QIms;
+typedef v8hi_operand<vD_field>					operand_vD_V8HI;
+typedef v8hi_operand<vA_field>					operand_vA_V8HI;
+typedef v8hi_operand<vB_field>					operand_vB_V8HI;
+typedef v8hi_operand<vC_field>					operand_vC_V8HI;
+typedef v8hi_operand<vD_field, int16>			operand_vD_V8HIs;
+typedef v8hi_operand<vA_field, int16>			operand_vA_V8HIs;
+typedef v8hi_operand<vB_field, int16>			operand_vB_V8HIs;
+typedef v8hi_operand<vC_field, int16>			operand_vC_V8HIs;
+typedef v8hi_operand<vD_field, int16, ev_mixed>	operand_vD_V8HIms;
+typedef v8hi_operand<vB_field, int16, ev_mixed>	operand_vB_V8HIms;
+typedef v4si_operand<vD_field>					operand_vD_V4SI;
+typedef v4si_operand<vA_field>					operand_vA_V4SI;
+typedef v4si_operand<vB_field>					operand_vB_V4SI;
+typedef v4si_operand<vC_field>					operand_vC_V4SI;
+typedef v4si_operand<vD_field, int32>			operand_vD_V4SIs;
+typedef v4si_operand<vA_field, int32>			operand_vA_V4SIs;
+typedef v4si_operand<vB_field, int32>			operand_vB_V4SIs;
+typedef v4si_operand<vC_field, int32>			operand_vC_V4SIs;
+typedef v2di_operand<vD_field>					operand_vD_V2DI;
+typedef v2di_operand<vA_field>					operand_vA_V2DI;
+typedef v2di_operand<vB_field>					operand_vB_V2DI;
+typedef v2di_operand<vC_field>					operand_vC_V2DI;
+typedef v2di_operand<vD_field, int64>			operand_vD_V2DIs;
+typedef v2di_operand<vA_field, int64>			operand_vA_V2DIs;
+typedef v2di_operand<vB_field, int64>			operand_vB_V2DIs;
+typedef v2di_operand<vC_field, int64>			operand_vC_V2DIs;
+typedef v4sf_operand<vD_field>					operand_vD_V4SF;
+typedef v4sf_operand<vA_field>					operand_vA_V4SF;
+typedef v4sf_operand<vB_field>					operand_vB_V4SF;
+typedef v4sf_operand<vC_field>					operand_vC_V4SF;
+typedef v4si_operand<vS_field>					operand_vS_V4SI;
+typedef v2di_operand<vS_field>					operand_vS_V2DI;
+typedef vimm_operand<vA_field>					operand_vA_UIMM;
+typedef vimm_operand<vB_field>					operand_vB_UIMM;
+typedef vSH_operand<vB_field>					operand_SHBO;
+
+// vector mixed element accessors
+typedef v16qi_operand<vA_field, uint8, ev_mixed>		operand_vA_V16QIm;
+typedef v16qi_operand<vB_field, uint8, ev_mixed>		operand_vB_V16QIm;
+typedef v16qi_operand<vD_field, uint8, ev_mixed>		operand_vD_V16QIm;
+typedef v8hi_operand<vA_field, uint16, ev_mixed>		operand_vA_V8HIm;
+typedef v8hi_operand<vB_field, uint16, ev_mixed>		operand_vB_V8HIm;
+typedef v8hi_operand<vD_field, uint16, ev_mixed>		operand_vD_V8HIm;
+
+#define DEFINE_VECTOR_SAT_OPERAND(EV, REG, OP)										\
+template< class value_type >														\
+struct operand_##REG##_##EV##_SAT : OP##_sat_operand<REG##_field, value_type> { }
+
+DEFINE_VECTOR_SAT_OPERAND(V4SI, vD, v4si);
+DEFINE_VECTOR_SAT_OPERAND(V4SI, vA, v4si);
+DEFINE_VECTOR_SAT_OPERAND(V4SI, vB, v4si);
+DEFINE_VECTOR_SAT_OPERAND(V4SI, vC, v4si);
+DEFINE_VECTOR_SAT_OPERAND(V8HI, vD, v8hi);
+DEFINE_VECTOR_SAT_OPERAND(V8HI, vA, v8hi);
+DEFINE_VECTOR_SAT_OPERAND(V8HI, vB, v8hi);
+DEFINE_VECTOR_SAT_OPERAND(V8HI, vC, v8hi);
+DEFINE_VECTOR_SAT_OPERAND(V16QI, vD, v16qi);
+DEFINE_VECTOR_SAT_OPERAND(V16QI, vA, v16qi);
+DEFINE_VECTOR_SAT_OPERAND(V16QI, vB, v16qi);
+DEFINE_VECTOR_SAT_OPERAND(V16QI, vC, v16qi);
+
+#undef DEFINE_VECTOR_SAT_OPERAND
+
+#define DEFINE_VECTOR_MIXED_SAT_OPERAND(EV, SAT, REG, OP, TYPE)										   \
+template< class value_type >																	   \
+struct operand_##REG##_##EV##m_##SAT : OP##_sat_operand<REG##_field, value_type, TYPE, ev_mixed> { }
+
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vA, v16qi, int16);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vB, v16qi, int16);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, SAT, vD, v16qi, int16);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V16QI, USAT, vD, v16qi, uint16);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vA, v8hi, int32);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vB, v8hi, int32);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, SAT, vD, v8hi, int32);
+DEFINE_VECTOR_MIXED_SAT_OPERAND(V8HI, USAT, vD, v8hi, uint32);
+
+#undef DEFINE_VECTOR_MIXED_SAT_OPERAND
+
+#define DEFINE_VECTOR_USAT_OPERAND(EV, REG, OP, TYPE)										\
+template< class value_type >																\
+struct operand_##REG##_##EV##_USAT : OP##_sat_operand<REG##_field, value_type, TYPE> { }
+
+// FIXME: temporary for vector pack unsigned saturate variants
+DEFINE_VECTOR_USAT_OPERAND(V4SI,  vD, v4si,  uint64);
+DEFINE_VECTOR_USAT_OPERAND(V8HI,  vD, v8hi,  uint32);
+DEFINE_VECTOR_USAT_OPERAND(V16QI, vD, v16qi, uint16);
+
+#undef DEFINE_VECTOR_USAT_OPERAND

 #define DEFINE_IMMEDIATE_OPERAND(NAME, FIELD, OP) \
 typedef immediate_operand<FIELD##_field, op_##OP> operand_##NAME
@ -255,6 +557,7 @@ DEFINE_IMMEDIATE_OPERAND(D, d, sign_extend_16_32);
 DEFINE_IMMEDIATE_OPERAND(NB, NB, nop);
 DEFINE_IMMEDIATE_OPERAND(SH, SH, nop);
 DEFINE_IMMEDIATE_OPERAND(FM, FM, nop);
+DEFINE_IMMEDIATE_OPERAND(SHB, vSH, nop);

 #undef DEFINE_IMMEDIATE_OPERAND

--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-operations.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-operations.hpp
@ -31,6 +31,9 @@
 *		EXPR	C++ expression defining the operation, parameters are x/y/z/t
 **/

+#define DEFINE_ALIAS_OP(NAME, T_NAME, TYPE)		\
+typedef op_template_##T_NAME<TYPE> op_##NAME
+
 #define DEFINE_OP1(NAME, TYPE, EXPR)			\
 struct op_##NAME {								\
 	static inline TYPE apply(TYPE x) {			\
@ -38,6 +41,10 @@ struct op_##NAME {								\
 	}											\
 }

+#define DEFINE_TEMPLATE_OP1(NAME, EXPR)			\
+template< class TYPE >							\
+DEFINE_OP1(template_##NAME, TYPE, EXPR)
+
 #define DEFINE_OP2(NAME, TYPE, EXPR)			\
 struct op_##NAME {								\
 	static inline TYPE apply(TYPE x, TYPE y) {	\
@ -45,6 +52,10 @@ struct op_##NAME {								\
 	}											\
 }

+#define DEFINE_TEMPLATE_OP2(NAME, EXPR)			\
+template< class TYPE >							\
+DEFINE_OP2(template_##NAME, TYPE, EXPR)
+
 #define DEFINE_OP3(NAME, TYPE, EXPR)					\
 struct op_##NAME {										\
 	static inline TYPE apply(TYPE x, TYPE y, TYPE z) {	\
@ -59,26 +70,42 @@ struct op_##NAME {												\
 	}															\
 }

+// Basic operations
+
+DEFINE_TEMPLATE_OP1(nop, x);
+DEFINE_TEMPLATE_OP2(add, x + y);
+DEFINE_TEMPLATE_OP2(sub, x - y);
+DEFINE_TEMPLATE_OP2(mul, x * y);
+DEFINE_TEMPLATE_OP2(div, x / y);
+DEFINE_TEMPLATE_OP2(and, x & y);
+DEFINE_TEMPLATE_OP2(or,  x | y);
+DEFINE_TEMPLATE_OP2(xor, x ^ y);
+DEFINE_TEMPLATE_OP2(orc, x | ~y);
+DEFINE_TEMPLATE_OP2(andc,x & ~y);
+DEFINE_TEMPLATE_OP2(nand,~(x & y));
+DEFINE_TEMPLATE_OP2(nor, ~(x | y));
+DEFINE_TEMPLATE_OP2(eqv, ~(x ^ y));
+
 // Integer basic operations

-DEFINE_OP1(nop, uint32, x);
+DEFINE_ALIAS_OP(nop, nop, uint32);
+DEFINE_ALIAS_OP(add, add, uint32);
+DEFINE_ALIAS_OP(sub, sub, uint32);
+DEFINE_ALIAS_OP(mul, mul, uint32);
+DEFINE_ALIAS_OP(smul,mul, int32);
+DEFINE_ALIAS_OP(div, div, uint32);
+DEFINE_ALIAS_OP(sdiv,div, int32);
 DEFINE_OP1(neg, uint32, -x);
 DEFINE_OP1(compl, uint32, ~x);
-DEFINE_OP2(add, uint32, x + y);
-DEFINE_OP2(sub, uint32, x - y);
-DEFINE_OP2(mul, uint32, x * y);
-DEFINE_OP2(smul, int32, x * y);
-DEFINE_OP2(div, uint32, x / y);
-DEFINE_OP2(sdiv, int32, x / y);
 DEFINE_OP2(mod, uint32, x % y);
-DEFINE_OP2(and, uint32, x & y);
-DEFINE_OP2(or,  uint32, x | y);
-DEFINE_OP2(xor, uint32, x ^ y);
-DEFINE_OP2(orc, uint32, x | ~y);
-DEFINE_OP2(andc,uint32, x & ~y);
-DEFINE_OP2(nand,uint32, ~(x & y));
-DEFINE_OP2(nor, uint32, ~(x | y));
-DEFINE_OP2(eqv, uint32, ~(x ^ y));
+DEFINE_ALIAS_OP(and, and, uint32);
+DEFINE_ALIAS_OP(or,  or,  uint32);
+DEFINE_ALIAS_OP(xor, xor, uint32);
+DEFINE_ALIAS_OP(orc, orc, uint32);
+DEFINE_ALIAS_OP(andc,andc,uint32);
+DEFINE_ALIAS_OP(nand,nand,uint32);
+DEFINE_ALIAS_OP(nor, nor, uint32);
+DEFINE_ALIAS_OP(eqv, eqv, uint32);
 DEFINE_OP2(shll, uint32, x << y);
 DEFINE_OP2(shrl, uint32, x >> y);
 DEFINE_OP2(shra, uint32, (int32)x >> y);
@ -89,6 +116,14 @@ DEFINE_OP4(ppc_rlwimi, uint32, (op_rotl::apply(x, y) & z) | (t & ~z));
 DEFINE_OP3(ppc_rlwinm, uint32, (op_rotl::apply(x, y) & z));
 DEFINE_OP3(ppc_rlwnm, uint32, (op_rotl::apply(x, (y & 0x1f)) & z));

+DEFINE_ALIAS_OP(add_64, add, uint64);
+DEFINE_ALIAS_OP(sub_64, sub, uint64);
+DEFINE_ALIAS_OP(smul_64,mul,  int64);
+DEFINE_ALIAS_OP(and_64, and, uint64);
+DEFINE_ALIAS_OP(andc_64,andc,uint64);
+DEFINE_ALIAS_OP(or_64,  or,  uint64);
+DEFINE_ALIAS_OP(nor_64, nor, uint64);
+DEFINE_ALIAS_OP(xor_64, xor, uint64);

 // Floating-point basic operations

@ -105,14 +140,168 @@ DEFINE_OP3(fnmadd, double, -((x * y) + z));
 DEFINE_OP3(fnmsub, double, -((x * y) - z));
 DEFINE_OP2(fsub, double, x - y);

+DEFINE_OP1(fnops, float, x);
+DEFINE_OP1(fabss, float, fabs(x));
+DEFINE_OP2(fadds, float, x + y);
+DEFINE_OP2(fdivs, float, x / y);
+DEFINE_OP3(fmadds, float, (x * y) + z);
+DEFINE_OP3(fmsubs, float, (x * y) - z);
+DEFINE_OP2(fmuls, float, x * y);
+DEFINE_OP1(fnabss, float, -fabs(x));
+DEFINE_OP1(fnegs, float, -x);
+DEFINE_OP3(fnmadds, float, -((x * y) + z));
+DEFINE_OP3(fnmsubs, float, -((x * y) - z));
+DEFINE_OP2(fsubs, float, x - y);
+
+DEFINE_OP1(exp2, float, exp2f(x));
+DEFINE_OP1(log2, float, log2f(x));
+DEFINE_OP1(fres, float, 1 / x);
+DEFINE_OP1(frsqrt, float, 1 / sqrt(x));
+DEFINE_OP1(frim, float, floorf(x));
+DEFINE_OP1(frin, float, roundf(x));
+DEFINE_OP1(frip, float, ceilf(x));
+DEFINE_OP1(friz, float, trunc(x));
+
+// Misc operations used in AltiVec instructions
+
+template< class TYPE >
+struct op_vrl {
+	static inline TYPE apply(TYPE v, TYPE n) {
+		const int sh = n & ((8 * sizeof(TYPE)) - 1);
+		return ((v << sh) | (v >> ((8 * sizeof(TYPE)) - sh)));
+	}
+};
+
+template< class TYPE >
+struct op_vsl {
+	static inline TYPE apply(TYPE v, TYPE n) {
+		const int sh = n & ((8 * sizeof(TYPE)) - 1);
+		return v << sh;
+	}
+};
+
+template< class TYPE >
+struct op_vsr {
+	static inline TYPE apply(TYPE v, TYPE n) {
+		const int sh = n & ((8 * sizeof(TYPE)) - 1);
+		return v >> sh;
+	}
+};
+
+template< uint16 round = 0 >
+struct op_mhraddsh {
+	static inline int32 apply(int32 a, int32 b, int32 c) {
+		return (((a * b) + round) >> 15) + c;
+	}
+};
+
+struct op_cvt_fp2si {
+	static inline int64 apply(uint32 a, float b) {
+		return (int64)(b * (1U << a));
+	}
+};
+
+template< class TYPE >
+struct op_cvt_si2fp {
+	static inline float apply(uint32 a, TYPE b) {
+		return ((float)b) / ((float)(1U << a));
+	}
+};
+
+template< class TYPE >
+struct op_max {
+	static inline TYPE apply(TYPE a, TYPE b) {
+		return (a > b) ? a : b;
+	}
+};
+
+template< class TYPE >
+struct op_min {
+	static inline TYPE apply(TYPE a, TYPE b) {
+		return (a < b) ? a : b;
+	}
+};
+
+template< int nbytes >
+struct op_all_ones {
+	static const uint32 value = (1U << (8 * nbytes)) - 1;
+};
+
+template<>
+struct op_all_ones<4> {
+	static const uint32 value = 0xffffffff;
+};
+
+template< class VX >
+struct op_cmp {
+	static const uint32 result = op_all_ones<sizeof(VX)>::value;
+};
+
+template< class VX >
+struct op_cmp_eq {
+	static inline uint32 apply(VX a, VX b) {
+		return a == b ? op_cmp<VX>::result : 0;
+	}
+};
+
+template< class VX >
+struct op_cmp_ge {
+	static inline uint32 apply(VX a, VX b) {
+		return a >= b ? op_cmp<VX>::result : 0;
+	}
+};
+
+template< class VX >
+struct op_cmp_gt {
+	static inline uint32 apply(VX a, VX b) {
+		return a > b ? op_cmp<VX>::result : 0;
+	}
+};
+
+struct op_cmpbfp {
+	static inline uint32 apply(float a, float b) {
+		const bool le = a <= b;
+		const bool ge = a >= -b;
+		return (le ? 0 : (1 << 31)) | (ge ? 0 : (1 << 30));
+	}
+};
+
+DEFINE_OP3(vsel, uint32, ((y & z) | (x & ~z)));
+DEFINE_OP3(vmaddfp, float, ((x * z) + y));
+DEFINE_OP3(vnmsubfp, float, -((x * z) - y));
+DEFINE_OP3(mladduh, uint32, ((x * y) + z) & 0xffff);
+DEFINE_OP2(addcuw, uint32, ((uint64)x + (uint64)y) >> 32);
+DEFINE_OP2(subcuw, uint32, (~((int64)x - (int64)y) >> 32) & 1);
+DEFINE_OP2(avgsb, int8,   (((int16)x + (int16)y + 1) >> 1));
+DEFINE_OP2(avgsh, int16,  (((int32)x + (int32)y + 1) >> 1));
+DEFINE_OP2(avgsw, int32,  (((int64)x + (int64)y + 1) >> 1));
+DEFINE_OP2(avgub, uint8,  ((uint16)x + (uint16)y + 1) >> 1);
+DEFINE_OP2(avguh, uint16, ((uint32)x + (uint32)y + 1) >> 1);
+DEFINE_OP2(avguw, uint32, ((uint64)x + (uint64)y + 1) >> 1);
+
+
 #undef DEFINE_OP1
 #undef DEFINE_OP2
 #undef DEFINE_OP3
 #undef DEFINE_OP4

+#undef DEFINE_TEMPLATE_OP1
+#undef DEFINE_TEMPLATE_OP2
+#undef DEFINE_TEMPLATE_OP3
+
+#undef DEFINE_ALIAS_OP
+

 // Sign/Zero-extend operation

+struct op_sign_extend_5_32 {
+	static inline uint32 apply(uint32 value) {
+		if (value & 0x10)
+			value -= 0x20;
+		return value;
+	}
+};
+
 struct op_sign_extend_16_32 {
 	static inline uint32 apply(uint32 value) {
 		return (uint32)(int32)(int16)value;
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-registers.hpp
@ -162,6 +162,58 @@ union powerpc_fpr {
 };


+/**
+ *		Vector Status and Control Register
+ **/
+
+class powerpc_vscr
+{
+	uint8 nj;
+	uint8 sat;
+public:
+	powerpc_vscr();
+	void set(uint32 v);
+	uint32 get() const;
+	uint32 get_nj() const		{ return nj; }
+	void set_nj(int v)			{ nj = v; }
+	uint32 get_sat() const		{ return sat; }
+	void set_sat(int v)			{ sat = v; }
+};
+
+inline
+powerpc_vscr::powerpc_vscr()
+	: nj(0), sat(0)
+{ }
+
+inline uint32
+powerpc_vscr::get() const
+{
+	return (nj << 16) | sat;
+}
+
+inline void
+powerpc_vscr::set(uint32 v)
+{
+	nj = VSCR_NJ_field::extract(v);
+	sat = VSCR_SAT_field::extract(v);
+}
+
+
+/**
+ *		Vector register
+ **/
+
+union powerpc_vr
+{
+	uint8	b[16];
+	uint16	h[8];
+	uint32	w[4];
+	uint64	j[2];
+	float	f[4];
+	double	d[2];
+};
+
+
 /**
 *		User Environment Architecture (UEA) Register Set
 **/
@ -185,6 +237,7 @@ struct powerpc_registers
 		SPR_CTR		= 9,
 		SPR_SDR1	= 25,
 		SPR_PVR		= 287,
+		SPR_VRSAVE	= 256,
 	};

 	static inline int GPR(int r) { return GPR_BASE + r; }
@ -203,6 +256,9 @@ struct powerpc_registers
 	static uint32 reserve_valid;
 	static uint32 reserve_addr;
 	static uint32 reserve_data;
+	powerpc_vr vr[32];			// Vector Registers
+	powerpc_vscr vscr;			// Vector Status and Control Register
+	uint32 vrsave;				// AltiVec Save Register
 };

 #endif /* PPC_REGISTERS_H */
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
@ -684,6 +684,9 @@ powerpc_cpu::compile_block(uint32 entry_point)
 			case powerpc_registers::SPR_CTR:
 				dg.gen_load_T0_CTR();
 				break;
+			case powerpc_registers::SPR_VRSAVE:
+				dg.gen_load_T0_VRSAVE();
+				break;
 #ifdef SHEEPSHAVER
 			case powerpc_registers::SPR_SDR1:
 				dg.gen_mov_32_T0_im(0xdead001f);
@ -697,7 +700,7 @@ powerpc_cpu::compile_block(uint32 entry_point)
 				dg.gen_mov_32_T0_im(0);
 				break;
 #else
-			default: goto do_illegal;
+			default: goto do_generic;
 #endif
 			}
 			dg.gen_store_T0_GPR(rD_field::extract(opcode));
@ -717,8 +720,11 @@ powerpc_cpu::compile_block(uint32 entry_point)
 			case powerpc_registers::SPR_CTR:
 				dg.gen_store_T0_CTR();
 				break;
+			case powerpc_registers::SPR_VRSAVE:
+				dg.gen_store_T0_VRSAVE();
+				break;
 #ifndef SHEEPSHAVER
-			default: goto do_illegal;
+			default: goto do_generic;
 #endif
 			}
 			break;