Experiment with generic AltiVec optimizations for V4SF, V2DI operands (+60%)

2024-12-26 08:32:20 +00:00 · 2004-02-16 23:17:27 +00:00 · 2004-02-16 23:17:27 +00:00 · ea3c6801ab
commit ea3c6801ab
parent 680326da55
8 changed files with 505 additions and 59 deletions
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
@ -1711,7 +1711,7 @@ const powerpc_cpu::instr_info_t powerpc_cpu::powerpc_ii_table[] = {
 	{ "vnmsubfp",
 	  EXECUTE_VECTOR_ARITH(vnmsubfp, V4SF, V4SF, V4SF, V4SF),
 	  NULL,
-	  PPC_I(VNMSUB),
+	  PPC_I(VNMSUBFP),
 	  VA_form, 4, 47, CFLOW_NORMAL
 	},
 	{ "vnor",
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
@ -39,19 +39,24 @@ register struct powerpc_cpu *CPU asm(REG_CPU);
 #define REG32(X) X
 #endif
 #define FPREG(X) ((powerpc_fpr *)(X))
 #define VREG(X)  ((powerpc_vr *)(X))[0]
 #define A0 REG32(reg_A0)
 #define VD VREG(reg_A0)
 register uintptr reg_A0 asm(REG_A0);
 #define T0 REG32(reg_T0)
 #define F0 FPREG(reg_T0)->d
 #define F0_dw FPREG(reg_T0)->j
 #define V0 VREG(reg_T0)
 register uintptr reg_T0 asm(REG_T0);
 #define T1 REG32(reg_T1)
 #define F1 FPREG(reg_T1)->d
 #define F1_dw FPREG(reg_T1)->j
 #define V1 VREG(reg_T1)
 register uintptr reg_T1 asm(REG_T1);
 #define T2 REG32(reg_T2)
 #define F2 FPREG(reg_T2)->d
 #define F2_dw FPREG(reg_T2)->j
 #define V2 VREG(reg_T2)
 register uintptr reg_T2 asm(REG_T2);
 #define FD powerpc_dyngen_helper::fp_result()
 #define FD_dw powerpc_dyngen_helper::fp_result_dw()
@ -1236,3 +1241,229 @@ void OPPROTO op_jump_next_A0(void)
 	}
 	dyngen_barrier();
 }
 /**
 *		Load/store addresses to vector registers
 **/
 #define reg_TD reg_A0
 #define DEFINE_OP(REG, N)						\
 void OPPROTO op_load_ad_V##REG##_VR##N(void)	\
 {												\
 	reg_T##REG = (uintptr)&CPU->vr(N);			\
 }												
 #define DEFINE_REG(N)							\
 DEFINE_OP(D,N);									\
 DEFINE_OP(0,N);									\
 DEFINE_OP(1,N);									\
 DEFINE_OP(2,N);									\
 DEFINE_REG(0);
 DEFINE_REG(1);
 DEFINE_REG(2);
 DEFINE_REG(3);
 DEFINE_REG(4);
 DEFINE_REG(5);
 DEFINE_REG(6);
 DEFINE_REG(7);
 DEFINE_REG(8);
 DEFINE_REG(9);
 DEFINE_REG(10);
 DEFINE_REG(11);
 DEFINE_REG(12);
 DEFINE_REG(13);
 DEFINE_REG(14);
 DEFINE_REG(15);
 DEFINE_REG(16);
 DEFINE_REG(17);
 DEFINE_REG(18);
 DEFINE_REG(19);
 DEFINE_REG(20);
 DEFINE_REG(21);
 DEFINE_REG(22);
 DEFINE_REG(23);
 DEFINE_REG(24);
 DEFINE_REG(25);
 DEFINE_REG(26);
 DEFINE_REG(27);
 DEFINE_REG(28);
 DEFINE_REG(29);
 DEFINE_REG(30);
 DEFINE_REG(31);
 #undef DEFINE_REG
 #undef DEFINE_OP
 #undef reg_TD
 void op_load_word_VD_T0(void)
 {
 	const uint32 ea = T0;
 	VD.w[(ea >> 2) & 3] = vm_read_memory_4(ea & ~3);
 }
 void op_store_word_VD_T0(void)
 {
 	const uint32 ea = T0;
 	vm_write_memory_4(ea & ~3, VD.w[(ea >> 2) & 3]);
 }
 void op_load_vect_VD_T0(void)
 {
 	const uint32 ea = T0 & ~15;
 	VD.w[0] = vm_read_memory_4(ea +  0);
 	VD.w[1] = vm_read_memory_4(ea +  4);
 	VD.w[2] = vm_read_memory_4(ea +  8);
 	VD.w[3] = vm_read_memory_4(ea + 12);
 }
 void op_store_vect_VD_T0(void)
 {
 	const uint32 ea = T0 & ~15;
 	vm_write_memory_4(ea +  0, VD.w[0]);
 	vm_write_memory_4(ea +  4, VD.w[1]);
 	vm_write_memory_4(ea +  8, VD.w[2]);
 	vm_write_memory_4(ea + 12, VD.w[3]);
 }
 /**
 *		Vector operations helpers
 **/
 struct VNONE {
 	typedef null_operand type;
 	static inline uint32 get(powerpc_vr const & v, int i) { return 0; }
 	static inline void set(powerpc_vr const & v, int i, uint32) { }
 };
 struct V16QI {
 	typedef uint8 type;
 	static inline type get(powerpc_vr const & v, int i) { return v.b[i]; }
 	static inline void set(powerpc_vr & v, int i, type x) { v.b[i] = x; }
 };
 struct V8HI {
 	typedef uint16 type;
 	static inline type get(powerpc_vr const & v, int i) { return v.h[i]; }
 	static inline void set(powerpc_vr & v, int i, type x) { v.h[i] = x; }
 };
 struct V4SI {
 	typedef uint32 type;
 	static inline type get(powerpc_vr const & v, int i) { return v.w[i]; }
 	static inline void set(powerpc_vr & v, int i, type x) { v.w[i] = x; }
 };
 struct V2DI {
 	typedef uint64 type;
 	static inline type get(powerpc_vr const & v, int i) { return v.j[i]; }
 	static inline void set(powerpc_vr & v, int i, type x) { v.j[i] = x; }
 };
 struct V4SF {
 	typedef float type;
 	static inline type get(powerpc_vr const & v, int i) { return v.f[i]; }
 	static inline void set(powerpc_vr & v, int i, type x) { v.f[i] = x; }
 };
 template< class OP, class VX, class VA, class VB, class VC, int N >
 struct do_vector_execute {
 	static inline void apply() {
 		do_vector_execute<OP, VX, VA, VB, VC, N - 1>::apply();
 		VX::set(
 			VD, N,
 			op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
 				VA::get(V0, N),
 				VB::get(V1, N),
 				VC::get(V2, N)));
 	}
 };
 template< class OP, class VX, class VA, class VB, class VC >
 struct do_vector_execute<OP, VX, VA, VB, VC, 0> {
 	static inline void apply() {
 		VX::set(
 			VD, 0, op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
 				VA::get(V0, 0),
 				VB::get(V1, 0),
 				VC::get(V2, 0)));
 	}
 };
 template< class OP, class VX, class VA, class VB = VNONE, class VC = VNONE >
 struct vector_execute {
 	static inline void apply() {
 		do_vector_execute<OP, VX, VA, VB, VC, (16 / sizeof(typename VX::type)) - 1>::apply();
 	}
 };
 /**
 *		Vector synthetic operations
 **/
 void op_vaddfp_VD_V0_V1(void)
 {
 	vector_execute<op_fadds, V4SF, V4SF, V4SF>::apply();
 }
 void op_vsubfp_VD_V0_V1(void)
 {
 	vector_execute<op_fsubs, V4SF, V4SF, V4SF>::apply();
 }
 void op_vmaddfp_VD_V0_V1_V2(void)
 {
 	vector_execute<op_vmaddfp, V4SF, V4SF, V4SF, V4SF>::apply();
 }
 void op_vnmsubfp_VD_V0_V1_V2(void)
 {
 	vector_execute<op_vnmsubfp, V4SF, V4SF, V4SF, V4SF>::apply();
 }
 void op_vmaxfp_VD_V0_V1(void)
 {
 	vector_execute<op_max<float>, V4SF, V4SF, V4SF>::apply();
 }
 void op_vminfp_VD_V0_V1(void)
 {
 	vector_execute<op_min<float>, V4SF, V4SF, V4SF>::apply();
 }
 void op_vand_VD_V0_V1(void)
 {
 	vector_execute<op_and_64, V2DI, V2DI, V2DI>::apply();
 }
 void op_vandc_VD_V0_V1(void)
 {
 	vector_execute<op_andc_64, V2DI, V2DI, V2DI>::apply();
 }
 void op_vnor_VD_V0_V1(void)
 {
 	vector_execute<op_nor_64, V2DI, V2DI, V2DI>::apply();
 }
 void op_vor_VD_V0_V1(void)
 {
 	vector_execute<op_or_64, V2DI, V2DI, V2DI>::apply();
 }
 void op_vxor_VD_V0_V1(void)
 {
 	vector_execute<op_xor_64, V2DI, V2DI, V2DI>::apply();
 }
 #ifdef LONG_OPERATIONS
 void op_vcmpeqfp_VD_V0_V1(void)
 {
 	vector_execute<op_cmp_eq<float>, V4SF, V4SF, V4SF>::apply();
 }
 void op_vaddubm_VD_V0_V1(void)
 {
 	vector_execute<op_template_add<uint8>, V16QI, V16QI, V16QI>::apply();
 }
 #endif
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp
@ -125,6 +125,10 @@ DEFINE_INSN(store, F0, FPR);
 DEFINE_INSN(store, F1, FPR);
 DEFINE_INSN(store, F2, FPR);
 DEFINE_INSN(store, FD, FPR);
 DEFINE_INSN(load_ad, VD, VR);
 DEFINE_INSN(load_ad, V0, VR);
 DEFINE_INSN(load_ad, V1, VR);
 DEFINE_INSN(load_ad, V2, VR);
 // Condition register bitfield
 DEFINE_INSN(load, T0, crb);
@ -234,3 +238,121 @@ void powerpc_dyngen::gen_bc_A0(int bo, int bi, uint32 npc)
 	}
 #endif
 }
 /**
 *		Vector instructions
 **/
 void powerpc_dyngen::gen_load_word_VD_T0(int vD)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_op_load_word_VD_T0();
 }
 void powerpc_dyngen::gen_store_word_VS_T0(int vS)
 {
 	gen_load_ad_VD_VR(vS);
 	gen_op_store_word_VD_T0();
 }
 void powerpc_dyngen::gen_load_vect_VD_T0(int vD)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_op_load_vect_VD_T0();
 }
 void powerpc_dyngen::gen_store_vect_VS_T0(int vS)
 {
 	gen_load_ad_VD_VR(vS);
 	gen_op_store_vect_VD_T0();
 }
 void powerpc_dyngen::gen_vaddfp(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vaddfp_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vsubfp(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vsubfp_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vmaddfp(int vD, int vA, int vB, int vC)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_load_ad_V2_VR(vC);
 	gen_op_vmaddfp_VD_V0_V1_V2();
 }
 void powerpc_dyngen::gen_vnmsubfp(int vD, int vA, int vB, int vC)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_load_ad_V2_VR(vC);
 	gen_op_vnmsubfp_VD_V0_V1_V2();
 }
 void powerpc_dyngen::gen_vmaxfp(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vmaxfp_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vminfp(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vminfp_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vand(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vand_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vandc(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vandc_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vnor(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vnor_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vor(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vor_VD_V0_V1();
 }
 void powerpc_dyngen::gen_vxor(int vD, int vA, int vB)
 {
 	gen_load_ad_VD_VR(vD);
 	gen_load_ad_V0_VR(vA);
 	gen_load_ad_V1_VR(vB);
 	gen_op_vxor_VD_V0_V1();
 }
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
@ -219,6 +219,27 @@ public:
 	// Branch instructions
 	void gen_bc_A0(int bo, int bi, uint32 npc);
 	// Vector instructions
 	void gen_load_ad_VD_VR(int i);
 	void gen_load_ad_V0_VR(int i);
 	void gen_load_ad_V1_VR(int i);
 	void gen_load_ad_V2_VR(int i);
 	void gen_load_word_VD_T0(int vD);
 	void gen_load_vect_VD_T0(int vD);
 	void gen_store_word_VS_T0(int vS);
 	void gen_store_vect_VS_T0(int vS);
 	void gen_vaddfp(int vD, int vA, int vB);
 	void gen_vsubfp(int vD, int vA, int vB);
 	void gen_vmaddfp(int vD, int vA, int vB, int vC);
 	void gen_vnmsubfp(int vD, int vA, int vB, int vC);
 	void gen_vmaxfp(int vD, int vA, int vB);
 	void gen_vminfp(int vD, int vA, int vB);
 	void gen_vand(int vD, int vA, int vB);
 	void gen_vandc(int vD, int vA, int vB);
 	void gen_vnor(int vD, int vA, int vB);
 	void gen_vor(int vD, int vA, int vB);
 	void gen_vxor(int vD, int vA, int vB);
 #undef DEFINE_ALIAS
 #undef DEFINE_ALIAS_0
 #undef DEFINE_ALIAS_1
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
@ -43,63 +43,6 @@
 #define DEBUG 0
 #include "debug.h"
 /**
 *	Helper class to apply an unary/binary/trinary operation
 *
 *		OP		Operation to perform
 *		RA		Input operand register
 *		RB		Input operand register or immediate (optional: operand_NONE)
 *		RC		Input operand register or immediate (optional: operand_NONE)
 **/
 template< class RT, class OP, class RA, class RB, class RC >
 struct op_apply {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C c) {
 		return OP::apply(a, b, c);
 	}
 };
 template< class RT, class OP, class RA, class RB >
 struct op_apply<RT, OP, RA, RB, null_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C) {
 		return OP::apply(a, b);
 	}
 };
 template< class RT, class OP, class RA >
 struct op_apply<RT, OP, RA, null_operand, null_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B, C) {
 		return OP::apply(a);
 	}
 };
 template< class RT, class OP, class RA, class RB >
 struct op_apply<RT, OP, RA, RB, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C) {
 		return (RT)OP::apply(a, b);
 	}
 };
 template< class RT, class OP, class RA >
 struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B, C) {
 		return (RT)OP::apply(a);
 	}
 };
 template< class RT, class OP, class RB >
 struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A, B b, C) {
 		return (RT)OP::apply(b);
 	}
 };
 /**
 *	Illegal & NOP instructions
 **/
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp
@ -36,6 +36,66 @@
 template< bool SB > struct register_value { typedef uint32 type; };
 template< > struct register_value< true > { typedef  int32 type; };
 /**
 *	Helper class to apply an unary/binary/trinary operation
 *
 *		OP		Operation to perform
 *		RA		Input operand register
 *		RB		Input operand register or immediate (optional: operand_NONE)
 *		RC		Input operand register or immediate (optional: operand_NONE)
 **/
 struct null_operand;
 struct null_vector_operand;
 template< class RT, class OP, class RA, class RB, class RC >
 struct op_apply {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C c) {
 		return OP::apply(a, b, c);
 	}
 };
 template< class RT, class OP, class RA, class RB >
 struct op_apply<RT, OP, RA, RB, null_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C) {
 		return OP::apply(a, b);
 	}
 };
 template< class RT, class OP, class RA >
 struct op_apply<RT, OP, RA, null_operand, null_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B, C) {
 		return OP::apply(a);
 	}
 };
 template< class RT, class OP, class RA, class RB >
 struct op_apply<RT, OP, RA, RB, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B b, C) {
 		return (RT)OP::apply(a, b);
 	}
 };
 template< class RT, class OP, class RA >
 struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A a, B, C) {
 		return (RT)OP::apply(a);
 	}
 };
 template< class RT, class OP, class RB >
 struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
 	template< class A, class B, class C >
 	static inline RT apply(A, B b, C) {
 		return (RT)OP::apply(b);
 	}
 };
 /**
 *		Add instruction templates
 **/
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
@ -290,7 +290,7 @@ enum powerpc_instruction {
 	PPC_I(VMULOSH),
 	PPC_I(VMULOUB),
 	PPC_I(VMULOUH),
-	PPC_I(VNMSUB),
+	PPC_I(VNMSUBFP),
 	PPC_I(VNOR),
 	PPC_I(VOR),
 	PPC_I(VPERM),
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
@ -1269,6 +1269,75 @@ powerpc_cpu::compile_block(uint32 entry_point)
 			break;
 		}
 #endif
 		// NOTE: A0/VD are clobbered in the following instructions!
 		case PPC_I(LVEWX):
 		case PPC_I(LVX):
 		case PPC_I(LVXL):
 		{
 			const int rA = rA_field::extract(opcode);
 			const int rB = rB_field::extract(opcode);
 			const int vD = vD_field::extract(opcode);
 			dg.gen_load_T0_GPR(rB);
 			if (rA != 0) {
 				dg.gen_load_T1_GPR(rA);
 				dg.gen_add_32_T0_T1();
 			}
 			switch (ii->mnemo) {
 			case PPC_I(LVEWX):	dg.gen_load_word_VD_T0(vD); break;
 			case PPC_I(LVX):	dg.gen_load_vect_VD_T0(vD); break;
 			case PPC_I(LVXL):	dg.gen_load_vect_VD_T0(vD); break;
 			}
 			break;
 		}
 		case PPC_I(STVEWX):
 		case PPC_I(STVX):
 		case PPC_I(STVXL):
 		{
 			const int rA = rA_field::extract(opcode);
 			const int rB = rB_field::extract(opcode);
 			const int vS = vS_field::extract(opcode);
 			dg.gen_load_T0_GPR(rB);
 			if (rA != 0) {
 				dg.gen_load_T1_GPR(rA);
 				dg.gen_add_32_T0_T1();
 			}
 			switch (ii->mnemo) {
 			case PPC_I(STVEWX):	dg.gen_store_word_VS_T0(vS); break;
 			case PPC_I(STVX):	dg.gen_store_vect_VS_T0(vS); break;
 			case PPC_I(STVXL):	dg.gen_store_vect_VS_T0(vS); break;
 			}
 			break;
 		}
 		case PPC_I(VADDFP):
 		case PPC_I(VSUBFP):
 		case PPC_I(VMADDFP):
 		case PPC_I(VNMSUBFP):
 		case PPC_I(VMAXFP):
 		case PPC_I(VMINFP):
 		case PPC_I(VAND):
 		case PPC_I(VANDC):
 		case PPC_I(VNOR):
 		case PPC_I(VOR):
 		case PPC_I(VXOR):
 		{
 			const int vD = vD_field::extract(opcode);
 			const int vA = vA_field::extract(opcode);
 			const int vB = vB_field::extract(opcode);
 			switch (ii->mnemo) {
 			case PPC_I(VADDFP):		dg.gen_vaddfp(vD, vA, vB); break;
 			case PPC_I(VSUBFP):		dg.gen_vsubfp(vD, vA, vB); break;
 			case PPC_I(VMADDFP):	dg.gen_vmaddfp(vD, vA, vB, vC_field::extract(opcode)); break;
 			case PPC_I(VNMSUBFP):	dg.gen_vnmsubfp(vD, vA, vB, vC_field::extract(opcode)); break;
 			case PPC_I(VMAXFP):		dg.gen_vmaxfp(vD, vA, vB); break;
 			case PPC_I(VMINFP):		dg.gen_vminfp(vD, vA, vB); break;
 			case PPC_I(VAND):		dg.gen_vand(vD, vA, vB); break;
 			case PPC_I(VANDC):		dg.gen_vandc(vD, vA, vB); break;
 			case PPC_I(VNOR):		dg.gen_vnor(vD, vA, vB); break;
 			case PPC_I(VOR):		dg.gen_vor(vD, vA, vB); break;
 			case PPC_I(VXOR):		dg.gen_vxor(vD, vA, vB); break;
 			}
 			break;
 		}
 		default:				// Direct call to instruction handler
 		{
 			typedef void (*func_t)(dyngen_cpu_base, uint32);