Experiment with generic AltiVec optimizations for V4SF, V2DI operands (+60%)

2025-01-26 16:31:11 +00:00 · 2004-02-16 23:17:27 +00:00 · 2004-02-16 23:17:27 +00:00 · ea3c6801ab
commit ea3c6801ab
parent 680326da55
8 changed files with 505 additions and 59 deletions
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-decode.cpp
@ -1711,7 +1711,7 @@ const powerpc_cpu::instr_info_t powerpc_cpu::powerpc_ii_table[] = {
 	{ "vnmsubfp",
 	  EXECUTE_VECTOR_ARITH(vnmsubfp, V4SF, V4SF, V4SF, V4SF),
 	  NULL,
-	  PPC_I(VNMSUB),
+	  PPC_I(VNMSUBFP),
 	  VA_form, 4, 47, CFLOW_NORMAL
 	},
 	{ "vnor",
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen-ops.cpp
@ -39,19 +39,24 @@ register struct powerpc_cpu *CPU asm(REG_CPU);
 #define REG32(X) X
 #endif
 #define FPREG(X) ((powerpc_fpr *)(X))
+#define VREG(X)  ((powerpc_vr *)(X))[0]
 #define A0 REG32(reg_A0)
+#define VD VREG(reg_A0)
 register uintptr reg_A0 asm(REG_A0);
 #define T0 REG32(reg_T0)
 #define F0 FPREG(reg_T0)->d
 #define F0_dw FPREG(reg_T0)->j
+#define V0 VREG(reg_T0)
 register uintptr reg_T0 asm(REG_T0);
 #define T1 REG32(reg_T1)
 #define F1 FPREG(reg_T1)->d
 #define F1_dw FPREG(reg_T1)->j
+#define V1 VREG(reg_T1)
 register uintptr reg_T1 asm(REG_T1);
 #define T2 REG32(reg_T2)
 #define F2 FPREG(reg_T2)->d
 #define F2_dw FPREG(reg_T2)->j
+#define V2 VREG(reg_T2)
 register uintptr reg_T2 asm(REG_T2);
 #define FD powerpc_dyngen_helper::fp_result()
 #define FD_dw powerpc_dyngen_helper::fp_result_dw()
@ -1236,3 +1241,229 @@ void OPPROTO op_jump_next_A0(void)
 	}
 	dyngen_barrier();
 }
+
+/**
+ *		Load/store addresses to vector registers
+ **/
+
+#define reg_TD reg_A0
+#define DEFINE_OP(REG, N)						\
+void OPPROTO op_load_ad_V##REG##_VR##N(void)	\
+{												\
+	reg_T##REG = (uintptr)&CPU->vr(N);			\
+}												
+#define DEFINE_REG(N)							\
+DEFINE_OP(D,N);									\
+DEFINE_OP(0,N);									\
+DEFINE_OP(1,N);									\
+DEFINE_OP(2,N);									\
+
+DEFINE_REG(0);
+DEFINE_REG(1);
+DEFINE_REG(2);
+DEFINE_REG(3);
+DEFINE_REG(4);
+DEFINE_REG(5);
+DEFINE_REG(6);
+DEFINE_REG(7);
+DEFINE_REG(8);
+DEFINE_REG(9);
+DEFINE_REG(10);
+DEFINE_REG(11);
+DEFINE_REG(12);
+DEFINE_REG(13);
+DEFINE_REG(14);
+DEFINE_REG(15);
+DEFINE_REG(16);
+DEFINE_REG(17);
+DEFINE_REG(18);
+DEFINE_REG(19);
+DEFINE_REG(20);
+DEFINE_REG(21);
+DEFINE_REG(22);
+DEFINE_REG(23);
+DEFINE_REG(24);
+DEFINE_REG(25);
+DEFINE_REG(26);
+DEFINE_REG(27);
+DEFINE_REG(28);
+DEFINE_REG(29);
+DEFINE_REG(30);
+DEFINE_REG(31);
+
+#undef DEFINE_REG
+#undef DEFINE_OP
+#undef reg_TD
+
+void op_load_word_VD_T0(void)
+{
+	const uint32 ea = T0;
+	VD.w[(ea >> 2) & 3] = vm_read_memory_4(ea & ~3);
+}
+
+void op_store_word_VD_T0(void)
+{
+	const uint32 ea = T0;
+	vm_write_memory_4(ea & ~3, VD.w[(ea >> 2) & 3]);
+}
+
+void op_load_vect_VD_T0(void)
+{
+	const uint32 ea = T0 & ~15;
+	VD.w[0] = vm_read_memory_4(ea +  0);
+	VD.w[1] = vm_read_memory_4(ea +  4);
+	VD.w[2] = vm_read_memory_4(ea +  8);
+	VD.w[3] = vm_read_memory_4(ea + 12);
+}
+
+void op_store_vect_VD_T0(void)
+{
+	const uint32 ea = T0 & ~15;
+	vm_write_memory_4(ea +  0, VD.w[0]);
+	vm_write_memory_4(ea +  4, VD.w[1]);
+	vm_write_memory_4(ea +  8, VD.w[2]);
+	vm_write_memory_4(ea + 12, VD.w[3]);
+}
+
+/**
+ *		Vector operations helpers
+ **/
+
+struct VNONE {
+	typedef null_operand type;
+	static inline uint32 get(powerpc_vr const & v, int i) { return 0; }
+	static inline void set(powerpc_vr const & v, int i, uint32) { }
+};
+
+struct V16QI {
+	typedef uint8 type;
+	static inline type get(powerpc_vr const & v, int i) { return v.b[i]; }
+	static inline void set(powerpc_vr & v, int i, type x) { v.b[i] = x; }
+};
+
+struct V8HI {
+	typedef uint16 type;
+	static inline type get(powerpc_vr const & v, int i) { return v.h[i]; }
+	static inline void set(powerpc_vr & v, int i, type x) { v.h[i] = x; }
+};
+
+struct V4SI {
+	typedef uint32 type;
+	static inline type get(powerpc_vr const & v, int i) { return v.w[i]; }
+	static inline void set(powerpc_vr & v, int i, type x) { v.w[i] = x; }
+};
+
+struct V2DI {
+	typedef uint64 type;
+	static inline type get(powerpc_vr const & v, int i) { return v.j[i]; }
+	static inline void set(powerpc_vr & v, int i, type x) { v.j[i] = x; }
+};
+
+struct V4SF {
+	typedef float type;
+	static inline type get(powerpc_vr const & v, int i) { return v.f[i]; }
+	static inline void set(powerpc_vr & v, int i, type x) { v.f[i] = x; }
+};
+
+template< class OP, class VX, class VA, class VB, class VC, int N >
+struct do_vector_execute {
+	static inline void apply() {
+		do_vector_execute<OP, VX, VA, VB, VC, N - 1>::apply();
+		VX::set(
+			VD, N,
+			op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
+				VA::get(V0, N),
+				VB::get(V1, N),
+				VC::get(V2, N)));
+	}
+};
+
+template< class OP, class VX, class VA, class VB, class VC >
+struct do_vector_execute<OP, VX, VA, VB, VC, 0> {
+	static inline void apply() {
+		VX::set(
+			VD, 0, op_apply<typename VX::type, OP, typename VA::type, typename VB::type, typename VC::type>::apply(
+				VA::get(V0, 0),
+				VB::get(V1, 0),
+				VC::get(V2, 0)));
+	}
+};
+
+template< class OP, class VX, class VA, class VB = VNONE, class VC = VNONE >
+struct vector_execute {
+	static inline void apply() {
+		do_vector_execute<OP, VX, VA, VB, VC, (16 / sizeof(typename VX::type)) - 1>::apply();
+	}
+};
+
+
+/**
+ *		Vector synthetic operations
+ **/
+
+void op_vaddfp_VD_V0_V1(void)
+{
+	vector_execute<op_fadds, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vsubfp_VD_V0_V1(void)
+{
+	vector_execute<op_fsubs, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vmaddfp_VD_V0_V1_V2(void)
+{
+	vector_execute<op_vmaddfp, V4SF, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vnmsubfp_VD_V0_V1_V2(void)
+{
+	vector_execute<op_vnmsubfp, V4SF, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vmaxfp_VD_V0_V1(void)
+{
+	vector_execute<op_max<float>, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vminfp_VD_V0_V1(void)
+{
+	vector_execute<op_min<float>, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vand_VD_V0_V1(void)
+{
+	vector_execute<op_and_64, V2DI, V2DI, V2DI>::apply();
+}
+
+void op_vandc_VD_V0_V1(void)
+{
+	vector_execute<op_andc_64, V2DI, V2DI, V2DI>::apply();
+}
+
+void op_vnor_VD_V0_V1(void)
+{
+	vector_execute<op_nor_64, V2DI, V2DI, V2DI>::apply();
+}
+
+void op_vor_VD_V0_V1(void)
+{
+	vector_execute<op_or_64, V2DI, V2DI, V2DI>::apply();
+}
+
+void op_vxor_VD_V0_V1(void)
+{
+	vector_execute<op_xor_64, V2DI, V2DI, V2DI>::apply();
+}
+
+#ifdef LONG_OPERATIONS
+void op_vcmpeqfp_VD_V0_V1(void)
+{
+	vector_execute<op_cmp_eq<float>, V4SF, V4SF, V4SF>::apply();
+}
+
+void op_vaddubm_VD_V0_V1(void)
+{
+	vector_execute<op_template_add<uint8>, V16QI, V16QI, V16QI>::apply();
+}
+#endif
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.cpp
@ -125,6 +125,10 @@ DEFINE_INSN(store, F0, FPR);
 DEFINE_INSN(store, F1, FPR);
 DEFINE_INSN(store, F2, FPR);
 DEFINE_INSN(store, FD, FPR);
+DEFINE_INSN(load_ad, VD, VR);
+DEFINE_INSN(load_ad, V0, VR);
+DEFINE_INSN(load_ad, V1, VR);
+DEFINE_INSN(load_ad, V2, VR);

 // Condition register bitfield
 DEFINE_INSN(load, T0, crb);
@ -234,3 +238,121 @@ void powerpc_dyngen::gen_bc_A0(int bo, int bi, uint32 npc)
 	}
 #endif
 }
+
+/**
+ *		Vector instructions
+ **/
+
+void powerpc_dyngen::gen_load_word_VD_T0(int vD)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_op_load_word_VD_T0();
+}
+
+void powerpc_dyngen::gen_store_word_VS_T0(int vS)
+{
+	gen_load_ad_VD_VR(vS);
+	gen_op_store_word_VD_T0();
+}
+
+void powerpc_dyngen::gen_load_vect_VD_T0(int vD)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_op_load_vect_VD_T0();
+}
+
+void powerpc_dyngen::gen_store_vect_VS_T0(int vS)
+{
+	gen_load_ad_VD_VR(vS);
+	gen_op_store_vect_VD_T0();
+}
+
+void powerpc_dyngen::gen_vaddfp(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vaddfp_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vsubfp(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vsubfp_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vmaddfp(int vD, int vA, int vB, int vC)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_load_ad_V2_VR(vC);
+	gen_op_vmaddfp_VD_V0_V1_V2();
+}
+
+void powerpc_dyngen::gen_vnmsubfp(int vD, int vA, int vB, int vC)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_load_ad_V2_VR(vC);
+	gen_op_vnmsubfp_VD_V0_V1_V2();
+}
+
+void powerpc_dyngen::gen_vmaxfp(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vmaxfp_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vminfp(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vminfp_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vand(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vand_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vandc(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vandc_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vnor(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vnor_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vor(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vor_VD_V0_V1();
+}
+
+void powerpc_dyngen::gen_vxor(int vD, int vA, int vB)
+{
+	gen_load_ad_VD_VR(vD);
+	gen_load_ad_V0_VR(vA);
+	gen_load_ad_V1_VR(vB);
+	gen_op_vxor_VD_V0_V1();
+}
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-dyngen.hpp
@ -219,6 +219,27 @@ public:
 	// Branch instructions
 	void gen_bc_A0(int bo, int bi, uint32 npc);

+	// Vector instructions
+	void gen_load_ad_VD_VR(int i);
+	void gen_load_ad_V0_VR(int i);
+	void gen_load_ad_V1_VR(int i);
+	void gen_load_ad_V2_VR(int i);
+	void gen_load_word_VD_T0(int vD);
+	void gen_load_vect_VD_T0(int vD);
+	void gen_store_word_VS_T0(int vS);
+	void gen_store_vect_VS_T0(int vS);
+	void gen_vaddfp(int vD, int vA, int vB);
+	void gen_vsubfp(int vD, int vA, int vB);
+	void gen_vmaddfp(int vD, int vA, int vB, int vC);
+	void gen_vnmsubfp(int vD, int vA, int vB, int vC);
+	void gen_vmaxfp(int vD, int vA, int vB);
+	void gen_vminfp(int vD, int vA, int vB);
+	void gen_vand(int vD, int vA, int vB);
+	void gen_vandc(int vD, int vA, int vB);
+	void gen_vnor(int vD, int vA, int vB);
+	void gen_vor(int vD, int vA, int vB);
+	void gen_vxor(int vD, int vA, int vB);
+
 #undef DEFINE_ALIAS
 #undef DEFINE_ALIAS_0
 #undef DEFINE_ALIAS_1
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.cpp
@ -43,63 +43,6 @@
 #define DEBUG 0
 #include "debug.h"

-/**
- *	Helper class to apply an unary/binary/trinary operation
- *
- *		OP		Operation to perform
- *		RA		Input operand register
- *		RB		Input operand register or immediate (optional: operand_NONE)
- *		RC		Input operand register or immediate (optional: operand_NONE)
- **/
-
-template< class RT, class OP, class RA, class RB, class RC >
-struct op_apply {
-	template< class A, class B, class C >
-	static inline RT apply(A a, B b, C c) {
-		return OP::apply(a, b, c);
-	}
-};
-
-template< class RT, class OP, class RA, class RB >
-struct op_apply<RT, OP, RA, RB, null_operand> {
-	template< class A, class B, class C >
-	static inline RT apply(A a, B b, C) {
-		return OP::apply(a, b);
-	}
-};
-
-template< class RT, class OP, class RA >
-struct op_apply<RT, OP, RA, null_operand, null_operand> {
-	template< class A, class B, class C >
-	static inline RT apply(A a, B, C) {
-		return OP::apply(a);
-	}
-};
-
-template< class RT, class OP, class RA, class RB >
-struct op_apply<RT, OP, RA, RB, null_vector_operand> {
-	template< class A, class B, class C >
-	static inline RT apply(A a, B b, C) {
-		return (RT)OP::apply(a, b);
-	}
-};
-
-template< class RT, class OP, class RA >
-struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
-	template< class A, class B, class C >
-	static inline RT apply(A a, B, C) {
-		return (RT)OP::apply(a);
-	}
-};
-
-template< class RT, class OP, class RB >
-struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
-	template< class A, class B, class C >
-	static inline RT apply(A, B b, C) {
-		return (RT)OP::apply(b);
-	}
-};
-
 /**
 *	Illegal & NOP instructions
 **/
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-execute.hpp
@ -36,6 +36,66 @@
 template< bool SB > struct register_value { typedef uint32 type; };
 template< > struct register_value< true > { typedef  int32 type; };

+/**
+ *	Helper class to apply an unary/binary/trinary operation
+ *
+ *		OP		Operation to perform
+ *		RA		Input operand register
+ *		RB		Input operand register or immediate (optional: operand_NONE)
+ *		RC		Input operand register or immediate (optional: operand_NONE)
+ **/
+
+struct null_operand;
+struct null_vector_operand;
+
+template< class RT, class OP, class RA, class RB, class RC >
+struct op_apply {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C c) {
+		return OP::apply(a, b, c);
+	}
+};
+
+template< class RT, class OP, class RA, class RB >
+struct op_apply<RT, OP, RA, RB, null_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C) {
+		return OP::apply(a, b);
+	}
+};
+
+template< class RT, class OP, class RA >
+struct op_apply<RT, OP, RA, null_operand, null_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B, C) {
+		return OP::apply(a);
+	}
+};
+
+template< class RT, class OP, class RA, class RB >
+struct op_apply<RT, OP, RA, RB, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B b, C) {
+		return (RT)OP::apply(a, b);
+	}
+};
+
+template< class RT, class OP, class RA >
+struct op_apply<RT, OP, RA, null_vector_operand, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A a, B, C) {
+		return (RT)OP::apply(a);
+	}
+};
+
+template< class RT, class OP, class RB >
+struct op_apply<RT, OP, null_vector_operand, RB, null_vector_operand> {
+	template< class A, class B, class C >
+	static inline RT apply(A, B b, C) {
+		return (RT)OP::apply(b);
+	}
+};
+
 /**
 *		Add instruction templates
 **/
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-instructions.hpp
@ -290,7 +290,7 @@ enum powerpc_instruction {
 	PPC_I(VMULOSH),
 	PPC_I(VMULOUB),
 	PPC_I(VMULOUH),
-	PPC_I(VNMSUB),
+	PPC_I(VNMSUBFP),
 	PPC_I(VNOR),
 	PPC_I(VOR),
 	PPC_I(VPERM),
--- a/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
+++ b/SheepShaver/src/kpx_cpu/src/cpu/ppc/ppc-translate.cpp
@ -1269,6 +1269,75 @@ powerpc_cpu::compile_block(uint32 entry_point)
 			break;
 		}
 #endif
+		// NOTE: A0/VD are clobbered in the following instructions!
+		case PPC_I(LVEWX):
+		case PPC_I(LVX):
+		case PPC_I(LVXL):
+		{
+			const int rA = rA_field::extract(opcode);
+			const int rB = rB_field::extract(opcode);
+			const int vD = vD_field::extract(opcode);
+			dg.gen_load_T0_GPR(rB);
+			if (rA != 0) {
+				dg.gen_load_T1_GPR(rA);
+				dg.gen_add_32_T0_T1();
+			}
+			switch (ii->mnemo) {
+			case PPC_I(LVEWX):	dg.gen_load_word_VD_T0(vD); break;
+			case PPC_I(LVX):	dg.gen_load_vect_VD_T0(vD); break;
+			case PPC_I(LVXL):	dg.gen_load_vect_VD_T0(vD); break;
+			}
+			break;
+		}
+		case PPC_I(STVEWX):
+		case PPC_I(STVX):
+		case PPC_I(STVXL):
+		{
+			const int rA = rA_field::extract(opcode);
+			const int rB = rB_field::extract(opcode);
+			const int vS = vS_field::extract(opcode);
+			dg.gen_load_T0_GPR(rB);
+			if (rA != 0) {
+				dg.gen_load_T1_GPR(rA);
+				dg.gen_add_32_T0_T1();
+			}
+			switch (ii->mnemo) {
+			case PPC_I(STVEWX):	dg.gen_store_word_VS_T0(vS); break;
+			case PPC_I(STVX):	dg.gen_store_vect_VS_T0(vS); break;
+			case PPC_I(STVXL):	dg.gen_store_vect_VS_T0(vS); break;
+			}
+			break;
+		}
+		case PPC_I(VADDFP):
+		case PPC_I(VSUBFP):
+		case PPC_I(VMADDFP):
+		case PPC_I(VNMSUBFP):
+		case PPC_I(VMAXFP):
+		case PPC_I(VMINFP):
+		case PPC_I(VAND):
+		case PPC_I(VANDC):
+		case PPC_I(VNOR):
+		case PPC_I(VOR):
+		case PPC_I(VXOR):
+		{
+			const int vD = vD_field::extract(opcode);
+			const int vA = vA_field::extract(opcode);
+			const int vB = vB_field::extract(opcode);
+			switch (ii->mnemo) {
+			case PPC_I(VADDFP):		dg.gen_vaddfp(vD, vA, vB); break;
+			case PPC_I(VSUBFP):		dg.gen_vsubfp(vD, vA, vB); break;
+			case PPC_I(VMADDFP):	dg.gen_vmaddfp(vD, vA, vB, vC_field::extract(opcode)); break;
+			case PPC_I(VNMSUBFP):	dg.gen_vnmsubfp(vD, vA, vB, vC_field::extract(opcode)); break;
+			case PPC_I(VMAXFP):		dg.gen_vmaxfp(vD, vA, vB); break;
+			case PPC_I(VMINFP):		dg.gen_vminfp(vD, vA, vB); break;
+			case PPC_I(VAND):		dg.gen_vand(vD, vA, vB); break;
+			case PPC_I(VANDC):		dg.gen_vandc(vD, vA, vB); break;
+			case PPC_I(VNOR):		dg.gen_vnor(vD, vA, vB); break;
+			case PPC_I(VOR):		dg.gen_vor(vD, vA, vB); break;
+			case PPC_I(VXOR):		dg.gen_vxor(vD, vA, vB); break;
+			}
+			break;
+		}
 		default:				// Direct call to instruction handler
 		{
 			typedef void (*func_t)(dyngen_cpu_base, uint32);