From 108126cfc6eddf1e0c9c7db39e25323403f04bbc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Nov 2011 03:57:34 +0000 Subject: [PATCH] Correctly mark VPERM2F128 as being an FP instruction and add execution domain fixing support to convert it to VPERM2I128 for AVX2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145370 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 2 ++ lib/Target/X86/X86InstrSSE.td | 2 +- test/CodeGen/X86/avx2-vperm2i128.ll | 19 ++++++++++++++----- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 24c4a53792d..5d310af3eb6 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3568,6 +3568,8 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, + { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } }; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e2bf42da719..0be59ccd87b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7336,7 +7336,7 @@ def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))), //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll index 0f24ac33316..6fcd2d7c1b6 100644 --- a/test/CodeGen/X86/avx2-vperm2i128.ll +++ b/test/CodeGen/X86/avx2-vperm2i128.ll @@ -3,28 +3,36 @@ ; CHECK: vperm2i128 $17 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { entry: - %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ; add forces execution domain + %a2 = add <32 x i8> %a, + %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } ; CHECK: vperm2i128 $33 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: - %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ; add forces execution domain + %a2 = add <4 x i64> %a, + %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } ; CHECK: vperm2i128 $49 define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { entry: - %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ; add forces execution domain + %a2 = add <8 x i32> %a, + %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } ; CHECK: vperm2i128 $2 define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { entry: - %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ; add forces execution domain + %a2 = add <16 x i16> %a, + %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -33,6 +41,7 @@ define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone entry: %c = load <16 x i16>* %a %d = load <16 x i16>* %b - %shuffle = shufflevector <16 x i16> %c, <16 x i16> %d, <16 x i32> + %c2 = add <16 x i16> %c, + %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> ret <16 x i16> %shuffle }