From d6548ad0139fc2e70bb40a12442da1ab9bbf7519 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 11 Nov 2014 11:20:31 +0000 Subject: [PATCH] [X86] Add missing check for 'isINSERTPSMask' in method 'isShuffleMaskLegal'. This helps the DAGCombiner to identify more opportunities to fold shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 +- test/CodeGen/X86/vector-shuffle-combining.ll | 76 ++++++++++++++++---- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4de5e2585ec..8fe82118c4e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19482,7 +19482,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isUNPCKHMask(M, SVT, Subtarget->hasInt256()) || isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) || isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) || - isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256())); + isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) || + (Subtarget->hasSSE41() && isINSERTPSMask(M, SVT))); } bool diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index f43f4051758..d78461fc33c 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1621,17 +1621,13 @@ define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) { ; ; SSE41-LABEL: combine_test1b: ; SSE41: # BB#0: -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,0] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_test1b: ; AVX: # BB#0: -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0] -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[2,0] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> @@ -1722,17 +1718,13 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) { ; ; SSE41-LABEL: combine_test4b: ; SSE41: # BB#0: -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] -; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[0,2] +; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_test4b: ; AVX: # BB#0: -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] -; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,1],xmm0[0,2] +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> @@ -2565,3 +2557,63 @@ define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) { %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> ret <8 x i32> %d } + +define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) { +; SSE41-LABEL: combine_insertps1: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] +; SSE41-NEXT: retq + +; AVX-LABEL: combine_insertps1: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3] +; AVX-NEXT: retq + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> + ret <4 x float> %d +} + +define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) { +; SSE41-LABEL: combine_insertps2: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] +; SSE41-NEXT: retq + +; AVX-LABEL: combine_insertps2: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3] +; AVX-NEXT: retq + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> + ret <4 x float> %d +} + +define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) { +; SSE41-LABEL: combine_insertps3: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; SSE41-NEXT: retq + +; AVX-LABEL: combine_insertps3: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX-NEXT: retq + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> + ret <4 x float> %d +} + +define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) { +; SSE41-LABEL: combine_insertps4: +; SSE41: # BB#0: +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; SSE41-NEXT: retq + +; AVX-LABEL: combine_insertps4: +; AVX: # BB#0: +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-NEXT: retq + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> + ret <4 x float> %d +}