diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 56243059a61..de8a3acdbd8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -614,16 +614,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is a splat, one of the input - // input shuffle masks, or if there's only one input to the shuffle. - // In this case, merging the shuffles just removes one instruction, which - // we know is safe. This is good for things like + // we only merge two shuffles if the result is either a splat or one of the + // input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like // turning: (splat(splat)) -> splat, or // merge(V[0..n], V[n+1..2n]) -> V[0..2n] - // - // FIXME: This is almost certainly far, far too conservative. We should - // have a better model. Perhaps a TargetTransformInfo hook to ask whether - // a shuffle is considered OK? ShuffleVectorInst* LHSShuffle = dyn_cast(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast(RHS); if (LHSShuffle) @@ -748,10 +743,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } // If the result mask is equal to one of the original shuffle masks, - // or is a splat, do the replacement. Similarly, if there is only one - // input vector, go ahead and do the folding. - if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask || - isa(RHS)) { + // or is a splat, do the replacement. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { SmallVector Elts; Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = newMask.size(); i != e; ++i) { diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index a22ad15940c..a447908d16c 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -139,10 +139,11 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1 %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> - %R = mul <8 x i8> %Q1, %Q2 -; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> -; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <8 x i32> -; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> +; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> +; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> + %R = mul <8 x i8> %Q1, %Q2 +; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> +; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> ; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2 ret <8 x i8> %R ; CHECK: ret <8 x i8> %R diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 37c19f1558b..37d4d56e913 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -86,14 +86,14 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind { } ; Same as test9, but make sure that "undef" mask values are not confused with -; mask values of 2*N, where N is the mask length of the result. Make sure when -; folding these shuffles that 'undef' mask values stay that way in the result -; instead of getting mapped to the 2*N'th entry of the source. -define <4 x i8> @test9a(<16 x i8> %in, <16 x i8> %in2) nounwind { +; mask values of 2*N, where N is the mask length. These shuffles should not +; be folded (because [8,9,4,8] may not be a mask supported by the target). +define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind { ; CHECK: @test9a -; CHECK-NEXT: shufflevector <16 x i8> %in, <16 x i8> %in2, <4 x i32> +; CHECK-NEXT: shufflevector +; CHECK-NEXT: shufflevector ; CHECK-NEXT: ret - %tmp7 = shufflevector <16 x i8> %in, <16 x i8> %in2, <4 x i32> < i32 undef, i32 9, i32 4, i32 16 > ; <<4 x i8>> [#uses=1] + %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > ; <<4 x i8>> [#uses=1] %tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 > ; <<4 x i8>> [#uses=1] ret <4 x i8> %tmp9 }