diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index a58124d7032..634add86a81 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -515,37 +515,44 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is one of the two input shuffle - // masks. In this case, merging the shuffles just removes one instruction, - // which we know is safe. This is good for things like turning: - // (splat(splat)) -> splat. + // we only merge two shuffles if the result is either a splat or one of the + // two input shuffle masks. In this case, merging the shuffles just removes + // one instruction, which we know is safe. This is good for things like + // turning: (splat(splat)) -> splat. if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { if (isa(RHS)) { std::vector LHSMask = getShuffleMask(LHSSVI); if (LHSMask.size() == Mask.size()) { std::vector NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); + bool isSplat = true; + unsigned SplatElt = 2 * Mask.size(); // undef + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + unsigned MaskElt = 2 * e; // undef + if (Mask[i] < e) + MaskElt = LHSMask[Mask[i]]; + // Check if this could still be a splat. + if (MaskElt < 2*e) { + if (SplatElt < 2*e && SplatElt != MaskElt) + isSplat = false; + SplatElt = MaskElt; + } + NewMask.push_back(MaskElt); + } // If the result mask is equal to the src shuffle or this // shuffle mask, do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { + if (isSplat || NewMask == LHSMask || NewMask == Mask) { unsigned LHSInNElts = cast(LHSSVI->getOperand(0)->getType())-> getNumElements(); std::vector Elts; + const Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get( - Type::getInt32Ty(SVI.getContext()))); + Elts.push_back(UndefValue::get(Int32Ty)); } else { - Elts.push_back(ConstantInt::get( - Type::getInt32Ty(SVI.getContext()), - NewMask[i])); + Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); } } return new ShuffleVectorInst(LHSSVI->getOperand(0), diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 5132a8ff9bd..d4977fb807e 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -87,3 +87,13 @@ define <4 x i8> @test9(<16 x i8> %tmp6) nounwind { %tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 > ; <<4 x i8>> [#uses=1] ret <4 x i8> %tmp9 } + +; Redundant vector splats should be removed. Radar 8597790. +define <4 x i32> @test10(<4 x i32> %tmp5) nounwind { +; CHECK: @test10 +; CHECK-NEXT: shufflevector +; CHECK-NEXT: ret + %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> + %tmp7 = shufflevector <4 x i32> %tmp6, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %tmp7 +}