From fe6d2cd9d169894b0263df8c5f26df663f4f0ea8 Mon Sep 17 00:00:00 2001
From: Mon P Wang <wangmp@apple.com>
Date: Mon, 26 Jan 2009 04:39:00 +0000
Subject: [PATCH] Fixed optimization of combining two shuffles where the first
 shuffle inputs has a different number of elements than the output.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62998 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Scalar/InstructionCombining.cpp           |  4 +++-
 test/Transforms/InstCombine/vec_shuffle2.ll   | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 test/Transforms/InstCombine/vec_shuffle2.ll

diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 40289eaf6bf..7f7592840a4 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -12179,9 +12179,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       // If the result mask is equal to the src shuffle or this shuffle mask, do
       // the replacement.
       if (NewMask == LHSMask || NewMask == Mask) {
+        unsigned LHSInNElts =
+          cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
         std::vector<Constant*> Elts;
         for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
-          if (NewMask[i] >= e*2) {
+          if (NewMask[i] >= LHSInNElts*2) {
             Elts.push_back(UndefValue::get(Type::Int32Ty));
           } else {
             Elts.push_back(ConstantInt::get(Type::Int32Ty, NewMask[i]));
diff --git a/test/Transforms/InstCombine/vec_shuffle2.ll b/test/Transforms/InstCombine/vec_shuffle2.ll
new file mode 100644
index 00000000000..3bd8924903f
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_shuffle2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep undef | count 1
+; END.
+
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+
+define void @test_cl(<4 x i8> addrspace(1)* %dest, <16 x i8> addrspace(1)* %old) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x i8> addrspace(1)* %dest, i32 0		; <<4 x i8> addrspace(1)*> [#uses=1]
+	%arrayidx5 = getelementptr <16 x i8> addrspace(1)* %old, i32 0		; <<16 x i8> addrspace(1)*> [#uses=1]
+	%tmp6 = load <16 x i8> addrspace(1)* %arrayidx5		; <<16 x i8>> [#uses=1]
+	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >		; <<4 x i8>> [#uses=1]
+	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
+	store <4 x i8> %tmp9, <4 x i8> addrspace(1)* %arrayidx
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
\ No newline at end of file