From 65f489fd7d876c3e624938cd46d2475c7f365a8a Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav.rotem@intel.com>
Date: Sat, 14 Jul 2012 22:26:05 +0000
Subject: [PATCH] AVX: Fix a bug in getTargetVShiftNode. The shift amount has
 to be a 128bit vector with the same element type as the input vector. This is
 needed because of the patterns we have for the VP[SLL/SRA/SRL][W/D/Q]
 instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160222 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp  |  8 +++++++-
 test/CodeGen/X86/2012-07-15-vshl.ll | 31 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 test/CodeGen/X86/2012-07-15-vshl.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 19cf5bf75a2..f74a1879a14 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9458,7 +9458,13 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
   ShOps[2] = DAG.getUNDEF(MVT::i32);
   ShOps[3] = DAG.getUNDEF(MVT::i32);
   ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
-  ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+
+  // The return type has to be a 128-bit type with the same element
+  // type as the input type.
+  MVT EltVT = VT.getVectorElementType().getSimpleVT();
+  EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+
+  ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
   return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
 }
 
diff --git a/test/CodeGen/X86/2012-07-15-vshl.ll b/test/CodeGen/X86/2012-07-15-vshl.ll
new file mode 100644
index 00000000000..cd0fef469e6
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-vshl.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx
+; PR13352
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define void @f_f() nounwind {
+allocas:
+  br label %for_loop29
+
+for_loop29:                                       ; preds = %safe_if_after_true, %allocas
+  %indvars.iv596 = phi i64 [ %indvars.iv.next597, %safe_if_after_true ], [ 0, %allocas ]
+  %0 = trunc i64 %indvars.iv596 to i32
+  %smear.15 = insertelement <16 x i32> undef, i32 %0, i32 15
+  %bitop = lshr <16 x i32> zeroinitializer, %smear.15
+  %bitop35 = and <16 x i32> %bitop, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %bitop35_to_bool = icmp ne <16 x i32> %bitop35, zeroinitializer
+  %val_to_boolvec32 = sext <16 x i1> %bitop35_to_bool to <16 x i32>
+  %floatmask.i526 = bitcast <16 x i32> %val_to_boolvec32 to <16 x float>
+  %mask1.i529 = shufflevector <16 x float> %floatmask.i526, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %"internal_mask&function_mask41_any" = icmp eq i32 undef, 0
+  br i1 %"internal_mask&function_mask41_any", label %safe_if_after_true, label %safe_if_run_true
+
+safe_if_after_true:                               ; preds = %for_loop29
+  %indvars.iv.next597 = add i64 %indvars.iv596, 1
+  br label %for_loop29
+
+safe_if_run_true:                                 ; preds = %for_loop29
+  %blend1.i583 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> undef, <8 x float> undef, <8 x float> %mask1.i529) nounwind
+  unreachable
+}
+