From 65f489fd7d876c3e624938cd46d2475c7f365a8a Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 14 Jul 2012 22:26:05 +0000 Subject: [PATCH] AVX: Fix a bug in getTargetVShiftNode. The shift amount has to be a 128bit vector with the same element type as the input vector. This is needed because of the patterns we have for the VP[SLL/SRA/SRL][W/D/Q] instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160222 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++++++- test/CodeGen/X86/2012-07-15-vshl.ll | 31 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/2012-07-15-vshl.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 19cf5bf75a2..f74a1879a14 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9458,7 +9458,13 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, ShOps[2] = DAG.getUNDEF(MVT::i32); ShOps[3] = DAG.getUNDEF(MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); + + // The return type has to be a 128-bit type with the same element + // type as the input type. + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); + + ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } diff --git a/test/CodeGen/X86/2012-07-15-vshl.ll b/test/CodeGen/X86/2012-07-15-vshl.ll new file mode 100644 index 00000000000..cd0fef469e6 --- /dev/null +++ b/test/CodeGen/X86/2012-07-15-vshl.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx +; PR13352 + +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + +define void @f_f() nounwind { +allocas: + br label %for_loop29 + +for_loop29: ; preds = %safe_if_after_true, %allocas + %indvars.iv596 = phi i64 [ %indvars.iv.next597, %safe_if_after_true ], [ 0, %allocas ] + %0 = trunc i64 %indvars.iv596 to i32 + %smear.15 = insertelement <16 x i32> undef, i32 %0, i32 15 + %bitop = lshr <16 x i32> zeroinitializer, %smear.15 + %bitop35 = and <16 x i32> %bitop, + %bitop35_to_bool = icmp ne <16 x i32> %bitop35, zeroinitializer + %val_to_boolvec32 = sext <16 x i1> %bitop35_to_bool to <16 x i32> + %floatmask.i526 = bitcast <16 x i32> %val_to_boolvec32 to <16 x float> + %mask1.i529 = shufflevector <16 x float> %floatmask.i526, <16 x float> undef, <8 x i32> + %"internal_mask&function_mask41_any" = icmp eq i32 undef, 0 + br i1 %"internal_mask&function_mask41_any", label %safe_if_after_true, label %safe_if_run_true + +safe_if_after_true: ; preds = %for_loop29 + %indvars.iv.next597 = add i64 %indvars.iv596, 1 + br label %for_loop29 + +safe_if_run_true: ; preds = %for_loop29 + %blend1.i583 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> undef, <8 x float> undef, <8 x float> %mask1.i529) nounwind + unreachable +} +