diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 4653a7d7c8c..93cd27e02d9 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -483,6 +483,8 @@ namespace { if (SelectInst *SI = dyn_cast(I)) { T2 = SI->getCondition()->getType(); + } else if (ShuffleVectorInst *SI = dyn_cast(I)) { + T2 = SI->getOperand(0)->getType(); } } @@ -987,10 +989,11 @@ namespace { // We don't want to fuse to a type that will be split, even // if the two input types will also be split and there is no other // associated cost. - unsigned VParts = VTTI->getNumberOfParts(VT1); - if (VParts > 1) + unsigned VParts1 = VTTI->getNumberOfParts(VT1), + VParts2 = VTTI->getNumberOfParts(VT2); + if (VParts1 > 1 || VParts2 > 1) return false; - else if (!VParts && VCost == ICost + JCost) + else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) return false; CostSavings = ICost + JCost - VCost; diff --git a/test/Transforms/BBVectorize/X86/sh-types.ll b/test/Transforms/BBVectorize/X86/sh-types.ll new file mode 100644 index 00000000000..0bcb714d5e6 --- /dev/null +++ b/test/Transforms/BBVectorize/X86/sh-types.ll @@ -0,0 +1,25 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s + +define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) { + %A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> + %B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> + %X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> + %X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> + %Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> + %Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> + + %M1 = fsub double %C1, %D1 + %M2 = fsub double %C2, %D2 + %N1 = fmul double %M1, %C1 + %N2 = fmul double %M2, %C2 + %Z1 = fadd double %N1, %D1 + %Z2 = fadd double %N2, %D2 + + %R = fmul <4 x float> %Y1, %Y2 + ret <4 x float> %R +; CHECK: @test7 +; CHECK-NOT: <8 x float> +; CHECK: ret <4 x float> +} +