diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4b6ab745828..29ba59c7a74 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6438,17 +6438,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); - if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR && V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF) // BUILD_VECTOR (load), undef V = V.getOperand(0); - - return MayFoldLoad(V); + if (MayFoldLoad(V)) + return true; + return false; } // FIXME: the version above should always be used. Since there's diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll index f5f88426058..1651c4cdace 100644 --- a/test/CodeGen/X86/vec_shuffle-30.ll +++ b/test/CodeGen/X86/vec_shuffle-30.ll @@ -1,25 +1,21 @@ -; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=sse41 -o %t +; RUN: grep pshufhw %t | grep -- -95 | count 1 +; RUN: grep shufps %t | count 1 +; RUN: not grep pslldq %t -; CHECK: test ; Test case when creating pshufhw, we incorrectly set the higher order bit ; for an undef, define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind { entry: -; CHECK-NOT: vmovaps -; CHECK: vmovlpd -; CHECK: vpshufhw $-95 %0 = load <8 x i16>* %dest %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14> store <8 x i16> %1, <8 x i16>* %dest ret void -} +} -; CHECK: test2 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind { entry: -; CHECK-NOT: pslldq -; CHECK: shufps %0 = shufflevector <4 x i32> %in, <4 x i32> , <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> store <4 x i32> %0, <4 x i32>* %dest ret void