diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 29ba59c7a74..4b6ab745828 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6438,17 +6438,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR && V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF) // BUILD_VECTOR (load), undef V = V.getOperand(0); - if (MayFoldLoad(V)) - return true; - return false; + + return MayFoldLoad(V); } // FIXME: the version above should always be used. Since there's diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll index 1651c4cdace..f5f88426058 100644 --- a/test/CodeGen/X86/vec_shuffle-30.ll +++ b/test/CodeGen/X86/vec_shuffle-30.ll @@ -1,21 +1,25 @@ -; RUN: llc < %s -march=x86 -mattr=sse41 -o %t -; RUN: grep pshufhw %t | grep -- -95 | count 1 -; RUN: grep shufps %t | count 1 -; RUN: not grep pslldq %t +; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s +; CHECK: test ; Test case when creating pshufhw, we incorrectly set the higher order bit ; for an undef, define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind { entry: +; CHECK-NOT: vmovaps +; CHECK: vmovlpd +; CHECK: vpshufhw $-95 %0 = load <8 x i16>* %dest %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14> store <8 x i16> %1, <8 x i16>* %dest ret void -} +} +; CHECK: test2 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind { entry: +; CHECK-NOT: pslldq +; CHECK: shufps %0 = shufflevector <4 x i32> %in, <4 x i32> , <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> store <4 x i32> %0, <4 x i32>* %dest ret void