diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 00d9ba071ef..ed3f770afec 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -156,6 +156,8 @@ public: // Node replacement helpers void ReplacedNode(SDNode *N) { LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5dd542842e4..f63beab44fd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19787,7 +19787,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EVT VT = InVec.getValueType(); - bool HasShuffleIntoBitcast = false; if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) @@ -19796,7 +19795,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (BCVT.getVectorNumElements() != VT.getVectorNumElements()) return SDValue(); InVec = InVec.getOperand(0); - HasShuffleIntoBitcast = true; } if (!isTargetShuffle(InVec.getOpcode())) @@ -19839,17 +19837,16 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) return SDValue(); - if (HasShuffleIntoBitcast) { - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned NewAlign = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + EVT EltVT = N->getValueType(0); + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + EltVT.getTypeForEVT(*DAG.getContext())); - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return SDValue(); - } + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT)) + return SDValue(); // All checks match so transform back to vector_shuffle so that DAG combiner // can finish the job diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll index cadc0fb723f..3e31b4b190b 100644 --- a/test/CodeGen/X86/extractelement-load.ll +++ b/test/CodeGen/X86/extractelement-load.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | FileCheck %s ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + define i32 @t(<2 x i64>* %val) nounwind { ; CHECK-LABEL: t: ; CHECK-NOT: movd @@ -23,3 +25,22 @@ undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3> %y = extractelement <8 x i32> %Shuff68, i32 0 ret i32 %y } + +; This case could easily end up inf-looping in the DAG combiner due to an +; low alignment load of the vector which prevents us from reliably forming a +; narrow load. +; FIXME: It would be nice to detect whether the target has fast and legal +; unaligned loads and use them here. +define void @t3() { +; CHECK-LABEL: t3: +; +; This movs the entire vector, shuffling the high double down. If we fixed the +; FIXME above it would just move the high double directly. +; CHECK: movhpd %xmm + +bb: + %tmp13 = load <2 x double>* undef, align 1 + %.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1 + store double %.sroa.3.24.vec.extract, double* undef, align 8 + unreachable +} diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 7868081553c..6f49a03cb8b 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1,11 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3 -; XFAIL: * -; Only @stress_test0 is expected to fail, but XFAIL is not that selective. I -; expect this to be unxfailed soon enough that we won't regress the other tests -; in the interim. - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown"