diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 80cf0a8ff1d..8166185f93b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6905,6 +6905,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6922,12 +6926,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast(InVec.getOperand(0)); } else if ((SVN = dyn_cast(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6938,14 +6950,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6982,6 +7001,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index 86e87129b73..6fbae199aae 100644 --- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind { ; CHECK: movw r1, :lower16:{{.*}} ; CHECK: movt r1, :upper16:{{.*}} -; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]} -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]] -; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]] +; CHECK: vldmia r1 +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}} ; CHECK: vstmia {{.*}} L.entry: diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll new file mode 100644 index 00000000000..39c213f00ab --- /dev/null +++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s +; Make sure we don't load from the location pointed to by %p +; twice: it has non-obvious performance implications, and +; the relevant transformation doesn't know how to update +; the chains correctly. +; PR10747 + +; CHECK: test: +; CHECK: pextrd $2, %xmm +define <4 x i32> @test(<4 x i32>* %p) { + %v = load <4 x i32>* %p + %e = extractelement <4 x i32> %v, i32 2 + %cmp = icmp eq i32 %e, 3 + %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer + ret <4 x i32> %sel +}