mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-05 12:31:33 +00:00
Make sure DAGCombiner doesn't introduce multiple loads from the same memory location. PR10747, part 2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147283 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c9a1aed7fe
commit
d6e2560e7a
@ -6905,6 +6905,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
|||||||
EVT LVT = ExtVT;
|
EVT LVT = ExtVT;
|
||||||
|
|
||||||
if (InVec.getOpcode() == ISD::BITCAST) {
|
if (InVec.getOpcode() == ISD::BITCAST) {
|
||||||
|
// Don't duplicate a load with other uses.
|
||||||
|
if (!InVec.hasOneUse())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
EVT BCVT = InVec.getOperand(0).getValueType();
|
EVT BCVT = InVec.getOperand(0).getValueType();
|
||||||
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
|
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -6922,12 +6926,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
|||||||
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
InVec.getOperand(0).getValueType() == ExtVT &&
|
InVec.getOperand(0).getValueType() == ExtVT &&
|
||||||
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
|
ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
|
||||||
|
// Don't duplicate a load with other uses.
|
||||||
|
if (!InVec.hasOneUse())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
|
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
|
||||||
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
|
} else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
|
||||||
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
|
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
|
||||||
// =>
|
// =>
|
||||||
// (load $addr+1*size)
|
// (load $addr+1*size)
|
||||||
|
|
||||||
|
// Don't duplicate a load with other uses.
|
||||||
|
if (!InVec.hasOneUse())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// If the bit convert changed the number of elements, it is unsafe
|
// If the bit convert changed the number of elements, it is unsafe
|
||||||
// to examine the mask.
|
// to examine the mask.
|
||||||
if (BCNumEltsChanged)
|
if (BCNumEltsChanged)
|
||||||
@ -6938,14 +6950,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
|||||||
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
|
int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
|
||||||
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
|
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
|
||||||
|
|
||||||
if (InVec.getOpcode() == ISD::BITCAST)
|
if (InVec.getOpcode() == ISD::BITCAST) {
|
||||||
|
// Don't duplicate a load with other uses.
|
||||||
|
if (!InVec.hasOneUse())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
InVec = InVec.getOperand(0);
|
InVec = InVec.getOperand(0);
|
||||||
|
}
|
||||||
if (ISD::isNormalLoad(InVec.getNode())) {
|
if (ISD::isNormalLoad(InVec.getNode())) {
|
||||||
LN0 = cast<LoadSDNode>(InVec);
|
LN0 = cast<LoadSDNode>(InVec);
|
||||||
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
|
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure we found a non-volatile load and the extractelement is
|
||||||
|
// the only use.
|
||||||
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
|
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
@ -6982,6 +7001,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
|||||||
// The replacement we need to do here is a little tricky: we need to
|
// The replacement we need to do here is a little tricky: we need to
|
||||||
// replace an extractelement of a load with a load.
|
// replace an extractelement of a load with a load.
|
||||||
// Use ReplaceAllUsesOfValuesWith to do the replacement.
|
// Use ReplaceAllUsesOfValuesWith to do the replacement.
|
||||||
|
// Note that this replacement assumes that the extractvalue is the only
|
||||||
|
// use of the load; that's okay because we don't want to perform this
|
||||||
|
// transformation in other cases anyway.
|
||||||
SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
|
SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
|
||||||
LN0->getPointerInfo().getWithOffset(PtrOff),
|
LN0->getPointerInfo().getWithOffset(PtrOff),
|
||||||
LN0->isVolatile(), LN0->isNonTemporal(),
|
LN0->isVolatile(), LN0->isNonTemporal(),
|
||||||
|
@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
|
|||||||
|
|
||||||
; CHECK: movw r1, :lower16:{{.*}}
|
; CHECK: movw r1, :lower16:{{.*}}
|
||||||
; CHECK: movt r1, :upper16:{{.*}}
|
; CHECK: movt r1, :upper16:{{.*}}
|
||||||
; CHECK: vldmia r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
|
; CHECK: vldmia r1
|
||||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short3]]
|
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short2]]
|
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short1]]
|
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, [[short0]]
|
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||||
; CHECK: vstmia {{.*}}
|
; CHECK: vstmia {{.*}}
|
||||||
|
|
||||||
L.entry:
|
L.entry:
|
||||||
|
16
test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
Normal file
16
test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
|
||||||
|
; Make sure we don't load from the location pointed to by %p
|
||||||
|
; twice: it has non-obvious performance implications, and
|
||||||
|
; the relevant transformation doesn't know how to update
|
||||||
|
; the chains correctly.
|
||||||
|
; PR10747
|
||||||
|
|
||||||
|
; CHECK: test:
|
||||||
|
; CHECK: pextrd $2, %xmm
|
||||||
|
define <4 x i32> @test(<4 x i32>* %p) {
|
||||||
|
%v = load <4 x i32>* %p
|
||||||
|
%e = extractelement <4 x i32> %v, i32 2
|
||||||
|
%cmp = icmp eq i32 %e, 3
|
||||||
|
%sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x i32> %sel
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user