DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to

(i16 load $addr+c*sizeof(i16)) and replace uses of (i32 vextract) with the
i16 load. It should issue an extload instead: (i32 extload $addr+c*sizeof(i16)).

rdar://11035895


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152675 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2012-03-13 22:00:52 +00:00
parent f210b68b41
commit 84387ea5f5
2 changed files with 35 additions and 4 deletions

View File

@ -7202,6 +7202,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT ExtVT = VT.getVectorElementType(); EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT; EVT LVT = ExtVT;
// If the result of load has to be truncated, then it's not necessarily
// profitable.
if (NVT.bitsLT(LVT))
return SDValue();
if (InVec.getOpcode() == ISD::BITCAST) { if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses. // Don't duplicate a load with other uses.
if (!InVec.hasOneUse()) if (!InVec.hasOneUse())
@ -7302,10 +7307,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Note that this replacement assumes that the extractvalue is the only // Note that this replacement assumes that the extractvalue is the only
// use of the load; that's okay because we don't want to perform this // use of the load; that's okay because we don't want to perform this
// transformation in other cases anyway. // transformation in other cases anyway.
SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, SDValue Load;
LN0->getPointerInfo().getWithOffset(PtrOff), if (NVT.bitsGT(LVT)) {
LN0->isVolatile(), LN0->isNonTemporal(), // If the result type of vextract is wider than the load, then issue an
LN0->isInvariant(), Align); // extending load instead.
ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
? ISD::ZEXTLOAD : ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
} else
Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), Align);
WorkListRemover DeadNodes(*this); WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load.getValue(0), Load.getValue(1) }; SDValue To[] = { Load.getValue(0), Load.getValue(1) };

View File

@ -0,0 +1,16 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
; rdar://11035895
; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
; (i32 extload $addr+c*sizeof(i16)
define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
entry:
; CHECK: ldrh [[REG:r[0-9]+]]
; CHECK: strh [[REG]]
%0 = load <3 x i16> * %srcA, align 8
%1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
store <2 x i16> %1, <2 x i16> * %dst, align 4
ret void
}