From 84387ea5f547f3b63dc701fdcac163b8dc797e5d Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 13 Mar 2012 22:00:52 +0000 Subject: [PATCH] DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to (i16 load $addr+c*sizeof(i16)) and replace uses of (i32 vextract) with the i16 load. It should issue an extload instead: (i32 extload $addr+c*sizeof(i16)). rdar://11035895 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 ++++++++++++++++---- test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll | 16 ++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d93af729322..66c04b42a92 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7202,6 +7202,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) @@ -7302,10 +7307,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Note that this replacement assumes that the extractvalue is the only // use of the load; that's okay because we don't want to perform this // transformation in other cases anyway. - SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + SDValue Load; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + } else + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue To[] = { Load.getValue(0), Load.getValue(1) }; diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll new file mode 100644 index 00000000000..6d596dfc071 --- /dev/null +++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s +; rdar://11035895 + +; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to +; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e. +; (i32 extload $addr+c*sizeof(i16) +define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind { +entry: +; CHECK: ldrh [[REG:r[0-9]+]] +; CHECK: strh [[REG]] + %0 = load <3 x i16> * %srcA, align 8 + %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> + store <2 x i16> %1, <2 x i16> * %dst, align 4 + ret void +} +