diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 257729ab84d..5401f85d96a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5408,9 +5408,9 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return Op; } -// FIXME: Split this code up when LegalizeDAGTypes lands. -SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - SDLoc dl) const { +void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + SelectionDAG &DAG, + SDLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -5459,15 +5459,92 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - MPI = MachinePointerInfo(); + MPI = MPI.getWithOffset(4); } - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, - false, false, false, 0); + RLI.Chain = Chain; + RLI.Ptr = FIPtr; + RLI.MPI = MPI; +} + +SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + SDLoc dl) const { + ReuseLoadInfo RLI; + LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); + + return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, + false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, + RLI.Ranges); +} + +// We're trying to insert a regular store, S, and then a load, L. If the +// incoming value, O, is a load, we might just be able to have our load use the +// address used by O. However, we don't know if anything else will store to +// that address before we can load from it. To prevent this situation, we need +// to insert our load, L, into the chain as a peer of O. To do this, we give L +// the same chain operand as O, we create a token factor from the chain results +// of O and L, and we replace all uses of O's chain result with that token +// factor (see spliceIntoChain below for this last part). +bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, + ReuseLoadInfo &RLI, + SelectionDAG &DAG) const { + SDLoc dl(Op); + if ((Op.getOpcode() == ISD::FP_TO_UINT || + Op.getOpcode() == ISD::FP_TO_SINT) && + isOperationLegalOrCustom(Op.getOpcode(), + Op.getOperand(0).getValueType())) { + + LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); + return true; + } + + LoadSDNode *LD = dyn_cast(Op); + if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal()) + return false; + if (LD->getMemoryVT() != MemVT) + return false; + + RLI.Ptr = LD->getBasePtr(); + if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { + assert(LD->getAddressingMode() == ISD::PRE_INC && + "Non-pre-inc AM on PPC?"); + RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, + LD->getOffset()); + } + + RLI.Chain = LD->getChain(); + RLI.MPI = LD->getPointerInfo(); + RLI.IsInvariant = LD->isInvariant(); + RLI.Alignment = LD->getAlignment(); + RLI.AAInfo = LD->getAAInfo(); + RLI.Ranges = LD->getRanges(); + + RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); + return true; +} + +// Given the head of the old chain, ResChain, insert a token factor containing +// it and NewResChain, and make users of ResChain now be users of that token +// factor. +void PPCTargetLowering::spliceIntoChain(SDValue ResChain, + SDValue NewResChain, + SelectionDAG &DAG) const { + if (!ResChain) + return; + + SDLoc dl(NewResChain); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + NewResChain, DAG.getUNDEF(MVT::Other)); + assert(TF.getNode() != NewResChain.getNode() && + "A new TF really is required here"); + + DAG.ReplaceAllUsesOfValueWith(ResChain, TF); + DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc dl(Op); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) @@ -5539,7 +5616,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); + ReuseLoadInfo RLI; + SDValue Bits; + + if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { + Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, + false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, + RLI.Ranges); + spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); + } else + Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) @@ -5560,23 +5647,36 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { - int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); - SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + ReuseLoadInfo RLI; + bool ReusingLoad; + if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, + DAG))) { + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + assert(cast(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + + RLI.Ptr = FIdx; + RLI.Chain = Store; + RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.Alignment = 4; + } - assert(cast(Store)->getMemoryVT() == MVT::i32 && - "Expected an i32 store"); MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, 4, 4); - SDValue Ops[] = { Store, FIdx }; + MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, + RLI.Alignment, RLI.AAInfo, RLI.Ranges); + SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); + if (ReusingLoad) + spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); @@ -6489,7 +6589,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, - SDLoc(Op)); + SDLoc(Op)); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index ee946e66b9a..8fc988a320f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -591,6 +591,28 @@ namespace llvm { } private: + + struct ReuseLoadInfo { + SDValue Ptr; + SDValue Chain; + SDValue ResChain; + MachinePointerInfo MPI; + bool IsInvariant; + unsigned Alignment; + AAMDNodes AAInfo; + const MDNode *Ranges; + + ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {} + }; + + bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, + SelectionDAG &DAG) const; + void spliceIntoChain(SDValue ResChain, SDValue NewResChain, + SelectionDAG &DAG) const; + + void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + SelectionDAG &DAG, SDLoc dl) const; + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 2824105485b..3942a7cdac2 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -302,27 +302,6 @@ http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf ===-------------------------------------------------------------------------=== -float foo(float X) { return (int)(X); } - -Currently produces: - -_foo: - fctiwz f0, f1 - stfd f0, -8(r1) - lwz r2, -4(r1) - extsw r2, r2 - std r2, -16(r1) - lfd f0, -16(r1) - fcfid f0, f0 - frsp f1, f0 - blr - -We could use a target dag combine to turn the lwz/extsw into an lwa when the -lwz has a single use. Since LWA is cracked anyway, this would be a codesize -win only. - -===-------------------------------------------------------------------------=== - We generate ugly code for this: void func(unsigned int *ret, float dx, float dy, float dz, float dw) { diff --git a/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll new file mode 100644 index 00000000000..6beee253a2e --- /dev/null +++ b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll @@ -0,0 +1,96 @@ +; RUN: llc -mcpu=a2 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind readonly +define double @test1(i64* nocapture readonly %x) #0 { +entry: + %0 = load i64* %x, align 8 + %conv = sitofp i64 %0 to double + ret double %conv + +; CHECK-LABEL: @test1 +; CHECK: lfd [[REG1:[0-9]+]], 0(3) +; CHECK: fcfid 1, [[REG1]] +; CHECK: blr +} + +; Function Attrs: nounwind readonly +define double @test2(i32* nocapture readonly %x) #0 { +entry: + %0 = load i32* %x, align 4 + %conv = sitofp i32 %0 to double + ret double %conv + +; CHECK-LABEL: @test2 +; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3 +; CHECK: fcfid 1, [[REG1]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define float @foo(float %X) #0 { +entry: + %conv = fptosi float %X to i32 + %conv1 = sitofp i32 %conv to float + ret float %conv1 + +; CHECK-LABEL: @foo +; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1 +; CHECK-DAG: addi [[REG1:[0-9]+]], 1, +; CHECK: stfiwx [[REG2]], 0, [[REG1]] +; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]] +; CHECK: fcfids 1, [[REG3]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define double @food(double %X) #0 { +entry: + %conv = fptosi double %X to i32 + %conv1 = sitofp i32 %conv to double + ret double %conv1 + +; CHECK-LABEL: @food +; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1 +; CHECK-DAG: addi [[REG1:[0-9]+]], 1, +; CHECK: stfiwx [[REG2]], 0, [[REG1]] +; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]] +; CHECK: fcfid 1, [[REG3]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define float @foou(float %X) #0 { +entry: + %conv = fptoui float %X to i32 + %conv1 = uitofp i32 %conv to float + ret float %conv1 + +; CHECK-LABEL: @foou +; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1 +; CHECK-DAG: addi [[REG1:[0-9]+]], 1, +; CHECK: stfiwx [[REG2]], 0, [[REG1]] +; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]] +; CHECK: fcfidus 1, [[REG3]] +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define double @fooud(double %X) #0 { +entry: + %conv = fptoui double %X to i32 + %conv1 = uitofp i32 %conv to double + ret double %conv1 + +; CHECK-LABEL: @fooud +; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1 +; CHECK-DAG: addi [[REG1:[0-9]+]], 1, +; CHECK: stfiwx [[REG2]], 0, [[REG1]] +; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]] +; CHECK: fcfidu 1, [[REG3]] +; CHECK: blr +} + +attributes #0 = { nounwind readonly } +