From 4e98296890c4e23d21841331f34c18d84e2d6e76 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 9 Jan 2015 01:34:30 +0000 Subject: [PATCH] [PowerPC] Fold [sz]ext with fp_to_int lowering where possible On modern cores with lfiw[az]x, we can fold a sign or zero extension from i32 to i64 into the load necessary for an i64 -> fp conversion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225493 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 62 +++++++++++++++++++++-- lib/Target/PowerPC/PPCISelLowering.h | 3 +- test/CodeGen/PowerPC/fp-to-int-ext.ll | 69 ++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/PowerPC/fp-to-int-ext.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 540061fe429..0614fe056cc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5483,9 +5483,11 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, // factor (see spliceIntoChain below for this last part). bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, - SelectionDAG &DAG) const { + SelectionDAG &DAG, + ISD::LoadExtType ET) const { SDLoc dl(Op); - if ((Op.getOpcode() == ISD::FP_TO_UINT || + if (ET == ISD::NON_EXTLOAD && + (Op.getOpcode() == ISD::FP_TO_UINT || Op.getOpcode() == ISD::FP_TO_SINT) && isOperationLegalOrCustom(Op.getOpcode(), Op.getOperand(0).getValueType())) { @@ -5495,7 +5497,8 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, } LoadSDNode *LD = dyn_cast(Op); - if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal()) + if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || + LD->isNonTemporal()) return false; if (LD->getMemoryVT() != MemVT) return false; @@ -5615,11 +5618,64 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, ReuseLoadInfo RLI; SDValue Bits; + MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); + } else if (Subtarget.hasLFIWAX() && + canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { + MachineMemOperand *MMO = + MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, + RLI.Alignment, RLI.AAInfo, RLI.Ranges); + SDValue Ops[] = { RLI.Chain, RLI.Ptr }; + Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, + DAG.getVTList(MVT::f64, MVT::Other), + Ops, MVT::i32, MMO); + spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); + } else if (Subtarget.hasFPCVT() && + canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { + MachineMemOperand *MMO = + MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, + RLI.Alignment, RLI.AAInfo, RLI.Ranges); + SDValue Ops[] = { RLI.Chain, RLI.Ptr }; + Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, + DAG.getVTList(MVT::f64, MVT::Other), + Ops, MVT::i32, MMO); + spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); + } else if (((Subtarget.hasLFIWAX() && + SINT.getOpcode() == ISD::SIGN_EXTEND) || + (Subtarget.hasFPCVT() && + SINT.getOpcode() == ISD::ZERO_EXTEND)) && + SINT.getOperand(0).getValueType() == MVT::i32) { + MachineFrameInfo *FrameInfo = MF.getFrameInfo(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + assert(cast(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + + RLI.Ptr = FIdx; + RLI.Chain = Store; + RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.Alignment = 4; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, + RLI.Alignment, RLI.AAInfo, RLI.Ranges); + SDValue Ops[] = { RLI.Chain, RLI.Ptr }; + Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? + PPCISD::LFIWZX : PPCISD::LFIWAX, + dl, DAG.getVTList(MVT::f64, MVT::Other), + Ops, MVT::i32, MMO); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 8fc988a320f..6149a21f443 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -606,7 +606,8 @@ namespace llvm { }; bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, - SelectionDAG &DAG) const; + SelectionDAG &DAG, + ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; void spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const; diff --git a/test/CodeGen/PowerPC/fp-to-int-ext.ll b/test/CodeGen/PowerPC/fp-to-int-ext.ll new file mode 100644 index 00000000000..bfacd89ca1a --- /dev/null +++ b/test/CodeGen/PowerPC/fp-to-int-ext.ll @@ -0,0 +1,69 @@ +; RUN: llc -mcpu=a2 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define double @foo1(i32* %x) #0 { +entry: + %0 = load i32* %x, align 4 + %conv = sext i32 %0 to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 + +; CHECK-LABEL: @foo1 +; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3 +; CHECK: fcfid 1, [[REG1]] +; CHECK: blr +} + +define double @foo2(i32* %x) #0 { +entry: + %0 = load i32* %x, align 4 + %conv = zext i32 %0 to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 + +; CHECK-LABEL: @foo2 +; CHECK: lfiwzx [[REG1:[0-9]+]], 0, 3 +; CHECK: fcfid 1, [[REG1]] +; CHECK: blr +} + +define double @foo3(i32* %x) #0 { +entry: + %0 = load i32* %x, align 4 + %1 = add i32 %0, 8 + %conv = zext i32 %1 to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 + +; CHECK-LABEL: @foo3 +; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3) +; CHECK-DAG: addi [[REG3:[0-9]+]], 1, +; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8 +; CHECK-DAG: stw [[REG2]], +; CHECK: lfiwzx [[REG4:[0-9]+]], 0, [[REG3]] +; CHECK: fcfid 1, [[REG4]] +; CHECK: blr +} + +define double @foo4(i32* %x) #0 { +entry: + %0 = load i32* %x, align 4 + %1 = add i32 %0, 8 + %conv = sext i32 %1 to i64 + %conv1 = sitofp i64 %conv to double + ret double %conv1 + +; CHECK-LABEL: @foo4 +; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3) +; CHECK-DAG: addi [[REG3:[0-9]+]], 1, +; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8 +; CHECK-DAG: stw [[REG2]], +; CHECK: lfiwax [[REG4:[0-9]+]], 0, [[REG3]] +; CHECK: fcfid 1, [[REG4]] +; CHECK: blr +} + +attributes #0 = { nounwind } +