[PowerPC] Fold [sz]ext with fp_to_int lowering where possible

On modern cores with lfiw[az]x, we can fold a sign or zero extension from i32
to i64 into the load necessary for an i64 -> fp conversion.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225493 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-01-09 01:34:30 +00:00
parent 8e1d151abe
commit 4e98296890
3 changed files with 130 additions and 4 deletions

View File

@ -5483,9 +5483,11 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
// factor (see spliceIntoChain below for this last part).
bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
ReuseLoadInfo &RLI,
SelectionDAG &DAG) const {
SelectionDAG &DAG,
ISD::LoadExtType ET) const {
SDLoc dl(Op);
if ((Op.getOpcode() == ISD::FP_TO_UINT ||
if (ET == ISD::NON_EXTLOAD &&
(Op.getOpcode() == ISD::FP_TO_UINT ||
Op.getOpcode() == ISD::FP_TO_SINT) &&
isOperationLegalOrCustom(Op.getOpcode(),
Op.getOperand(0).getValueType())) {
@ -5495,7 +5497,8 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
}
LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal())
if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
LD->isNonTemporal())
return false;
if (LD->getMemoryVT() != MemVT)
return false;
@ -5615,11 +5618,64 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
ReuseLoadInfo RLI;
SDValue Bits;
MachineFunction &MF = DAG.getMachineFunction();
if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
RLI.Ranges);
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
} else if (Subtarget.hasLFIWAX() &&
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
MachineMemOperand *MMO =
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
} else if (Subtarget.hasFPCVT() &&
canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
MachineMemOperand *MMO =
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
} else if (((Subtarget.hasLFIWAX() &&
SINT.getOpcode() == ISD::SIGN_EXTEND) ||
(Subtarget.hasFPCVT() &&
SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
SINT.getOperand(0).getValueType() == MVT::i32) {
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
MachinePointerInfo::getFixedStack(FrameIdx),
false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
RLI.Chain = Store;
RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
RLI.Alignment = 4;
MachineMemOperand *MMO =
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
} else
Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);

View File

@ -606,7 +606,8 @@ namespace llvm {
};
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG) const;
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
SelectionDAG &DAG) const;

View File

@ -0,0 +1,69 @@
; RUN: llc -mcpu=a2 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define double @foo1(i32* %x) #0 {
entry:
%0 = load i32* %x, align 4
%conv = sext i32 %0 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
; CHECK-LABEL: @foo1
; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
; CHECK: fcfid 1, [[REG1]]
; CHECK: blr
}
define double @foo2(i32* %x) #0 {
entry:
%0 = load i32* %x, align 4
%conv = zext i32 %0 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
; CHECK-LABEL: @foo2
; CHECK: lfiwzx [[REG1:[0-9]+]], 0, 3
; CHECK: fcfid 1, [[REG1]]
; CHECK: blr
}
define double @foo3(i32* %x) #0 {
entry:
%0 = load i32* %x, align 4
%1 = add i32 %0, 8
%conv = zext i32 %1 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
; CHECK-LABEL: @foo3
; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
; CHECK-DAG: stw [[REG2]],
; CHECK: lfiwzx [[REG4:[0-9]+]], 0, [[REG3]]
; CHECK: fcfid 1, [[REG4]]
; CHECK: blr
}
define double @foo4(i32* %x) #0 {
entry:
%0 = load i32* %x, align 4
%1 = add i32 %0, 8
%conv = sext i32 %1 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
; CHECK-LABEL: @foo4
; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
; CHECK-DAG: stw [[REG2]],
; CHECK: lfiwax [[REG4:[0-9]+]], 0, [[REG3]]
; CHECK: fcfid 1, [[REG4]]
; CHECK: blr
}
attributes #0 = { nounwind }