From f99a4b82a4ff0383138ccaf9e22511eb786168fb Mon Sep 17 00:00:00 2001 From: Stuart Hastings Date: Mon, 6 Jun 2011 23:15:58 +0000 Subject: [PATCH] Followup to 132458, omit unnecessary stack copy when x87 input is a load. rdar://problem/6373334 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132696 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++++++---- lib/Target/X86/X86ISelLowering.h | 5 ++-- test/CodeGen/X86/2011-06-03-x87chain.ll | 31 +++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/X86/2011-06-03-x87chain.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 464c5bcb530..eed4bca40b9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1097,6 +1097,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SINT_TO_FP); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -6700,11 +6701,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, DebugLoc dl = Op.getDebugLoc(); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); - - SDValue Addr = Op.getOperand(0); - if (Addr.getOpcode() == ISD::LOAD) - return BuildFILD(Op, SrcVT, DAG.getEntryNode(), Addr, DAG); - int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), @@ -12169,6 +12165,26 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op0 = N->getOperand(0); + // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have + // a 32-bit target where SSE doesn't support i64->FP operations. + if (Op0.getOpcode() == ISD::LOAD) { + LoadSDNode *Ld = cast(Op0.getNode()); + EVT VT = Ld->getValueType(0); + if (!Ld->isVolatile() && !N->getValueType(0).isVector() && + ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && + !XTLI->getSubtarget()->is64Bit() && + !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG); + DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); + return FILDChain; + } + } + return SDValue(); +} + // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { @@ -12253,6 +12269,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); + case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c599c452870..7b541631081 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -689,6 +689,9 @@ namespace llvm { /// appropriate. virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, + SelectionDAG &DAG) const; + protected: std::pair findRepresentativeClass(EVT VT) const; @@ -780,8 +783,6 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, - SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll new file mode 100644 index 00000000000..19b4fc734c3 --- /dev/null +++ b/test/CodeGen/X86/2011-06-03-x87chain.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp { +entry: + %tmp1 = load i64* %a, align 8 +; Insure x87 ops are properly chained, order preserved. +; CHECK: fildll + %conv = sitofp i64 %tmp1 to float +; CHECK: fstps + store float %conv, float* %f, align 4 +; CHECK: idivl + %div = sdiv i32 %x, %y + %conv5 = sext i32 %div to i64 + store i64 %conv5, i64* %b, align 8 + ret float %conv +} + +define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp { +entry: +; CHECK: movl $0, + store i64 0, i64* %b, align 8 + %mul = mul nsw i32 %y, %x + %sub = add nsw i32 %mul, -1 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i64* %a, i64 %idxprom + %tmp4 = load i64* %arrayidx, align 8 +; CHECK: fildll + %conv = sitofp i64 %tmp4 to float + store float %conv, float* %f, align 4 + ret float %conv +}