Value soft float calls as more expensive in the inliner.

Summary: When evaluating floating point instructions in the inliner, ask the TTI whether it is an expensive operation.  By default, it's not an expensive operation.  This keeps the default behavior the same as before.  The ARM TTI has been updated to return back TCC_Expensive for targets which don't have hardware floating point.

Reviewers: chandlerc, echristo

Reviewed By: echristo

Subscribers: t.p.northover, aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D6936

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228263 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Cameron Esfahani 2015-02-05 02:09:33 +00:00
parent a7f2cf45f3
commit d02540a1d7
9 changed files with 200 additions and 1 deletions

View File

@ -325,6 +325,10 @@ public:
/// \brief Return true if the hardware has a fast square-root instruction.
bool haveFastSqrt(Type *Ty) const;
/// \brief Return the expected cost of supporting the floating point operation
/// of the specified type.
unsigned getFPOpCost(Type *Ty) const;
/// \brief Return the expected cost of materializing for the given integer
/// immediate of the specified type.
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
@ -516,6 +520,7 @@ public:
virtual bool shouldBuildLookupTables() = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual unsigned getFPOpCost(Type *Ty) = 0;
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
@ -631,6 +636,11 @@ public:
return Impl.getPopcntSupport(IntTyWidthInBit);
}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
unsigned getFPOpCost(Type *Ty) override {
return Impl.getFPOpCost(Ty);
}
unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCost(Imm, Ty);
}

View File

@ -239,6 +239,8 @@ public:
bool haveFastSqrt(Type *Ty) { return false; }
unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,

View File

@ -167,6 +167,12 @@ public:
TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
unsigned getFPOpCost(Type *Ty) {
// By default, FP instructions are no more expensive since they are
// implemented in HW. Target specific TTI can override this.
return TargetTransformInfo::TCC_Basic;
}
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:

View File

@ -907,6 +907,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
// If the instruction is floating point, and the target says this operation is
// expensive or the function has the "use-soft-float" attribute, this may
// eventually become a library call. Treat the cost as such.
if (I->getType()->isFloatingPointTy()) {
bool hasSoftFloatAttr = false;
// If the function has the "use-soft-float" attribute, mark it as expensive.
if (F.hasFnAttribute("use-soft-float")) {
Attribute Attr = F.getFnAttribute("use-soft-float");
StringRef Val = Attr.getValueAsString();
if (Val == "true")
hasSoftFloatAttr = true;
}
if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
hasSoftFloatAttr)
Cost += InlineConstants::CallPenalty;
}
// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we

View File

@ -148,6 +148,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
return TTIImpl->getFPOpCost(Ty);
}
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return TTIImpl->getIntImmCost(Imm, Ty);
}

View File

@ -310,7 +310,8 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
return hasNEON() && UseNEONForSinglePrecisionFP;
}
bool hasDivide() const { return HasHardwareDivide; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }

View File

@ -314,6 +314,25 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
// Use similar logic that's in ARMISelLowering:
// Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
// to VFP.
if (ST->hasVFP2() && !ST->isThumb1Only()) {
if (Ty->isFloatTy()) {
return TargetTransformInfo::TCC_Basic;
}
if (Ty->isDoubleTy()) {
return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
TargetTransformInfo::TCC_Basic;
}
}
return TargetTransformInfo::TCC_Expensive;
}
unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.

View File

@ -114,6 +114,8 @@ public:
unsigned getAddressComputationCost(Type *Val, bool IsComplex);
unsigned getFPOpCost(Type *Ty);
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,

View File

@ -0,0 +1,136 @@
; RUN: opt -S -inline < %s | FileCheck %s
; Make sure that soft float implementations are calculated as being more expensive
; to the inliner.
define i32 @test_nofp() #0 {
; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
; CHECK-LABEL: test_nofp
; CHECK: call float @f_nofp
entry:
%responseX = alloca i32, align 4
%responseY = alloca i32, align 4
%responseZ = alloca i32, align 4
%valueX = alloca i8, align 1
%valueY = alloca i8, align 1
%valueZ = alloca i8, align 1
call void @getX(i32* %responseX, i8* %valueX)
call void @getY(i32* %responseY, i8* %valueY)
call void @getZ(i32* %responseZ, i8* %valueZ)
%0 = load i32* %responseX
%1 = load i8* %valueX
%call = call float @f_nofp(i32 %0, i8 zeroext %1)
%2 = load i32* %responseZ
%3 = load i8* %valueZ
%call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
%call3 = call float @fabsf(float %call)
%cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
br i1 %cmp, label %if.end12, label %if.else
if.else: ; preds = %entry
%4 = load i32* %responseY
%5 = load i8* %valueY
%call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
%call4 = call float @fabsf(float %call1)
%cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
br i1 %cmp5, label %if.end12, label %if.else7
if.else7: ; preds = %if.else
%call8 = call float @fabsf(float %call2)
%cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
br i1 %cmp9, label %if.then10, label %if.end12
if.then10: ; preds = %if.else7
br label %if.end12
if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7
%success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
ret i32 %success.0
}
define i32 @test_hasfp() #0 {
; f_hasfp() does not have the "use-soft-float" attribute, so it should get inlined.
; CHECK-LABEL: test_hasfp
; CHECK-NOT: call float @f_hasfp
entry:
%responseX = alloca i32, align 4
%responseY = alloca i32, align 4
%responseZ = alloca i32, align 4
%valueX = alloca i8, align 1
%valueY = alloca i8, align 1
%valueZ = alloca i8, align 1
call void @getX(i32* %responseX, i8* %valueX)
call void @getY(i32* %responseY, i8* %valueY)
call void @getZ(i32* %responseZ, i8* %valueZ)
%0 = load i32* %responseX
%1 = load i8* %valueX
%call = call float @f_hasfp(i32 %0, i8 zeroext %1)
%2 = load i32* %responseZ
%3 = load i8* %valueZ
%call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
%call3 = call float @fabsf(float %call)
%cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
br i1 %cmp, label %if.end12, label %if.else
if.else: ; preds = %entry
%4 = load i32* %responseY
%5 = load i8* %valueY
%call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
%call4 = call float @fabsf(float %call1)
%cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
br i1 %cmp5, label %if.end12, label %if.else7
if.else7: ; preds = %if.else
%call8 = call float @fabsf(float %call2)
%cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
br i1 %cmp9, label %if.then10, label %if.end12
if.then10: ; preds = %if.else7
br label %if.end12
if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7
%success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
ret i32 %success.0
}
declare void @getX(i32*, i8*) #0
declare void @getY(i32*, i8*) #0
declare void @getZ(i32*, i8*) #0
define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 {
entry:
%conv = zext i8 %value1 to i32
%sub = add nsw i32 %conv, -1
%conv1 = sitofp i32 %sub to float
%0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
%mul = fmul float %0, 2.620000e+03
%conv2 = sitofp i32 %response to float
%sub3 = fsub float %conv2, %mul
%div = fdiv float %sub3, %mul
ret float %div
}
define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 {
entry:
%conv = zext i8 %value1 to i32
%sub = add nsw i32 %conv, -1
%conv1 = sitofp i32 %sub to float
%0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
%mul = fmul float %0, 2.620000e+03
%conv2 = sitofp i32 %response to float
%sub3 = fsub float %conv2, %mul
%div = fdiv float %sub3, %mul
ret float %div
}
declare float @fabsf(float) optsize minsize
declare float @llvm.pow.f32(float, float) optsize minsize
attributes #0 = { minsize optsize }
attributes #1 = { minsize optsize "use-soft-float"="true" }