Value soft float calls as more expensive in the inliner.

Summary: When evaluating floating point instructions in the inliner, ask the TTI whether it is an expensive operation. By default, it's not an expensive operation. This keeps the default behavior the same as before. The ARM TTI has been updated to return back TCC_Expensive for targets which don't have hardware floating point. Reviewers: chandlerc, echristo Reviewed By: echristo Subscribers: t.p.northover, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D6936 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228263 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-20 16:17:38 +00:00 · 2015-02-05 02:09:33 +00:00
parent a7f2cf45f3
commit d02540a1d7
9 changed files with 200 additions and 1 deletions
@@ -325,6 +325,10 @@ public:
  /// \brief Return true if the hardware has a fast square-root instruction.
  bool haveFastSqrt(Type *Ty) const;

+  /// \brief Return the expected cost of supporting the floating point operation
+  /// of the specified type.
+  unsigned getFPOpCost(Type *Ty) const;
+
  /// \brief Return the expected cost of materializing for the given integer
  /// immediate of the specified type.
  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
@@ -516,6 +520,7 @@ public:
  virtual bool shouldBuildLookupTables() = 0;
  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
  virtual bool haveFastSqrt(Type *Ty) = 0;
+  virtual unsigned getFPOpCost(Type *Ty) = 0;
  virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
  virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                                 Type *Ty) = 0;
@@ -631,6 +636,11 @@ public:
    return Impl.getPopcntSupport(IntTyWidthInBit);
  }
  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
+
+  unsigned getFPOpCost(Type *Ty) override {
+    return Impl.getFPOpCost(Ty);
+  }
+
  unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
    return Impl.getIntImmCost(Imm, Ty);
  }
@@ -239,6 +239,8 @@ public:

  bool haveFastSqrt(Type *Ty) { return false; }

+  unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
+
  unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }

  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
@@ -167,6 +167,12 @@ public:
           TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
  }

+  unsigned getFPOpCost(Type *Ty) {
+    // By default, FP instructions are no more expensive since they are
+    // implemented in HW.  Target specific TTI can override this.
+    return TargetTransformInfo::TCC_Basic;
+  }
+
  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
    // This unrolling functionality is target independent, but to provide some
    // motivation for its intended use, for x86:
@@ -907,6 +907,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
      ++NumVectorInstructions;

+    // If the instruction is floating point, and the target says this operation is
+    // expensive or the function has the "use-soft-float" attribute, this may
+    // eventually become a library call.  Treat the cost as such.
+    if (I->getType()->isFloatingPointTy()) {
+      bool hasSoftFloatAttr = false;
+
+      // If the function has the "use-soft-float" attribute, mark it as expensive.
+      if (F.hasFnAttribute("use-soft-float")) {
+        Attribute Attr = F.getFnAttribute("use-soft-float");
+        StringRef Val = Attr.getValueAsString();
+        if (Val == "true")
+          hasSoftFloatAttr = true;
+      }
+
+      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+          hasSoftFloatAttr)
+        Cost += InlineConstants::CallPenalty;
+    }
+
    // If the instruction simplified to a constant, there is no cost to this
    // instruction. Visit the instructions using our InstVisitor to account for
    // all of the per-instruction logic. The visit tree returns true if we
@@ -148,6 +148,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
  return TTIImpl->haveFastSqrt(Ty);
 }

+unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
+  return TTIImpl->getFPOpCost(Ty);
+}
+
 unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
  return TTIImpl->getIntImmCost(Imm, Ty);
 }
@@ -310,7 +310,8 @@ public:
  bool hasCRC() const { return HasCRC; }
  bool hasVirtualization() const { return HasVirtualization; }
  bool useNEONForSinglePrecisionFP() const {
-    return hasNEON() && UseNEONForSinglePrecisionFP; }
+    return hasNEON() && UseNEONForSinglePrecisionFP;
+  }

  bool hasDivide() const { return HasHardwareDivide; }
  bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
@@ -314,6 +314,25 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
  return 1;
 }

+unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+  // Use similar logic that's in ARMISelLowering:
+  // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
+  // to VFP.
+
+  if (ST->hasVFP2() && !ST->isThumb1Only()) {
+    if (Ty->isFloatTy()) {
+      return TargetTransformInfo::TCC_Basic;
+    }
+
+    if (Ty->isDoubleTy()) {
+      return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
+        TargetTransformInfo::TCC_Basic;
+    }
+  }
+
+  return TargetTransformInfo::TCC_Expensive;
+}
+
 unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                    Type *SubTp) {
  // We only handle costs of reverse and alternate shuffles for now.
@@ -114,6 +114,8 @@ public:

  unsigned getAddressComputationCost(Type *Val, bool IsComplex);

+  unsigned getFPOpCost(Type *Ty);
+
  unsigned getArithmeticInstrCost(
      unsigned Opcode, Type *Ty,
      TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
@@ -0,0 +1,136 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+; Make sure that soft float implementations are calculated as being more expensive
+; to the inliner.
+
+define i32 @test_nofp() #0 {
+; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
+; CHECK-LABEL: test_nofp
+; CHECK: call float @f_nofp 
+entry:
+  %responseX = alloca i32, align 4
+  %responseY = alloca i32, align 4
+  %responseZ = alloca i32, align 4
+  %valueX = alloca i8, align 1
+  %valueY = alloca i8, align 1
+  %valueZ = alloca i8, align 1
+
+  call void @getX(i32* %responseX, i8* %valueX)
+  call void @getY(i32* %responseY, i8* %valueY)
+  call void @getZ(i32* %responseZ, i8* %valueZ)
+
+  %0 = load i32* %responseX
+  %1 = load i8* %valueX
+  %call = call float @f_nofp(i32 %0, i8 zeroext %1)
+  %2 = load i32* %responseZ
+  %3 = load i8* %valueZ
+  %call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
+  %call3 = call float @fabsf(float %call)
+  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
+  br i1 %cmp, label %if.end12, label %if.else
+
+if.else:                                          ; preds = %entry
+  %4 = load i32* %responseY
+  %5 = load i8* %valueY
+  %call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
+  %call4 = call float @fabsf(float %call1)
+  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
+  br i1 %cmp5, label %if.end12, label %if.else7
+
+if.else7:                                         ; preds = %if.else
+  %call8 = call float @fabsf(float %call2)
+  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
+  br i1 %cmp9, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else7
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
+  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
+  ret i32 %success.0
+}
+
+define i32 @test_hasfp() #0 {
+; f_hasfp()  does not have the "use-soft-float" attribute, so it should get inlined.
+; CHECK-LABEL: test_hasfp
+; CHECK-NOT: call float @f_hasfp 
+entry:
+  %responseX = alloca i32, align 4
+  %responseY = alloca i32, align 4
+  %responseZ = alloca i32, align 4
+  %valueX = alloca i8, align 1
+  %valueY = alloca i8, align 1
+  %valueZ = alloca i8, align 1
+
+  call void @getX(i32* %responseX, i8* %valueX)
+  call void @getY(i32* %responseY, i8* %valueY)
+  call void @getZ(i32* %responseZ, i8* %valueZ)
+
+  %0 = load i32* %responseX
+  %1 = load i8* %valueX
+  %call = call float @f_hasfp(i32 %0, i8 zeroext %1)
+  %2 = load i32* %responseZ
+  %3 = load i8* %valueZ
+  %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
+  %call3 = call float @fabsf(float %call)
+  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
+  br i1 %cmp, label %if.end12, label %if.else
+
+if.else:                                          ; preds = %entry
+  %4 = load i32* %responseY
+  %5 = load i8* %valueY
+  %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
+  %call4 = call float @fabsf(float %call1)
+  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
+  br i1 %cmp5, label %if.end12, label %if.else7
+
+if.else7:                                         ; preds = %if.else
+  %call8 = call float @fabsf(float %call2)
+  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
+  br i1 %cmp9, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else7
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
+  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
+  ret i32 %success.0
+}
+
+declare void @getX(i32*, i8*) #0
+
+declare void @getY(i32*, i8*) #0
+
+declare void @getZ(i32*, i8*) #0
+
+define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = sitofp i32 %sub to float
+  %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+  %mul = fmul float %0, 2.620000e+03
+  %conv2 = sitofp i32 %response to float
+  %sub3 = fsub float %conv2, %mul
+  %div = fdiv float %sub3, %mul
+  ret float %div
+}
+
+define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = sitofp i32 %sub to float
+  %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+  %mul = fmul float %0, 2.620000e+03
+  %conv2 = sitofp i32 %response to float
+  %sub3 = fsub float %conv2, %mul
+  %div = fdiv float %sub3, %mul
+  ret float %div
+}
+
+declare float @fabsf(float) optsize minsize
+
+declare float @llvm.pow.f32(float, float) optsize minsize
+
+attributes #0 = { minsize optsize }
+attributes #1 = { minsize optsize "use-soft-float"="true" }