[PowerPC] Make use of the TargetRecip system

r238842 added the TargetRecip system for controlling use of reciprocal
estimates for sqrt and division using a set of parameters that can be set by
the frontend. Clang now supports a sophisticated -mrecip option, and this will
allow that option to effectively control the relevant code-generation
functionality of the PPC backend.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241985 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-07-12 02:33:57 +00:00
parent d14325bee9
commit 866cf31c07
4 changed files with 62 additions and 15 deletions

View File

@ -9067,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
static std::string getRecipOp(const char *Base, EVT VT) {
std::string RecipOp(Base);
if (VT.getScalarType() == MVT::f64)
RecipOp += "d";
else
RecipOp += "f";
if (VT.isVector())
RecipOp = "vec-" + RecipOp;
return RecipOp;
}
SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
@ -9078,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
// 2^-14. IEEE float has 23 digits and double has 52 digits.
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++RefinementSteps;
TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
std::string RecipOp = getRecipOp("sqrt", VT);
if (!Recips.isEnabled(RecipOp))
return SDValue();
RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
@ -9101,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
// 2^-14. IEEE float has 23 digits and double has 52 digits.
RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++RefinementSteps;
TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
std::string RecipOp = getRecipOp("div", VT);
if (!Recips.isEnabled(RecipOp))
return SDValue();
RefinementSteps = Recips.getRefinementSteps(RecipOp);
return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();

View File

@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(getTargetTriple())),
TargetABI(computeTargetABI(TT, Options)) {
TargetABI(computeTargetABI(TT, Options)),
Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
// For the estimates, convergence is quadratic, so we essentially double the
// number of digits correct after every iteration. For both FRE and FRSQRTE,
// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
// this is 2^-14. IEEE float has 23 digits and double has 52 digits.
unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
RefinementSteps64 = RefinementSteps + 1;
this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
initAsmInfo();
}

View File

@ -29,6 +29,8 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
PPCSubtarget Subtarget;
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx -recip=sqrtf:0,sqrtd:0 | FileCheck %s -check-prefix=CHECK-NONR
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@ -24,6 +25,13 @@ define double @foo(double %a, double %b) nounwind {
; CHECK-NEXT: fmul
; CHECK: blr
; CHECK-NONR: @foo
; CHECK-NONR: frsqrte
; CHECK-NONR-NOT: fmadd
; CHECK-NONR: fmul
; CHECK-NONR-NOT: fmadd
; CHECK-NONR: blr
; CHECK-SAFE: @foo
; CHECK-SAFE: fsqrt
; CHECK-SAFE: fdiv
@ -90,6 +98,13 @@ define float @goo(float %a, float %b) nounwind {
; CHECK-NEXT: fmuls
; CHECK-NEXT: blr
; CHECK-NONR: @goo
; CHECK-NONR: frsqrtes
; CHECK-NONR-NOT: fmadds
; CHECK-NONR: fmuls
; CHECK-NONR-NOT: fmadds
; CHECK-NONR: blr
; CHECK-SAFE: @goo
; CHECK-SAFE: fsqrts
; CHECK-SAFE: fdivs