Now using a variant of the existing inlining heuristics to decide whether to create a given specialization of a function in PartialSpecialization. If the total performance bonus across all callsites passing the same constant exceeds the specialization cost, we create the specialization.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116158 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kenneth Uildriks 2010-10-09 22:06:36 +00:00
parent ea1fe2c0a7
commit 74fa7327d6
4 changed files with 181 additions and 40 deletions

View File

@ -143,6 +143,18 @@ namespace llvm {
Function *Callee,
SmallPtrSet<const Function *, 16> &NeverInline);
/// getSpecializationBonus - The heuristic used to determine the per-call
/// performance boost for using a specialization of Callee with argument
/// SpecializedArgNos replaced by a constant.
int getSpecializationBonus(Function *Callee,
SmallVectorImpl<unsigned> &SpecializedArgNo);
/// getSpecializationCost - The heuristic used to determine the code-size
/// impact of creating a specialized version of Callee with argument
/// SpecializedArgNo replaced by a constant.
InlineCost getSpecializationCost(Function *Callee,
SmallVectorImpl<unsigned> &SpecializedArgNo);
/// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
/// higher threshold to determine if the function call should be inlined.
float getInlineFudgeFactor(CallSite CS);

View File

@ -312,6 +312,42 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline()
Metrics.containsIndirectBr);
}
// getSpecializationBonus - The heuristic used to determine the per-call
// performance boost for using a specialization of Callee with argument
// specializedArgNo replaced by a constant.
int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
SmallVectorImpl<unsigned> &SpecializedArgNos)
{
if (Callee->mayBeOverridden())
return 0;
int Bonus = 0;
// If this function uses the coldcc calling convention, prefer not to
// specialize it.
if (Callee->getCallingConv() == CallingConv::Cold)
Bonus -= InlineConstants::ColdccPenalty;
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
CalleeFI->analyzeFunction(Callee);
for (unsigned i = 0, s = SpecializedArgNos.size();
i < s; ++i )
{
Bonus += CalleeFI->ArgumentWeights[SpecializedArgNos[i]].ConstantBonus;
}
// Calls usually take a long time, so they make the specialization gain
// smaller.
Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
return Bonus;
}
// getInlineCost - The heuristic used to determine if we should inline the
// function call or not.
//
@ -442,6 +478,40 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
return llvm::InlineCost::get(InlineCost);
}
// getSpecializationCost - The heuristic used to determine the code-size
// impact of creating a specialized version of Callee with argument
// SpecializedArgNo replaced by a constant.
InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
SmallVectorImpl<unsigned> &SpecializedArgNos)
{
// Don't specialize functions which can be redefined at link-time to mean
// something else.
if (Callee->mayBeOverridden())
return llvm::InlineCost::getNever();
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
// If we haven't calculated this information yet, do so now.
if (CalleeFI->Metrics.NumBlocks == 0)
CalleeFI->analyzeFunction(Callee);
int Cost = 0;
// Look at the orginal size of the callee. Each instruction counts as 5.
Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
// Offset that with the amount of code that can be constant-folded
// away with the given arguments replaced by constants.
for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
ae = SpecializedArgNos.end(); an != ae; ++an)
{
Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
}
return llvm::InlineCost::get(Cost);
}
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
// higher threshold to determine if the function call should be inlined.
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {

View File

@ -25,6 +25,7 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Support/CallSite.h"
#include "llvm/ADT/DenseSet.h"
@ -37,17 +38,12 @@ STATISTIC(numReplaced, "Number of callers replaced by specialization");
// Maximum number of arguments markable interested
static const int MaxInterests = 6;
// Call must be used at least occasionally
static const int CallsMin = 5;
// Must have 10% of calls having the same constant to specialize on
static const double ConstValPercent = .1;
namespace {
typedef SmallVector<int, MaxInterests> InterestingArgVector;
class PartSpec : public ModulePass {
void scanForInterest(Function&, InterestingArgVector&);
int scanDistribution(Function&, int, std::map<Constant*, int>&);
InlineCostAnalyzer CA;
public :
static char ID; // Pass identification, replacement for typeid
PartSpec() : ModulePass(ID) {}
@ -79,6 +75,10 @@ SpecializeFunction(Function* F,
NF->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(NF);
// FIXME: Specialized versions getting the same constants should also get
// the same name. That way, specializations for public functions can be
// marked linkonce_odr and reused across modules.
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
ii != ee; ) {
Value::use_iterator i = ii;
@ -144,22 +144,37 @@ bool PartSpec::runOnModule(Module &M) {
bool breakOuter = false;
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
std::map<Constant*, int> distribution;
int total = scanDistribution(F, interestingArgs[x], distribution);
if (total > CallsMin)
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
ee = distribution.end(); ii != ee; ++ii)
if (total > ii->second && ii->first &&
ii->second > total * ConstValPercent) {
ValueMap<const Value*, Value*> m;
Function::arg_iterator arg = F.arg_begin();
for (int y = 0; y < interestingArgs[x]; ++y)
++arg;
m[&*arg] = ii->first;
SpecializeFunction(&F, m);
++numSpecialized;
breakOuter = true;
Changed = true;
}
scanDistribution(F, interestingArgs[x], distribution);
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
ee = distribution.end(); ii != ee; ++ii) {
// The distribution map might have an entry for NULL (i.e., one or more
// callsites were passing a non-constant there). We allow that to
// happen so that we can see whether any callsites pass a non-constant;
// if none do and the function is internal, we might have an opportunity
// to kill the original function.
if (!ii->first) continue;
int bonus = ii->second;
SmallVector<unsigned, 1> argnos;
argnos.push_back(interestingArgs[x]);
InlineCost cost = CA.getSpecializationCost(&F, argnos);
// FIXME: If this is the last constant entry, and no non-constant
// entries exist, and the target function is internal, the cost should
// be reduced by the original size of the target function, almost
// certainly making it negative and causing a specialization that will
// leave the original function dead and removable.
if (cost.isAlways() ||
(cost.isVariable() && cost.getValue() < bonus)) {
ValueMap<const Value*, Value*> m;
Function::arg_iterator arg = F.arg_begin();
for (int y = 0; y < interestingArgs[x]; ++y)
++arg;
m[&*arg] = ii->first;
SpecializeFunction(&F, m);
++numSpecialized;
breakOuter = true;
Changed = true;
}
}
}
}
return Changed;
@ -170,28 +185,20 @@ bool PartSpec::runOnModule(Module &M) {
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
ii != ee; ++ii) {
for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
ui != ue; ++ui) {
bool interesting = false;
User *U = *ui;
if (isa<CmpInst>(U)) interesting = true;
else if (isa<CallInst>(U))
interesting = ui->getOperand(0) == ii;
else if (isa<InvokeInst>(U))
interesting = ui->getOperand(0) == ii;
else if (isa<SwitchInst>(U)) interesting = true;
else if (isa<BranchInst>(U)) interesting = true;
if (interesting) {
args.push_back(std::distance(F.arg_begin(), ii));
break;
}
int argno = std::distance(F.arg_begin(), ii);
SmallVector<unsigned, 1> argnos;
argnos.push_back(argno);
int bonus = CA.getSpecializationBonus(&F, argnos);
if (bonus > 0) {
args.push_back(argno);
}
}
}
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
/// For each distinct constant, we'll compute the total of the specialization
/// bonus across all callsites passing that constant; if that total exceeds
/// the specialization cost, we will create the specialization.
int PartSpec::scanDistribution(Function& F, int arg,
std::map<Constant*, int>& dist) {
bool hasIndirect = false;
@ -201,7 +208,10 @@ int PartSpec::scanDistribution(Function& F, int arg,
User *U = *ii;
CallSite CS(U);
if (CS && CS.getCalledFunction() == &F) {
++dist[dyn_cast<Constant>(CS.getArgument(arg))];
SmallVector<unsigned, 1> argnos;
argnos.push_back(arg);
dist[dyn_cast<Constant>(CS.getArgument(arg))] +=
CA.getSpecializationBonus(&F, argnos);
++total;
} else
hasIndirect = true;

View File

@ -0,0 +1,49 @@
; If there are not enough callsites for a particular specialization to
; justify its existence, the specialization shouldn't be created.
;
; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s
declare void @callback1()
declare void @callback2()
declare void @othercall()
define internal void @UseCallback(void()* %pCallback) {
call void %pCallback()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
call void @othercall()
ret void
}
define void @foo(void()* %pNonConstCallback)
{
Entry:
; CHECK: Entry
; CHECK-NOT: call void @UseCallback(void ()* @callback1)
; CHECK: call void @UseCallback(void ()* @callback2)
; CHECK-NEXT: call void @UseCallback(void ()* @callback2)
; CHECK-NEXT: ret void
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback2)
call void @UseCallback(void()* @callback2)
ret void
}