mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 07:34:33 +00:00
Now using a variant of the existing inlining heuristics to decide whether to create a given specialization of a function in PartialSpecialization. If the total performance bonus across all callsites passing the same constant exceeds the specialization cost, we create the specialization.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116158 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ea1fe2c0a7
commit
74fa7327d6
@ -143,6 +143,18 @@ namespace llvm {
|
||||
Function *Callee,
|
||||
SmallPtrSet<const Function *, 16> &NeverInline);
|
||||
|
||||
/// getSpecializationBonus - The heuristic used to determine the per-call
|
||||
/// performance boost for using a specialization of Callee with argument
|
||||
/// SpecializedArgNos replaced by a constant.
|
||||
int getSpecializationBonus(Function *Callee,
|
||||
SmallVectorImpl<unsigned> &SpecializedArgNo);
|
||||
|
||||
/// getSpecializationCost - The heuristic used to determine the code-size
|
||||
/// impact of creating a specialized version of Callee with argument
|
||||
/// SpecializedArgNo replaced by a constant.
|
||||
InlineCost getSpecializationCost(Function *Callee,
|
||||
SmallVectorImpl<unsigned> &SpecializedArgNo);
|
||||
|
||||
/// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||
/// higher threshold to determine if the function call should be inlined.
|
||||
float getInlineFudgeFactor(CallSite CS);
|
||||
|
@ -312,6 +312,42 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline()
|
||||
Metrics.containsIndirectBr);
|
||||
|
||||
}
|
||||
// getSpecializationBonus - The heuristic used to determine the per-call
|
||||
// performance boost for using a specialization of Callee with argument
|
||||
// specializedArgNo replaced by a constant.
|
||||
int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
|
||||
SmallVectorImpl<unsigned> &SpecializedArgNos)
|
||||
{
|
||||
if (Callee->mayBeOverridden())
|
||||
return 0;
|
||||
|
||||
int Bonus = 0;
|
||||
// If this function uses the coldcc calling convention, prefer not to
|
||||
// specialize it.
|
||||
if (Callee->getCallingConv() == CallingConv::Cold)
|
||||
Bonus -= InlineConstants::ColdccPenalty;
|
||||
|
||||
// Get information about the callee.
|
||||
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
|
||||
|
||||
// If we haven't calculated this information yet, do so now.
|
||||
if (CalleeFI->Metrics.NumBlocks == 0)
|
||||
CalleeFI->analyzeFunction(Callee);
|
||||
|
||||
|
||||
for (unsigned i = 0, s = SpecializedArgNos.size();
|
||||
i < s; ++i )
|
||||
{
|
||||
Bonus += CalleeFI->ArgumentWeights[SpecializedArgNos[i]].ConstantBonus;
|
||||
}
|
||||
// Calls usually take a long time, so they make the specialization gain
|
||||
// smaller.
|
||||
Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
|
||||
|
||||
return Bonus;
|
||||
}
|
||||
|
||||
|
||||
// getInlineCost - The heuristic used to determine if we should inline the
|
||||
// function call or not.
|
||||
//
|
||||
@ -442,6 +478,40 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
|
||||
return llvm::InlineCost::get(InlineCost);
|
||||
}
|
||||
|
||||
// getSpecializationCost - The heuristic used to determine the code-size
|
||||
// impact of creating a specialized version of Callee with argument
|
||||
// SpecializedArgNo replaced by a constant.
|
||||
InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
|
||||
SmallVectorImpl<unsigned> &SpecializedArgNos)
|
||||
{
|
||||
// Don't specialize functions which can be redefined at link-time to mean
|
||||
// something else.
|
||||
if (Callee->mayBeOverridden())
|
||||
return llvm::InlineCost::getNever();
|
||||
|
||||
// Get information about the callee.
|
||||
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
|
||||
|
||||
// If we haven't calculated this information yet, do so now.
|
||||
if (CalleeFI->Metrics.NumBlocks == 0)
|
||||
CalleeFI->analyzeFunction(Callee);
|
||||
|
||||
int Cost = 0;
|
||||
|
||||
// Look at the orginal size of the callee. Each instruction counts as 5.
|
||||
Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
|
||||
|
||||
// Offset that with the amount of code that can be constant-folded
|
||||
// away with the given arguments replaced by constants.
|
||||
for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
|
||||
ae = SpecializedArgNos.end(); an != ae; ++an)
|
||||
{
|
||||
Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
|
||||
}
|
||||
|
||||
return llvm::InlineCost::get(Cost);
|
||||
}
|
||||
|
||||
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
|
||||
// higher threshold to determine if the function call should be inlined.
|
||||
float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/InlineCost.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Support/CallSite.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
@ -37,17 +38,12 @@ STATISTIC(numReplaced, "Number of callers replaced by specialization");
|
||||
// Maximum number of arguments markable interested
|
||||
static const int MaxInterests = 6;
|
||||
|
||||
// Call must be used at least occasionally
|
||||
static const int CallsMin = 5;
|
||||
|
||||
// Must have 10% of calls having the same constant to specialize on
|
||||
static const double ConstValPercent = .1;
|
||||
|
||||
namespace {
|
||||
typedef SmallVector<int, MaxInterests> InterestingArgVector;
|
||||
class PartSpec : public ModulePass {
|
||||
void scanForInterest(Function&, InterestingArgVector&);
|
||||
int scanDistribution(Function&, int, std::map<Constant*, int>&);
|
||||
InlineCostAnalyzer CA;
|
||||
public :
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
PartSpec() : ModulePass(ID) {}
|
||||
@ -79,6 +75,10 @@ SpecializeFunction(Function* F,
|
||||
NF->setLinkage(GlobalValue::InternalLinkage);
|
||||
F->getParent()->getFunctionList().push_back(NF);
|
||||
|
||||
// FIXME: Specialized versions getting the same constants should also get
|
||||
// the same name. That way, specializations for public functions can be
|
||||
// marked linkonce_odr and reused across modules.
|
||||
|
||||
for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
|
||||
ii != ee; ) {
|
||||
Value::use_iterator i = ii;
|
||||
@ -144,22 +144,37 @@ bool PartSpec::runOnModule(Module &M) {
|
||||
bool breakOuter = false;
|
||||
for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
|
||||
std::map<Constant*, int> distribution;
|
||||
int total = scanDistribution(F, interestingArgs[x], distribution);
|
||||
if (total > CallsMin)
|
||||
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
|
||||
ee = distribution.end(); ii != ee; ++ii)
|
||||
if (total > ii->second && ii->first &&
|
||||
ii->second > total * ConstValPercent) {
|
||||
ValueMap<const Value*, Value*> m;
|
||||
Function::arg_iterator arg = F.arg_begin();
|
||||
for (int y = 0; y < interestingArgs[x]; ++y)
|
||||
++arg;
|
||||
m[&*arg] = ii->first;
|
||||
SpecializeFunction(&F, m);
|
||||
++numSpecialized;
|
||||
breakOuter = true;
|
||||
Changed = true;
|
||||
}
|
||||
scanDistribution(F, interestingArgs[x], distribution);
|
||||
for (std::map<Constant*, int>::iterator ii = distribution.begin(),
|
||||
ee = distribution.end(); ii != ee; ++ii) {
|
||||
// The distribution map might have an entry for NULL (i.e., one or more
|
||||
// callsites were passing a non-constant there). We allow that to
|
||||
// happen so that we can see whether any callsites pass a non-constant;
|
||||
// if none do and the function is internal, we might have an opportunity
|
||||
// to kill the original function.
|
||||
if (!ii->first) continue;
|
||||
int bonus = ii->second;
|
||||
SmallVector<unsigned, 1> argnos;
|
||||
argnos.push_back(interestingArgs[x]);
|
||||
InlineCost cost = CA.getSpecializationCost(&F, argnos);
|
||||
// FIXME: If this is the last constant entry, and no non-constant
|
||||
// entries exist, and the target function is internal, the cost should
|
||||
// be reduced by the original size of the target function, almost
|
||||
// certainly making it negative and causing a specialization that will
|
||||
// leave the original function dead and removable.
|
||||
if (cost.isAlways() ||
|
||||
(cost.isVariable() && cost.getValue() < bonus)) {
|
||||
ValueMap<const Value*, Value*> m;
|
||||
Function::arg_iterator arg = F.arg_begin();
|
||||
for (int y = 0; y < interestingArgs[x]; ++y)
|
||||
++arg;
|
||||
m[&*arg] = ii->first;
|
||||
SpecializeFunction(&F, m);
|
||||
++numSpecialized;
|
||||
breakOuter = true;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
@ -170,28 +185,20 @@ bool PartSpec::runOnModule(Module &M) {
|
||||
void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
|
||||
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
|
||||
ii != ee; ++ii) {
|
||||
for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
|
||||
ui != ue; ++ui) {
|
||||
|
||||
bool interesting = false;
|
||||
User *U = *ui;
|
||||
if (isa<CmpInst>(U)) interesting = true;
|
||||
else if (isa<CallInst>(U))
|
||||
interesting = ui->getOperand(0) == ii;
|
||||
else if (isa<InvokeInst>(U))
|
||||
interesting = ui->getOperand(0) == ii;
|
||||
else if (isa<SwitchInst>(U)) interesting = true;
|
||||
else if (isa<BranchInst>(U)) interesting = true;
|
||||
|
||||
if (interesting) {
|
||||
args.push_back(std::distance(F.arg_begin(), ii));
|
||||
break;
|
||||
}
|
||||
int argno = std::distance(F.arg_begin(), ii);
|
||||
SmallVector<unsigned, 1> argnos;
|
||||
argnos.push_back(argno);
|
||||
int bonus = CA.getSpecializationBonus(&F, argnos);
|
||||
if (bonus > 0) {
|
||||
args.push_back(argno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// scanDistribution - Construct a histogram of constants for arg of F at arg.
|
||||
/// For each distinct constant, we'll compute the total of the specialization
|
||||
/// bonus across all callsites passing that constant; if that total exceeds
|
||||
/// the specialization cost, we will create the specialization.
|
||||
int PartSpec::scanDistribution(Function& F, int arg,
|
||||
std::map<Constant*, int>& dist) {
|
||||
bool hasIndirect = false;
|
||||
@ -201,7 +208,10 @@ int PartSpec::scanDistribution(Function& F, int arg,
|
||||
User *U = *ii;
|
||||
CallSite CS(U);
|
||||
if (CS && CS.getCalledFunction() == &F) {
|
||||
++dist[dyn_cast<Constant>(CS.getArgument(arg))];
|
||||
SmallVector<unsigned, 1> argnos;
|
||||
argnos.push_back(arg);
|
||||
dist[dyn_cast<Constant>(CS.getArgument(arg))] +=
|
||||
CA.getSpecializationBonus(&F, argnos);
|
||||
++total;
|
||||
} else
|
||||
hasIndirect = true;
|
||||
|
49
test/Transforms/PartialSpecialize/heuristics.ll
Normal file
49
test/Transforms/PartialSpecialize/heuristics.ll
Normal file
@ -0,0 +1,49 @@
|
||||
; If there are not enough callsites for a particular specialization to
|
||||
; justify its existence, the specialization shouldn't be created.
|
||||
;
|
||||
; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s
|
||||
declare void @callback1()
|
||||
declare void @callback2()
|
||||
|
||||
declare void @othercall()
|
||||
|
||||
define internal void @UseCallback(void()* %pCallback) {
|
||||
call void %pCallback()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
call void @othercall()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo(void()* %pNonConstCallback)
|
||||
{
|
||||
Entry:
|
||||
; CHECK: Entry
|
||||
; CHECK-NOT: call void @UseCallback(void ()* @callback1)
|
||||
; CHECK: call void @UseCallback(void ()* @callback2)
|
||||
; CHECK-NEXT: call void @UseCallback(void ()* @callback2)
|
||||
; CHECK-NEXT: ret void
|
||||
call void @UseCallback(void()* @callback1)
|
||||
call void @UseCallback(void()* @callback1)
|
||||
call void @UseCallback(void()* @callback1)
|
||||
call void @UseCallback(void()* @callback1)
|
||||
call void @UseCallback(void()* @callback2)
|
||||
call void @UseCallback(void()* @callback2)
|
||||
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user