mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 17:33:24 +00:00
Undo a previous restriction on the inline cost calculation which Nick
introduced. Specifically, there are cost reductions for all constant-operand icmp instructions against an alloca, regardless of whether the alloca will in fact be elligible for SROA. That means we don't want to abort the icmp reduction computation when we abort the SROA reduction computation. That in turn frees us from the need to keep a separate worklist and defer the ICmp calculations. Use this new-found freedom and some judicious function boundaries to factor the innards of computing the cost factor of any given instruction out of the loop over the instructions and into static helper functions. This greatly simplifies the code, and hopefully makes it more clear what is happening here. Reviewed by Eric Christopher. There is some concern that we'd like to ensure this doesn't get out of hand, and I plan to benchmark the effects of this change over the next few days along with some further fixes to the inline cost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152368 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
abd6674166
commit
e8187e0294
@ -212,15 +212,148 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
|
||||
return Reduction;
|
||||
}
|
||||
|
||||
static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
|
||||
ICmpInst *ICI) {
|
||||
unsigned Reduction = 0;
|
||||
|
||||
// Bail if this is comparing against a non-constant; there is nothing we can
|
||||
// do there.
|
||||
if (!isa<Constant>(ICI->getOperand(1)))
|
||||
return Reduction;
|
||||
|
||||
// An icmp pred (alloca, C) becomes true if the predicate is true when
|
||||
// equal and false otherwise.
|
||||
bool Result = ICI->isTrueWhenEqual();
|
||||
|
||||
SmallVector<Instruction *, 4> Worklist;
|
||||
Worklist.push_back(ICI);
|
||||
do {
|
||||
Instruction *U = Worklist.pop_back_val();
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
|
||||
UI != UE; ++UI) {
|
||||
Instruction *I = dyn_cast<Instruction>(*UI);
|
||||
if (!I || I->mayHaveSideEffects()) continue;
|
||||
if (I->getNumOperands() == 1)
|
||||
Worklist.push_back(I);
|
||||
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
|
||||
// If BO produces the same value as U, then the other operand is
|
||||
// irrelevant and we can put it into the Worklist to continue
|
||||
// deleting dead instructions. If BO produces the same value as the
|
||||
// other operand, we can delete BO but that's it.
|
||||
if (Result == true) {
|
||||
if (BO->getOpcode() == Instruction::Or)
|
||||
Worklist.push_back(I);
|
||||
if (BO->getOpcode() == Instruction::And)
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else {
|
||||
if (BO->getOpcode() == Instruction::Or ||
|
||||
BO->getOpcode() == Instruction::Xor)
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
if (BO->getOpcode() == Instruction::And)
|
||||
Worklist.push_back(I);
|
||||
}
|
||||
}
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
|
||||
BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
|
||||
if (BB->getSinglePredecessor())
|
||||
Reduction
|
||||
+= InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
|
||||
}
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
|
||||
return Reduction;
|
||||
}
|
||||
|
||||
/// \brief Compute the reduction possible for a given instruction if we are able
|
||||
/// to SROA an alloca.
|
||||
///
|
||||
/// The reduction for this instruction is added to the SROAReduction output
|
||||
/// parameter. Returns false if this instruction is expected to defeat SROA in
|
||||
/// general.
|
||||
bool countCodeReductionForSROAInst(Instruction *I,
|
||||
SmallVectorImpl<Value *> &Worklist,
|
||||
unsigned &SROAReduction) {
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
if (!LI->isSimple())
|
||||
return false;
|
||||
SROAReduction += InlineConstants::InstrCost;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
if (!SI->isSimple())
|
||||
return false;
|
||||
SROAReduction += InlineConstants::InstrCost;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has variable indices, we won't be able to do much with it.
|
||||
if (!GEP->hasAllConstantIndices())
|
||||
return false;
|
||||
// A non-zero GEP will likely become a mask operation after SROA.
|
||||
if (GEP->hasAllZeroIndices())
|
||||
SROAReduction += InlineConstants::InstrCost;
|
||||
Worklist.push_back(GEP);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
|
||||
// Track pointer through bitcasts.
|
||||
Worklist.push_back(BCI);
|
||||
SROAReduction += InlineConstants::InstrCost;
|
||||
return true;
|
||||
}
|
||||
|
||||
// We just look for non-constant operands to ICmp instructions as those will
|
||||
// defeat SROA. The actual reduction for these happens even without SROA.
|
||||
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
|
||||
return isa<Constant>(ICI->getOperand(1));
|
||||
|
||||
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
|
||||
// SROA can handle a select of alloca iff all uses of the alloca are
|
||||
// loads, and dereferenceable. We assume it's dereferenceable since
|
||||
// we're told the input is an alloca.
|
||||
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
|
||||
UI != UE; ++UI) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(*UI);
|
||||
if (LI == 0 || !LI->isSimple())
|
||||
return false;
|
||||
}
|
||||
// We don't know whether we'll be deleting the rest of the chain of
|
||||
// instructions from the SelectInst on, because we don't know whether
|
||||
// the other side of the select is also an alloca or not.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default:
|
||||
return false;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
// SROA can usually chew through these intrinsics.
|
||||
SROAReduction += InlineConstants::InstrCost;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is some other strange instruction, we're not going to be
|
||||
// able to do much if we inline this.
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
|
||||
const CodeMetrics &Metrics, Value *V) {
|
||||
if (!V->getType()->isPointerTy()) return 0; // Not a pointer
|
||||
unsigned Reduction = 0;
|
||||
|
||||
// Looking at ICmpInsts will never abort the analysis and return zero, and
|
||||
// analyzing them is expensive, so save them for last so that we don't do
|
||||
// extra work that we end up throwing out.
|
||||
SmallVector<ICmpInst *, 4> ICmpInsts;
|
||||
unsigned SROAReduction = 0;
|
||||
bool CanSROAAlloca = true;
|
||||
|
||||
SmallVector<Value *, 4> Worklist;
|
||||
Worklist.push_back(V);
|
||||
@ -229,111 +362,17 @@ unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
|
||||
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
|
||||
UI != E; ++UI){
|
||||
Instruction *I = cast<Instruction>(*UI);
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
if (!LI->isSimple())
|
||||
return 0;
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
if (!SI->isSimple())
|
||||
return 0;
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has variable indices, we won't be able to do much with it.
|
||||
if (!GEP->hasAllConstantIndices())
|
||||
return 0;
|
||||
// A non-zero GEP will likely become a mask operation after SROA.
|
||||
if (GEP->hasAllZeroIndices())
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
Worklist.push_back(GEP);
|
||||
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
|
||||
// Track pointer through bitcasts.
|
||||
Worklist.push_back(BCI);
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
|
||||
// SROA can handle a select of alloca iff all uses of the alloca are
|
||||
// loads, and dereferenceable. We assume it's dereferenceable since
|
||||
// we're told the input is an alloca.
|
||||
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
|
||||
UI != UE; ++UI) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(*UI);
|
||||
if (LI == 0 || !LI->isSimple()) return 0;
|
||||
}
|
||||
// We don't know whether we'll be deleting the rest of the chain of
|
||||
// instructions from the SelectInst on, because we don't know whether
|
||||
// the other side of the select is also an alloca or not.
|
||||
continue;
|
||||
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default:
|
||||
return 0;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
// SROA can usually chew through these intrinsics.
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
break;
|
||||
}
|
||||
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
|
||||
if (!isa<Constant>(ICI->getOperand(1)))
|
||||
return 0;
|
||||
ICmpInsts.push_back(ICI);
|
||||
} else {
|
||||
// If there is some other strange instruction, we're not going to be
|
||||
// able to do much if we inline this.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
|
||||
Reduction += countCodeReductionForAllocaICmp(Metrics, ICI);
|
||||
|
||||
if (CanSROAAlloca)
|
||||
CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist,
|
||||
SROAReduction);
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
|
||||
while (!ICmpInsts.empty()) {
|
||||
ICmpInst *ICI = ICmpInsts.pop_back_val();
|
||||
|
||||
// An icmp pred (alloca, C) becomes true if the predicate is true when
|
||||
// equal and false otherwise.
|
||||
bool Result = ICI->isTrueWhenEqual();
|
||||
|
||||
SmallVector<Instruction *, 4> Worklist;
|
||||
Worklist.push_back(ICI);
|
||||
do {
|
||||
Instruction *U = Worklist.pop_back_val();
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
|
||||
UI != UE; ++UI) {
|
||||
Instruction *I = dyn_cast<Instruction>(*UI);
|
||||
if (!I || I->mayHaveSideEffects()) continue;
|
||||
if (I->getNumOperands() == 1)
|
||||
Worklist.push_back(I);
|
||||
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
|
||||
// If BO produces the same value as U, then the other operand is
|
||||
// irrelevant and we can put it into the Worklist to continue
|
||||
// deleting dead instructions. If BO produces the same value as the
|
||||
// other operand, we can delete BO but that's it.
|
||||
if (Result == true) {
|
||||
if (BO->getOpcode() == Instruction::Or)
|
||||
Worklist.push_back(I);
|
||||
if (BO->getOpcode() == Instruction::And)
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else {
|
||||
if (BO->getOpcode() == Instruction::Or ||
|
||||
BO->getOpcode() == Instruction::Xor)
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
if (BO->getOpcode() == Instruction::And)
|
||||
Worklist.push_back(I);
|
||||
}
|
||||
}
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
|
||||
BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
|
||||
if (BB->getSinglePredecessor())
|
||||
Reduction
|
||||
+= InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
|
||||
}
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
}
|
||||
|
||||
return Reduction;
|
||||
return Reduction + (CanSROAAlloca ? SROAReduction : 0);
|
||||
}
|
||||
|
||||
/// analyzeFunction - Fill in the current structure with information gleaned
|
||||
|
@ -81,3 +81,44 @@ bb.true:
|
||||
bb.false:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @outer4(i32 %A) {
|
||||
; CHECK: @outer4
|
||||
; CHECK-NOT: call void @inner4
|
||||
%ptr = alloca i32
|
||||
call void @inner4(i32* %ptr, i32 %A)
|
||||
ret void
|
||||
}
|
||||
|
||||
; %D poisons this call, scalar-repl can't handle that instruction. However, we
|
||||
; still want to detect that the icmp and branch *can* be handled.
|
||||
define void @inner4(i32 *%ptr, i32 %A) {
|
||||
%B = getelementptr i32* %ptr, i32 %A
|
||||
%E = icmp eq i32* %ptr, null
|
||||
br i1 %E, label %bb.true, label %bb.false
|
||||
bb.true:
|
||||
; This block musn't be counted in the inline cost.
|
||||
%t1 = load i32* %ptr
|
||||
%t2 = add i32 %t1, 1
|
||||
%t3 = add i32 %t2, 1
|
||||
%t4 = add i32 %t3, 1
|
||||
%t5 = add i32 %t4, 1
|
||||
%t6 = add i32 %t5, 1
|
||||
%t7 = add i32 %t6, 1
|
||||
%t8 = add i32 %t7, 1
|
||||
%t9 = add i32 %t8, 1
|
||||
%t10 = add i32 %t9, 1
|
||||
%t11 = add i32 %t10, 1
|
||||
%t12 = add i32 %t11, 1
|
||||
%t13 = add i32 %t12, 1
|
||||
%t14 = add i32 %t13, 1
|
||||
%t15 = add i32 %t14, 1
|
||||
%t16 = add i32 %t15, 1
|
||||
%t17 = add i32 %t16, 1
|
||||
%t18 = add i32 %t17, 1
|
||||
%t19 = add i32 %t18, 1
|
||||
%t20 = add i32 %t19, 1
|
||||
ret void
|
||||
bb.false:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user