Move the instruction simplification of callsite arguments in the inliner

to instead rely on much more generic and powerful instruction
simplification in the function cloner (and thus inliner).

This teaches the pruning function cloner to use instsimplify rather than
just the constant folder to fold values during cloning. This can
simplify a large number of things that constant folding alone cannot
begin to touch. For example, it will realize that 'or' and 'and'
instructions with certain constant operands actually become constants
regardless of what their other operand is. It also can thread back
through the caller to perform simplifications that are only possible by
looking up a few levels. In particular, GEPs and pointer testing tend to
fold much more heavily with this change.

This should (in some cases) have a positive impact on compile times with
optimizations on because the inliner itself will simply avoid cloning
a great deal of code. It already attempted to prune proven-dead code,
but now it will be use the stronger simplifications to prove more code
dead.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153403 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2012-03-25 04:03:40 +00:00
parent acdae3e25a
commit d54f9a4c3b
3 changed files with 79 additions and 89 deletions

View File

@ -19,7 +19,6 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
@ -335,38 +334,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
/// \brief Simplify arguments going into a particular callsite.
///
/// This is important to do each time we add a callsite due to inlining so that
/// constants and other entities which feed into inline cost estimation are
/// properly recognized when analyzing the new callsite. Consider:
/// void outer(int x) {
/// if (x < 42)
/// return inner(42 - x);
/// ...
/// }
/// void inner(int x) {
/// ...
/// }
///
/// The inliner gives calls to 'outer' with a constant argument a bonus because
/// it will delete one side of a branch. But the resulting call to 'inner'
/// will, after inlining, also have a constant operand. We need to do just
/// enough constant folding to expose this for callsite arguments. The rest
/// will be taken care of after the inliner finishes running.
static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
// FIXME: It would be nice to avoid this smallvector if RAUW doesn't
// invalidate operand iterators in any cases.
SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
if (Instruction *Inst = dyn_cast<Instruction>(*I))
if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
@ -494,9 +461,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
i != e; ++i) {
Value *Ptr = InlineInfo.InlinedCalls[i];
CallSite NewCS = Ptr;
simplifyCallSiteArguments(TD, NewCS);
CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
}
}

View File

@ -25,6 +25,7 @@
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
#include <map>
@ -218,11 +219,6 @@ namespace {
/// anything that it can reach.
void CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone);
public:
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
/// mapping its operands through VMap if they are available.
Constant *ConstantFoldMappedInstruction(const Instruction *I);
};
}
@ -262,19 +258,33 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
II != IE; ++II) {
// If this instruction constant folds, don't bother cloning the instruction,
// instead, just add the constant to the value map.
if (Constant *C = ConstantFoldMappedInstruction(II)) {
VMap[II] = C;
continue;
Instruction *NewInst = II->clone();
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
if (Value *V = SimplifyInstruction(NewInst, TD)) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
V = MappedV;
VMap[II] = V;
delete NewInst;
continue;
}
}
Instruction *NewInst = II->clone();
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
VMap[II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@ -345,30 +355,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Returns.push_back(RI);
}
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
/// mapping its operands through VMap if they are available.
Constant *PruningFunctionCloner::
ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(Ops[0], TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
@ -418,25 +404,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
// Loop over all of the instructions in the block, fixing up operand
// references as we go. This uses VMap to do all the hard work.
//
BasicBlock::iterator I = NewBB->begin();
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
// Skip over all PHI nodes, remembering them for later.
BasicBlock::const_iterator OldI = BI->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
PHIToResolve.push_back(cast<PHINode>(OldI));
}
// Otherwise, remap the rest of the instructions normally.
for (; I != NewBB->end(); ++I)
RemapInstruction(I, VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
if (const PHINode *PN = dyn_cast<PHINode>(I))
PHIToResolve.push_back(PN);
else
break;
// Finally, remap the terminator instructions, as those can't be remapped
// until all BBs are mapped.
RemapInstruction(NewBB->getTerminator(), VMap,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution

View File

@ -71,3 +71,48 @@ entry:
tail call fastcc void @foo( i32 8 )
ret void
}
declare void @f(i32 %x)
define void @inner2(i32 %x, i32 %y, i32 %z) {
entry:
%cmp1 = icmp ne i32 %x, 0
br i1 %cmp1, label %then1, label %end1
then1:
call void @f(i32 %x)
br label %end1
end1:
%x2 = and i32 %x, %z
%cmp2 = icmp sgt i32 %x2, 1
br i1 %cmp2, label %then2, label %end2
then2:
call void @f(i32 %x2)
br label %end2
end2:
%y2 = or i32 %y, %z
%cmp3 = icmp sgt i32 %y2, 0
br i1 %cmp3, label %then3, label %end3
then3:
call void @f(i32 %y2)
br label %end3
end3:
ret void
}
define void @outer2(i32 %z) {
; Ensure that after inlining, none of the blocks with a call to @f actually
; make it through inlining.
; CHECK: define void @outer2
; CHECK-NOT: call
; CHECK: ret void
entry:
call void @inner2(i32 0, i32 -1, i32 %z)
ret void
}