diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9975333976d..048a1a05606 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,7 +19,6 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -335,38 +334,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -/// \brief Simplify arguments going into a particular callsite. -/// -/// This is important to do each time we add a callsite due to inlining so that -/// constants and other entities which feed into inline cost estimation are -/// properly recognized when analyzing the new callsite. Consider: -/// void outer(int x) { -/// if (x < 42) -/// return inner(42 - x); -/// ... -/// } -/// void inner(int x) { -/// ... -/// } -/// -/// The inliner gives calls to 'outer' with a constant argument a bonus because -/// it will delete one side of a branch. But the resulting call to 'inner' -/// will, after inlining, also have a constant operand. We need to do just -/// enough constant folding to expose this for callsite arguments. The rest -/// will be taken care of after the inliner finishes running. -static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { - // FIXME: It would be nice to avoid this smallvector if RAUW doesn't - // invalidate operand iterators in any cases. - SmallVector, 4> SimplifiedArgs; - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (Instruction *Inst = dyn_cast(*I)) - if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) - SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); - for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) - SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); -} - bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis(); const TargetData *TD = getAnalysisIfAvailable(); @@ -494,9 +461,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSite NewCS = Ptr; - simplifyCallSiteArguments(TD, NewCS); - CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1b28c35238e..a83c4e66ca4 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" #include @@ -218,11 +219,6 @@ namespace { /// anything that it can reach. void CloneBlock(const BasicBlock *BB, std::vector &ToClone); - - public: - /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through VMap if they are available. - Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -262,19 +258,33 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // loop doesn't include the terminator. for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II) { - // If this instruction constant folds, don't bother cloning the instruction, - // instead, just add the constant to the value map. - if (Constant *C = ConstantFoldMappedInstruction(II)) { - VMap[II] = C; - continue; + Instruction *NewInst = II->clone(); + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. + if (!isa(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = SimplifyInstruction(NewInst, TD)) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + VMap[II] = V; + delete NewInst; + continue; + } } - Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); - NewBB->getInstList().push_back(NewInst); VMap[II] = NewInst; // Add instruction map to value. - + NewBB->getInstList().push_back(NewInst); hasCalls |= (isa(II) && !isa(II)); if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) @@ -345,30 +355,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, Returns.push_back(RI); } -/// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through VMap if they are available. -Constant *PruningFunctionCloner:: -ConstantFoldMappedInstruction(const Instruction *I) { - SmallVector Ops; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Constant *Op = dyn_cast_or_null(MapValue(I->getOperand(i), - VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges))) - Ops.push_back(Op); - else - return 0; // All operands not constant! - - if (const CmpInst *CI = dyn_cast(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); - - if (const LoadInst *LI = dyn_cast(I)) - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Ops[0], TD); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); -} - /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for @@ -418,25 +404,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); - - // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses VMap to do all the hard work. - // - BasicBlock::iterator I = NewBB->begin(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. - if (PHINode *PN = dyn_cast(I)) { - // Skip over all PHI nodes, remembering them for later. - BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast(I)); ++I, ++OldI) - PHIToResolve.push_back(cast(OldI)); - } - - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) - RemapInstruction(I, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) + if (const PHINode *PN = dyn_cast(I)) + PHIToResolve.push_back(PN); + else + break; + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll index b744ae1ef39..7583dddea9d 100644 --- a/test/Transforms/Inline/inline_cleanup.ll +++ b/test/Transforms/Inline/inline_cleanup.ll @@ -71,3 +71,48 @@ entry: tail call fastcc void @foo( i32 8 ) ret void } + +declare void @f(i32 %x) + +define void @inner2(i32 %x, i32 %y, i32 %z) { +entry: + %cmp1 = icmp ne i32 %x, 0 + br i1 %cmp1, label %then1, label %end1 + +then1: + call void @f(i32 %x) + br label %end1 + +end1: + %x2 = and i32 %x, %z + %cmp2 = icmp sgt i32 %x2, 1 + br i1 %cmp2, label %then2, label %end2 + +then2: + call void @f(i32 %x2) + br label %end2 + +end2: + %y2 = or i32 %y, %z + %cmp3 = icmp sgt i32 %y2, 0 + br i1 %cmp3, label %then3, label %end3 + +then3: + call void @f(i32 %y2) + br label %end3 + +end3: + ret void +} + +define void @outer2(i32 %z) { +; Ensure that after inlining, none of the blocks with a call to @f actually +; make it through inlining. +; CHECK: define void @outer2 +; CHECK-NOT: call +; CHECK: ret void + +entry: + call void @inner2(i32 0, i32 -1, i32 %z) + ret void +}