diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index fb5861c7a19..94b14be5e2d 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -222,6 +222,11 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; + // Looking at ICmpInsts will never abort the analysis and return zero, and + // analyzing them is expensive, so save them for last so that we don't do + // extra work that we end up throwing out. + SmallVector ICmpInsts; + SmallVector Worklist; Worklist.push_back(V); do { @@ -271,10 +276,14 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { case Intrinsic::memmove: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - // SROA can usually chew through these intrinsics. + // SROA can usually chew through these intrinsics. Reduction += InlineConstants::InstrCost; break; } + } else if (ICmpInst *ICI = dyn_cast(I)) { + if (!isa(ICI->getOperand(1))) + return 0; + ICmpInsts.push_back(ICI); } else { // If there is some other strange instruction, we're not going to be // able to do much if we inline this. @@ -283,6 +292,51 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { } } while (!Worklist.empty()); + while (!ICmpInsts.empty()) { + ICmpInst *ICI = ICmpInsts.pop_back_val(); + + // An icmp pred (alloca, C) becomes true if the predicate is true when + // equal and false otherwise. + bool Result = ICI->isTrueWhenEqual(); + + SmallVector Worklist; + Worklist.push_back(ICI); + do { + Instruction *U = Worklist.pop_back_val(); + Reduction += InlineConstants::InstrCost; + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) { + Instruction *I = dyn_cast(*UI); + if (!I || I->mayHaveSideEffects()) continue; + if (I->getNumOperands() == 1) + Worklist.push_back(I); + if (BinaryOperator *BO = dyn_cast(I)) { + // If BO produces the same value as U, then the other operand is + // irrelevant and we can put it into the Worklist to continue + // deleting dead instructions. If BO produces the same value as the + // other operand, we can delete BO but that's it. + if (Result == true) { + if (BO->getOpcode() == Instruction::Or) + Worklist.push_back(I); + if (BO->getOpcode() == Instruction::And) + Reduction += InlineConstants::InstrCost; + } else { + if (BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor) + Reduction += InlineConstants::InstrCost; + if (BO->getOpcode() == Instruction::And) + Worklist.push_back(I); + } + } + if (BranchInst *BI = dyn_cast(I)) { + BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); + if (BB->getSinglePredecessor()) + Reduction += InlineConstants::InstrCost * BB->size(); + } + } + } while (!Worklist.empty()); + } + return Reduction; } diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll index 2587ae13634..91ab40ae163 100644 --- a/test/Transforms/Inline/alloca-bonus.ll +++ b/test/Transforms/Inline/alloca-bonus.ll @@ -42,3 +42,42 @@ define void @inner2(i32 *%ptr) { call void @llvm.lifetime.start(i64 0, i8* %E) ret void } + +define void @outer3() { +; CHECK: @outer3 +; CHECK-NOT: call void @inner3 + %ptr = alloca i32 + call void @inner3(i32* %ptr, i1 undef) + ret void +} + +define void @inner3(i32 *%ptr, i1 %x) { + %A = icmp eq i32* %ptr, null + %B = and i1 %x, %A + br i1 %A, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %t1 = load i32* %ptr + %t2 = add i32 %t1, 1 + %t3 = add i32 %t2, 1 + %t4 = add i32 %t3, 1 + %t5 = add i32 %t4, 1 + %t6 = add i32 %t5, 1 + %t7 = add i32 %t6, 1 + %t8 = add i32 %t7, 1 + %t9 = add i32 %t8, 1 + %t10 = add i32 %t9, 1 + %t11 = add i32 %t10, 1 + %t12 = add i32 %t11, 1 + %t13 = add i32 %t12, 1 + %t14 = add i32 %t13, 1 + %t15 = add i32 %t14, 1 + %t16 = add i32 %t15, 1 + %t17 = add i32 %t16, 1 + %t18 = add i32 %t17, 1 + %t19 = add i32 %t18, 1 + %t20 = add i32 %t19, 1 + ret void +bb.false: + ret void +}