diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index c43b5ca2506..358f61fd523 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -808,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. - return isa(SI.getCondition()) || - dyn_cast_or_null( - SimplifiedValues.lookup(SI.getCondition())); + if (isa(SI.getCondition())) + return true; + if (Value *V = SimplifiedValues.lookup(SI.getCondition())) + if (isa(V)) + return true; + + // Otherwise, we need to accumulate a cost proportional to the number of + // distinct successor blocks. This fan-out in the CFG cannot be represented + // for free even if we can represent the core switch as a jumptable that + // takes a single instruction. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. + SmallPtrSet SuccessorBlocks; + SuccessorBlocks.insert(SI.getDefaultDest()); + for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) + SuccessorBlocks.insert(I.getCaseSuccessor()); + // Add cost corresponding to the number of distinct destinations. The first + // we model as free because of fallthrough. + Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { diff --git a/test/Transforms/Inline/switch.ll b/test/Transforms/Inline/switch.ll new file mode 100644 index 00000000000..c5dab53e8b6 --- /dev/null +++ b/test/Transforms/Inline/switch.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s + +define i32 @callee(i32 %a) { + switch i32 %a, label %sw.default [ + i32 0, label %sw.bb0 + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + i32 5, label %sw.bb5 + i32 6, label %sw.bb6 + i32 7, label %sw.bb7 + i32 8, label %sw.bb8 + i32 9, label %sw.bb9 + ] + +sw.default: + br label %return + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.bb3: + br label %return + +sw.bb4: + br label %return + +sw.bb5: + br label %return + +sw.bb6: + br label %return + +sw.bb7: + br label %return + +sw.bb8: + br label %return + +sw.bb9: + br label %return + +return: + ret i32 42 +} + +define i32 @caller(i32 %a) { +; CHECK-LABEL: @caller( +; CHECK: call i32 @callee( + + %result = call i32 @callee(i32 %a) + ret i32 %result +}