Speculatively revert r146578 to determine if it is the cause of a number of

performance regressions (both execution-time and compile-time) on our
nightly testers.

Original commit message:
Fix for bug #11429: Wrong behaviour for switches. Small improvement for code
size heuristics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147131 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2011-12-22 02:40:57 +00:00
parent 4982159b88
commit 5ddb7a0105
4 changed files with 11 additions and 395 deletions

View File

@ -71,9 +71,7 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
// FIXME: Consider custom class for this.
std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
SmallPtrSet<Value *,8> UnswitchedVals;
bool OptimizeForSize;
bool redoLoop;
@ -119,15 +117,7 @@ namespace {
private:
virtual void releaseMemory() {
// We need to forget about all switches in the current loop.
// FIXME: Do it better than enumerating all blocks of code
// and see if it is a switch instruction.
for (Loop::block_iterator I = currentLoop->block_begin(),
E = currentLoop->block_end(); I != E; ++I) {
SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
if (SI)
UnswitchedVals.erase(SI);
}
UnswitchedVals.clear();
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@ -138,12 +128,6 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
/// For new loop switches we clone info about values that was
/// already unswitched and has redundant successors.
/// Note, that new loop data is stored inside the VMap.
void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
const BasicBlock* SrcBB);
void initLoopData() {
loopHeader = currentLoop->getHeader();
@ -271,25 +255,13 @@ bool LoopUnswitch::processCurrentLoop() {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
unsigned NumCases = SI->getNumCases();
if (LoopCond && NumCases > 1) {
if (LoopCond && SI->getNumCases() > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
Constant *UnswitchVal = NULL;
Constant *UnswitchVal = SI->getCaseValue(1);
// Do not process same value again and again.
// At this point we have some cases already unswitched and
// some not yet unswitched. Let's find the first not yet unswitched one.
for (unsigned i = 1; i < NumCases; ++i) {
Constant* UnswitchValCandidate = SI->getCaseValue(i);
if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
UnswitchVal = UnswitchValCandidate;
break;
}
}
if (!UnswitchVal)
if (!UnswitchedVals.insert(UnswitchVal))
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@ -315,23 +287,6 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
/// For new loop switches we clone info about values that was
/// already unswitched and has redundant successors.
/// Not that new loop data is stored inside the VMap.
void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
const BasicBlock* SrcBB) {
const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
if (SI && UnswitchedVals.count(SI)) {
// Don't clone a totally simplified switch.
if (isa<Constant>(SI->getCondition()))
return;
Value* I = VMap.lookup(SI);
assert(I && "All instructions that are in SrcBB must be in VMap.");
UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
}
}
/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
/// loop with no side effects (including infinite loops).
///
@ -423,25 +378,14 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// Check to see if a successor of the switch is guaranteed to go to the
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
// Note that we can't trivially unswitch on the default case or
// on already unswitched cases.
for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
BasicBlock* LoopExitCandidate;
if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
// this. Note that we can't trivially unswitch on the default case.
for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
ConstantInt* CaseVal = SI->getCaseValue(i);
// Check that it was not unswitched before, since already unswitched
// trivial vals are looks trivial too.
if (UnswitchedVals[SI].count(CaseVal))
continue;
LoopExitBB = LoopExitCandidate;
if (Val) *Val = CaseVal;
if (Val) *Val = SI->getCaseValue(i);
break;
}
}
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
@ -503,14 +447,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
unsigned NumUnswitched =
(NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
if (Metrics.NumInsts > Threshold ||
Metrics.NumBlocks * 5 > Threshold ||
Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
@ -682,12 +620,6 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
// Inherit simplified switches info for NewBB
// We needn't pass NewBB since its instructions are already contained
// inside the VMap.
CloneUnswitchedVals(VMap, LoopBlocks[i]);
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@ -1013,9 +945,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = SI->getSuccessor(DeadCase);
BasicBlock *Latch = L->getLoopLatch();
UnswitchedVals[SI].insert(Val);
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge

View File

@ -1,91 +0,0 @@
; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 2 loop-unswitch - Number of switches unswitched
; CHECK: %1 = icmp eq i32 %c, 1
; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
; CHECK: ..split_crit_edge: ; preds = %0
; CHECK-NEXT: br label %.split
; CHECK: .split.us: ; preds = %0
; CHECK-NEXT: br label %loop_begin.us
; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us
; CHECK-NEXT: %var_val.us = load i32* %var
; CHECK-NEXT: switch i32 1, label %default.us-lcssa.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK: inc.us: ; preds = %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: .split: ; preds = %..split_crit_edge
; CHECK-NEXT: %2 = icmp eq i32 %c, 2
; CHECK-NEXT: br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
; CHECK: .split..split.split_crit_edge: ; preds = %.split
; CHECK-NEXT: br label %.split.split
; CHECK: .split.split.us: ; preds = %.split
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
; CHECK-NEXT: %var_val.us2 = load i32* %var
; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
; CHECK-NEXT: i32 1, label %inc.us3
; CHECK-NEXT: i32 2, label %dec.us4
; CHECK-NEXT: ]
; CHECK: dec.us4: ; preds = %loop_begin.us1
; CHECK-NEXT: call void @decf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us5
; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
; CHECK-NEXT: br label %loop_begin
; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split
; CHECK-NEXT: %var_val = load i32* %var
; CHECK-NEXT: switch i32 %c, label %default.us-lcssa.us-lcssa [
; CHECK-NEXT: i32 1, label %inc
; CHECK-NEXT: i32 2, label %dec
; CHECK-NEXT: ]
; CHECK: inc: ; preds = %loop_begin
; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc.split
; CHECK: dec: ; preds = %loop_begin
; CHECK-NEXT: br i1 true, label %us-unreachable6, label %dec.split
define i32 @test(i32* %var) {
%mem = alloca i32
store i32 2, i32* %mem
%c = load i32* %mem
br label %loop_begin
loop_begin:
%var_val = load i32* %var
switch i32 %c, label %default [
i32 1, label %inc
i32 2, label %dec
]
inc:
call void @incf() noreturn nounwind
br label %loop_begin
dec:
call void @decf() noreturn nounwind
br label %loop_begin
default:
br label %loop_exit
loop_exit:
ret i32 0
}
declare void @incf() noreturn
declare void @decf() noreturn

View File

@ -1,84 +0,0 @@
; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s
; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 1 loop-unswitch - Number of switches unswitched
; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
; CHECK: %1 = icmp eq i32 %c, 1
; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
; CHECK: ..split_crit_edge: ; preds = %0
; CHECK-NEXT: br label %.split
; CHECK: .split.us: ; preds = %0
; CHECK-NEXT: br label %loop_begin.us
; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us
; CHECK: switch i32 1, label %second_switch.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: second_switch.us: ; preds = %loop_begin.us
; CHECK-NEXT: switch i32 %d, label %default.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK-NEXT: ]
; CHECK: .split: ; preds = %..split_crit_edge
; CHECK-NEXT: br label %loop_begin
; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split
; CHECK: switch i32 %c, label %second_switch [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge
; CHECK-NEXT: ]
; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin
; CHECK-NEXT: br i1 true, label %us-unreachable, label %inc
; CHECK: second_switch: ; preds = %loop_begin
; CHECK-NEXT: switch i32 %d, label %default [
; CHECK-NEXT: i32 1, label %inc
; CHECK-NEXT: ]
; CHECK: inc: ; preds = %loop_begin.inc_crit_edge, %second_switch
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge
define i32 @test(i32* %var) {
%mem = alloca i32
store i32 2, i32* %mem
%c = load i32* %mem
%d = load i32* %mem
br label %loop_begin
loop_begin:
%var_val = load i32* %var
switch i32 %c, label %second_switch [
i32 1, label %inc
]
second_switch:
switch i32 %d, label %default [
i32 1, label %inc
]
inc:
call void @incf() noreturn nounwind
br label %loop_begin
default:
br label %loop_begin
loop_exit:
ret i32 0
}
declare void @incf() noreturn
declare void @decf() noreturn

View File

@ -1,138 +0,0 @@
; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 3 loop-unswitch - Number of switches unswitched
; CHECK: %1 = icmp eq i32 %c, 1
; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
; CHECK: ..split_crit_edge: ; preds = %0
; CHECK-NEXT: br label %.split
; CHECK: .split.us: ; preds = %0
; CHECK-NEXT: %2 = icmp eq i32 %d, 1
; CHECK-NEXT: br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
; CHECK: .split.us..split.us.split_crit_edge: ; preds = %.split.us
; CHECK-NEXT: br label %.split.us.split
; CHECK: .split.us.split.us: ; preds = %.split.us
; CHECK-NEXT: br label %loop_begin.us.us
; CHECK: loop_begin.us.us: ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
; CHECK-NEXT: %var_val.us.us = load i32* %var
; CHECK-NEXT: switch i32 1, label %second_switch.us.us [
; CHECK-NEXT: i32 1, label %inc.us.us
; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us.us
; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us
; CHECK-NEXT: switch i32 1, label %default.us.us [
; CHECK-NEXT: i32 1, label %inc.us.us
; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge
; CHECK-NEXT: br label %loop_begin.us
; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us.split
; CHECK-NEXT: %var_val.us = load i32* %var
; CHECK-NEXT: switch i32 1, label %second_switch.us [
; CHECK-NEXT: i32 1, label %inc.us
; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us
; CHECK: second_switch.us: ; preds = %loop_begin.us
; CHECK-NEXT: switch i32 %d, label %default.us [
; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge
; CHECK-NEXT: ]
; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us
; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us
; CHECK: .split: ; preds = %..split_crit_edge
; CHECK-NEXT: %3 = icmp eq i32 %d, 1
; CHECK-NEXT: br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
; CHECK: .split..split.split_crit_edge: ; preds = %.split
; CHECK-NEXT: br label %.split.split
; CHECK: .split.split.us: ; preds = %.split
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
; CHECK-NEXT: %var_val.us2 = load i32* %var
; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
; CHECK-NEXT: ]
; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
; CHECK-NEXT: call void @incf() noreturn nounwind
; CHECK-NEXT: br label %loop_begin.backedge.us6
; CHECK: second_switch.us4: ; preds = %loop_begin.us1
; CHECK-NEXT: switch i32 1, label %default.us5 [
; CHECK-NEXT: i32 1, label %inc.us3
; CHECK-NEXT: ]
; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1
; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
; CHECK-NEXT: br label %loop_begin
; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split
; CHECK-NEXT: %var_val = load i32* %var
; CHECK-NEXT: switch i32 %c, label %second_switch [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge
; CHECK-NEXT: ]
; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin
; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc
; CHECK: second_switch: ; preds = %loop_begin
; CHECK-NEXT: switch i32 %d, label %default [
; CHECK-NEXT: i32 1, label %second_switch.inc_crit_edge
; CHECK-NEXT: ]
; CHECK: second_switch.inc_crit_edge: ; preds = %second_switch
; CHECK-NEXT: br i1 true, label %us-unreachable7, label %inc
define i32 @test(i32* %var) {
%mem = alloca i32
store i32 2, i32* %mem
%c = load i32* %mem
%d = load i32* %mem
br label %loop_begin
loop_begin:
%var_val = load i32* %var
switch i32 %c, label %second_switch [
i32 1, label %inc
]
second_switch:
switch i32 %d, label %default [
i32 1, label %inc
]
inc:
call void @incf() noreturn nounwind
br label %loop_begin
default:
br label %loop_begin
loop_exit:
ret i32 0
}
declare void @incf() noreturn
declare void @decf() noreturn