LowerSwitch: track bounding range for the condition tree.

When LowerSwitch transforms a switch instruction into a tree of ifs it
is actually performing a binary search into the various case ranges, to
see if the current value falls into one cases range of values.

So, if we have a program with something like this:

switch (a) {
case 0:
  do0();
  break;
case 1:
  do1();
  break;
case 2:
  do2();
  break;
default:
  break;
}

the code produced is something like this:

  if (a < 1) {
    if (a == 0) {
      do0();
    }
  } else {
    if (a < 2) {
      if (a == 1) {
        do1();
      }
    } else {
      if (a == 2) {
        do2();
      }
    }
  }

This code is inefficient because the check (a == 1) to execute do1() is
not needed.

The reason is that because we already checked that (a >= 1) initially by
checking that also  (a < 2) we basically already inferred that (a == 1)
without the need of an extra basic block spawned to check if actually (a
== 1).

The patch addresses this problem by keeping track of already
checked bounds in the LowerSwitch algorithm, so that when the time
arrives to produce a Leaf Block that checks the equality with the case
value / range the algorithm can decide if that block is really needed
depending on the already checked bounds .

For example, the above with "a = 1" would work like this:

the bounds start as LB: NONE , UB: NONE
as (a < 1) is emitted the bounds for the else path become LB: 1 UB:
NONE. This happens because by failing the test (a < 1) we know that the
value "a" cannot be smaller than 1 if we enter the else branch.
After the emitting the check (a < 2) the bounds in the if branch become
LB: 1 UB: 1. This is because by checking that "a" is smaller than 2 then
the upper bound becomes 2 - 1 = 1.

When it is time to emit the leaf block for "case 1:" we notice that 1
can be squeezed exactly in between the LB and UB, which means that if we
arrived to that block there is no need to emit a block that checks if (a
== 1).

Patch by: Marcello Maggioni <hayarms@gmail.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211038 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach 2014-06-16 16:55:20 +00:00
parent 408519765b
commit dc2dc390f6
4 changed files with 208 additions and 101 deletions

View File

@ -14,11 +14,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@ -58,16 +60,18 @@ namespace {
Low(low), High(high), BB(bb) { }
};
typedef std::vector<CaseRange> CaseVector;
typedef std::vector<CaseRange> CaseVector;
typedef std::vector<CaseRange>::iterator CaseItr;
private:
void processSwitchInst(SwitchInst *SI);
BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
BasicBlock* OrigBlock, BasicBlock* Default);
BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
BasicBlock* OrigBlock, BasicBlock* Default);
unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
Value *Val, BasicBlock *OrigBlock,
BasicBlock *Default);
BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
};
/// The comparison function for sorting the switch case values in the vector.
@ -129,15 +133,26 @@ static raw_ostream& operator<<(raw_ostream &O,
// switchConvert - Convert the switch statement into a binary lookup of
// the case values. The function recursively builds this tree.
//
BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
Value* Val, BasicBlock* OrigBlock,
BasicBlock* Default)
{
// LowerBound and UpperBound are used to keep track of the bounds for Val
// that have already been checked by a block emitted by one of the previous
// calls to switchConvert in the call stack.
BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound,
ConstantInt *UpperBound, Value *Val,
BasicBlock *OrigBlock,
BasicBlock *Default) {
unsigned Size = End - Begin;
if (Size == 1)
if (Size == 1) {
// Check if the Case Range is perfectly squeezed in between
// already checked Upper and Lower bounds. If it is then we can avoid
// emitting the code that checks if the value actually falls in the range
// because the bounds already tell us so.
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
return Begin->BB;
}
return newLeafBlock(*Begin, Val, OrigBlock, Default);
}
unsigned Mid = Size / 2;
std::vector<CaseRange> LHS(Begin, Begin + Mid);
@ -145,15 +160,50 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
std::vector<CaseRange> RHS(Begin + Mid, End);
DEBUG(dbgs() << "RHS: " << RHS << "\n");
CaseRange& Pivot = *(Begin + Mid);
DEBUG(dbgs() << "Pivot ==> "
<< cast<ConstantInt>(Pivot.Low)->getValue() << " -"
<< cast<ConstantInt>(Pivot.High)->getValue() << "\n");
CaseRange &Pivot = *(Begin + Mid);
DEBUG(dbgs() << "Pivot ==> "
<< cast<ConstantInt>(Pivot.Low)->getValue()
<< " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
OrigBlock, Default);
BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
OrigBlock, Default);
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
// the smallest, so there is always a case range that has at least
// a smaller value.
ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
ConstantInt *NewUpperBound;
// If we don't have a Default block then it means that we can never
// have a value outside of a case range, so set the UpperBound to the highest
// value in the LHS part of the case ranges.
if (Default != nullptr) {
// Because NewLowerBound is never the smallest representable integer
// it is safe here to subtract one.
NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
NewLowerBound->getValue() - 1);
} else {
CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
NewUpperBound = cast<ConstantInt>(LastLHS->High);
}
DEBUG(dbgs() << "LHS Bounds ==> ";
if (LowerBound) {
dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
} else {
dbgs() << "NONE";
}
dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
dbgs() << "RHS Bounds ==> ";
dbgs() << NewLowerBound->getSExtValue() << " - ";
if (UpperBound) {
dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
} else {
dbgs() << "NONE\n";
});
BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
NewUpperBound, Val, OrigBlock, Default);
BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
UpperBound, Val, OrigBlock, Default);
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
@ -291,13 +341,19 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
return;
}
const bool DefaultIsUnreachable =
Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
F->getBasicBlockList().insert(Default, NewDefault);
BranchInst::Create(Default, NewDefault);
// if the default block is set as an unreachable we avoid creating one
// because will never be a valid target.
BasicBlock *NewDefault = nullptr;
if (!DefaultIsUnreachable) {
NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
F->getBasicBlockList().insert(Default, NewDefault);
BranchInst::Create(Default, NewDefault);
}
// If there is an entry in any PHI nodes for the default edge, make sure
// to update them as well.
for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@ -316,12 +372,31 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
DEBUG(dbgs() << "Cases: " << Cases << "\n");
(void)numCmps;
BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
OrigBlock, NewDefault);
ConstantInt *UpperBound = nullptr;
ConstantInt *LowerBound = nullptr;
// Optimize the condition where Default is an unreachable block. In this case
// we can make the bounds tightly fitted around the case value ranges,
// because we know that the value passed to the switch should always be
// exactly one of the case values.
if (DefaultIsUnreachable) {
CaseItr LastCase = Cases.begin() + Cases.size() - 1;
UpperBound = cast<ConstantInt>(LastCase->High);
LowerBound = cast<ConstantInt>(Cases.begin()->Low);
}
BasicBlock *SwitchBlock =
switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
CurBlock->getInstList().erase(SI);
pred_iterator PI = pred_begin(Default), E = pred_end(Default);
// If the Default block has no more predecessors just remove it
if (PI == E) {
DeleteDeadBlock(Default);
}
}

View File

@ -0,0 +1,27 @@
; RUN: opt < %s -lowerswitch -S | FileCheck %s
; CHECK-NOT: icmp eq i32 %0, 1
define i32 @foo(i32 %a) #0 {
entry:
%retval = alloca i32, align 4
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
%0 = load i32* %a.addr, align 4
switch i32 %0, label %sw.default [
i32 0, label %sw.bb
i32 1, label %sw.bb1
i32 2, label %sw.bb2
]
sw.bb:
ret i32 12
sw.bb1:
ret i32 4
sw.bb2:
ret i32 2
sw.default:
ret i32 9
}

View File

@ -0,0 +1,41 @@
; RUN: opt < %s -lowerswitch -S | FileCheck %s
; CHECK-NOT: {{.*}}icmp eq{{.*}}
;
;int foo(int a) {
;
; switch (a) {
; case 0:
; return 10;
; case 1:
; return 3;
; default:
; __builtin_unreachable();
; }
;
;}
define i32 @foo(i32 %a) nounwind ssp uwtable {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 %a, i32* %2, align 4
%3 = load i32* %2, align 4
switch i32 %3, label %6 [
i32 0, label %4
i32 1, label %5
]
; <label>:4
store i32 10, i32* %1
br label %7
; <label>:5
store i32 3, i32* %1
br label %7
; <label>:6
unreachable
; <label>:7
%8 = load i32* %1
ret i32 %8
}

View File

@ -3,93 +3,57 @@
; We have switch on input.
; On output we should got binary comparison tree. Check that all is fine.
;CHECK: entry:
;CHECK-NEXT: br label %NodeBlock37
;CHECK: entry:
;CHECK-NEXT: br label %NodeBlock19
;CHECK: NodeBlock37: ; preds = %entry
;CHECK-NEXT: %Pivot38 = icmp slt i32 %tmp158, 10
;CHECK-NEXT: br i1 %Pivot38, label %NodeBlock13, label %NodeBlock35
;CHECK: NodeBlock19: ; preds = %entry
;CHECK-NEXT: %Pivot20 = icmp slt i32 %tmp158, 10
;CHECK-NEXT: br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
;CHECK: NodeBlock35: ; preds = %NodeBlock37
;CHECK-NEXT: %Pivot36 = icmp slt i32 %tmp158, 13
;CHECK-NEXT: br i1 %Pivot36, label %NodeBlock23, label %NodeBlock33
;CHECK: NodeBlock17: ; preds = %NodeBlock19
;CHECK-NEXT: %Pivot18 = icmp slt i32 %tmp158, 13
;CHECK-NEXT: br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
;CHECK: NodeBlock33: ; preds = %NodeBlock35
;CHECK-NEXT: %Pivot34 = icmp slt i32 %tmp158, 14
;CHECK-NEXT: br i1 %Pivot34, label %LeafBlock25, label %NodeBlock31
;CHECK: NodeBlock15: ; preds = %NodeBlock17
;CHECK-NEXT: %Pivot16 = icmp slt i32 %tmp158, 14
;CHECK-NEXT: br i1 %Pivot16, label %bb330, label %NodeBlock13
;CHECK: NodeBlock31: ; preds = %NodeBlock33
;CHECK-NEXT: %Pivot32 = icmp slt i32 %tmp158, 15
;CHECK-NEXT: br i1 %Pivot32, label %LeafBlock27, label %LeafBlock29
;CHECK: NodeBlock13: ; preds = %NodeBlock15
;CHECK-NEXT: %Pivot14 = icmp slt i32 %tmp158, 15
;CHECK-NEXT: br i1 %Pivot14, label %bb332, label %LeafBlock11
;CHECK: LeafBlock29: ; preds = %NodeBlock31
;CHECK-NEXT: %SwitchLeaf30 = icmp eq i32 %tmp158, 15
;CHECK-NEXT: br i1 %SwitchLeaf30, label %bb334, label %NewDefault
;CHECK: LeafBlock11: ; preds = %NodeBlock13
;CHECK-NEXT: %SwitchLeaf12 = icmp eq i32 %tmp158, 15
;CHECK-NEXT: br i1 %SwitchLeaf12, label %bb334, label %NewDefault
;CHECK: LeafBlock27: ; preds = %NodeBlock31
;CHECK-NEXT: %SwitchLeaf28 = icmp eq i32 %tmp158, 14
;CHECK-NEXT: br i1 %SwitchLeaf28, label %bb332, label %NewDefault
;CHECK: NodeBlock9: ; preds = %NodeBlock17
;CHECK-NEXT: %Pivot10 = icmp slt i32 %tmp158, 11
;CHECK-NEXT: br i1 %Pivot10, label %bb324, label %NodeBlock7
;CHECK: LeafBlock25: ; preds = %NodeBlock33
;CHECK-NEXT: %SwitchLeaf26 = icmp eq i32 %tmp158, 13
;CHECK-NEXT: br i1 %SwitchLeaf26, label %bb330, label %NewDefault
;CHECK: NodeBlock7: ; preds = %NodeBlock9
;CHECK-NEXT: %Pivot8 = icmp slt i32 %tmp158, 12
;CHECK-NEXT: br i1 %Pivot8, label %bb326, label %bb328
;CHECK: NodeBlock23: ; preds = %NodeBlock35
;CHECK-NEXT: %Pivot24 = icmp slt i32 %tmp158, 11
;CHECK-NEXT: br i1 %Pivot24, label %LeafBlock15, label %NodeBlock21
;CHECK: NodeBlock5: ; preds = %NodeBlock19
;CHECK-NEXT: %Pivot6 = icmp slt i32 %tmp158, 7
;CHECK-NEXT: br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
;CHECK: NodeBlock21: ; preds = %NodeBlock23
;CHECK-NEXT: %Pivot22 = icmp slt i32 %tmp158, 12
;CHECK-NEXT: br i1 %Pivot22, label %LeafBlock17, label %LeafBlock19
;CHECK: NodeBlock3: ; preds = %NodeBlock5
;CHECK-NEXT: %Pivot4 = icmp slt i32 %tmp158, 8
;CHECK-NEXT: br i1 %Pivot4, label %bb, label %NodeBlock1
;CHECK: LeafBlock19: ; preds = %NodeBlock21
;CHECK-NEXT: %SwitchLeaf20 = icmp eq i32 %tmp158, 12
;CHECK-NEXT: br i1 %SwitchLeaf20, label %bb328, label %NewDefault
;CHECK: NodeBlock1: ; preds = %NodeBlock3
;CHECK-NEXT: %Pivot2 = icmp slt i32 %tmp158, 9
;CHECK-NEXT: br i1 %Pivot2, label %bb338, label %bb322
;CHECK: LeafBlock17: ; preds = %NodeBlock21
;CHECK-NEXT: %SwitchLeaf18 = icmp eq i32 %tmp158, 11
;CHECK-NEXT: br i1 %SwitchLeaf18, label %bb326, label %NewDefault
;CHECK: NodeBlock: ; preds = %NodeBlock5
;CHECK-NEXT: %Pivot = icmp slt i32 %tmp158, 0
;CHECK-NEXT: br i1 %Pivot, label %LeafBlock, label %bb338
;CHECK: LeafBlock15: ; preds = %NodeBlock23
;CHECK-NEXT: %SwitchLeaf16 = icmp eq i32 %tmp158, 10
;CHECK-NEXT: br i1 %SwitchLeaf16, label %bb324, label %NewDefault
;CHECK: NodeBlock13: ; preds = %NodeBlock37
;CHECK-NEXT: %Pivot14 = icmp slt i32 %tmp158, 7
;CHECK-NEXT: br i1 %Pivot14, label %NodeBlock, label %NodeBlock11
;CHECK: NodeBlock11: ; preds = %NodeBlock13
;CHECK-NEXT: %Pivot12 = icmp slt i32 %tmp158, 8
;CHECK-NEXT: br i1 %Pivot12, label %LeafBlock3, label %NodeBlock9
;CHECK: NodeBlock9: ; preds = %NodeBlock11
;CHECK-NEXT: %Pivot10 = icmp slt i32 %tmp158, 9
;CHECK-NEXT: br i1 %Pivot10, label %LeafBlock5, label %LeafBlock7
;CHECK: LeafBlock7: ; preds = %NodeBlock9
;CHECK-NEXT: %SwitchLeaf8 = icmp eq i32 %tmp158, 9
;CHECK-NEXT: br i1 %SwitchLeaf8, label %bb322, label %NewDefault
;CHECK: LeafBlock5: ; preds = %NodeBlock9
;CHECK-NEXT: %SwitchLeaf6 = icmp eq i32 %tmp158, 8
;CHECK-NEXT: br i1 %SwitchLeaf6, label %bb338, label %NewDefault
;CHECK: LeafBlock3: ; preds = %NodeBlock11
;CHECK-NEXT: %SwitchLeaf4 = icmp eq i32 %tmp158, 7
;CHECK-NEXT: br i1 %SwitchLeaf4, label %bb, label %NewDefault
;CHECK: NodeBlock: ; preds = %NodeBlock13
;CHECK-NEXT: %Pivot = icmp slt i32 %tmp158, 0
;CHECK-NEXT: br i1 %Pivot, label %LeafBlock, label %LeafBlock1
;CHECK: LeafBlock1: ; preds = %NodeBlock
;CHECK-NEXT: %SwitchLeaf2 = icmp ule i32 %tmp158, 6
;CHECK-NEXT: br i1 %SwitchLeaf2, label %bb338, label %NewDefault
;CHECK: LeafBlock: ; preds = %NodeBlock
;CHECK-NEXT: %tmp158.off = add i32 %tmp158, 6
;CHECK-NEXT: %SwitchLeaf = icmp ule i32 %tmp158.off, 4
;CHECK-NEXT: br i1 %SwitchLeaf, label %bb338, label %NewDefault
;CHECK: LeafBlock: ; preds = %NodeBlock
;CHECK-NEXT: %tmp158.off = add i32 %tmp158, 6
;CHECK-NEXT: %SwitchLeaf = icmp ule i32 %tmp158.off, 4
;CHECK-NEXT: br i1 %SwitchLeaf, label %bb338, label %NewDefault
define i32 @main(i32 %tmp158) {
entry: