Use TargetTransformInfo to control switch-to-lookup table transformation

When the switch-to-lookup tables transform landed in SimplifyCFG, it
was pointed out that this could be inappropriate for some targets.
Since there was no way at the time for the pass to know anything about
the target, an awkward reverse-transform was added in CodeGenPrepare
that turned lookup tables back into switches for some targets.

This patch uses the new TargetTransformInfo to determine if a
switch should be transformed, and removes
CodeGenPrepare::ConvertLoadToSwitch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167011 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hans Wennborg 2012-10-30 11:23:25 +00:00
parent c588e0e162
commit 04d7d13d30
10 changed files with 79 additions and 208 deletions

View File

@ -46,6 +46,8 @@ public:
virtual unsigned getJumpBufAlignment() const;
virtual unsigned getJumpBufSize() const;
virtual bool shouldBuildLookupTables() const;
};
class VectorTargetTransformImpl : public VectorTargetTransformInfo {

View File

@ -117,6 +117,11 @@ public:
virtual unsigned getJumpBufSize() const {
return 0;
}
/// shouldBuildLookupTables - Return true if switches should be turned into
/// lookup tables for the target.
virtual bool shouldBuildLookupTables() const {
return true;
}
};
/// VectorTargetTransformInfo - This interface is used by the vectorizers

View File

@ -37,6 +37,7 @@ class AllocaInst;
class ConstantExpr;
class DataLayout;
class TargetLibraryInfo;
class TargetTransformInfo;
class DIBuilder;
template<typename T> class SmallVectorImpl;
@ -134,7 +135,8 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// of the CFG. It returns true if a modification was made, possibly deleting
/// the basic block that was pointed to.
///
bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0);
bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0,
const TargetTransformInfo *TTI = 0);
/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
/// and if a predecessor branches to us and one of our successors, fold the

View File

@ -49,6 +49,12 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
return TLI->getJumpBufSize();
}
bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
return TLI->supportJumpTables() &&
(TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
//===----------------------------------------------------------------------===//
//
// Calls used by the vectorizers.
@ -313,4 +319,3 @@ VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp));
return LT.first;
}

View File

@ -18,7 +18,6 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@ -128,7 +127,6 @@ namespace {
bool OptimizeSelectInst(SelectInst *SI);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
bool PlaceDbgValues(Function &F);
bool ConvertLoadToSwitch(LoadInst *LI);
};
}
@ -1292,11 +1290,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
return OptimizeCmpExpression(CI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = false;
if (TLI)
Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
Changed |= ConvertLoadToSwitch(LI);
return Changed;
return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
@ -1376,109 +1372,3 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) {
}
return MadeChange;
}
static bool TargetSupportsJumpTables(const TargetLowering &TLI) {
return TLI.supportJumpTables() &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
/// ConvertLoadToSwitch - Convert loads from constant lookup tables into
/// switches. This undos the switch-to-lookup table transformation in
/// SimplifyCFG for targets where that is inprofitable.
bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) {
// This only applies to targets that don't support jump tables.
if (!TLI || TargetSupportsJumpTables(*TLI))
return false;
// FIXME: In the future, it would be desirable to have enough target
// information in SimplifyCFG, so we could decide at that stage whether to
// transform the switch to a lookup table or not, and this
// reverse-transformation could be removed.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace())
return false;
if (GEP->getNumIndices() != 2)
return false;
Value *FirstIndex = GEP->idx_begin()[0];
ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex);
if (!FirstIndexInt || !FirstIndexInt->isZero())
return false;
Value *TableIndex = GEP->idx_begin()[1];
IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType());
GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return false;
Constant *Arr = GV->getInitializer();
uint64_t NumElements;
if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr))
NumElements = CA->getType()->getNumElements();
else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr))
NumElements = CDA->getNumElements();
else
return false;
if (NumElements < 2)
return false;
// Split the block.
BasicBlock *OriginalBB = LI->getParent();
BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI);
// Replace OriginalBB's terminator with a switch.
IRBuilder<> Builder(OriginalBB->getTerminator());
SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB,
NumElements - 1);
OriginalBB->getTerminator()->eraseFromParent();
// Count the frequency of each value to decide which to use as default.
SmallDenseMap<Constant*, uint64_t> ValueFreq;
for (uint64_t I = 0; I < NumElements; ++I)
++ValueFreq[Arr->getAggregateElement(I)];
uint64_t MaxCount = 0;
Constant *DefaultValue = NULL;
for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(),
E = ValueFreq.end(); I != E; ++I) {
if (I->second > MaxCount) {
MaxCount = I->second;
DefaultValue = I->first;
}
}
assert(DefaultValue && "No values in the array?");
// Create the phi node in PostSwitchBB, which will replace the load.
Builder.SetInsertPoint(PostSwitchBB->begin());
PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements);
PHI->addIncoming(DefaultValue, OriginalBB);
// Build basic blocks to target with the switch.
for (uint64_t I = 0; I < NumElements; ++I) {
Constant *C = Arr->getAggregateElement(I);
if (C == DefaultValue) continue; // Already covered by the default case.
BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(),
"lookup.bb",
PostSwitchBB->getParent(),
PostSwitchBB);
Switch->addCase(ConstantInt::get(TableIndexTy, I), BB);
Builder.SetInsertPoint(BB);
Builder.CreateBr(PostSwitchBB);
PHI->addIncoming(C, BB);
}
// Remove the load.
LI->replaceAllUsesWith(PHI);
LI->eraseFromParent();
// Clean up.
if (GEP->use_empty())
GEP->eraseFromParent();
if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty())
GV->eraseFromParent();
CurInstIterator = Switch;
return true;
}

View File

@ -35,6 +35,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/TargetTransformInfo.h"
using namespace llvm;
STATISTIC(NumSimpl, "Number of blocks simplified");
@ -293,7 +294,8 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) {
static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
const TargetTransformInfo *TTI) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@ -302,7 +304,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) {
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TD)) {
if (SimplifyCFG(BBIt++, TD, TTI)) {
LocalChange = true;
++NumSimpl;
}
@ -317,9 +319,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) {
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
const TargetTransformInfo *TTI =
getAnalysisIfAvailable<TargetTransformInfo>();
bool EverChanged = removeUnreachableBlocksFromFn(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TD);
EverChanged |= iterativelySimplifyCFG(F, TD, TTI);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@ -333,7 +337,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
EverChanged = iterativelySimplifyCFG(F, TD);
EverChanged = iterativelySimplifyCFG(F, TD, TTI);
EverChanged |= removeUnreachableBlocksFromFn(F);
} while (EverChanged);

View File

@ -14,6 +14,7 @@
#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DataLayout.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
@ -39,7 +40,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/NoFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/DataLayout.h"
#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <set>
@ -82,6 +83,7 @@ namespace {
class SimplifyCFGOpt {
const DataLayout *const TD;
const TargetTransformInfo *const TTI;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@ -101,7 +103,8 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
explicit SimplifyCFGOpt(const DataLayout *td) : TD(td) {}
SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
: TD(td), TTI(tti) {}
bool run(BasicBlock *BB);
};
}
@ -3459,8 +3462,13 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
/// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI,
IRBuilder<> &Builder,
const DataLayout* TD) {
const DataLayout* TD,
const TargetTransformInfo *TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
return false;
// FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
@ -3619,7 +3627,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
if (SwitchToLookupTable(SI, Builder, TD))
if (SwitchToLookupTable(SI, Builder, TD, TTI))
return SimplifyCFG(BB) | true;
return false;
@ -3916,6 +3924,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD) {
return SimplifyCFGOpt(TD).run(BB);
bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
const TargetTransformInfo *TTI) {
return SimplifyCFGOpt(TD, TTI).run(BB);
}

View File

@ -1,84 +0,0 @@
; RUN: llc -march=sparc < %s | FileCheck %s
; Check that all the switches turned into lookup tables by SimplifyCFG are
; turned back into switches for targets that don't like lookup tables.
@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
@switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
@switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
@switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
@switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
define i32 @f(i32 %c) {
entry:
%switch.tableidx = sub i32 %c, 42
%0 = icmp ult i32 %switch.tableidx, 7
br i1 %0, label %switch.lookup, label %return
switch.lookup:
%switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
%switch.load = load i32* %switch.gep
ret i32 %switch.load
return:
ret i32 15
; CHECK: f:
; CHECK: %switch.lookup
; CHECK-NOT: sethi %hi(.Lswitch.table)
}
declare void @dummy(i8 signext, float)
define void @h(i32 %x) {
entry:
%switch.tableidx = sub i32 %x, 0
%0 = icmp ult i32 %switch.tableidx, 4
br i1 %0, label %switch.lookup, label %sw.epilog
switch.lookup:
%switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx
%switch.load = load i8* %switch.gep
%switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx
%switch.load2 = load float* %switch.gep1
br label %sw.epilog
sw.epilog:
%a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
%b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
call void @dummy(i8 signext %a.0, float %b.0)
ret void
; CHECK: h:
; CHECK: %switch.lookup
; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}})
}
define i8* @foostring(i32 %x) {
entry:
%switch.tableidx = sub i32 %x, 0
%0 = icmp ult i32 %switch.tableidx, 4
br i1 %0, label %switch.lookup, label %return
switch.lookup:
%switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
%switch.load = load i8** %switch.gep
ret i8* %switch.load
return:
ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)
; CHECK: foostring:
; CHECK: %switch.lookup
; CHECK-NOT: sethi %hi(.Lswitch.table3)
}
; CHECK-NOT: .Lswitch.table
; CHECK-NOT: .Lswitch.table1
; CHECK-NOT: .Lswitch.table2
; CHECK-NOT: .Lswitch.table3

View File

@ -0,0 +1,6 @@
config.suffixes = ['.ll', '.c', '.cpp']
targets = set(config.root.targets_to_build.split())
if not 'Sparc' in targets:
config.unsupported = True

View File

@ -0,0 +1,32 @@
; RUN: opt < %s -simplifycfg -S -mtriple=sparc-unknown-unknown | FileCheck %s
; Check that switches are not turned into lookup tables, as this is not
; considered profitable on the target.
define i32 @f(i32 %c) nounwind uwtable readnone {
entry:
switch i32 %c, label %sw.default [
i32 42, label %return
i32 43, label %sw.bb1
i32 44, label %sw.bb2
i32 45, label %sw.bb3
i32 46, label %sw.bb4
i32 47, label %sw.bb5
i32 48, label %sw.bb6
]
sw.bb1: br label %return
sw.bb2: br label %return
sw.bb3: br label %return
sw.bb4: br label %return
sw.bb5: br label %return
sw.bb6: br label %return
sw.default: br label %return
return:
%retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
ret i32 %retval.0
; CHECK: @f
; CHECK-NOT: getelementptr
; CHECK: switch i32 %c
}