[InstCombine] Optimize icmp-select-icmp

In special cases select instructions can be eliminated by
replacing them with a cheaper bitwise operation even when the
select result is used outside its home block. The instances implemented
are patterns like
    %x=icmp.eq
    %y=select %x,%r, null
    %z=icmp.eq|neq %y, null
    br %z,true, false
==> %x=icmp.ne
    %y=icmp.eq %r,null
    %z=or %x,%y
    br %z,true,false
The optimization is integrated into the instruction
combiner and performed only when all uses of the select result can
be replaced by the select operand proper. For this dominator information
is used and dominance is now a required analysis pass in the combiner.
The optimization itself is iterative. The critical step is to replace the
select result with the non-constant select operand. So the select becomes
local and the combiner iteratively works out simpler code pattern and
eventually eliminates the select.

rdar://17853760



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218721 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Gerolf Hoflehner 2014-10-01 00:13:22 +00:00
parent 8f70c4827a
commit 2318c2f28d
5 changed files with 277 additions and 9 deletions

View File

@ -14,6 +14,7 @@
#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicInst.h"
@ -98,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
AssumptionTracker *AT; AssumptionTracker *AT;
const DataLayout *DL; const DataLayout *DL;
TargetLibraryInfo *TLI; TargetLibraryInfo *TLI;
DominatorTree *DT; // not required DominatorTree *DT;
bool MadeIRChange; bool MadeIRChange;
LibCallSimplifier *Simplifier; LibCallSimplifier *Simplifier;
bool MinimizeSize; bool MinimizeSize;
@ -113,7 +114,8 @@ public:
BuilderTy *Builder; BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid static char ID; // Pass identification, replacement for typeid
InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { InstCombiner()
: FunctionPass(ID), DL(nullptr), DT(nullptr), Builder(nullptr) {
MinimizeSize = false; MinimizeSize = false;
initializeInstCombinerPass(*PassRegistry::getPassRegistry()); initializeInstCombinerPass(*PassRegistry::getPassRegistry());
} }
@ -242,6 +244,11 @@ public:
// visitInstruction - Specify what to return for unhandled instructions... // visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return nullptr; } Instruction *visitInstruction(Instruction &I) { return nullptr; }
bool dominatesAllUses(const Instruction *DI, const Instruction *UI,
const BasicBlock *DB) const;
bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
const ConstantInt *CI1,
const ConstantInt *CI2);
private: private:
bool ShouldChangeType(Type *From, Type *To) const; bool ShouldChangeType(Type *From, Type *To) const;

View File

@ -2429,6 +2429,127 @@ static bool swapMayExposeCSEOpportunities(const Value * Op0,
return GlobalSwapBenefits > 0; return GlobalSwapBenefits > 0;
} }
/// \brief Check that one use is in the same block as the definition and all
/// other uses are in blocks dominated by a given block
///
/// \param DI Definition
/// \param UI Use
/// \param DB Block that must dominate all uses of \p DI outside
/// the parent block. Note there can be a use of \p DI in \p DB.
/// \return true when \p UI is the only use of \p DI in the parent block
/// and all other uses of \p DI are in blocks dominated by \p DB.
///
bool InstCombiner::dominatesAllUses(const Instruction *DI,
const Instruction *UI,
const BasicBlock *DB) const {
assert(DI && DI->getParent() == UI->getParent() &&
"definition and use must be in the same block");
// DominatorTree available?
if (!DT)
return false;
for (const User *U : DI->users()) {
auto *Usr = cast<Instruction>(U);
if (Usr != UI && !DT->dominates(DB, Usr->getParent()))
return false;
}
return true;
}
///
/// true when the instruction sequence within a block is select-cmp-br.
///
static bool isChainSelectCmpBranch(const SelectInst *SI) {
const BasicBlock *BB = SI->getParent();
if (!BB)
return false;
auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
if (!BI || BI->getNumSuccessors() != 2)
return false;
auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
return false;
return true;
}
///
/// \brief True when a select result is replaced by one of its operands
/// in select-icmp sequence. This will eventually result in the elimination
/// of the select.
///
/// \param SI Select instruction
/// \param Icmp Compare instruction
/// \param CI1 'true' when first select operand is equal to RHSC of Icmp
/// \param CI2 'true' when second select operand is equal to RHSC of Icmp
///
/// Notes:
/// - The replacement is global and requires dominator information
/// - The caller is responsible for the actual replacement
///
/// Example:
///
/// entry:
/// %4 = select i1 %3, %C* %0, %C* null
/// %5 = icmp eq %C* %4, null
/// br i1 %5, label %9, label %7
/// ...
/// ; <label>:7 ; preds = %entry
/// %8 = getelementptr inbounds %C* %4, i64 0, i32 0
/// ...
///
/// can be transformed to
///
/// %5 = icmp eq %C* %0, null
/// %6 = select i1 %3, i1 %5, i1 true
/// br i1 %6, label %9, label %7
/// ...
/// ; <label>:7 ; preds = %entry
/// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0!
///
/// Similar when the first operand of the select is a constant or/and
/// the compare is for not equal rather than equal.
///
/// FIXME: Currently the function considers equal compares only. It should be
/// possbile to extend it to not equal compares also.
///
bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
const ICmpInst *Icmp,
const ConstantInt *CI1,
const ConstantInt *CI2) {
if (isChainSelectCmpBranch(SI) && Icmp->isEquality()) {
// Code sequence is select - icmp.[eq|ne] - br
unsigned ReplaceWithOpd = 0;
if (CI1 && !CI1->isZero())
// The first constant operand of the select and the RHS of
// the compare match, so try to substitute
// the select results with its second operand
// Example:
// %4 = select i1 %3, %C* null, %C* %0
// %5 = icmp eq %C* %4, null
// ==> could replace select with second operand
ReplaceWithOpd = 2;
else if (CI2 && !CI2->isZero())
// Similar when the second operand of the select is a constant
// Example:
// %4 = select i1 %3, %C* %0, %C* null
// %5 = icmp eq %C* %4, null
// ==> could replace select with first operand
ReplaceWithOpd = 1;
if (ReplaceWithOpd) {
// Replace select with operand on else path for EQ compares.
// Replace select with operand on then path for NE compares.
BasicBlock *Succ =
Icmp->getPredicate() == ICmpInst::ICMP_EQ
? SI->getParent()->getTerminator()->getSuccessor(1)
: SI->getParent()->getTerminator()->getSuccessor(0);
if (InstCombiner::dominatesAllUses(SI, Icmp, Succ)) {
SI->replaceAllUsesWith(SI->getOperand(ReplaceWithOpd));
return true;
}
}
}
return false;
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false; bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@ -2885,8 +3006,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// fold to a constant (in which case the icmp is replaced with a select // fold to a constant (in which case the icmp is replaced with a select
// which will usually simplify) or this is the only user of the // which will usually simplify) or this is the only user of the
// select (in which case we are trading a select+icmp for a simpler // select (in which case we are trading a select+icmp for a simpler
// select+icmp). // select+icmp) or all uses of the select can be replaced based on
if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { // dominance information ("Global cases").
bool Transform = false;
if (Op1 && Op2)
Transform = true;
else if (Op1 || Op2) {
if (LHSI->hasOneUse())
Transform = true;
else
// Global cases
Transform = replacedSelectWithOperand(
cast<SelectInst>(LHSI), &I, dyn_cast_or_null<ConstantInt>(Op1),
dyn_cast_or_null<ConstantInt>(Op2));
}
if (Transform) {
if (!Op1) if (!Op1)
Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
RHSC, I.getName()); RHSC, I.getName());

View File

@ -90,6 +90,7 @@ INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
"Combine redundant instructions", false, false) "Combine redundant instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(InstCombiner, "instcombine", INITIALIZE_PASS_END(InstCombiner, "instcombine",
"Combine redundant instructions", false, false) "Combine redundant instructions", false, false)
@ -97,6 +98,8 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG(); AU.setPreservesCFG();
AU.addRequired<AssumptionTracker>(); AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>(); AU.addRequired<TargetLibraryInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
} }
@ -2933,12 +2936,9 @@ bool InstCombiner::runOnFunction(Function &F) {
AT = &getAnalysis<AssumptionTracker>(); AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr; DL = DLP ? &DLP->getDataLayout() : nullptr;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfo>(); TLI = &getAnalysis<TargetLibraryInfo>();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
// Minimizing size? // Minimizing size?
MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::MinSize); Attribute::MinSize);

View File

@ -2,11 +2,11 @@
define void @entry() nounwind { define void @entry() nounwind {
entry: entry:
; CHECK: br label %for.cond
br label %for.cond br label %for.cond
for.cond: for.cond:
%local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ] %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
; CHECK: sub <1 x i32> <i32 92>, %local
%phi3 = sub <1 x i32> zeroinitializer, %local %phi3 = sub <1 x i32> zeroinitializer, %local
br label %cond.end br label %cond.end

View File

@ -0,0 +1,127 @@
; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence
; RUN: opt -instcombine -S < %s | FileCheck %s
%C = type <{ %struct.S }>
%struct.S = type { i64*, i32, i32 }
declare void @bar(%struct.S *) #1
define void @test1(%C*) {
entry:
%1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
%m = load i64** %1, align 8
%2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
%n = load i64** %2, align 8
%3 = getelementptr inbounds i64* %m, i64 9
%4 = bitcast i64* %3 to i64 (%C*)**
%5 = load i64 (%C*)** %4, align 8
%6 = icmp eq i64* %m, %n
%7 = select i1 %6, %C* %0, %C* null
%8 = icmp eq %C* %7, null
br i1 %8, label %12, label %10
; <label>:9 ; preds = %10, %12
ret void
; <label>:10 ; preds = %entry
%11 = getelementptr inbounds %C* %7, i64 0, i32 0
tail call void @bar(%struct.S* %11)
br label %9
; <label>:12 ; preds = %entry
%13 = tail call i64 %5(%C* %0)
br label %9
; CHECK-LABEL: @test1(
; CHECK-NOT: select
; CHECK: or
}
define void @test2(%C*) {
entry:
%1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
%m = load i64** %1, align 8
%2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
%n = load i64** %2, align 8
%3 = getelementptr inbounds i64* %m, i64 9
%4 = bitcast i64* %3 to i64 (%C*)**
%5 = load i64 (%C*)** %4, align 8
%6 = icmp eq i64* %m, %n
%7 = select i1 %6, %C* null, %C* %0
%8 = icmp eq %C* %7, null
br i1 %8, label %12, label %10
; <label>:9 ; preds = %10, %12
ret void
; <label>:10 ; preds = %entry
%11 = getelementptr inbounds %C* %7, i64 0, i32 0
tail call void @bar(%struct.S* %11)
br label %9
; <label>:12 ; preds = %entry
%13 = tail call i64 %5(%C* %0)
br label %9
; CHECK-LABEL: @test2(
; CHECK-NOT: select
; CHECK: or
}
define void @test3(%C*) {
entry:
%1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
%m = load i64** %1, align 8
%2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
%n = load i64** %2, align 8
%3 = getelementptr inbounds i64* %m, i64 9
%4 = bitcast i64* %3 to i64 (%C*)**
%5 = load i64 (%C*)** %4, align 8
%6 = icmp eq i64* %m, %n
%7 = select i1 %6, %C* %0, %C* null
%8 = icmp ne %C* %7, null
br i1 %8, label %10, label %12
; <label>:9 ; preds = %10, %12
ret void
; <label>:10 ; preds = %entry
%11 = getelementptr inbounds %C* %7, i64 0, i32 0
tail call void @bar(%struct.S* %11)
br label %9
; <label>:12 ; preds = %entry
%13 = tail call i64 %5(%C* %0)
br label %9
; CHECK-LABEL: @test3(
; CHECK-NOT: select
; CHECK: or
}
define void @test4(%C*) {
entry:
%1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
%m = load i64** %1, align 8
%2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
%n = load i64** %2, align 8
%3 = getelementptr inbounds i64* %m, i64 9
%4 = bitcast i64* %3 to i64 (%C*)**
%5 = load i64 (%C*)** %4, align 8
%6 = icmp eq i64* %m, %n
%7 = select i1 %6, %C* null, %C* %0
%8 = icmp ne %C* %7, null
br i1 %8, label %10, label %12
; <label>:9 ; preds = %10, %12
ret void
; <label>:10 ; preds = %entry
%11 = getelementptr inbounds %C* %7, i64 0, i32 0
tail call void @bar(%struct.S* %11)
br label %9
; <label>:12 ; preds = %entry
%13 = tail call i64 %5(%C* %0)
br label %9
; CHECK-LABEL: @test4(
; CHECK-NOT: select
; CHECK: or
}