diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 6c0d4e74a7a..da7a22c0c6e 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -14,6 +14,7 @@ #include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" @@ -98,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner AssumptionTracker *AT; const DataLayout *DL; TargetLibraryInfo *TLI; - DominatorTree *DT; // not required + DominatorTree *DT; bool MadeIRChange; LibCallSimplifier *Simplifier; bool MinimizeSize; @@ -113,7 +114,8 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { + InstCombiner() + : FunctionPass(ID), DL(nullptr), DT(nullptr), Builder(nullptr) { MinimizeSize = false; initializeInstCombinerPass(*PassRegistry::getPassRegistry()); } @@ -242,6 +244,11 @@ public: // visitInstruction - Specify what to return for unhandled instructions... Instruction *visitInstruction(Instruction &I) { return nullptr; } + bool dominatesAllUses(const Instruction *DI, const Instruction *UI, + const BasicBlock *DB) const; + bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp, + const ConstantInt *CI1, + const ConstantInt *CI2); private: bool ShouldChangeType(Type *From, Type *To) const; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 00623b1cbf6..f6d605ee8e3 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2429,6 +2429,125 @@ static bool swapMayExposeCSEOpportunities(const Value * Op0, return GlobalSwapBenefits > 0; } +/// \brief Check that one use is in the same block as the definition and all +/// other uses are in blocks dominated by a given block +/// +/// \param DI Definition +/// \param UI Use +/// \param DB Block that must dominate all uses of \p DI outside +/// the parent block +/// \return true when \p UI is the only use of \p DI in the parent block +/// and all other uses of \p DI are in blocks dominated by \p DB. +/// +bool InstCombiner::dominatesAllUses(const Instruction *DI, + const Instruction *UI, + const BasicBlock *DB) const { + assert(DI && UI && "Instruction not defined\n"); + if (DI->getParent() != UI->getParent()) + return false; + // DominatorTree available? + if (!DT) + return false; + for (const User *U : DI->users()) { + auto *Usr = cast<Instruction>(U); + if (Usr != UI && !DT->dominates(DB, Usr->getParent())) + return false; + } + return true; +} + +/// +/// true when the instruction sequence within a block is select-cmp-br. +/// +static bool isChainSelectCmpBranch(const SelectInst *SI) { + const BasicBlock *BB = SI->getParent(); + if (!BB) + return false; + auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator()); + if (!BI || BI->getNumSuccessors() != 2) + return false; + auto *IC = dyn_cast<ICmpInst>(BI->getCondition()); + if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) + return false; + return true; +} + +/// +/// \brief True when a select result is replaced by one of its operands +/// in select-icmp sequence. This will eventually result in the elimination +/// of the select. +/// +/// \param SI Select instruction +/// \param Icmp Compare instruction +/// \param CI1 'true' when first select operand is equal to RHSC of Icmp +/// \param CI2 'true' when second select operand is equal to RHSC of Icmp +/// +/// Notes: +/// - The replacement is global and requires dominator information +/// - The caller is responsible for the actual replacement +/// +/// Example: +/// +/// entry: +/// %4 = select i1 %3, %C* %0, %C* null +/// %5 = icmp eq %C* %4, null +/// br i1 %5, label %9, label %7 +/// ... +/// ; <label>:7 ; preds = %entry +/// %8 = getelementptr inbounds %C* %4, i64 0, i32 0 +/// ... +/// +/// can be transformed to +/// +/// %5 = icmp eq %C* %0, null +/// %6 = select i1 %3, i1 %5, i1 true +/// br i1 %6, label %9, label %7 +/// ... +/// ; <label>:7 ; preds = %entry +/// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0! +/// +/// Similar when the first operand of the select is a constant or/and +/// the compare is for not equal rather than equal. +/// +/// FIXME: Currently the function considers equal compares only. It should be +/// possbile to extend it to not equal compares also. +/// +bool InstCombiner::replacedSelectWithOperand(SelectInst *SI, + const ICmpInst *Icmp, + const ConstantInt *CI1, + const ConstantInt *CI2) { + if (isChainSelectCmpBranch(SI) && + Icmp->getPredicate() == ICmpInst::ICMP_EQ) { + // Code sequence is select - icmp.eq - br + unsigned ReplaceWithOpd = 0; + if (CI1 && !CI1->isZero()) + // The first constant operand of the select and the RHS of + // the compare match, so try to substitute + // the select results with its second operand + // Example: + // %4 = select i1 %3, %C* null, %C* %0 + // %5 = icmp eq %C* %4, null + // ==> could replace select with second operand + ReplaceWithOpd = 2; + else if (CI2 && !CI2->isZero()) + // Similar when the second operand of the select is a constant + // Example: + // %4 = select i1 %3, %C* %0, %C* null + // %5 = icmp eq %C* %4, null + // ==> could replace select with first operand + ReplaceWithOpd = 1; + if (ReplaceWithOpd) { + // Replace select with operand on else path for EQ compares. + BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); + if (InstCombiner::dominatesAllUses(SI, Icmp, Succ)) { + SI->replaceAllUsesWith(SI->getOperand(ReplaceWithOpd)); + return true; + } + } + } + return false; +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -2885,8 +3004,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // fold to a constant (in which case the icmp is replaced with a select // which will usually simplify) or this is the only user of the // select (in which case we are trading a select+icmp for a simpler - // select+icmp). - if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + // select+icmp) or all uses of the select can be replaced based on + // dominance information ("Global cases"). + bool Transform = false; + if (Op1 && Op2) + Transform = true; + else if (Op1 || Op2) { + if (LHSI->hasOneUse()) + Transform = true; + else + // Global cases + Transform = replacedSelectWithOperand( + cast<SelectInst>(LHSI), &I, dyn_cast_or_null<ConstantInt>(Op1), + dyn_cast_or_null<ConstantInt>(Op2)); + } + if (Transform) { if (!Op1) Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, I.getName()); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index ac0c01e3c7b..a3a29b4e0bd 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -90,6 +90,7 @@ INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(InstCombiner, "instcombine", "Combine redundant instructions", false, false) @@ -97,6 +98,8 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AssumptionTracker>(); AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } @@ -2933,12 +2936,9 @@ bool InstCombiner::runOnFunction(Function &F) { AT = &getAnalysis<AssumptionTracker>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); DL = DLP ? &DLP->getDataLayout() : nullptr; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TLI = &getAnalysis<TargetLibraryInfo>(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; - // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll index d34600f0fa5..051c21ca41a 100644 --- a/test/Transforms/InstCombine/pr12338.ll +++ b/test/Transforms/InstCombine/pr12338.ll @@ -2,11 +2,11 @@ define void @entry() nounwind { entry: +; CHECK: br label %for.cond br label %for.cond for.cond: %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ] -; CHECK: sub <1 x i32> <i32 92>, %local %phi3 = sub <1 x i32> zeroinitializer, %local br label %cond.end diff --git a/test/Transforms/InstCombine/pr21199.ll b/test/Transforms/InstCombine/pr21199.ll new file mode 100644 index 00000000000..e6599fb640d --- /dev/null +++ b/test/Transforms/InstCombine/pr21199.ll @@ -0,0 +1,25 @@ +; do not replace a 'select' with 'or' in 'select - cmp - br' sequence +; RUN: opt -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @f(i32) + +define void @test(i32 %len) { +entry: + %cmp = icmp ult i32 %len, 8 + %cond = select i1 %cmp, i32 %len, i32 8 + %cmp11 = icmp ult i32 0, %cond + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + tail call void @f(i32 %cond) + %inc = add i32 %i.02, 1 + %cmp1 = icmp ult i32 %inc, %cond + br i1 %cmp1, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +; CHECK: select +} diff --git a/test/Transforms/InstCombine/select-cmp-br.ll b/test/Transforms/InstCombine/select-cmp-br.ll new file mode 100644 index 00000000000..2e999510b42 --- /dev/null +++ b/test/Transforms/InstCombine/select-cmp-br.ll @@ -0,0 +1,127 @@ +; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence +; RUN: opt -instcombine -S < %s | FileCheck %s + +%C = type <{ %struct.S }> +%struct.S = type { i64*, i32, i32 } + +declare void @bar(%struct.S *) #1 + +define void @test1(%C*) { +entry: + %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0 + %m = load i64** %1, align 8 + %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0 + %n = load i64** %2, align 8 + %3 = getelementptr inbounds i64* %m, i64 9 + %4 = bitcast i64* %3 to i64 (%C*)** + %5 = load i64 (%C*)** %4, align 8 + %6 = icmp eq i64* %m, %n + %7 = select i1 %6, %C* %0, %C* null + %8 = icmp eq %C* %7, null + br i1 %8, label %12, label %10 + +; <label>:9 ; preds = %10, %12 + ret void + +; <label>:10 ; preds = %entry + %11 = getelementptr inbounds %C* %7, i64 0, i32 0 + tail call void @bar(%struct.S* %11) + br label %9 + +; <label>:12 ; preds = %entry + %13 = tail call i64 %5(%C* %0) + br label %9 +; CHECK-LABEL: @test1( +; CHECK-NOT: select +; CHECK: or +} + +define void @test2(%C*) { +entry: + %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0 + %m = load i64** %1, align 8 + %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0 + %n = load i64** %2, align 8 + %3 = getelementptr inbounds i64* %m, i64 9 + %4 = bitcast i64* %3 to i64 (%C*)** + %5 = load i64 (%C*)** %4, align 8 + %6 = icmp eq i64* %m, %n + %7 = select i1 %6, %C* null, %C* %0 + %8 = icmp eq %C* %7, null + br i1 %8, label %12, label %10 + +; <label>:9 ; preds = %10, %12 + ret void + +; <label>:10 ; preds = %entry + %11 = getelementptr inbounds %C* %7, i64 0, i32 0 + tail call void @bar(%struct.S* %11) + br label %9 + +; <label>:12 ; preds = %entry + %13 = tail call i64 %5(%C* %0) + br label %9 +; CHECK-LABEL: @test2( +; CHECK-NOT: select +; CHECK: or +} + +define void @test3(%C*) { +entry: + %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0 + %m = load i64** %1, align 8 + %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0 + %n = load i64** %2, align 8 + %3 = getelementptr inbounds i64* %m, i64 9 + %4 = bitcast i64* %3 to i64 (%C*)** + %5 = load i64 (%C*)** %4, align 8 + %6 = icmp eq i64* %m, %n + %7 = select i1 %6, %C* %0, %C* null + %8 = icmp ne %C* %7, null + br i1 %8, label %10, label %12 + +; <label>:9 ; preds = %10, %12 + ret void + +; <label>:10 ; preds = %entry + %11 = getelementptr inbounds %C* %7, i64 0, i32 0 + tail call void @bar(%struct.S* %11) + br label %9 + +; <label>:12 ; preds = %entry + %13 = tail call i64 %5(%C* %0) + br label %9 +; CHECK-LABEL: @test3( +; CHECK-NOT: select +; CHECK: or +} + +define void @test4(%C*) { +entry: + %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0 + %m = load i64** %1, align 8 + %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0 + %n = load i64** %2, align 8 + %3 = getelementptr inbounds i64* %m, i64 9 + %4 = bitcast i64* %3 to i64 (%C*)** + %5 = load i64 (%C*)** %4, align 8 + %6 = icmp eq i64* %m, %n + %7 = select i1 %6, %C* null, %C* %0 + %8 = icmp ne %C* %7, null + br i1 %8, label %10, label %12 + +; <label>:9 ; preds = %10, %12 + ret void + +; <label>:10 ; preds = %entry + %11 = getelementptr inbounds %C* %7, i64 0, i32 0 + tail call void @bar(%struct.S* %11) + br label %9 + +; <label>:12 ; preds = %entry + %13 = tail call i64 %5(%C* %0) + br label %9 +; CHECK-LABEL: @test4( +; CHECK-NOT: select +; CHECK: or +}