[IndVars] Try to use existing values in RewriteLoopExitValues.

Summary:
In RewriteLoopExitValues, before expanding out an SCEV expression using
SCEVExpander, try to see if an existing LLVM IR expression already
computes the value we're interested in.  If so use that existing
expression.

Apart from reducing IndVars' reliance on the rest of the compilation
pipeline, this also prevents IndVars from concluding some expressions as
"high cost" when they're not.  For instance,
`InductiveRangeCheckElimination` often emits code of the following form:

```
len = umin(len_A, len_B)

loop:
  ...
  if (i++ < len)
    goto loop

outside_loop:
    use(i)
```

`SCEVExpander` refuses to rewrite the use of `i` in `outside_loop`,
since it thinks the value of `i` on loop exit, `len`, is a high cost
expansion since it contains an `umax` in it.  With this change,
`IndVars` can see that it can re-use `len` instead of creating a new
expression to compute `umin(len_A, len_B)`.

I considered putting this cleverness in `SCEVExpander`, but I was
worried that it may then have a deterimental effect on other passes
that use it.  So I decided it was better to just do this in the one
place where it seems like an obviously good idea, with the intent of
generalizing later if needed.

Reviewers: atrick, reames

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D10782

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241838 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjoy Das 2015-07-09 18:46:12 +00:00
parent 13c5e09c6e
commit 4db527032a
2 changed files with 90 additions and 2 deletions

View File

@ -41,6 +41,7 @@
#include "llvm/IR/Instructions.h" #include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h" #include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h" #include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
@ -135,6 +136,10 @@ namespace {
PHINode *IndVar, SCEVExpander &Rewriter); PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L); void SinkUnusedInvariants(Loop *L);
Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
Instruction *InsertPt, Type *Ty,
bool &IsHighCostExpansion);
}; };
} }
@ -496,6 +501,52 @@ struct RewritePhi {
}; };
} }
Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
Loop *L, Instruction *InsertPt,
Type *ResultTy,
bool &IsHighCostExpansion) {
using namespace llvm::PatternMatch;
if (!Rewriter.isHighCostExpansion(S, L)) {
IsHighCostExpansion = false;
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
}
// Before expanding S into an expensive LLVM expression, see if we can use an
// already existing value as the expansion for S. There is potential to make
// this significantly smarter, but this simple heuristic already gets some
// interesting cases.
SmallVector<BasicBlock *, 4> Latches;
L->getLoopLatches(Latches);
for (BasicBlock *BB : Latches) {
ICmpInst::Predicate Pred;
Instruction *LHS, *RHS;
BasicBlock *TrueBB, *FalseBB;
if (!match(BB->getTerminator(),
m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
TrueBB, FalseBB)))
continue;
if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
IsHighCostExpansion = false;
return LHS;
}
if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
IsHighCostExpansion = false;
return RHS;
}
}
// We didn't find anything, fall back to using SCEVExpander.
assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
IsHighCostExpansion = true;
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// RewriteLoopExitValues - Optimize IV users outside the loop. // RewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop. // As a side effect, reduces the amount of IV processing within the loop.
@ -628,7 +679,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue; continue;
} }
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); bool HighCost = false;
Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
PN->getType(), HighCost);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n"); << " LoopVal = " << *Inst << "\n");
@ -637,7 +690,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
DeadInsts.push_back(ExitVal); DeadInsts.push_back(ExitVal);
continue; continue;
} }
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);
// Collect all the candidate PHINodes to be rewritten. // Collect all the candidate PHINodes to be rewritten.
RewritePhiSet.push_back( RewritePhiSet.push_back(

View File

@ -0,0 +1,36 @@
; RUN: opt -S -indvars < %s | FileCheck %s
define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) {
; CHECK-LABEL: @f(
not_zero11.preheader:
%tmp13 = icmp ugt i32 %length.i, %length.i.88
%tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i
%tmp15 = icmp sgt i32 %tmp14, 0
br i1 %tmp15, label %not_zero11, label %not_zero11.postloop
not_zero11:
%v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ]
%tmp16 = zext i32 %v_1 to i64
%tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16
%tmp18 = load i8, i8* %tmp17, align 1
%tmp19 = zext i8 %tmp18 to i32
%tmp20 = or i32 %tmp19, %tmp10
%tmp21 = trunc i32 %tmp20 to i8
%addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16
store i8 %tmp21, i8* %addr22, align 1
%tmp22 = add nuw nsw i32 %v_1, 1
%tmp23 = icmp slt i32 %tmp22, %tmp14
br i1 %tmp23, label %not_zero11, label %main.exit.selector
main.exit.selector:
; CHECK-LABEL: main.exit.selector:
; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i
%tmp24 = icmp slt i32 %tmp22, %length.i
br i1 %tmp24, label %not_zero11.postloop, label %leave
leave:
ret void
not_zero11.postloop:
ret void
}