Handle loop with negtive induction variable increment

This patch extend LoopReroll pass to hand the loops which
is similar to the following:

      while (len > 1) {
            sum4 += buf[len];
            sum4 += buf[len-1];
            len -= 2;
        }

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243171 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Lawrence Hu 2015-07-24 22:01:49 +00:00
parent b0fd31a910
commit 5136ca2c6d
2 changed files with 83 additions and 37 deletions

View File

@ -166,7 +166,10 @@ namespace {
typedef SmallVector<Instruction *, 16> SmallInstructionVector; typedef SmallVector<Instruction *, 16> SmallInstructionVector;
typedef SmallSet<Instruction *, 16> SmallInstructionSet; typedef SmallSet<Instruction *, 16> SmallInstructionSet;
// A chain of isomorphic instructions, indentified by a single-use PHI, // Map between induction variable and its increment
DenseMap<Instruction *, int64_t> IVToIncMap;
// A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the // representing a reduction. Only the last value may be used outside the
// loop. // loop.
struct SimpleLoopReduction { struct SimpleLoopReduction {
@ -335,7 +338,7 @@ namespace {
// x[i*3+1] = y2 // x[i*3+1] = y2
// x[i*3+2] = y3 // x[i*3+2] = y3
// //
// Base instruction -> i*3 // Base instruction -> i*3
// +---+----+ // +---+----+
// / | \ // / | \
// ST[y1] +1 +2 <-- Roots // ST[y1] +1 +2 <-- Roots
@ -366,8 +369,10 @@ namespace {
struct DAGRootTracker { struct DAGRootTracker {
DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
ScalarEvolution *SE, AliasAnalysis *AA, ScalarEvolution *SE, AliasAnalysis *AA,
TargetLibraryInfo *TLI) TargetLibraryInfo *TLI,
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {} DenseMap<Instruction *, int64_t> &IncrMap)
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV),
IVToIncMap(IncrMap) {}
/// Stage 1: Find all the DAG roots for the induction variable. /// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots(); bool findRoots();
@ -417,7 +422,7 @@ namespace {
// The loop induction variable. // The loop induction variable.
Instruction *IV; Instruction *IV;
// Loop step amount. // Loop step amount.
uint64_t Inc; int64_t Inc;
// Loop reroll count; if Inc == 1, this records the scaling applied // Loop reroll count; if Inc == 1, this records the scaling applied
// to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ; // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
// If Inc is not 1, Scale = Inc. // If Inc is not 1, Scale = Inc.
@ -430,6 +435,8 @@ namespace {
// they are used in (or specially, IL_All for instructions // they are used in (or specially, IL_All for instructions
// used in the loop increment mechanism). // used in the loop increment mechanism).
UsesTy Uses; UsesTy Uses;
// Map between induction variable and its increment
DenseMap<Instruction *, int64_t> &IVToIncMap;
}; };
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
@ -484,13 +491,12 @@ void LoopReroll::collectPossibleIVs(Loop *L,
continue; continue;
if (const SCEVConstant *IncSCEV = if (const SCEVConstant *IncSCEV =
dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) { dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
if (!IncSCEV->getValue()->getValue().isStrictlyPositive()) const APInt &AInt = IncSCEV->getValue()->getValue().abs();
if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
continue; continue;
if (IncSCEV->getValue()->uge(MaxInc)) IVToIncMap[I] = IncSCEV->getValue()->getSExtValue();
continue; DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
<< "\n");
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
*PHISCEV << "\n");
PossibleIVs.push_back(I); PossibleIVs.push_back(I);
} }
} }
@ -699,17 +705,11 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
} }
} }
int64_t V = CI->getValue().getSExtValue(); int64_t V = std::abs(CI->getValue().getSExtValue());
if (Roots.find(V) != Roots.end()) if (Roots.find(V) != Roots.end())
// No duplicates, please. // No duplicates, please.
return false; return false;
// FIXME: Add support for negative values.
if (V < 0) {
DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
return false;
}
Roots[V] = cast<Instruction>(I); Roots[V] = cast<Instruction>(I);
} }
@ -731,7 +731,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
unsigned NumBaseUses = BaseUsers.size(); unsigned NumBaseUses = BaseUsers.size();
if (NumBaseUses == 0) if (NumBaseUses == 0)
NumBaseUses = Roots.begin()->second->getNumUses(); NumBaseUses = Roots.begin()->second->getNumUses();
// Check that every node has the same number of users. // Check that every node has the same number of users.
for (auto &KV : Roots) { for (auto &KV : Roots) {
if (KV.first == 0) if (KV.first == 0)
@ -744,7 +744,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
} }
} }
return true; return true;
} }
bool LoopReroll::DAGRootTracker:: bool LoopReroll::DAGRootTracker::
@ -787,7 +787,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
if (!collectPossibleRoots(IVU, V)) if (!collectPossibleRoots(IVU, V))
return false; return false;
// If we didn't get a root for index zero, then IVU must be // If we didn't get a root for index zero, then IVU must be
// subsumed. // subsumed.
if (V.find(0) == V.end()) if (V.find(0) == V.end())
SubsumedInsts.insert(IVU); SubsumedInsts.insert(IVU);
@ -818,13 +818,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
} }
bool LoopReroll::DAGRootTracker::findRoots() { bool LoopReroll::DAGRootTracker::findRoots() {
Inc = IVToIncMap[IV];
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
getValue()->getZExtValue();
assert(RootSets.empty() && "Unclean state!"); assert(RootSets.empty() && "Unclean state!");
if (Inc == 1) { if (std::abs(Inc) == 1) {
for (auto *IVU : IV->users()) { for (auto *IVU : IV->users()) {
if (isLoopIncrement(IVU, IV)) if (isLoopIncrement(IVU, IV))
LoopIncs.push_back(cast<Instruction>(IVU)); LoopIncs.push_back(cast<Instruction>(IVU));
@ -1103,15 +1100,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
" vs. " << *RootInst << "\n"); " vs. " << *RootInst << "\n");
return false; return false;
} }
RootIt = TryIt; RootIt = TryIt;
RootInst = TryIt->first; RootInst = TryIt->first;
} }
// All instructions between the last root and this root // All instructions between the last root and this root
// may belong to some other iteration. If they belong to a // may belong to some other iteration. If they belong to a
// future iteration, then they're dangerous to alias with. // future iteration, then they're dangerous to alias with.
// //
// Note that because we allow a limited amount of flexibility in the order // Note that because we allow a limited amount of flexibility in the order
// that we visit nodes, LastRootIt might be *before* RootIt, in which // that we visit nodes, LastRootIt might be *before* RootIt, in which
// case we've already checked this set of instructions so we shouldn't // case we've already checked this set of instructions so we shouldn't
@ -1267,6 +1264,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J; ++J;
} }
bool Negative = IVToIncMap[IV] < 0;
const DataLayout &DL = Header->getModule()->getDataLayout(); const DataLayout &DL = Header->getModule()->getDataLayout();
// We need to create a new induction variable for each different BaseInst. // We need to create a new induction variable for each different BaseInst.
@ -1275,10 +1273,9 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEVAddRecExpr *RealIVSCEV = const SCEVAddRecExpr *RealIVSCEV =
cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst)); cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
const SCEV *Start = RealIVSCEV->getStart(); const SCEV *Start = RealIVSCEV->getStart();
const SCEVAddRecExpr *H = cast<SCEVAddRecExpr> const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
(SE->getAddRecExpr(Start, Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
SE->getConstant(RealIVSCEV->getType(), 1), SCEV::FlagAnyWrap));
L, SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander. { // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, DL, "reroll"); SCEVExpander Expander(*SE, DL, "reroll");
Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
@ -1294,8 +1291,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
// Iteration count SCEV minus 1 // Iteration count SCEV minus 1
const SCEV *ICMinus1SCEV = const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
Value *ICMinus1; // Iteration count minus 1 Value *ICMinus1; // Iteration count minus 1
if (isa<SCEVConstant>(ICMinus1SCEV)) { if (isa<SCEVConstant>(ICMinus1SCEV)) {
@ -1444,13 +1441,13 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount, const SCEV *IterCount,
ReductionTracker &Reductions) { ReductionTracker &Reductions) {
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI); DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, IVToIncMap);
if (!DAGRoots.findRoots()) if (!DAGRoots.findRoots())
return false; return false;
DEBUG(dbgs() << "LRR: Found all root induction increments for: " << DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
*IV << "\n"); *IV << "\n");
if (!DAGRoots.validate(Reductions)) if (!DAGRoots.validate(Reductions))
return false; return false;
if (!Reductions.validateSelected()) if (!Reductions.validateSelected())
@ -1497,6 +1494,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// First, we need to find the induction variable with respect to which we can // First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options). // reroll (there may be several possible options).
SmallInstructionVector PossibleIVs; SmallInstructionVector PossibleIVs;
IVToIncMap.clear();
collectPossibleIVs(L, PossibleIVs); collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) { if (PossibleIVs.empty()) {

View File

@ -0,0 +1,48 @@
; RUN: opt -S -loop-reroll %s | FileCheck %s
target triple = "aarch64--linux-gnu"
@buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1
define i32 @test1(i32 %len, i8* nocapture readonly %buf) #0 {
entry:
%cmp.13 = icmp sgt i32 %len, 1
br i1 %cmp.13, label %while.body.lr.ph, label %while.end
while.body.lr.ph: ; preds = %entry
br label %while.body
while.body:
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
;CHECK-NEXT: %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -1
;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -2
;CHECK: br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
%sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ]
%len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ]
%idxprom = sext i32 %len.addr.014 to i64
%arrayidx = getelementptr inbounds i8, i8* %buf, i64 %idxprom
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i64
%add = add i64 %conv, %sum4.015
%sub = add nsw i32 %len.addr.014, -1
%idxprom1 = sext i32 %sub to i64
%arrayidx2 = getelementptr inbounds i8, i8* %buf, i64 %idxprom1
%1 = load i8, i8* %arrayidx2, align 1
%conv3 = zext i8 %1 to i64
%add4 = add i64 %add, %conv3
%sub5 = add nsw i32 %len.addr.014, -2
%cmp = icmp sgt i32 %sub5, 1
br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
while.cond.while.end_crit_edge: ; preds = %while.body
%add4.lcssa = phi i64 [ %add4, %while.body ]
%phitmp = trunc i64 %add4.lcssa to i32
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
%sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ]
ret i32 %sum4.0.lcssa
unreachable
}