mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
IRCE: generalize to handle loops with decreasing induction variables.
IRCE can now split the iteration space for loops like: for (i = n; i >= 0; i--) a[i + k] = 42; // bounds check on access git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230618 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
19c267aed1
commit
21b2edfeae
@ -399,23 +399,11 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/// This class is used to constrain loops to run within a given iteration space.
|
|
||||||
/// The algorithm this class implements is given a Loop and a range [Begin,
|
|
||||||
/// End). The algorithm then tries to break out a "main loop" out of the loop
|
|
||||||
/// it is given in a way that the "main loop" runs with the induction variable
|
|
||||||
/// in a subset of [Begin, End). The algorithm emits appropriate pre and post
|
|
||||||
/// loops to run any remaining iterations. The pre loop runs any iterations in
|
|
||||||
/// which the induction variable is < Begin, and the post loop runs any
|
|
||||||
/// iterations in which the induction variable is >= End.
|
|
||||||
///
|
|
||||||
class LoopConstrainer {
|
|
||||||
|
|
||||||
// Keeps track of the structure of a loop. This is similar to llvm::Loop,
|
// Keeps track of the structure of a loop. This is similar to llvm::Loop,
|
||||||
// except that it is more lightweight and can track the state of a loop
|
// except that it is more lightweight and can track the state of a loop through
|
||||||
// through changing and potentially invalid IR. This structure also
|
// changing and potentially invalid IR. This structure also formalizes the
|
||||||
// formalizes the kinds of loops we can deal with -- ones that have a single
|
// kinds of loops we can deal with -- ones that have a single latch that is also
|
||||||
// latch that is also an exiting block *and* have a canonical induction
|
// an exiting block *and* have a canonical induction variable.
|
||||||
// variable.
|
|
||||||
struct LoopStructure {
|
struct LoopStructure {
|
||||||
const char *Tag;
|
const char *Tag;
|
||||||
|
|
||||||
@ -428,16 +416,15 @@ class LoopConstrainer {
|
|||||||
BasicBlock *LatchExit;
|
BasicBlock *LatchExit;
|
||||||
unsigned LatchBrExitIdx;
|
unsigned LatchBrExitIdx;
|
||||||
|
|
||||||
// The canonical induction variable. It's value is `CIVStart` on the 0th
|
Value *IndVarNext;
|
||||||
// itertion and `CIVNext` for all iterations after that.
|
Value *IndVarStart;
|
||||||
PHINode *CIV;
|
Value *LoopExitAt;
|
||||||
Value *CIVStart;
|
bool IndVarIncreasing;
|
||||||
Value *CIVNext;
|
|
||||||
|
|
||||||
LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr),
|
LoopStructure()
|
||||||
LatchBr(nullptr), LatchExit(nullptr),
|
: Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
|
||||||
LatchBrExitIdx(-1), CIV(nullptr),
|
LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
|
||||||
CIVStart(nullptr), CIVNext(nullptr) { }
|
IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {}
|
||||||
|
|
||||||
template <typename M> LoopStructure map(M Map) const {
|
template <typename M> LoopStructure map(M Map) const {
|
||||||
LoopStructure Result;
|
LoopStructure Result;
|
||||||
@ -447,13 +434,27 @@ class LoopConstrainer {
|
|||||||
Result.LatchBr = cast<BranchInst>(Map(LatchBr));
|
Result.LatchBr = cast<BranchInst>(Map(LatchBr));
|
||||||
Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
|
Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
|
||||||
Result.LatchBrExitIdx = LatchBrExitIdx;
|
Result.LatchBrExitIdx = LatchBrExitIdx;
|
||||||
Result.CIV = cast<PHINode>(Map(CIV));
|
Result.IndVarNext = Map(IndVarNext);
|
||||||
Result.CIVNext = Map(CIVNext);
|
Result.IndVarStart = Map(IndVarStart);
|
||||||
Result.CIVStart = Map(CIVStart);
|
Result.LoopExitAt = Map(LoopExitAt);
|
||||||
|
Result.IndVarIncreasing = IndVarIncreasing;
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, Loop &,
|
||||||
|
const char *&);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// This class is used to constrain loops to run within a given iteration space.
|
||||||
|
/// The algorithm this class implements is given a Loop and a range [Begin,
|
||||||
|
/// End). The algorithm then tries to break out a "main loop" out of the loop
|
||||||
|
/// it is given in a way that the "main loop" runs with the induction variable
|
||||||
|
/// in a subset of [Begin, End). The algorithm emits appropriate pre and post
|
||||||
|
/// loops to run any remaining iterations. The pre loop runs any iterations in
|
||||||
|
/// which the induction variable is < Begin, and the post loop runs any
|
||||||
|
/// iterations in which the induction variable is >= End.
|
||||||
|
///
|
||||||
|
class LoopConstrainer {
|
||||||
// The representation of a clone of the original loop we started out with.
|
// The representation of a clone of the original loop we started out with.
|
||||||
struct ClonedLoop {
|
struct ClonedLoop {
|
||||||
// The cloned blocks
|
// The cloned blocks
|
||||||
@ -472,17 +473,22 @@ class LoopConstrainer {
|
|||||||
BasicBlock *PseudoExit;
|
BasicBlock *PseudoExit;
|
||||||
BasicBlock *ExitSelector;
|
BasicBlock *ExitSelector;
|
||||||
std::vector<PHINode *> PHIValuesAtPseudoExit;
|
std::vector<PHINode *> PHIValuesAtPseudoExit;
|
||||||
|
PHINode *IndVarEnd;
|
||||||
|
|
||||||
RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { }
|
RewrittenRangeInfo()
|
||||||
|
: PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Calculated subranges we restrict the iteration space of the main loop to.
|
// Calculated subranges we restrict the iteration space of the main loop to.
|
||||||
// See the implementation of `calculateSubRanges' for more details on how
|
// See the implementation of `calculateSubRanges' for more details on how
|
||||||
// these fields are computed. `ExitPreLoopAt' is `None' if we don't need a
|
// these fields are computed. `LowLimit` is None if there is no restriction
|
||||||
// pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop.
|
// on low end of the restricted iteration space of the main loop. `HighLimit`
|
||||||
|
// is None if there is no restriction on high end of the restricted iteration
|
||||||
|
// space of the main loop.
|
||||||
|
|
||||||
struct SubRanges {
|
struct SubRanges {
|
||||||
Optional<Value *> ExitPreLoopAt;
|
Optional<const SCEV *> LowLimit;
|
||||||
Optional<Value *> ExitMainLoopAt;
|
Optional<const SCEV *> HighLimit;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A utility function that does a `replaceUsesOfWith' on the incoming block
|
// A utility function that does a `replaceUsesOfWith' on the incoming block
|
||||||
@ -491,19 +497,11 @@ class LoopConstrainer {
|
|||||||
static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
|
static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
|
||||||
BasicBlock *ReplaceBy);
|
BasicBlock *ReplaceBy);
|
||||||
|
|
||||||
// Try to "parse" `OriginalLoop' and populate the various out parameters.
|
|
||||||
// Returns true on success, false on failure.
|
|
||||||
//
|
|
||||||
bool recognizeLoop(LoopStructure &LoopStructureOut,
|
|
||||||
const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut,
|
|
||||||
const char *&FailureReasonOut) const;
|
|
||||||
|
|
||||||
// Compute a safe set of limits for the main loop to run in -- effectively the
|
// Compute a safe set of limits for the main loop to run in -- effectively the
|
||||||
// intersection of `Range' and the iteration space of the original loop.
|
// intersection of `Range' and the iteration space of the original loop.
|
||||||
// Return the header count (1 + the latch taken count) in `HeaderCount'.
|
|
||||||
// Return None if unable to compute the set of subranges.
|
// Return None if unable to compute the set of subranges.
|
||||||
//
|
//
|
||||||
Optional<SubRanges> calculateSubRanges(Value *&HeaderCount) const;
|
Optional<SubRanges> calculateSubRanges() const;
|
||||||
|
|
||||||
// Clone `OriginalLoop' and return the result in CLResult. The IR after
|
// Clone `OriginalLoop' and return the result in CLResult. The IR after
|
||||||
// running `cloneLoop' is well formed except for the PHI nodes in CLResult --
|
// running `cloneLoop' is well formed except for the PHI nodes in CLResult --
|
||||||
@ -542,16 +540,15 @@ class LoopConstrainer {
|
|||||||
// The loop denoted by `LS' has `OldPreheader' as its preheader. This
|
// The loop denoted by `LS' has `OldPreheader' as its preheader. This
|
||||||
// function creates a new preheader for `LS' and returns it.
|
// function creates a new preheader for `LS' and returns it.
|
||||||
//
|
//
|
||||||
BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS,
|
BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
|
||||||
BasicBlock *OldPreheader, const char *Tag) const;
|
const char *Tag) const;
|
||||||
|
|
||||||
// `ContinuationBlockAndPreheader' was the continuation block for some call to
|
// `ContinuationBlockAndPreheader' was the continuation block for some call to
|
||||||
// `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
|
// `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
|
||||||
// This function rewrites the PHI nodes in `LS.Header' to start with the
|
// This function rewrites the PHI nodes in `LS.Header' to start with the
|
||||||
// correct value.
|
// correct value.
|
||||||
void rewriteIncomingValuesForPHIs(
|
void rewriteIncomingValuesForPHIs(
|
||||||
LoopConstrainer::LoopStructure &LS,
|
LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
|
||||||
BasicBlock *ContinuationBlockAndPreheader,
|
|
||||||
const LoopConstrainer::RewrittenRangeInfo &RRI) const;
|
const LoopConstrainer::RewrittenRangeInfo &RRI) const;
|
||||||
|
|
||||||
// Even though we do not preserve any passes at this time, we at least need to
|
// Even though we do not preserve any passes at this time, we at least need to
|
||||||
@ -570,7 +567,6 @@ class LoopConstrainer {
|
|||||||
LoopInfo &OriginalLoopInfo;
|
LoopInfo &OriginalLoopInfo;
|
||||||
const SCEV *LatchTakenCount;
|
const SCEV *LatchTakenCount;
|
||||||
BasicBlock *OriginalPreheader;
|
BasicBlock *OriginalPreheader;
|
||||||
Value *OriginalHeaderCount;
|
|
||||||
|
|
||||||
// The preheader of the main loop. This may or may not be different from
|
// The preheader of the main loop. This may or may not be different from
|
||||||
// `OriginalPreheader'.
|
// `OriginalPreheader'.
|
||||||
@ -584,12 +580,12 @@ class LoopConstrainer {
|
|||||||
LoopStructure MainLoopStructure;
|
LoopStructure MainLoopStructure;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE,
|
LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
|
||||||
InductiveRangeCheck::Range R)
|
ScalarEvolution &SE, InductiveRangeCheck::Range R)
|
||||||
: F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE),
|
: F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
|
||||||
OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
|
SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
|
||||||
OriginalPreheader(nullptr), OriginalHeaderCount(nullptr),
|
OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
|
||||||
MainLoopPreheader(nullptr), Range(R) { }
|
MainLoopStructure(LS) {}
|
||||||
|
|
||||||
// Entry point for the algorithm. Returns true on success.
|
// Entry point for the algorithm. Returns true on success.
|
||||||
bool run();
|
bool run();
|
||||||
@ -604,155 +600,246 @@ void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
|
|||||||
PN->setIncomingBlock(i, ReplaceBy);
|
PN->setIncomingBlock(i, ReplaceBy);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut,
|
static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) {
|
||||||
const SCEV *&LatchCountOut,
|
APInt SMax =
|
||||||
BasicBlock *&PreheaderOut,
|
APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth());
|
||||||
const char *&FailureReason) const {
|
return SE.getSignedRange(S).contains(SMax) &&
|
||||||
using namespace llvm::PatternMatch;
|
SE.getUnsignedRange(S).contains(SMax);
|
||||||
|
|
||||||
assert(OriginalLoop.isLoopSimplifyForm() &&
|
|
||||||
"should follow from addRequired<>");
|
|
||||||
|
|
||||||
BasicBlock *Latch = OriginalLoop.getLoopLatch();
|
|
||||||
if (!OriginalLoop.isLoopExiting(Latch)) {
|
|
||||||
FailureReason = "no loop latch";
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PHINode *CIV = OriginalLoop.getCanonicalInductionVariable();
|
static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
|
||||||
assert(CIV && "precondition");
|
APInt SMin =
|
||||||
|
APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth());
|
||||||
|
return SE.getSignedRange(S).contains(SMin) &&
|
||||||
|
SE.getUnsignedRange(S).contains(SMin);
|
||||||
|
}
|
||||||
|
|
||||||
BasicBlock *Header = OriginalLoop.getHeader();
|
Optional<LoopStructure>
|
||||||
BasicBlock *Preheader = OriginalLoop.getLoopPreheader();
|
LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
|
||||||
|
const char *&FailureReason) {
|
||||||
|
assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
|
||||||
|
|
||||||
|
BasicBlock *Latch = L.getLoopLatch();
|
||||||
|
if (!L.isLoopExiting(Latch)) {
|
||||||
|
FailureReason = "no loop latch";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
BasicBlock *Header = L.getHeader();
|
||||||
|
BasicBlock *Preheader = L.getLoopPreheader();
|
||||||
if (!Preheader) {
|
if (!Preheader) {
|
||||||
FailureReason = "no preheader";
|
FailureReason = "no preheader";
|
||||||
return false;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *CIVNext = CIV->getIncomingValueForBlock(Latch);
|
|
||||||
Value *CIVStart = CIV->getIncomingValueForBlock(Preheader);
|
|
||||||
|
|
||||||
const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch);
|
|
||||||
if (isa<SCEVCouldNotCompute>(LatchCount)) {
|
|
||||||
FailureReason = "could not compute latch count";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// While SCEV does most of the analysis for us, we still have to
|
|
||||||
// modify the latch; and currently we can only deal with certain
|
|
||||||
// kinds of latches. This can be made more sophisticated as needed.
|
|
||||||
|
|
||||||
BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
|
BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
|
||||||
|
|
||||||
if (!LatchBr || LatchBr->isUnconditional()) {
|
if (!LatchBr || LatchBr->isUnconditional()) {
|
||||||
FailureReason = "latch terminator not conditional branch";
|
FailureReason = "latch terminator not conditional branch";
|
||||||
return false;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Currently we only support a latch condition of the form:
|
unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
|
||||||
//
|
|
||||||
// %condition = icmp slt %civNext, %limit
|
|
||||||
// br i1 %condition, label %header, label %exit
|
|
||||||
|
|
||||||
if (LatchBr->getSuccessor(0) != Header) {
|
ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
|
||||||
FailureReason = "unknown latch form (header not first successor)";
|
if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
|
||||||
return false;
|
FailureReason = "latch terminator branch not conditional on integral icmp";
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *CIVComparedTo = nullptr;
|
const SCEV *LatchCount = SE.getExitCount(&L, Latch);
|
||||||
ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
|
if (isa<SCEVCouldNotCompute>(LatchCount)) {
|
||||||
if (!(match(LatchBr->getCondition(),
|
FailureReason = "could not compute latch count";
|
||||||
m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) &&
|
return None;
|
||||||
Pred == ICmpInst::ICMP_SLT)) {
|
|
||||||
FailureReason = "unknown latch form (not slt)";
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken
|
ICmpInst::Predicate Pred = ICI->getPredicate();
|
||||||
// counts that are narrower than the canonical induction variable. These
|
Value *LeftValue = ICI->getOperand(0);
|
||||||
// values are still accurate, and we could probably use them after sign/zero
|
const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
|
||||||
// extension; but for now we just bail out of the transformation to keep
|
IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
|
||||||
// things simple.
|
|
||||||
const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo);
|
Value *RightValue = ICI->getOperand(1);
|
||||||
if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV) ||
|
const SCEV *RightSCEV = SE.getSCEV(RightValue);
|
||||||
CIVComparedToSCEV->getType() != LatchCount->getType()) {
|
|
||||||
FailureReason = "could not relate CIV to latch expression";
|
// We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
|
||||||
return false;
|
if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
|
||||||
|
if (isa<SCEVAddRecExpr>(RightSCEV)) {
|
||||||
|
std::swap(LeftSCEV, RightSCEV);
|
||||||
|
std::swap(LeftValue, RightValue);
|
||||||
|
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||||
|
} else {
|
||||||
|
FailureReason = "no add recurrences in the icmp";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount);
|
auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
|
||||||
const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne);
|
if (!AR->isAffine())
|
||||||
if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) {
|
|
||||||
FailureReason = "unexpected header count in latch";
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
IntegerType *Ty = cast<IntegerType>(AR->getType());
|
||||||
|
IntegerType *WideTy =
|
||||||
|
IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
|
||||||
|
|
||||||
|
// Currently we only work with induction variables that have been proved to
|
||||||
|
// not wrap. This restriction can potentially be lifted in the future.
|
||||||
|
|
||||||
|
const SCEVAddRecExpr *ExtendAfterOp =
|
||||||
|
dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
|
||||||
|
if (!ExtendAfterOp)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
|
||||||
|
const SCEV *ExtendedStep =
|
||||||
|
SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
|
||||||
|
|
||||||
|
bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
|
||||||
|
ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
|
||||||
|
|
||||||
|
if (!NoSignedWrap)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (const SCEVConstant *StepExpr =
|
||||||
|
dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
|
||||||
|
ConstantInt *StepCI = StepExpr->getValue();
|
||||||
|
if (StepCI->isOne() || StepCI->isMinusOne()) {
|
||||||
|
IsIncreasing = StepCI->isOne();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned LatchBrExitIdx = 1;
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// `ICI` is interpreted as taking the backedge if the *next* value of the
|
||||||
|
// induction variable satisfies some constraint.
|
||||||
|
|
||||||
|
const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
|
||||||
|
bool IsIncreasing = false;
|
||||||
|
if (!IsInductionVar(IndVarNext, IsIncreasing)) {
|
||||||
|
FailureReason = "LHS in icmp not induction variable";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
|
||||||
|
// TODO: generalize the predicates here to also match their unsigned variants.
|
||||||
|
if (IsIncreasing) {
|
||||||
|
bool FoundExpectedPred =
|
||||||
|
(Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
|
||||||
|
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
|
||||||
|
|
||||||
|
if (!FoundExpectedPred) {
|
||||||
|
FailureReason = "expected icmp slt semantically, found something else";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LatchBrExitIdx == 0) {
|
||||||
|
if (CanBeSMax(SE, RightSCEV)) {
|
||||||
|
// TODO: this restriction is easily removable -- we just have to
|
||||||
|
// remember that the icmp was an slt and not an sle.
|
||||||
|
FailureReason = "limit may overflow when coercing sle to slt";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
IRBuilder<> B(&*Preheader->rbegin());
|
||||||
|
RightValue = B.CreateAdd(RightValue, One);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
bool FoundExpectedPred =
|
||||||
|
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
|
||||||
|
(Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
|
||||||
|
|
||||||
|
if (!FoundExpectedPred) {
|
||||||
|
FailureReason = "expected icmp sgt semantically, found something else";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LatchBrExitIdx == 0) {
|
||||||
|
if (CanBeSMin(SE, RightSCEV)) {
|
||||||
|
// TODO: this restriction is easily removable -- we just have to
|
||||||
|
// remember that the icmp was an sgt and not an sge.
|
||||||
|
FailureReason = "limit may overflow when coercing sge to sgt";
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
IRBuilder<> B(&*Preheader->rbegin());
|
||||||
|
RightValue = B.CreateSub(RightValue, One);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const SCEV *StartNext = IndVarNext->getStart();
|
||||||
|
const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
|
||||||
|
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
|
||||||
|
|
||||||
BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
|
BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
|
||||||
|
|
||||||
assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) ==
|
assert(SE.getLoopDisposition(LatchCount, &L) ==
|
||||||
ScalarEvolution::LoopInvariant &&
|
ScalarEvolution::LoopInvariant &&
|
||||||
"loop variant exit count doesn't make sense!");
|
"loop variant exit count doesn't make sense!");
|
||||||
|
|
||||||
assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!");
|
assert(!L.contains(LatchExit) && "expected an exit block!");
|
||||||
|
|
||||||
LoopStructureOut.Tag = "main";
|
Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor(
|
||||||
LoopStructureOut.Header = Header;
|
IndVarStart, IndVarTy, &*Preheader->rbegin());
|
||||||
LoopStructureOut.Latch = Latch;
|
IndVarStartV->setName("indvar.start");
|
||||||
LoopStructureOut.LatchBr = LatchBr;
|
|
||||||
LoopStructureOut.LatchExit = LatchExit;
|
LoopStructure Result;
|
||||||
LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx;
|
|
||||||
LoopStructureOut.CIV = CIV;
|
Result.Tag = "main";
|
||||||
LoopStructureOut.CIVNext = CIVNext;
|
Result.Header = Header;
|
||||||
LoopStructureOut.CIVStart = CIVStart;
|
Result.Latch = Latch;
|
||||||
|
Result.LatchBr = LatchBr;
|
||||||
|
Result.LatchExit = LatchExit;
|
||||||
|
Result.LatchBrExitIdx = LatchBrExitIdx;
|
||||||
|
Result.IndVarStart = IndVarStartV;
|
||||||
|
Result.IndVarNext = LeftValue;
|
||||||
|
Result.IndVarIncreasing = IsIncreasing;
|
||||||
|
Result.LoopExitAt = RightValue;
|
||||||
|
|
||||||
LatchCountOut = LatchCount;
|
|
||||||
PreheaderOut = Preheader;
|
|
||||||
FailureReason = nullptr;
|
FailureReason = nullptr;
|
||||||
|
|
||||||
return true;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<LoopConstrainer::SubRanges>
|
Optional<LoopConstrainer::SubRanges>
|
||||||
LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const {
|
LoopConstrainer::calculateSubRanges() const {
|
||||||
IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
|
IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
|
||||||
|
|
||||||
if (Range.getType() != Ty)
|
if (Range.getType() != Ty)
|
||||||
return None;
|
return None;
|
||||||
|
|
||||||
SCEVExpander Expander(SE, "irce");
|
|
||||||
Instruction *InsertPt = OriginalPreheader->getTerminator();
|
|
||||||
|
|
||||||
LoopConstrainer::SubRanges Result;
|
LoopConstrainer::SubRanges Result;
|
||||||
|
|
||||||
// I think we can be more aggressive here and make this nuw / nsw if the
|
// I think we can be more aggressive here and make this nuw / nsw if the
|
||||||
// addition that feeds into the icmp for the latch's terminating branch is nuw
|
// addition that feeds into the icmp for the latch's terminating branch is nuw
|
||||||
// / nsw. In any case, a wrapping 2's complement addition is safe.
|
// / nsw. In any case, a wrapping 2's complement addition is safe.
|
||||||
ConstantInt *One = ConstantInt::get(Ty, 1);
|
ConstantInt *One = ConstantInt::get(Ty, 1);
|
||||||
const SCEV *HeaderCountSCEV = SE.getAddExpr(LatchTakenCount, SE.getSCEV(One));
|
const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
|
||||||
HeaderCountOut = Expander.expandCodeFor(HeaderCountSCEV, Ty, InsertPt);
|
const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
|
||||||
|
|
||||||
const SCEV *Zero = SE.getConstant(Ty, 0);
|
bool Increasing = MainLoopStructure.IndVarIncreasing;
|
||||||
|
// We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
|
||||||
|
// range of values the induction variable takes.
|
||||||
|
const SCEV *Smallest =
|
||||||
|
Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One));
|
||||||
|
const SCEV *Greatest =
|
||||||
|
Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One));
|
||||||
|
|
||||||
|
auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
|
||||||
|
return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
|
||||||
|
};
|
||||||
|
|
||||||
// In some cases we can prove that we don't need a pre or post loop
|
// In some cases we can prove that we don't need a pre or post loop
|
||||||
|
|
||||||
bool ProvablyNoPreloop =
|
bool ProvablyNoPreloop =
|
||||||
SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Zero);
|
SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest);
|
||||||
if (!ProvablyNoPreloop) {
|
if (!ProvablyNoPreloop)
|
||||||
const SCEV *ExitPreLoopAtSCEV =
|
Result.LowLimit = Clamp(Range.getBegin());
|
||||||
SE.getSMinExpr(HeaderCountSCEV, Range.getBegin());
|
|
||||||
Result.ExitPreLoopAt =
|
|
||||||
Expander.expandCodeFor(ExitPreLoopAtSCEV, Ty, InsertPt);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ProvablyNoPostLoop =
|
bool ProvablyNoPostLoop =
|
||||||
SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, Range.getEnd());
|
SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
|
||||||
if (!ProvablyNoPostLoop) {
|
if (!ProvablyNoPostLoop)
|
||||||
const SCEV *ExitMainLoopAtSCEV =
|
Result.HighLimit = Clamp(Range.getEnd());
|
||||||
SE.getSMinExpr(HeaderCountSCEV, Range.getEnd());
|
|
||||||
Result.ExitMainLoopAt =
|
|
||||||
Expander.expandCodeFor(ExitMainLoopAtSCEV, Ty, InsertPt);
|
|
||||||
}
|
|
||||||
|
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
@ -809,7 +896,7 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
||||||
const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt,
|
const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
|
||||||
BasicBlock *ContinuationBlock) const {
|
BasicBlock *ContinuationBlock) const {
|
||||||
|
|
||||||
// We start with a loop with a single latch:
|
// We start with a loop with a single latch:
|
||||||
@ -893,32 +980,37 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
|||||||
BBInsertLocation);
|
BBInsertLocation);
|
||||||
|
|
||||||
BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
|
BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
|
||||||
|
bool Increasing = LS.IndVarIncreasing;
|
||||||
|
|
||||||
IRBuilder<> B(PreheaderJump);
|
IRBuilder<> B(PreheaderJump);
|
||||||
|
|
||||||
// EnterLoopCond - is it okay to start executing this `LS'?
|
// EnterLoopCond - is it okay to start executing this `LS'?
|
||||||
Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt);
|
Value *EnterLoopCond = Increasing
|
||||||
|
? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
|
||||||
|
: B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt);
|
||||||
|
|
||||||
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
|
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
|
||||||
PreheaderJump->eraseFromParent();
|
PreheaderJump->eraseFromParent();
|
||||||
|
|
||||||
assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!");
|
|
||||||
|
|
||||||
B.SetInsertPoint(LS.LatchBr);
|
|
||||||
|
|
||||||
// ContinueCond - is it okay to execute the next iteration in `LS'?
|
|
||||||
Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt);
|
|
||||||
|
|
||||||
LS.LatchBr->setCondition(ContinueCond);
|
|
||||||
assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit &&
|
|
||||||
"invariant!");
|
|
||||||
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
|
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
|
||||||
|
B.SetInsertPoint(LS.LatchBr);
|
||||||
|
Value *TakeBackedgeLoopCond =
|
||||||
|
Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt)
|
||||||
|
: B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt);
|
||||||
|
Value *CondForBranch = LS.LatchBrExitIdx == 1
|
||||||
|
? TakeBackedgeLoopCond
|
||||||
|
: B.CreateNot(TakeBackedgeLoopCond);
|
||||||
|
|
||||||
|
LS.LatchBr->setCondition(CondForBranch);
|
||||||
|
|
||||||
B.SetInsertPoint(RRI.ExitSelector);
|
B.SetInsertPoint(RRI.ExitSelector);
|
||||||
|
|
||||||
// IterationsLeft - are there any more iterations left, given the original
|
// IterationsLeft - are there any more iterations left, given the original
|
||||||
// upper bound on the induction variable? If not, we branch to the "real"
|
// upper bound on the induction variable? If not, we branch to the "real"
|
||||||
// exit.
|
// exit.
|
||||||
Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount);
|
Value *IterationsLeft = Increasing
|
||||||
|
? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt)
|
||||||
|
: B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt);
|
||||||
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
|
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
|
||||||
|
|
||||||
BranchInst *BranchToContinuation =
|
BranchInst *BranchToContinuation =
|
||||||
@ -942,6 +1034,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
|||||||
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
|
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end",
|
||||||
|
BranchToContinuation);
|
||||||
|
RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
|
||||||
|
RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector);
|
||||||
|
|
||||||
// The latch exit now has a branch from `RRI.ExitSelector' instead of
|
// The latch exit now has a branch from `RRI.ExitSelector' instead of
|
||||||
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
|
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
|
||||||
for (Instruction &I : *LS.LatchExit) {
|
for (Instruction &I : *LS.LatchExit) {
|
||||||
@ -955,7 +1052,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LoopConstrainer::rewriteIncomingValuesForPHIs(
|
void LoopConstrainer::rewriteIncomingValuesForPHIs(
|
||||||
LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock,
|
LoopStructure &LS, BasicBlock *ContinuationBlock,
|
||||||
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
|
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
|
||||||
|
|
||||||
unsigned PHIIndex = 0;
|
unsigned PHIIndex = 0;
|
||||||
@ -970,11 +1067,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
|
|||||||
PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
|
PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
|
||||||
}
|
}
|
||||||
|
|
||||||
LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock);
|
LS.IndVarStart = RRI.IndVarEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
BasicBlock *
|
BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
|
||||||
LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS,
|
|
||||||
BasicBlock *OldPreheader,
|
BasicBlock *OldPreheader,
|
||||||
const char *Tag) const {
|
const char *Tag) const {
|
||||||
|
|
||||||
@ -1004,30 +1100,79 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
|
|||||||
|
|
||||||
bool LoopConstrainer::run() {
|
bool LoopConstrainer::run() {
|
||||||
BasicBlock *Preheader = nullptr;
|
BasicBlock *Preheader = nullptr;
|
||||||
const char *CouldNotProceedBecause = nullptr;
|
LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
|
||||||
if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader,
|
Preheader = OriginalLoop.getLoopPreheader();
|
||||||
CouldNotProceedBecause)) {
|
assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
|
||||||
DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause
|
"preconditions!");
|
||||||
<< "\n";);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
OriginalPreheader = Preheader;
|
OriginalPreheader = Preheader;
|
||||||
MainLoopPreheader = Preheader;
|
MainLoopPreheader = Preheader;
|
||||||
|
|
||||||
Optional<SubRanges> MaybeSR = calculateSubRanges(OriginalHeaderCount);
|
Optional<SubRanges> MaybeSR = calculateSubRanges();
|
||||||
if (!MaybeSR.hasValue()) {
|
if (!MaybeSR.hasValue()) {
|
||||||
DEBUG(dbgs() << "irce: could not compute subranges\n");
|
DEBUG(dbgs() << "irce: could not compute subranges\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
SubRanges SR = MaybeSR.getValue();
|
SubRanges SR = MaybeSR.getValue();
|
||||||
|
bool Increasing = MainLoopStructure.IndVarIncreasing;
|
||||||
|
IntegerType *IVTy =
|
||||||
|
cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
|
||||||
|
|
||||||
|
SCEVExpander Expander(SE, "irce");
|
||||||
|
Instruction *InsertPt = OriginalPreheader->getTerminator();
|
||||||
|
|
||||||
// It would have been better to make `PreLoop' and `PostLoop'
|
// It would have been better to make `PreLoop' and `PostLoop'
|
||||||
// `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
|
// `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
|
||||||
// constructor.
|
// constructor.
|
||||||
ClonedLoop PreLoop, PostLoop;
|
ClonedLoop PreLoop, PostLoop;
|
||||||
bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue();
|
bool NeedsPreLoop =
|
||||||
bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue();
|
Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
|
||||||
|
bool NeedsPostLoop =
|
||||||
|
Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
|
||||||
|
|
||||||
|
Value *ExitPreLoopAt = nullptr;
|
||||||
|
Value *ExitMainLoopAt = nullptr;
|
||||||
|
const SCEVConstant *MinusOneS =
|
||||||
|
cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
|
||||||
|
|
||||||
|
if (NeedsPreLoop) {
|
||||||
|
const SCEV *ExitPreLoopAtSCEV = nullptr;
|
||||||
|
|
||||||
|
if (Increasing)
|
||||||
|
ExitPreLoopAtSCEV = *SR.LowLimit;
|
||||||
|
else {
|
||||||
|
if (CanBeSMin(SE, *SR.HighLimit)) {
|
||||||
|
DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
|
||||||
|
<< "preloop exit limit. HighLimit = " << *(*SR.HighLimit)
|
||||||
|
<< "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
|
||||||
|
}
|
||||||
|
|
||||||
|
ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
|
||||||
|
ExitPreLoopAt->setName("exit.preloop.at");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NeedsPostLoop) {
|
||||||
|
const SCEV *ExitMainLoopAtSCEV = nullptr;
|
||||||
|
|
||||||
|
if (Increasing)
|
||||||
|
ExitMainLoopAtSCEV = *SR.HighLimit;
|
||||||
|
else {
|
||||||
|
if (CanBeSMin(SE, *SR.LowLimit)) {
|
||||||
|
DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
|
||||||
|
<< "mainloop exit limit. LowLimit = " << *(*SR.LowLimit)
|
||||||
|
<< "\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
|
||||||
|
}
|
||||||
|
|
||||||
|
ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
|
||||||
|
ExitMainLoopAt->setName("exit.mainloop.at");
|
||||||
|
}
|
||||||
|
|
||||||
// We clone these ahead of time so that we don't have to deal with changing
|
// We clone these ahead of time so that we don't have to deal with changing
|
||||||
// and temporarily invalid IR as we transform the loops.
|
// and temporarily invalid IR as we transform the loops.
|
||||||
@ -1044,9 +1189,8 @@ bool LoopConstrainer::run() {
|
|||||||
|
|
||||||
MainLoopPreheader =
|
MainLoopPreheader =
|
||||||
createPreheader(MainLoopStructure, Preheader, "mainloop");
|
createPreheader(MainLoopStructure, Preheader, "mainloop");
|
||||||
PreLoopRRI =
|
PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
|
||||||
changeIterationSpaceEnd(PreLoop.Structure, Preheader,
|
ExitPreLoopAt, MainLoopPreheader);
|
||||||
SR.ExitPreLoopAt.getValue(), MainLoopPreheader);
|
|
||||||
rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
|
rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
|
||||||
PreLoopRRI);
|
PreLoopRRI);
|
||||||
}
|
}
|
||||||
@ -1058,8 +1202,7 @@ bool LoopConstrainer::run() {
|
|||||||
PostLoopPreheader =
|
PostLoopPreheader =
|
||||||
createPreheader(PostLoop.Structure, Preheader, "postloop");
|
createPreheader(PostLoop.Structure, Preheader, "postloop");
|
||||||
PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
|
PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
|
||||||
SR.ExitMainLoopAt.getValue(),
|
ExitMainLoopAt, PostLoopPreheader);
|
||||||
PostLoopPreheader);
|
|
||||||
rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
|
rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
|
||||||
PostLoopRRI);
|
PostLoopRRI);
|
||||||
}
|
}
|
||||||
@ -1179,13 +1322,6 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
|
ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
|
||||||
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
|
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
|
||||||
|
|
||||||
PHINode *CIV = L->getCanonicalInductionVariable();
|
|
||||||
if (!CIV) {
|
|
||||||
DEBUG(dbgs() << "irce: loop has no canonical induction variable\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const SCEVAddRecExpr *IndVar = cast<SCEVAddRecExpr>(SE.getSCEV(CIV));
|
|
||||||
|
|
||||||
for (auto BBI : L->getBlocks())
|
for (auto BBI : L->getBlocks())
|
||||||
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
|
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
|
||||||
if (InductiveRangeCheck *IRC =
|
if (InductiveRangeCheck *IRC =
|
||||||
@ -1202,6 +1338,21 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
IRC->print(dbgs());
|
IRC->print(dbgs());
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const char *FailureReason = nullptr;
|
||||||
|
Optional<LoopStructure> MaybeLoopStructure =
|
||||||
|
LoopStructure::parseLoopStructure(SE, *L, FailureReason);
|
||||||
|
if (!MaybeLoopStructure.hasValue()) {
|
||||||
|
DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason
|
||||||
|
<< "\n";);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LoopStructure LS = MaybeLoopStructure.getValue();
|
||||||
|
bool Increasing = LS.IndVarIncreasing;
|
||||||
|
const SCEV *MinusOne =
|
||||||
|
SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
|
||||||
|
const SCEVAddRecExpr *IndVar =
|
||||||
|
cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
|
||||||
|
|
||||||
Optional<InductiveRangeCheck::Range> SafeIterRange;
|
Optional<InductiveRangeCheck::Range> SafeIterRange;
|
||||||
Instruction *ExprInsertPt = Preheader->getTerminator();
|
Instruction *ExprInsertPt = Preheader->getTerminator();
|
||||||
|
|
||||||
@ -1223,8 +1374,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
if (!SafeIterRange.hasValue())
|
if (!SafeIterRange.hasValue())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), SE,
|
LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
|
||||||
SafeIterRange.getValue());
|
SE, SafeIterRange.getValue());
|
||||||
bool Changed = LC.run();
|
bool Changed = LC.run();
|
||||||
|
|
||||||
if (Changed) {
|
if (Changed) {
|
||||||
|
43
test/Transforms/IRCE/decrementing-loop.ll
Normal file
43
test/Transforms/IRCE/decrementing-loop.ll
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
; RUN: opt -irce -S < %s | FileCheck %s
|
||||||
|
|
||||||
|
define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
|
||||||
|
entry:
|
||||||
|
%len = load i32* %a_len_ptr, !range !0
|
||||||
|
%first.itr.check = icmp sgt i32 %n, 0
|
||||||
|
%start = sub i32 %n, 1
|
||||||
|
br i1 %first.itr.check, label %loop, label %exit
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%idx = phi i32 [ %start, %entry ] , [ %idx.dec, %in.bounds ]
|
||||||
|
%idx.dec = sub i32 %idx, 1
|
||||||
|
%abc.high = icmp slt i32 %idx, %len
|
||||||
|
%abc.low = icmp sge i32 %idx, 0
|
||||||
|
%abc = and i1 %abc.low, %abc.high
|
||||||
|
br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
|
||||||
|
|
||||||
|
in.bounds:
|
||||||
|
%addr = getelementptr i32* %arr, i32 %idx
|
||||||
|
store i32 0, i32* %addr
|
||||||
|
%next = icmp sgt i32 %idx.dec, -1
|
||||||
|
br i1 %next, label %loop, label %exit
|
||||||
|
|
||||||
|
out.of.bounds:
|
||||||
|
ret void
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK: loop.preheader:
|
||||||
|
; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1
|
||||||
|
; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len
|
||||||
|
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
||||||
|
; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
|
||||||
|
; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]]
|
||||||
|
; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]]
|
||||||
|
; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
|
||||||
|
; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
|
||||||
|
; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = !{i32 0, i32 2147483647}
|
||||||
|
!1 = !{!"branch_weights", i32 64, i32 4}
|
@ -42,9 +42,11 @@ define void @multiple_access_no_preloop(
|
|||||||
; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
|
; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
|
||||||
; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
|
; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
|
||||||
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
||||||
; CHECK: [[not_upper_limit_cond:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
|
; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
|
||||||
; CHECK: [[not_upper_limit:[^ ]+]] = select i1 [[not_upper_limit_cond]], i32 [[smax_not_len]], i32 [[not_n]]
|
; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
|
||||||
; CHECK: [[upper_limit:[^ ]+]] = sub i32 -1, [[not_upper_limit]]
|
; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
|
||||||
|
; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
|
||||||
|
; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
|
||||||
|
|
||||||
; CHECK-LABEL: loop:
|
; CHECK-LABEL: loop:
|
||||||
; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds
|
; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds
|
||||||
|
@ -36,6 +36,7 @@ define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32
|
|||||||
|
|
||||||
; CHECK-LABEL: main.pseudo.exit:
|
; CHECK-LABEL: main.pseudo.exit:
|
||||||
; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
|
; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
|
||||||
|
; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
|
||||||
; CHECK-NEXT: br label %postloop
|
; CHECK-NEXT: br label %postloop
|
||||||
|
|
||||||
; CHECK-LABEL: postloop:
|
; CHECK-LABEL: postloop:
|
||||||
@ -85,17 +86,19 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3
|
|||||||
; CHECK-LABEL: loop.preheader:
|
; CHECK-LABEL: loop.preheader:
|
||||||
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
||||||
; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
|
; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
|
||||||
; CHECK: [[not_exit_main_loop_at_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
|
; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
|
||||||
; CHECK: [[not_exit_main_loop_at:[^ ]+]] = select i1 [[not_exit_main_loop_at_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
|
; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
|
||||||
; CHECK: [[exit_main_loop_at:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at]]
|
; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
|
||||||
; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at]]
|
; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
|
||||||
|
; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
|
||||||
|
; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
|
||||||
; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit
|
; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit
|
||||||
|
|
||||||
; CHECK-LABEL: loop:
|
; CHECK-LABEL: loop:
|
||||||
; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
|
; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
|
||||||
|
|
||||||
; CHECK-LABEL: in.bounds:
|
; CHECK-LABEL: in.bounds:
|
||||||
; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at]]
|
; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at_loclamp]]
|
||||||
; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector
|
; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector
|
||||||
|
|
||||||
; CHECK-LABEL: main.pseudo.exit:
|
; CHECK-LABEL: main.pseudo.exit:
|
||||||
|
@ -31,14 +31,21 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
|
|||||||
; CHECK-LABEL: loop.preheader:
|
; CHECK-LABEL: loop.preheader:
|
||||||
; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1
|
; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1
|
||||||
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
|
||||||
; CHECK: [[not_exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
|
; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
|
||||||
; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cond]], i32 [[not_safe_start]], i32 [[not_n]]
|
; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
|
||||||
; CHECK: [[exit_preloop_at:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at]]
|
; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]]
|
||||||
|
; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
|
||||||
|
; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1
|
||||||
|
; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
|
||||||
|
; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]]
|
||||||
|
; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_end]], i32 [[not_n]]
|
||||||
|
; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_loclamp]]
|
||||||
|
; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_loclamp]], 0
|
||||||
|
; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_loclamp]], i32 0
|
||||||
|
|
||||||
; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start]], %len
|
|
||||||
; CHECK: [[not_exit_mainloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]]
|
|
||||||
; CHECK: [[not_exit_mainloop_at:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond]], i32 [[not_safe_end]], i32 [[not_n]]
|
|
||||||
; CHECK: [[exit_mainloop_at:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at]]
|
|
||||||
|
|
||||||
; CHECK-LABEL: in.bounds:
|
; CHECK-LABEL: in.bounds:
|
||||||
; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
|
; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
|
||||||
|
Loading…
Reference in New Issue
Block a user