Teach LSR to optimize more loop exit compares, i.e. change them to use postinc iv value. Previously LSR would only optimize those which are in the loop latch block. However, if LSR can prove it is safe (and profitable), it's now possible to change those not in the latch blocks to use postinc values.

Also, if the compare is the only use, LSR would place the iv increment instruction before the compare instead in the latch.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@71485 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2009-05-11 22:33:01 +00:00
parent 6dc4ade595
commit 5792f51e12
4 changed files with 275 additions and 45 deletions

View File

@ -43,6 +43,7 @@ STATISTIC(NumVariable, "Number of PHIs with variable strides");
STATISTIC(NumEliminated, "Number of strides eliminated"); STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized"); STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false), cl::init(false),
@ -122,6 +123,10 @@ namespace {
/// particular stride. /// particular stride.
std::map<SCEVHandle, IVsOfOneStride> IVsByStride; std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
/// StrideNoReuse - Keep track of all the strides whose ivs cannot be
/// reused (nor should they be rewritten to reuse other strides).
SmallSet<SCEVHandle, 4> StrideNoReuse;
/// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
/// We use this to iterate over the IVUsesByStride collection without being /// We use this to iterate over the IVUsesByStride collection without being
/// dependent on random ordering of pointers in the process. /// dependent on random ordering of pointers in the process.
@ -184,7 +189,7 @@ namespace {
SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&, SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
IVExpr&, const Type*, IVExpr&, const Type*,
const std::vector<BasedUser>& UsersToProcess); const std::vector<BasedUser>& UsersToProcess);
bool ValidStride(bool, int64_t, bool ValidScale(bool, int64_t,
const std::vector<BasedUser>& UsersToProcess); const std::vector<BasedUser>& UsersToProcess);
SCEVHandle CollectIVUsers(const SCEVHandle &Stride, SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses, IVUsersOfOneStride &Uses,
@ -213,6 +218,7 @@ namespace {
SCEVHandle Stride, SCEVHandle Stride,
SCEVHandle CommonExprs, SCEVHandle CommonExprs,
Value *CommonBaseV, Value *CommonBaseV,
Instruction *IVIncInsertPt,
const Loop *L, const Loop *L,
SCEVExpander &PreheaderRewriter); SCEVExpander &PreheaderRewriter);
void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
@ -1122,11 +1128,10 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
return Result; return Result;
} }
/// ValidStride - Check whether the given Scale is valid for all loads and /// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess. /// stores in UsersToProcess.
/// ///
bool LoopStrengthReduce::ValidStride(bool HasBaseReg, bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) { const std::vector<BasedUser>& UsersToProcess) {
if (!TLI) if (!TLI)
return true; return true;
@ -1186,13 +1191,17 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
const SCEVHandle &Stride, const SCEVHandle &Stride,
IVExpr &IV, const Type *Ty, IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) { const std::vector<BasedUser>& UsersToProcess) {
if (StrideNoReuse.count(Stride))
return SE->getIntegerSCEV(0, Stride->getType());
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue(); int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) { ++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI = std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]); IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
continue; continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride && if (SI->first != Stride &&
@ -1206,7 +1215,7 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// multiplications. // multiplications.
if (Scale == 1 || if (Scale == 1 ||
(AllUsesAreAddresses && (AllUsesAreAddresses &&
ValidStride(HasBaseReg, Scale, UsersToProcess))) ValidScale(HasBaseReg, Scale, UsersToProcess)))
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II) IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now. // FIXME: Only handle base == 0 for now.
@ -1452,6 +1461,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
/// Return the created phi node. /// Return the created phi node.
/// ///
static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step,
Instruction *IVIncInsertPt,
const Loop *L, const Loop *L,
SCEVExpander &Rewriter) { SCEVExpander &Rewriter) {
assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!"); assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
@ -1475,16 +1485,17 @@ static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step,
IncAmount = Rewriter.SE.getNegativeSCEV(Step); IncAmount = Rewriter.SE.getNegativeSCEV(Step);
// Insert an add instruction right before the terminator corresponding // Insert an add instruction right before the terminator corresponding
// to the back-edge. // to the back-edge or just before the only use. The location is determined
// by the caller and passed in as IVIncInsertPt.
Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty, Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
Preheader->getTerminator()); Preheader->getTerminator());
Instruction *IncV; Instruction *IncV;
if (isNegative) { if (isNegative) {
IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next", IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
LatchBlock->getTerminator()); IVIncInsertPt);
} else { } else {
IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next", IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
LatchBlock->getTerminator()); IVIncInsertPt);
} }
if (!isa<ConstantInt>(StepV)) ++NumVariable; if (!isa<ConstantInt>(StepV)) ++NumVariable;
@ -1541,6 +1552,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
// Rewrite the UsersToProcess records, creating a separate PHI for each // Rewrite the UsersToProcess records, creating a separate PHI for each
// unique Base value. // unique Base value.
Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
// TODO: The uses are grouped by base, but not sorted. We arbitrarily // TODO: The uses are grouped by base, but not sorted. We arbitrarily
// pick the first Imm value here to start with, and adjust it for the // pick the first Imm value here to start with, and adjust it for the
@ -1548,7 +1560,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
SCEVHandle Imm = UsersToProcess[i].Imm; SCEVHandle Imm = UsersToProcess[i].Imm;
SCEVHandle Base = UsersToProcess[i].Base; SCEVHandle Base = UsersToProcess[i].Base;
SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm); SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm);
PHINode *Phi = InsertAffinePhi(Start, Stride, L, PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
PreheaderRewriter); PreheaderRewriter);
// Loop over all the users with the same base. // Loop over all the users with the same base.
do { do {
@ -1561,6 +1573,18 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(
} }
} }
/// FindIVIncInsertPt - Return the location to insert the increment instruction.
/// If the only use if a use of postinc value, (must be the loop termination
/// condition), then insert it just before the use.
static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
L->contains(UsersToProcess[0].Inst->getParent()))
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the /// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
/// given users to share. /// given users to share.
/// ///
@ -1570,12 +1594,13 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
SCEVHandle Stride, SCEVHandle Stride,
SCEVHandle CommonExprs, SCEVHandle CommonExprs,
Value *CommonBaseV, Value *CommonBaseV,
Instruction *IVIncInsertPt,
const Loop *L, const Loop *L,
SCEVExpander &PreheaderRewriter) { SCEVExpander &PreheaderRewriter) {
DOUT << " Inserting new PHI:\n"; DOUT << " Inserting new PHI:\n";
PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
Stride, L, Stride, IVIncInsertPt, L,
PreheaderRewriter); PreheaderRewriter);
// Remember this in case a later stride is multiple of this. // Remember this in case a later stride is multiple of this.
@ -1590,8 +1615,8 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
DOUT << "\n"; DOUT << "\n";
} }
/// PrepareToStrengthReduceWithNewPhi - Prepare for the given users to reuse /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
/// an induction variable with a stride that is a factor of the current /// reuse an induction variable with a stride that is a factor of the current
/// induction variable. /// induction variable.
/// ///
void void
@ -1727,6 +1752,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator(); Instruction *PreInsertPt = Preheader->getTerminator();
BasicBlock *LatchBlock = L->getLoopLatch(); BasicBlock *LatchBlock = L->getLoopLatch();
Instruction *IVIncInsertPt = LatchBlock->getTerminator();
Value *CommonBaseV = Constant::getNullValue(ReplacedTy); Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
@ -1755,13 +1781,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
AllUsesAreOutsideLoop, AllUsesAreOutsideLoop,
Stride, ReuseIV, ReplacedTy, Stride, ReuseIV, ReplacedTy,
UsersToProcess); UsersToProcess);
if (isa<SCEVConstant>(RewriteFactor) && if (!RewriteFactor->isZero())
cast<SCEVConstant>(RewriteFactor)->isZero())
PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
CommonBaseV, L, PreheaderRewriter);
else
PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV, PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
ReuseIV, PreInsertPt); ReuseIV, PreInsertPt);
else {
IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
CommonBaseV, IVIncInsertPt,
L, PreheaderRewriter);
}
} }
// Process all the users now, replacing their strided uses with // Process all the users now, replacing their strided uses with
@ -1800,7 +1828,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
// FIXME: Use emitted users to emit other users. // FIXME: Use emitted users to emit other users.
BasedUser &User = UsersToProcess.back(); BasedUser &User = UsersToProcess.back();
DOUT << " Examining use "; DOUT << " Examining ";
if (User.isUseOfPostIncrementedValue)
DOUT << "postinc";
else
DOUT << "preinc";
DOUT << " use ";
DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
/*PrintType=*/false)); /*PrintType=*/false));
DOUT << " in Inst: " << *(User.Inst); DOUT << " in Inst: " << *(User.Inst);
@ -1810,11 +1843,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
Value *RewriteOp = User.Phi; Value *RewriteOp = User.Phi;
if (User.isUseOfPostIncrementedValue) { if (User.isUseOfPostIncrementedValue) {
RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock); RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
// If this user is in the loop, make sure it is the last thing in the // If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment. // loop to ensure it is dominated by the increment. In case it's the
if (L->contains(User.Inst->getParent())) // only use of the iv, the increment instruction is already before the
User.Inst->moveBefore(LatchBlock->getTerminator()); // use.
if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
User.Inst->moveBefore(IVIncInsertPt);
} }
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
@ -2085,7 +2119,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// if it's likely the new stride uses will be rewritten using the // if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction. // stride of the compare instruction.
if (AllUsesAreAddresses && if (AllUsesAreAddresses &&
ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
continue; continue;
// If scale is negative, use swapped predicate unless it's testing // If scale is negative, use swapped predicate unless it's testing
@ -2304,8 +2338,8 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
if (!DestTy) continue; if (!DestTy) continue;
if (TLI) { if (TLI) {
/* If target does not support DestTy natively then do not apply // If target does not support DestTy natively then do not apply
this transformation. */ // this transformation.
MVT DVT = TLI->getValueType(DestTy); MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue; if (!TLI->isTypeLegal(DVT)) continue;
} }
@ -2380,8 +2414,6 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here. // TODO: implement optzns here.
OptimizeShadowIV(L); OptimizeShadowIV(L);
OptimizeLoopTermCond(L);
} }
/// OptimizeLoopTermCond - Change loop terminating condition to use the /// OptimizeLoopTermCond - Change loop terminating condition to use the
@ -2391,23 +2423,78 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// can, we want to change it to use a post-incremented version of its // can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into // induction variable, to allow coalescing the live ranges for the IV into
// one register value. // one register value.
PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin()); BasicBlock *LatchBlock = L->getLoopLatch();
BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *ExitBlock = L->getExitingBlock();
BasicBlock *LatchBlock = if (!ExitBlock)
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); // Multiple exits, just look at the exit in the latch block if there is one.
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator()); ExitBlock = LatchBlock;
if (!TermBr || TermBr->isUnconditional() || BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
!isa<ICmpInst>(TermBr->getCondition())) if (!TermBr)
return;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
return; return;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
// Search IVUsesByStride to find Cond's IVUse if there is one. // Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0; IVStrideUse *CondUse = 0;
const SCEVHandle *CondStride = 0; const SCEVHandle *CondStride = 0;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse, CondStride)) if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV. return; // setcc doesn't use the IV.
if (ExitBlock != LatchBlock) {
if (!Cond->hasOneUse())
// See below, we don't want the condition to be cloned.
return;
// If exiting block is the latch block, we know it's safe and profitable to
// transform the icmp to use post-inc iv. Otherwise do so only if it would
// not reuse another iv and its iv would be reused by other uses. We are
// optimizing for the case where the icmp is the only use of the iv.
IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride];
for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) {
if (StrideUses.Users[i].User == Cond)
continue;
if (!StrideUses.Users[i].isUseOfPostIncrementedValue)
return;
}
// FIXME: This is expensive, and worse still ChangeCompareStride does a
// similar check. Can we perform all the icmp related transformations after
// StrengthReduceStridedIVUsers?
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee;
++NewStride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[NewStride]);
if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
continue;
int64_t SSInt =
cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == SInt)
return; // This can definitely be reused.
if (unsigned(abs(SSInt)) < SInt || (SSInt % SInt) != 0)
continue;
int64_t Scale = SSInt / SInt;
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
return;
}
}
StrideNoReuse.insert(*CondStride);
}
// If the trip count is computed in terms of an smax (due to ScalarEvolution // If the trip count is computed in terms of an smax (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop // being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT instead of NE. // comparison to use SLT instead of NE.
@ -2415,6 +2502,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// If possible, change stride and operands of the compare instruction to // If possible, change stride and operands of the compare instruction to
// eliminate one stride. // eliminate one stride.
if (ExitBlock == LatchBlock)
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and // It's possible for the setcc instruction to be anywhere in the loop, and
@ -2442,6 +2530,8 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride); CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
CondUse->isUseOfPostIncrementedValue = true; CondUse->isUseOfPostIncrementedValue = true;
Changed = true; Changed = true;
++NumLoopCond;
} }
// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding // OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
@ -2582,6 +2672,11 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// computation of some other indvar to decide when to terminate the loop. // computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L); OptimizeIndvars(L);
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
// StrengthReduceStridedIVUsers?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has // FIXME: We can shrink overlarge IV's here. e.g. if the code has
// computation in i64 values and the target doesn't support i64, demote // computation in i64 values and the target doesn't support i64, demote
// the computation to 32-bit if safe. // the computation to 32-bit if safe.
@ -2616,6 +2711,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IVUsesByStride.clear(); IVUsesByStride.clear();
IVsByStride.clear(); IVsByStride.clear();
StrideOrder.clear(); StrideOrder.clear();
StrideNoReuse.clear();
// Clean up after ourselves // Clean up after ourselves
if (!DeadInsts.empty()) if (!DeadInsts.empty())

View File

@ -0,0 +1,134 @@
; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne
@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5]
@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4]
@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2]
define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
entry:
%0 = load i32* %rk, align 4 ; <i32> [#uses=1]
%1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1]
%2 = load i32* %1, align 4 ; <i32> [#uses=1]
%tmp15 = add i32 %r, -1 ; <i32> [#uses=1]
%tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2]
br label %bb
bb: ; preds = %bb1, %entry
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3]
%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ] ; <i32> [#uses=2]
%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ] ; <i32> [#uses=2]
%tmp18 = shl i64 %indvar, 4 ; <i64> [#uses=4]
%rk26 = bitcast i32* %rk to i8* ; <i8*> [#uses=6]
%3 = lshr i32 %s0.0, 24 ; <i32> [#uses=1]
%4 = zext i32 %3 to i64 ; <i64> [#uses=1]
%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1]
%6 = load i32* %5, align 4 ; <i32> [#uses=1]
%7 = lshr i32 %s1.0, 16 ; <i32> [#uses=1]
%8 = and i32 %7, 255 ; <i32> [#uses=1]
%9 = zext i32 %8 to i64 ; <i64> [#uses=1]
%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1]
%11 = load i32* %10, align 4 ; <i32> [#uses=1]
%ctg2.sum2728 = or i64 %tmp18, 8 ; <i64> [#uses=1]
%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1]
%13 = bitcast i8* %12 to i32* ; <i32*> [#uses=1]
%14 = load i32* %13, align 4 ; <i32> [#uses=1]
%15 = xor i32 %11, %6 ; <i32> [#uses=1]
%16 = xor i32 %15, %14 ; <i32> [#uses=3]
%17 = lshr i32 %s1.0, 24 ; <i32> [#uses=1]
%18 = zext i32 %17 to i64 ; <i64> [#uses=1]
%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1]
%20 = load i32* %19, align 4 ; <i32> [#uses=1]
%21 = and i32 %s0.0, 255 ; <i32> [#uses=1]
%22 = zext i32 %21 to i64 ; <i64> [#uses=1]
%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1]
%24 = load i32* %23, align 4 ; <i32> [#uses=1]
%ctg2.sum2930 = or i64 %tmp18, 12 ; <i64> [#uses=1]
%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1]
%26 = bitcast i8* %25 to i32* ; <i32*> [#uses=1]
%27 = load i32* %26, align 4 ; <i32> [#uses=1]
%28 = xor i32 %24, %20 ; <i32> [#uses=1]
%29 = xor i32 %28, %27 ; <i32> [#uses=4]
%30 = lshr i32 %16, 24 ; <i32> [#uses=1]
%31 = zext i32 %30 to i64 ; <i64> [#uses=1]
%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1]
%33 = load i32* %32, align 4 ; <i32> [#uses=2]
%exitcond = icmp eq i64 %indvar, %tmp.16 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb1
bb1: ; preds = %bb
%ctg2.sum31 = add i64 %tmp18, 16 ; <i64> [#uses=1]
%34 = getelementptr i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1]
%35 = bitcast i8* %34 to i32* ; <i32*> [#uses=1]
%36 = lshr i32 %29, 16 ; <i32> [#uses=1]
%37 = and i32 %36, 255 ; <i32> [#uses=1]
%38 = zext i32 %37 to i64 ; <i64> [#uses=1]
%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1]
%40 = load i32* %39, align 4 ; <i32> [#uses=1]
%41 = load i32* %35, align 4 ; <i32> [#uses=1]
%42 = xor i32 %40, %33 ; <i32> [#uses=1]
%43 = xor i32 %42, %41 ; <i32> [#uses=1]
%44 = lshr i32 %29, 24 ; <i32> [#uses=1]
%45 = zext i32 %44 to i64 ; <i64> [#uses=1]
%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1]
%47 = load i32* %46, align 4 ; <i32> [#uses=1]
%48 = and i32 %16, 255 ; <i32> [#uses=1]
%49 = zext i32 %48 to i64 ; <i64> [#uses=1]
%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1]
%51 = load i32* %50, align 4 ; <i32> [#uses=1]
%ctg2.sum32 = add i64 %tmp18, 20 ; <i64> [#uses=1]
%52 = getelementptr i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1]
%53 = bitcast i8* %52 to i32* ; <i32*> [#uses=1]
%54 = load i32* %53, align 4 ; <i32> [#uses=1]
%55 = xor i32 %51, %47 ; <i32> [#uses=1]
%56 = xor i32 %55, %54 ; <i32> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %bb
bb2: ; preds = %bb
%tmp10 = shl i64 %tmp.16, 4 ; <i64> [#uses=2]
%ctg2.sum = add i64 %tmp10, 16 ; <i64> [#uses=1]
%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1]
%57 = bitcast i8* %tmp1213 to i32* ; <i32*> [#uses=1]
%58 = and i32 %33, -16777216 ; <i32> [#uses=1]
%59 = lshr i32 %29, 16 ; <i32> [#uses=1]
%60 = and i32 %59, 255 ; <i32> [#uses=1]
%61 = zext i32 %60 to i64 ; <i64> [#uses=1]
%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1]
%63 = load i32* %62, align 4 ; <i32> [#uses=1]
%64 = and i32 %63, 16711680 ; <i32> [#uses=1]
%65 = or i32 %64, %58 ; <i32> [#uses=1]
%66 = load i32* %57, align 4 ; <i32> [#uses=1]
%67 = xor i32 %65, %66 ; <i32> [#uses=2]
%68 = lshr i32 %29, 8 ; <i32> [#uses=1]
%69 = zext i32 %68 to i64 ; <i64> [#uses=1]
%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1]
%71 = load i32* %70, align 4 ; <i32> [#uses=1]
%72 = and i32 %71, -16777216 ; <i32> [#uses=1]
%73 = and i32 %16, 255 ; <i32> [#uses=1]
%74 = zext i32 %73 to i64 ; <i64> [#uses=1]
%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1]
%76 = load i32* %75, align 4 ; <i32> [#uses=1]
%77 = and i32 %76, 16711680 ; <i32> [#uses=1]
%78 = or i32 %77, %72 ; <i32> [#uses=1]
%ctg2.sum25 = add i64 %tmp10, 20 ; <i64> [#uses=1]
%79 = getelementptr i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1]
%80 = bitcast i8* %79 to i32* ; <i32*> [#uses=1]
%81 = load i32* %80, align 4 ; <i32> [#uses=1]
%82 = xor i32 %78, %81 ; <i32> [#uses=2]
%83 = lshr i32 %67, 24 ; <i32> [#uses=1]
%84 = trunc i32 %83 to i8 ; <i8> [#uses=1]
store i8 %84, i8* %out, align 1
%85 = lshr i32 %67, 16 ; <i32> [#uses=1]
%86 = trunc i32 %85 to i8 ; <i8> [#uses=1]
%87 = getelementptr i8* %out, i64 1 ; <i8*> [#uses=1]
store i8 %86, i8* %87, align 1
%88 = getelementptr i8* %out, i64 4 ; <i8*> [#uses=1]
%89 = lshr i32 %82, 24 ; <i32> [#uses=1]
%90 = trunc i32 %89 to i8 ; <i8> [#uses=1]
store i8 %90, i8* %88, align 1
%91 = lshr i32 %82, 16 ; <i32> [#uses=1]
%92 = trunc i32 %91 to i8 ; <i8> [#uses=1]
%93 = getelementptr i8* %out, i64 5 ; <i8*> [#uses=1]
store i8 %92, i8* %93, align 1
ret void
}

View File

@ -16,7 +16,7 @@
;} ;}
define i32 @t(i32 %a, i32 %b) { define i32 @t(i32 %a, i32 %b) nounwind {
entry: entry:
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1] %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer br i1 %tmp1434, label %bb17, label %bb.outer

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* } %struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }