diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index 310239823cf..b539a788a22 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -753,6 +753,12 @@ public: // special checks necessary if A and B are in the same basic block. bool dominates(const Instruction *A, const Instruction *B) const; + /// properlyDominates - Use this instead of dominates() to determine whether a + /// user of A can be hoisted above B. + bool properlyDominates(const Instruction *A, const Instruction *B) const { + return A != B && dominates(A, B); + } + bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const { return DT->properlyDominates(A, B); } diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 68414242b8f..c22fc3ab74b 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -114,9 +114,12 @@ namespace llvm { /// starts at zero and steps by one on each iteration. PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty); - /// hoistStep - Utility for hoisting an IV increment. - static bool hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT); + /// getIVIncOperand - Return the induction variable increment's IV operand. + Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos, + bool allowScale); + + /// hoistIVInc - Utility for hoisting an IV increment. + bool hoistIVInc(Instruction *IncV, Instruction *InsertPos); /// replaceCongruentIVs - replace congruent phis with their most canonical /// representative. Return the number of phis eliminated. @@ -169,6 +172,13 @@ namespace llvm { Builder.ClearInsertionPoint(); } + /// isInsertedInstruction - Return true if the specified instruction was + /// inserted by the code rewriter. If so, the client should not modify the + /// instruction. + bool isInsertedInstruction(Instruction *I) const { + return InsertedValues.count(I) || InsertedPostIncValues.count(I); + } + void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } private: @@ -205,13 +215,6 @@ namespace llvm { /// result will be expanded to have that type, with a cast if necessary. Value *expandCodeFor(const SCEV *SH, Type *Ty = 0); - /// isInsertedInstruction - Return true if the specified instruction was - /// inserted by the code rewriter. If so, the client should not modify the - /// instruction. - bool isInsertedInstruction(Instruction *I) const { - return InsertedValues.count(I) || InsertedPostIncValues.count(I); - } - /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e2f75aa0ea0..0f7475673a4 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -38,8 +38,11 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, if (U->getType() == Ty) if (CastInst *CI = dyn_cast(U)) if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, fix it. - if (BasicBlock::iterator(CI) != IP) { + // If the cast isn't where we want it, fix it. All new or reused + // instructions must strictly dominate the Builder's InsertPt to + // ensure that the expression's expansion dominates its uses. + if (BasicBlock::iterator(CI) != IP + || IP == Builder.GetInsertPoint()) { // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. @@ -867,6 +870,94 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, return isNormalAddRecExprPHI(PN, IncV, L); } +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + + switch (IncV->getOpcode()) { + default: + return NULL; + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: { + Instruction *OInst = dyn_cast(IncV->getOperand(1)); + if (!OInst || SE.DT->properlyDominates(OInst, InsertPos)) + return dyn_cast(IncV->getOperand(0)); + return NULL; + } + case Instruction::BitCast: + return dyn_cast(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa(*I)) + continue; + if (Instruction *OInst = dyn_cast(*I)) { + if (!SE.DT->properlyDominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. + if (IncV->getNumOperands() != 2) + return NULL; + unsigned AS = cast(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return NULL; + break; + } + return dyn_cast(IncV->getOperand(0)); + } +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->properlyDominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->properlyDominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); + } + return true; +} + /// Determine if this cyclic phi is in a form that would have been generated by /// LSR. We don't care if the phi was actually expanded in this pass, as long /// as it is in a low-cost form, for example, no implied multiplication. This @@ -874,54 +965,13 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, /// expandAddtoGEP. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { - if (ChainedPhis.count(PN)) - return true; - - switch (IncV->getOpcode()) { - // Check for a simple Add/Sub or GEP of a loop invariant step. - case Instruction::Add: - case Instruction::Sub: - return IncV->getOperand(0) == PN - && L->isLoopInvariant(IncV->getOperand(1)); - case Instruction::BitCast: - IncV = dyn_cast(IncV->getOperand(0)); - if (!IncV) - return false; - // fall-thru to GEP handling - case Instruction::GetElementPtr: { - // This must be a pointer addition of constants (pretty) or some number of - // address-size elements (ugly). - for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); - I != E; ++I) { - if (isa(*I)) - continue; - // ugly geps have 2 operands. - // i1* is used by the expander to represent an address-size element. - if (IncV->getNumOperands() != 2) - return false; - unsigned AS = cast(IncV->getType())->getAddressSpace(); - if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) - && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return false; - // Ensure the operands dominate the insertion point. I don't know of a - // case when this would not be true, so this is somewhat untested. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) - return false; - } - break; - } - IncV = dyn_cast(IncV->getOperand(0)); - if (IncV && IncV->getOpcode() == Instruction::BitCast) - IncV = dyn_cast(IncV->getOperand(0)); - return IncV == PN; - } - default: - return false; + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; } + return false; } /// expandIVInc - Expand an IV increment at Builder's current InsertPos. @@ -981,26 +1031,28 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (LSRMode) { if (!isExpandedAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + continue; } else { if (!isNormalAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast(IncV->getOperand(0)); + } while (IncV != PN); } // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); // Remember the increment. rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast(IncV->getOperand(0)); - } while (IncV != PN); return PN; } } @@ -1404,10 +1456,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *I) { - BasicBlock::iterator IP = I; - while (isInsertedInstruction(IP) || isa(IP)) - ++IP; + Instruction *IP) { Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } @@ -1445,8 +1494,11 @@ Value *SCEVExpander::expand(const SCEV *S) { // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstInsertionPt(); - while (isInsertedInstruction(InsertPt) || isa(InsertPt)) + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa(InsertPt))) { InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } break; } @@ -1481,23 +1533,9 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedPostIncValues.insert(I); else InsertedValues.insert(I); - - // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that - // subsequently inserted code will be dominated. - if (Builder.GetInsertPoint() == I) { - BasicBlock::iterator It = cast(I); - do { ++It; } while (isInsertedInstruction(It) || - isa(It)); - Builder.SetInsertPoint(Builder.GetInsertBlock(), It); - } } void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - // If we acquired more instructions since the old insert point was saved, - // advance past them. - while (isInsertedInstruction(I) || isa(I)) ++I; - Builder.SetInsertPoint(BB, I); } @@ -1525,49 +1563,6 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } -/// hoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -/// -/// This does not require a SCEVExpander instance and could be replaced by a -/// general code-insertion helper. -bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) { - // Phi nodes are strangely positional but don't follow normal rules for - // instruction dominance. Handle them immediately. - if (isa(InsertPos)) - return isa(IncV); - else if (isa(IncV)) - return false; - - if (DT->dominates(IncV, InsertPos)) - return true; - - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast(OI); - if (OInst && (OInst == InsertPos || !DT->dominates(OInst, InsertPos))) - return false; - } - IncV->moveBefore(InsertPos); - return true; -} - /// Sort values by integer width for replaceCongruentIVs. static bool width_descending(Value *lhs, Value *rhs) { // Put pointers at the back and make sure pointer < pointer = false. @@ -1632,10 +1627,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, cast(Phi->getIncomingValueForBlock(LatchBlock)); // If this phi has the same width but is more canonical, replace the - // original with it. + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. if (OrigPhiRef->getType() == Phi->getType() - && !isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) - && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } @@ -1649,7 +1647,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, IsomorphicInc->getType()); if (OrigInc != IsomorphicInc && TruncExpr == SE.getSCEV(IsomorphicInc) - && hoistStep(OrigInc, IsomorphicInc, DT)) { + && ((isa(OrigInc) && isa(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6d52b22e015..ac1209f46f6 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -846,7 +846,7 @@ protected: const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); - Instruction *WidenIVUse(NarrowIVDefUse DU); + Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; @@ -990,7 +990,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) { +Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. if (isa(DU.NarrowUse) && @@ -1058,7 +1058,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) { // NarrowUse. Instruction *WideUse = 0; if (WideAddRec == WideIncExpr - && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT)) + && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = CloneIVUser(DU); @@ -1163,7 +1163,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *WideUse = WidenIVUse(DU); + Instruction *WideUse = WidenIVUse(DU, Rewriter); // Follow all def-use edges from the previous narrow use. if (WideUse) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index baf566906b6..42f45ae6273 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1573,9 +1573,11 @@ class LSRInstance { BasicBlock::iterator HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const; - BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU) const; + BasicBlock::iterator + AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU, + SCEVExpander &Rewriter) const; Value *Expand(const LSRFixup &LF, const Formula &F, @@ -4131,9 +4133,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, /// AdjustInsertPositionForExpand - Determine an input position which will be /// dominated by the operands and which will dominate the result. BasicBlock::iterator -LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, +LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, const LSRFixup &LF, - const LSRUse &LU) const { + const LSRUse &LU, + SCEVExpander &Rewriter) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -4168,9 +4171,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, } } + assert(!isa(LowestIP) && !isa(LowestIP) + && !isa(LowestIP) && + "Insertion point must be a normal instruction"); + // Then, climb up the immediate dominator tree as far as we can go while // still being dominated by the input positions. - IP = HoistInsertPosition(IP, Inputs); + BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); // Don't insert instructions before PHI nodes. while (isa(IP)) ++IP; @@ -4181,6 +4188,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, // Ignore debug intrinsics. while (isa(IP)) ++IP; + // Set IP below instructions recently inserted by SCEVExpander. This keeps the + // IP consistent across expansions and allows the previously inserted + // instructions to be reused by subsequent expansion. + while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP; + return IP; } @@ -4195,7 +4207,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInsertPositionForExpand(IP, LF, LU); + IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index 08e35de0829..85084d93b14 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -48,3 +48,40 @@ cond.false35.i: ; preds = %for.end.i exit: ; preds = %cond.true29.i, %cond.true.i ret i32 0 } + +%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 } + +@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16 + +; CHECK: @test2 +; CHECK: %entry +; CHECK-NOT: mov +; CHECK: jne +define void @test2(i32 %n) nounwind uwtable { +entry: + br i1 undef, label %while.end, label %for.cond468 + +for.cond468: ; preds = %if.then477, %entry + %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ] + %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ] + %k.0 = load i32* %k.0.in, align 4 + %0 = trunc i64 %indvars.iv1163 to i32 + %cmp469 = icmp slt i32 %0, %n + br i1 %cmp469, label %for.body471, label %for.inc498 + +for.body471: ; preds = %for.cond468 + %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1 + %1 = load i32* %first, align 4 + br i1 undef, label %if.then477, label %for.inc498 + +if.then477: ; preds = %for.body471 + %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2 + %indvars.iv.next1164 = add i64 %indvars.iv1163, 1 + br label %for.cond468 + +for.inc498: ; preds = %for.inc498, %for.body471, %for.cond468 + br label %for.inc498 + +while.end: ; preds = %entry + ret void +}