diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 28ecb98e5e4..6df3e782c82 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -13,9 +13,6 @@ // access for the first iteration, and then creating a new GEP instruction in // the loop to increment the value by the appropriate amount. // -// There are currently several deficiencies in the implementation, marked with -// FIXME in the code. -// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" @@ -25,10 +22,13 @@ #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" #include using namespace llvm; @@ -50,11 +50,40 @@ namespace { std::map Map; }; + struct IVUse { + /// Users - Keep track of all of the users of this stride as well as the + /// initial value. + std::vector > Users; + std::vector UserOperands; + + void addUser(SCEVHandle &SH, Instruction *U, Instruction *V) { + Users.push_back(std::make_pair(SH, U)); + UserOperands.push_back(V); + } + }; + + class LoopStrengthReduce : public FunctionPass { LoopInfo *LI; DominatorSet *DS; + ScalarEvolution *SE; + const TargetData *TD; + const Type *UIntPtrTy; bool Changed; unsigned MaxTargetAMSize; + + /// IVUsesByStride - Keep track of all uses of induction variables that we + /// are interested in. The key of the map is the stride of the access. + std::map IVUsesByStride; + + /// CastedBasePointers - As we need to lower getelementptr instructions, we + /// cast the pointer input to uintptr_t. This keeps track of the casted + /// values for the pointers we have processed so far. + std::map CastedBasePointers; + + /// DeadInsts - Keep track of instructions we may have made dead, so that + /// we can remove them after we are done working. + std::set DeadInsts; public: LoopStrengthReduce(unsigned MTAMS = 1) : MaxTargetAMSize(MTAMS) { @@ -63,6 +92,9 @@ namespace { virtual bool runOnFunction(Function &) { LI = &getAnalysis(); DS = &getAnalysis(); + SE = &getAnalysis(); + TD = &getAnalysis(); + UIntPtrTy = TD->getIntPtrType(); Changed = false; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) @@ -76,9 +108,17 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: void runOnLoop(Loop *L); + bool AddUsersIfInteresting(Instruction *I, Loop *L); + void AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP, Instruction *I, + Loop *L); + + void StrengthReduceStridedIVUsers(Value *Stride, IVUse &Uses, Loop *L, + bool isOnlyStride); + void strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L, GEPCache* GEPCache, Instruction *InsertBefore, @@ -111,207 +151,505 @@ DeleteTriviallyDeadInstructions(std::set &Insts) { } } -void LoopStrengthReduce::strengthReduceGEP(GetElementPtrInst *GEPI, Loop *L, - GEPCache *Cache, - Instruction *InsertBefore, - std::set &DeadInsts) { - // We will strength reduce the GEP by splitting it into two parts. The first - // is a GEP to hold the initial value of the non-strength-reduced GEP upon - // entering the loop, which we will insert at the end of the loop preheader. - // The second is a GEP to hold the incremented value of the initial GEP. - // The LoopIndVarSimplify pass guarantees that loop counts start at zero, so - // we will replace the indvar with a constant zero value to create the first - // GEP. - // - // We currently only handle GEP instructions that consist of zero or more - // constants or loop invariable expressions prior to an instance of the - // canonical induction variable. - unsigned indvar = 0; - std::vector pre_op_vector; - std::vector inc_op_vector; - const Type *ty = GEPI->getOperand(0)->getType(); - Value *CanonicalIndVar = L->getCanonicalInductionVariable(); - BasicBlock *Header = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); - bool AllConstantOperands = true; - Cache = Cache->get(GEPI->getOperand(0)); - for (unsigned op = 1, e = GEPI->getNumOperands(); op != e; ++op) { - Value *operand = GEPI->getOperand(op); - if (ty->getTypeID() == Type::StructTyID) { - assert(isa(operand)); - ConstantUInt *c = dyn_cast(operand); - ty = ty->getContainedType(unsigned(c->getValue())); - } else { - ty = ty->getContainedType(0); - } +/// CanReduceSCEV - Return true if we can strength reduce this scalar evolution +/// in the specified loop. +static bool CanReduceSCEV(const SCEVHandle &SH, Loop *L) { + SCEVAddRecExpr *AddRec = dyn_cast(SH); + if (!AddRec || AddRec->getLoop() != L) return false; - if (operand == CanonicalIndVar) { - // FIXME: use getCanonicalInductionVariableIncrement to choose between - // one and neg one maybe? We need to support int *foo = GEP base, -1 - const Type *Ty = CanonicalIndVar->getType(); - pre_op_vector.push_back(Constant::getNullValue(Ty)); - inc_op_vector.push_back(ConstantInt::get(Ty, 1)); - indvar = op; - break; - } else if (isa(operand)) { - pre_op_vector.push_back(operand); - AllConstantOperands = false; - } else if (isa(operand)) { - pre_op_vector.push_back(operand); - } else if (Instruction *inst = dyn_cast(operand)) { - if (!DS->dominates(inst, Preheader->getTerminator())) - return; - pre_op_vector.push_back(operand); - AllConstantOperands = false; - } else { - return; // Cannot handle this. - } - Cache = Cache->get(operand); - } - assert(indvar > 0 && "Indvar used by GEP not found in operand list"); + // FIXME: Generalize to non-affine IV's. + if (!AddRec->isAffine()) return false; - // Ensure the pointer base is loop invariant. While strength reduction - // makes sense even if the pointer changed on every iteration, there is no - // realistic way of handling it unless GEPs were completely decomposed into - // their constituent operations so we have explicit multiplications to work - // with. - if (Instruction *GepPtrOp = dyn_cast(GEPI->getOperand(0))) - if (!DS->dominates(GepPtrOp, Preheader->getTerminator())) - return; + // FIXME: generalize to IV's with more complex strides (must emit stride + // expression outside of loop!) + if (isa(AddRec->getOperand(1))) + return true; - // Don't reduce multiplies that the target can handle via addressing modes. - uint64_t sz = getAnalysis().getTypeSize(ty); - if (sz && (sz & (sz-1)) == 0) // Power of two? - if (sz <= (1ULL << (MaxTargetAMSize-1))) - return; - - // If all operands of the GEP we are going to insert into the preheader - // are constants, generate a GEP ConstantExpr instead. - // - // If there is only one operand after the initial non-constant one, we know - // that it was the induction variable, and has been replaced by a constant - // null value. In this case, replace the GEP with a use of pointer directly. - PHINode *NewPHI; - if (Cache->CachedPHINode == 0) { - Value *PreGEP; - if (AllConstantOperands && isa(GEPI->getOperand(0))) { - Constant *C = dyn_cast(GEPI->getOperand(0)); - PreGEP = ConstantExpr::getGetElementPtr(C, pre_op_vector); - } else if (pre_op_vector.size() == 1) { - PreGEP = GEPI->getOperand(0); - } else { - PreGEP = new GetElementPtrInst(GEPI->getOperand(0), - pre_op_vector, GEPI->getName()+".pre", - Preheader->getTerminator()); - } - - // The next step of the strength reduction is to create a PHI that will - // choose between the initial GEP we created and inserted into the - // preheader, and the incremented GEP that we will create below and insert - // into the loop body. - NewPHI = new PHINode(PreGEP->getType(), - GEPI->getName()+".str", InsertBefore); - NewPHI->addIncoming(PreGEP, Preheader); - - // Now, create the GEP instruction to increment by one the value selected - // by the PHI instruction we just created above, and add it as the second - // incoming Value/BasicBlock pair to the PHINode. It is inserted before - // the increment of the canonical induction variable. - Instruction *IncrInst = - const_cast(L->getCanonicalInductionVariableIncrement()); - GetElementPtrInst *StrGEP = new GetElementPtrInst(NewPHI, inc_op_vector, - GEPI->getName()+".inc", - IncrInst); - pred_iterator PI = pred_begin(Header); - if (*PI == Preheader) - ++PI; - NewPHI->addIncoming(StrGEP, *PI); - Cache->CachedPHINode = NewPHI; - } else { - // Reuse previously created pointer, as it is identical to the one we were - // about to create. - NewPHI = Cache->CachedPHINode; - } - - if (GEPI->getNumOperands() - 1 == indvar) { - // If there were no operands following the induction variable, replace all - // uses of the old GEP instruction with the new PHI. - GEPI->replaceAllUsesWith(NewPHI); - } else { - // Create a new GEP instruction using the new PHI as the base. The - // operands of the original GEP past the induction variable become - // operands of this new GEP. - std::vector op_vector; - const Type *Ty = CanonicalIndVar->getType(); - op_vector.push_back(Constant::getNullValue(Ty)); - for (unsigned op = indvar + 1; op < GEPI->getNumOperands(); op++) - op_vector.push_back(GEPI->getOperand(op)); - GetElementPtrInst *newGEP = new GetElementPtrInst(NewPHI, op_vector, - GEPI->getName() + ".lsr", - GEPI); - GEPI->replaceAllUsesWith(newGEP); - } - - // The old GEP is now dead. - DeadInsts.insert(GEPI); - ++NumReduced; + // We handle steps by unsigned values, because we know we won't have to insert + // a cast for them. + if (SCEVUnknown *SU = dyn_cast(AddRec->getOperand(1))) + if (SU->getValue()->getType()->isUnsigned()) + return true; + + // Otherwise, no, we can't handle it yet. + return false; } + +/// GetAdjustedIndex - Adjust the specified GEP sequential type index to match +/// the size of the pointer type, and scale it by the type size. +static SCEVHandle GetAdjustedIndex(const SCEVHandle &Idx, uint64_t TySize, + const Type *UIntPtrTy) { + SCEVHandle Result = Idx; + if (Result->getType()->getUnsignedVersion() != UIntPtrTy) { + if (UIntPtrTy->getPrimitiveSize() < Result->getType()->getPrimitiveSize()) + Result = SCEVTruncateExpr::get(Result, UIntPtrTy); + else + Result = SCEVZeroExtendExpr::get(Result, UIntPtrTy); + } + + // This index is scaled by the type size being indexed. + if (TySize != 1) + Result = SCEVMulExpr::get(Result, + SCEVConstant::get(ConstantUInt::get(UIntPtrTy, + TySize))); + return Result; +} + +/// AnalyzeGetElementPtrUsers - Analyze all of the users of the specified +/// getelementptr instruction, adding them to the IVUsesByStride table. Note +/// that we only want to analyze a getelementptr instruction once, and it can +/// have multiple operands that are uses of the indvar (e.g. A[i][i]). Because +/// of this, we only process a GEP instruction if its first recurrent operand is +/// "op", otherwise we will either have already processed it or we will sometime +/// later. +void LoopStrengthReduce::AnalyzeGetElementPtrUsers(GetElementPtrInst *GEP, + Instruction *Op, Loop *L) { + // Analyze all of the subscripts of this getelementptr instruction, looking + // for uses that are determined by the trip count of L. First, skip all + // operands the are not dependent on the IV. + + // Build up the base expression. Insert an LLVM cast of the pointer to + // uintptr_t first. + Value *BasePtr; + if (Constant *CB = dyn_cast(GEP->getOperand(0))) + BasePtr = ConstantExpr::getCast(CB, UIntPtrTy); + else { + Value *&BP = CastedBasePointers[GEP->getOperand(0)]; + if (BP == 0) { + BasicBlock::iterator InsertPt; + if (isa(GEP->getOperand(0))) { + InsertPt = GEP->getParent()->getParent()->begin()->begin(); + } else { + InsertPt = cast(GEP->getOperand(0)); + if (InvokeInst *II = dyn_cast(GEP->getOperand(0))) + InsertPt = II->getNormalDest()->begin(); + else + ++InsertPt; + } + BP = new CastInst(GEP->getOperand(0), UIntPtrTy, + GEP->getOperand(0)->getName(), InsertPt); + } + BasePtr = BP; + } + + SCEVHandle Base = SCEVUnknown::get(BasePtr); + + gep_type_iterator GTI = gep_type_begin(GEP); + unsigned i = 1; + for (; GEP->getOperand(i) != Op; ++i, ++GTI) { + // If this is a use of a recurrence that we can analyze, and it comes before + // Op does in the GEP operand list, we will handle this when we process this + // operand. + if (const StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = cast(GEP->getOperand(i))->getValue(); + uint64_t Offset = SL->MemberOffsets[Idx]; + Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset, + UIntPtrTy)); + } else { + SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i)); + if (CanReduceSCEV(Idx, L)) + return; + Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx, + TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy)); + } + } + + // Get the index, convert it to intptr_t. + SCEVHandle GEPIndexExpr = + GetAdjustedIndex(SE->getSCEV(Op), TD->getTypeSize(GTI.getIndexedType()), + UIntPtrTy); + + // Process all remaining subscripts in the GEP instruction. + for (++i, ++GTI; i != GEP->getNumOperands(); ++i, ++GTI) + if (const StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = cast(GEP->getOperand(i))->getValue(); + uint64_t Offset = SL->MemberOffsets[Idx]; + Base = SCEVAddExpr::get(Base, SCEVUnknown::getIntegerSCEV(Offset, + UIntPtrTy)); + } else { + SCEVHandle Idx = SE->getSCEV(GEP->getOperand(i)); + if (CanReduceSCEV(Idx, L)) { // Another IV subscript + GEPIndexExpr = SCEVAddExpr::get(GEPIndexExpr, + GetAdjustedIndex(Idx, TD->getTypeSize(GTI.getIndexedType()), + UIntPtrTy)); + assert(CanReduceSCEV(GEPIndexExpr, L) && + "Cannot reduce the sum of two reducible SCEV's??"); + } else { + Base = SCEVAddExpr::get(Base, GetAdjustedIndex(Idx, + TD->getTypeSize(GTI.getIndexedType()), UIntPtrTy)); + } + } + + assert(CanReduceSCEV(GEPIndexExpr, L) && "Non reducible idx??"); + + Base = SCEVAddExpr::get(Base, cast(GEPIndexExpr)->getStart()); + SCEVHandle Stride = cast(GEPIndexExpr)->getOperand(1); + + DEBUG(std::cerr << "GEP BASE : " << *Base << "\n"); + DEBUG(std::cerr << "GEP STRIDE: " << *Stride << "\n"); + + Value *Step = 0; // Step of ISE. + if (SCEVConstant *SC = dyn_cast(Stride)) + /// Always get the step value as an unsigned value. + Step = ConstantExpr::getCast(SC->getValue(), + SC->getValue()->getType()->getUnsignedVersion()); + else + Step = cast(Stride)->getValue(); + assert(Step->getType()->isUnsigned() && "Bad step value!"); + + + // Now that we know the base and stride contributed by the GEP instruction, + // process all users. + for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + + // Do not infinitely recurse on PHI nodes. + if (isa(User) && User->getParent() == L->getHeader()) + continue; + + // If this is an instruction defined in a nested loop, or outside this loop, + // don't mess with it. + if (LI->getLoopFor(User->getParent()) != L) + continue; + + DEBUG(std::cerr << "FOUND USER: " << *User + << " OF STRIDE: " << *Step << " BASE = " << *Base << "\n"); + + + // Okay, we found a user that we cannot reduce. Analyze the instruction + // and decide what to do with it. + IVUsesByStride[Step].addUser(Base, User, GEP); + } +} + +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L) { + if (I->getType() == Type::VoidTy) return false + SCEVHandle ISE = SE->getSCEV(I); + if (!CanReduceSCEV(ISE, L)) return false; + + SCEVAddRecExpr *AR = cast(ISE); + SCEVHandle Start = AR->getStart(); + + // Get the step value, canonicalizing to an unsigned integer type so that + // lookups in the map will match. + Value *Step = 0; // Step of ISE. + if (SCEVConstant *SC = dyn_cast(AR->getOperand(1))) + /// Always get the step value as an unsigned value. + Step = ConstantExpr::getCast(SC->getValue(), + SC->getValue()->getType()->getUnsignedVersion()); + else + Step = cast(AR->getOperand(1))->getValue(); + assert(Step->getType()->isUnsigned() && "Bad step value!"); + + std::set AnalyzedGEPs; + + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ + Instruction *User = cast(*UI); + + // Do not infinitely recurse on PHI nodes. + if (isa(User) && User->getParent() == L->getHeader()) + continue; + + // If this is an instruction defined in a nested loop, or outside this loop, + // don't mess with it. + if (LI->getLoopFor(User->getParent()) != L) + continue; + + // Next, see if this user is analyzable itself! + if (!AddUsersIfInteresting(User, L)) { + if (GetElementPtrInst *GEP = dyn_cast(User)) { + // If this is a getelementptr instruction, figure out what linear + // expression of induction variable is actually being used. + // + if (AnalyzedGEPs.insert(GEP).second) // Not already analyzed? + AnalyzeGetElementPtrUsers(GEP, I, L); + } else { + DEBUG(std::cerr << "FOUND USER: " << *User + << " OF SCEV: " << *ISE << "\n"); + + // Okay, we found a user that we cannot reduce. Analyze the instruction + // and decide what to do with it. + IVUsesByStride[Step].addUser(Start, User, I); + } + } + } + return true; +} + +namespace { + /// BasedUser - For a particular base value, keep information about how we've + /// partitioned the expression so far. + struct BasedUser { + /// Inst - The instruction using the induction variable. + Instruction *Inst; + + /// Op - The value to replace with the EmittedBase. + Value *Op; + + /// Imm - The immediate value that should be added to the base immediately + /// before Inst, because it will be folded into the imm field of the + /// instruction. + SCEVHandle Imm; + + /// EmittedBase - The actual value* to use for the base value of this + /// operation. This is null if we should just use zero so far. + Value *EmittedBase; + + BasedUser(Instruction *I, Value *V, const SCEVHandle &IMM) + : Inst(I), Op(V), Imm(IMM), EmittedBase(0) {} + + + // No need to compare these. + bool operator<(const BasedUser &BU) const { return 0; } + + void dump() const; + }; +} + +void BasedUser::dump() const { + std::cerr << " Imm=" << *Imm; + if (EmittedBase) + std::cerr << " EB=" << *EmittedBase; + + std::cerr << " Inst: " << *Inst; +} + +/// isTargetConstant - Return true if the following can be referenced by the +/// immediate field of a target instruction. +static bool isTargetConstant(const SCEVHandle &V) { + + // FIXME: Look at the target to decide if &GV is a legal constant immediate. + if (isa(V)) return true; + + return false; // ENABLE this for x86 + + if (SCEVUnknown *SU = dyn_cast(V)) + if (ConstantExpr *CE = dyn_cast(SU->getValue())) + if (CE->getOpcode() == Instruction::Cast) + if (isa(CE->getOperand(0))) + // FIXME: should check to see that the dest is uintptr_t! + return true; + return false; +} + +/// GetImmediateValues - Look at Val, and pull out any additions of constants +/// that can fit into the immediate field of instructions in the target. +static SCEVHandle GetImmediateValues(SCEVHandle Val, bool isAddress) { + if (!isAddress) + return SCEVUnknown::getIntegerSCEV(0, Val->getType()); + if (isTargetConstant(Val)) + return Val; + + SCEVAddExpr *SAE = dyn_cast(Val); + if (SAE) { + unsigned i = 0; + for (; i != SAE->getNumOperands(); ++i) + if (isTargetConstant(SAE->getOperand(i))) { + SCEVHandle ImmVal = SAE->getOperand(i); + + // If there are any other immediates that we can handle here, pull them + // out too. + for (++i; i != SAE->getNumOperands(); ++i) + if (isTargetConstant(SAE->getOperand(i))) + ImmVal = SCEVAddExpr::get(ImmVal, SAE->getOperand(i)); + return ImmVal; + } + } + + return SCEVUnknown::getIntegerSCEV(0, Val->getType()); +} + +/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single +/// stride of IV. All of the users may have different starting values, and this +/// may not be the only stride (we know it is if isOnlyStride is true). +void LoopStrengthReduce::StrengthReduceStridedIVUsers(Value *Stride, + IVUse &Uses, Loop *L, + bool isOnlyStride) { + // Transform our list of users and offsets to a bit more complex table. In + // this new vector, the first entry for each element is the base of the + // strided access, and the second is the BasedUser object for the use. We + // progressively move information from the first to the second entry, until we + // eventually emit the object. + std::vector > UsersToProcess; + UsersToProcess.reserve(Uses.Users.size()); + + SCEVHandle ZeroBase = SCEVUnknown::getIntegerSCEV(0, + Uses.Users[0].first->getType()); + + for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) + UsersToProcess.push_back(std::make_pair(Uses.Users[i].first, + BasedUser(Uses.Users[i].second, + Uses.UserOperands[i], + ZeroBase))); + + // First pass, figure out what we can represent in the immediate fields of + // instructions. If we can represent anything there, move it to the imm + // fields of the BasedUsers. + for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { + bool isAddress = isa(UsersToProcess[i].second.Inst) || + isa(UsersToProcess[i].second.Inst); + UsersToProcess[i].second.Imm = GetImmediateValues(UsersToProcess[i].first, + isAddress); + UsersToProcess[i].first = SCEV::getMinusSCEV(UsersToProcess[i].first, + UsersToProcess[i].second.Imm); + + DEBUG(std::cerr << "BASE: " << *UsersToProcess[i].first); + DEBUG(UsersToProcess[i].second.dump()); + } + + SCEVExpander Rewriter(*SE, *LI); + BasicBlock *Preheader = L->getLoopPreheader(); + Instruction *PreInsertPt = Preheader->getTerminator(); + Instruction *PhiInsertBefore = L->getHeader()->begin(); + + assert(isa(PhiInsertBefore) && + "How could this loop have IV's without any phis?"); + PHINode *SomeLoopPHI = cast(PhiInsertBefore); + assert(SomeLoopPHI->getNumIncomingValues() == 2 && + "This loop isn't canonicalized right"); + BasicBlock *LatchBlock = + SomeLoopPHI->getIncomingBlock(SomeLoopPHI->getIncomingBlock(0) == Preheader); + + // FIXME: This loop needs increasing levels of intelligence. + // STAGE 0: just emit everything as its own base. <-- We are here + // STAGE 1: factor out common vars from bases, and try and push resulting + // constants into Imm field. + // STAGE 2: factor out large constants to try and make more constants + // acceptable for target loads and stores. + std::sort(UsersToProcess.begin(), UsersToProcess.end()); + + while (!UsersToProcess.empty()) { + // Create a new Phi for this base, and stick it in the loop header. + Value *Replaced = UsersToProcess.front().second.Op; + const Type *ReplacedTy = Replaced->getType(); + PHINode *NewPHI = new PHINode(ReplacedTy, Replaced->getName()+".str", + PhiInsertBefore); + + // Emit the initial base value into the loop preheader, and add it to the + // Phi node. + Value *BaseV = Rewriter.expandCodeFor(UsersToProcess.front().first, + PreInsertPt, ReplacedTy); + NewPHI->addIncoming(BaseV, Preheader); + + // Emit the increment of the base value before the terminator of the loop + // latch block, and add it to the Phi node. + SCEVHandle Inc = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), + SCEVUnknown::get(Stride)); + + Value *IncV = Rewriter.expandCodeFor(Inc, LatchBlock->getTerminator(), + ReplacedTy); + IncV->setName(NewPHI->getName()+".inc"); + NewPHI->addIncoming(IncV, LatchBlock); + + // Emit the code to add the immediate offset to the Phi value, just before + // the instruction that we identified as using this stride and base. + // First, empty the SCEVExpander's expression map so that we are guaranteed + // to have the code emitted where we expect it. + Rewriter.clear(); + SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), + UsersToProcess.front().second.Imm); + Value *newVal = Rewriter.expandCodeFor(NewValSCEV, + UsersToProcess.front().second.Inst, + ReplacedTy); + + // Replace the use of the operand Value with the new Phi we just created. + DEBUG(std::cerr << "REPLACING: " << *Replaced << "IN: " << + *UsersToProcess.front().second.Inst << "WITH: "<< *newVal << '\n'); + UsersToProcess.front().second.Inst->replaceUsesOfWith(Replaced, newVal); + + // Mark old value we replaced as possibly dead, so that it is elminated + // if we just replaced the last use of that value. + DeadInsts.insert(cast(Replaced)); + + UsersToProcess.erase(UsersToProcess.begin()); + ++NumReduced; + + // TODO: Next, find out which base index is the most common, pull it out. + } + + // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but + // different starting values, into different PHIs. + + // BEFORE writing this, it's probably useful to handle GEP's. + + // NOTE: pull all constants together, for REG+IMM addressing, include &GV in + // 'IMM' if the target supports it. +} + + void LoopStrengthReduce::runOnLoop(Loop *L) { // First step, transform all loops nesting inside of this loop. for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) runOnLoop(*I); - // Next, get the first PHINode since it is guaranteed to be the canonical - // induction variable for the loop by the preceding IndVarSimplify pass. - PHINode *PN = L->getCanonicalInductionVariable(); - if (0 == PN) - return; + // Next, find all uses of induction variables in this loop, and catagorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) + AddUsersIfInteresting(I, L); - // FIXME: Need to use SCEV to detect GEP uses of the indvar, since indvars - // pass creates code like this, which we can't currently detect: - // %tmp.1 = sub uint 2000, %indvar - // %tmp.8 = getelementptr int* %y, uint %tmp.1 + // If we have nothing to do, return. + //if (IVUsesByStride.empty()) return; - // Strength reduce all GEPs in the Loop. Insert secondary PHI nodes for the - // strength reduced pointers we'll be creating after the canonical induction - // variable's PHI. - std::set DeadInsts; - GEPCache Cache; - for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); - UI != UE; ++UI) - if (GetElementPtrInst *GEPI = dyn_cast(*UI)) - strengthReduceGEP(GEPI, L, &Cache, PN->getNext(), DeadInsts); + // FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of + // doing computation in byte values, promote to 32-bit values if safe. + + // FIXME: Attempt to reuse values across multiple IV's. In particular, we + // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be + // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need + // to be careful that IV's are all the same type. Only works for intptr_t + // indvars. + + // If we only have one stride, we can more aggressively eliminate some things. + bool HasOneStride = IVUsesByStride.size() == 1; + + for (std::map::iterator SI = IVUsesByStride.begin(), + E = IVUsesByStride.end(); SI != E; ++SI) + StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride); // Clean up after ourselves if (!DeadInsts.empty()) { DeleteTriviallyDeadInstructions(DeadInsts); - // At this point, we know that we have killed one or more GEP instructions. - // It is worth checking to see if the cann indvar is also dead, so that we - // can remove it as well. The requirements for the cann indvar to be - // considered dead are: - // 1. the cann indvar has one use - // 2. the use is an add instruction - // 3. the add has one use - // 4. the add is used by the cann indvar - // If all four cases above are true, then we can remove both the add and - // the cann indvar. - // FIXME: this needs to eliminate an induction variable even if it's being - // compared against some value to decide loop termination. - if (PN->hasOneUse()) { - BinaryOperator *BO = dyn_cast(*(PN->use_begin())); - if (BO && BO->getOpcode() == Instruction::Add) - if (BO->hasOneUse()) { - if (PN == dyn_cast(*(BO->use_begin()))) { - DeadInsts.insert(BO); - // Break the cycle, then delete the PHI. - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - PN->eraseFromParent(); - DeleteTriviallyDeadInstructions(DeadInsts); + BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN; + for (; (PN = dyn_cast(I)); ++I) { + // At this point, we know that we have killed one or more GEP instructions. + // It is worth checking to see if the cann indvar is also dead, so that we + // can remove it as well. The requirements for the cann indvar to be + // considered dead are: + // 1. the cann indvar has one use + // 2. the use is an add instruction + // 3. the add has one use + // 4. the add is used by the cann indvar + // If all four cases above are true, then we can remove both the add and + // the cann indvar. + // FIXME: this needs to eliminate an induction variable even if it's being + // compared against some value to decide loop termination. + if (PN->hasOneUse()) { + BinaryOperator *BO = dyn_cast(*(PN->use_begin())); + if (BO && BO->getOpcode() == Instruction::Add) + if (BO->hasOneUse()) { + if (PN == dyn_cast(*(BO->use_begin()))) { + DeadInsts.insert(BO); + // Break the cycle, then delete the PHI. + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } } - } + } } + DeleteTriviallyDeadInstructions(DeadInsts); } + + IVUsesByStride.clear(); + return; }