mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-01 15:17:25 +00:00
Add support for pointer induction variables even when there is no integer induction variable.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168558 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -106,9 +106,10 @@ class SingleBlockLoopVectorizer {
|
|||||||
public:
|
public:
|
||||||
/// Ctor.
|
/// Ctor.
|
||||||
SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
|
SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
|
||||||
DominatorTree *dt, LPPassManager *Lpm,
|
DominatorTree *dt, DataLayout *dl,
|
||||||
|
LPPassManager *Lpm,
|
||||||
unsigned VecWidth):
|
unsigned VecWidth):
|
||||||
OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
|
OrigLoop(Orig), SE(Se), LI(Li), DT(dt), DL(dl), LPM(Lpm), VF(VecWidth),
|
||||||
Builder(Se->getContext()), Induction(0), OldInduction(0) { }
|
Builder(Se->getContext()), Induction(0), OldInduction(0) { }
|
||||||
|
|
||||||
// Perform the actual loop widening (vectorization).
|
// Perform the actual loop widening (vectorization).
|
||||||
@@ -167,6 +168,8 @@ private:
|
|||||||
LoopInfo *LI;
|
LoopInfo *LI;
|
||||||
// Dominator Tree.
|
// Dominator Tree.
|
||||||
DominatorTree *DT;
|
DominatorTree *DT;
|
||||||
|
// Data Layout;
|
||||||
|
DataLayout *DL;
|
||||||
// Loop Pass Manager;
|
// Loop Pass Manager;
|
||||||
LPPassManager *LPM;
|
LPPassManager *LPM;
|
||||||
// The vectorization factor to use.
|
// The vectorization factor to use.
|
||||||
@@ -250,10 +253,36 @@ public:
|
|||||||
// This POD struct holds information about the memory runtime legality
|
// This POD struct holds information about the memory runtime legality
|
||||||
// check that a group of pointers do not overlap.
|
// check that a group of pointers do not overlap.
|
||||||
struct RuntimePointerCheck {
|
struct RuntimePointerCheck {
|
||||||
|
RuntimePointerCheck(): Need(false) {}
|
||||||
|
|
||||||
|
/// Reset the state of the pointer runtime information.
|
||||||
|
void reset() {
|
||||||
|
Need = false;
|
||||||
|
Pointers.clear();
|
||||||
|
Starts.clear();
|
||||||
|
Ends.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a pointer and calculate the start and end SCEVs.
|
||||||
|
void insert_pointer(ScalarEvolution *SE, Loop *Lp, Value *Ptr) {
|
||||||
|
const SCEV *Sc = SE->getSCEV(Ptr);
|
||||||
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
|
||||||
|
assert(AR && "Invalid addrec expression");
|
||||||
|
const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader());
|
||||||
|
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
|
||||||
|
Pointers.push_back(Ptr);
|
||||||
|
Starts.push_back(AR->getStart());
|
||||||
|
Ends.push_back(ScEnd);
|
||||||
|
}
|
||||||
|
|
||||||
/// This flag indicates if we need to add the runtime check.
|
/// This flag indicates if we need to add the runtime check.
|
||||||
bool Need;
|
bool Need;
|
||||||
/// Holds the pointers that we need to check.
|
/// Holds the pointers that we need to check.
|
||||||
SmallVector<Value*, 2> Pointers;
|
SmallVector<Value*, 2> Pointers;
|
||||||
|
/// Holds the pointer value at the beginning of the loop.
|
||||||
|
SmallVector<const SCEV*, 2> Starts;
|
||||||
|
/// Holds the pointer value at the end of the loop.
|
||||||
|
SmallVector<const SCEV*, 2> Ends;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// ReductionList contains the reduction descriptors for all
|
/// ReductionList contains the reduction descriptors for all
|
||||||
@@ -278,11 +307,11 @@ public:
|
|||||||
/// Returns the induction variables found in the loop.
|
/// Returns the induction variables found in the loop.
|
||||||
InductionList *getInductionVars() { return &Inductions; }
|
InductionList *getInductionVars() { return &Inductions; }
|
||||||
|
|
||||||
/// Check if the pointer returned by this GEP is consecutive
|
/// Check if this pointer is consecutive when vectorizing. This happens
|
||||||
/// when the index is vectorized. This happens when the last
|
/// when the last index of the GEP is the induction variable, or that the
|
||||||
/// index of the GEP is consecutive, like the induction variable.
|
/// pointer itself is an induction variable.
|
||||||
/// This check allows us to vectorize A[idx] into a wide load/store.
|
/// This check allows us to vectorize A[idx] into a wide load/store.
|
||||||
bool isConsecutiveGep(Value *Ptr);
|
bool isConsecutivePtr(Value *Ptr);
|
||||||
|
|
||||||
/// Returns true if the value V is uniform within the loop.
|
/// Returns true if the value V is uniform within the loop.
|
||||||
bool isUniform(Value *V);
|
bool isUniform(Value *V);
|
||||||
@@ -451,7 +480,7 @@ struct LoopVectorize : public LoopPass {
|
|||||||
"\n");
|
"\n");
|
||||||
|
|
||||||
// If we decided that it is *legal* to vectorizer the loop then do it.
|
// If we decided that it is *legal* to vectorizer the loop then do it.
|
||||||
SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
|
SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF);
|
||||||
LB.vectorize(&LVL);
|
LB.vectorize(&LVL);
|
||||||
|
|
||||||
DEBUG(verifyFunction(*L->getHeader()->getParent()));
|
DEBUG(verifyFunction(*L->getHeader()->getParent()));
|
||||||
@@ -472,10 +501,6 @@ struct LoopVectorize : public LoopPass {
|
|||||||
};
|
};
|
||||||
|
|
||||||
Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
|
Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
|
||||||
// Instructions that access the old induction variable
|
|
||||||
// actually want to get the new one.
|
|
||||||
if (V == OldInduction)
|
|
||||||
V = Induction;
|
|
||||||
// Create the types.
|
// Create the types.
|
||||||
LLVMContext &C = V->getContext();
|
LLVMContext &C = V->getContext();
|
||||||
Type *VTy = VectorType::get(V->getType(), VF);
|
Type *VTy = VectorType::get(V->getType(), VF);
|
||||||
@@ -515,7 +540,14 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
|
|||||||
return Builder.CreateAdd(Val, Cv, "induction");
|
return Builder.CreateAdd(Val, Cv, "induction");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
|
bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
||||||
|
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
|
||||||
|
|
||||||
|
// If this pointer is an induction variable, return it.
|
||||||
|
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
|
||||||
|
if (Phi && getInductionVars()->count(Phi))
|
||||||
|
return true;
|
||||||
|
|
||||||
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
|
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
|
||||||
if (!Gep)
|
if (!Gep)
|
||||||
return false;
|
return false;
|
||||||
@@ -576,7 +608,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
|
|||||||
|
|
||||||
// If we are accessing the old induction variable, use the new one.
|
// If we are accessing the old induction variable, use the new one.
|
||||||
if (SrcOp == OldInduction) {
|
if (SrcOp == OldInduction) {
|
||||||
Params.push_back(getBroadcastInstrs(Induction));
|
Params.push_back(getVectorValue(Induction));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -666,9 +698,13 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
...
|
...
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Some loops have a single integer induction variable, while other loops
|
||||||
|
// don't. One example is c++ iterators that often have multiple pointer
|
||||||
|
// induction variables. In the code below we also support a case where we
|
||||||
|
// don't have a single induction variable.
|
||||||
OldInduction = Legal->getInduction();
|
OldInduction = Legal->getInduction();
|
||||||
assert(OldInduction && "We must have a single phi node.");
|
Type *IdxTy = OldInduction ? OldInduction->getType() :
|
||||||
Type *IdxTy = OldInduction->getType();
|
DL->getIntPtrType(SE->getContext());
|
||||||
|
|
||||||
// Find the loop boundaries.
|
// Find the loop boundaries.
|
||||||
const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
|
const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
|
||||||
@@ -677,19 +713,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
// Get the total trip count from the count by adding 1.
|
// Get the total trip count from the count by adding 1.
|
||||||
ExitCount = SE->getAddExpr(ExitCount,
|
ExitCount = SE->getAddExpr(ExitCount,
|
||||||
SE->getConstant(ExitCount->getType(), 1));
|
SE->getConstant(ExitCount->getType(), 1));
|
||||||
// We may need to extend the index in case there is a type mismatch.
|
|
||||||
// We know that the count starts at zero and does not overflow.
|
|
||||||
// We are using Zext because it should be less expensive.
|
|
||||||
if (ExitCount->getType() != IdxTy)
|
|
||||||
ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
|
|
||||||
|
|
||||||
// This is the original scalar-loop preheader.
|
// This is the original scalar-loop preheader.
|
||||||
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
|
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
|
||||||
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
|
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
|
||||||
assert(ExitBlock && "Must have an exit block");
|
assert(ExitBlock && "Must have an exit block");
|
||||||
|
|
||||||
// The loop index does not have to start at Zero. It starts with this value.
|
// The loop index does not have to start at Zero. Find the original start
|
||||||
Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
|
// value from the induction PHI node. If we don't have an induction variable
|
||||||
|
// then we know that it starts at zero.
|
||||||
|
Value *StartIdx = OldInduction ?
|
||||||
|
OldInduction->getIncomingValueForBlock(BypassBlock):
|
||||||
|
ConstantInt::get(IdxTy, 0);
|
||||||
|
|
||||||
assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
|
assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
|
||||||
assert(BypassBlock && "Invalid loop structure");
|
assert(BypassBlock && "Invalid loop structure");
|
||||||
@@ -721,7 +756,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
Instruction *Loc = BypassBlock->getTerminator();
|
Instruction *Loc = BypassBlock->getTerminator();
|
||||||
|
|
||||||
// Count holds the overall loop count (N).
|
// Count holds the overall loop count (N).
|
||||||
Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
|
Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), Loc);
|
||||||
|
|
||||||
|
// We may need to extend the index in case there is a type mismatch.
|
||||||
|
// We know that the count starts at zero and does not overflow.
|
||||||
|
if (Count->getType() != IdxTy) {
|
||||||
|
// The exit count can be of pointer type. Convert it to the correct
|
||||||
|
// integer type.
|
||||||
|
if (ExitCount->getType()->isPointerTy())
|
||||||
|
Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc);
|
||||||
|
else
|
||||||
|
Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc);
|
||||||
|
}
|
||||||
|
|
||||||
// Add the start index to the loop count to get the new end index.
|
// Add the start index to the loop count to get the new end index.
|
||||||
Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
|
Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
|
||||||
@@ -734,7 +780,8 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
|
Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
|
||||||
"end.idx.rnd.down", Loc);
|
"end.idx.rnd.down", Loc);
|
||||||
|
|
||||||
// Now, compare the new count to zero. If it is zero, jump to the scalar part.
|
// Now, compare the new count to zero. If it is zero skip the vector loop and
|
||||||
|
// jump to the scalar loop.
|
||||||
Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
|
Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
|
||||||
IdxEndRoundDown,
|
IdxEndRoundDown,
|
||||||
StartIdx,
|
StartIdx,
|
||||||
@@ -762,23 +809,21 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
Ends.push_back(Ptr);
|
Ends.push_back(Ptr);
|
||||||
} else {
|
} else {
|
||||||
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
|
DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
|
||||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
|
|
||||||
Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
|
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i],
|
||||||
const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
|
PtrArithTy, Loc);
|
||||||
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
|
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
|
||||||
assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
|
|
||||||
Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
|
|
||||||
Starts.push_back(Start);
|
Starts.push_back(Start);
|
||||||
Ends.push_back(End);
|
Ends.push_back(End);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i=0; i < NumPointers; ++i) {
|
for (unsigned i = 0; i < NumPointers; ++i) {
|
||||||
for (unsigned j=i+1; j < NumPointers; ++j) {
|
for (unsigned j = i+1; j < NumPointers; ++j) {
|
||||||
Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
||||||
Starts[0], Ends[1], "bound0", Loc);
|
Starts[i], Ends[j], "bound0", Loc);
|
||||||
Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
|
||||||
Starts[1], Ends[0], "bound1", Loc);
|
Starts[j], Ends[i], "bound1", Loc);
|
||||||
Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
|
Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
|
||||||
"found.conflict", Loc);
|
"found.conflict", Loc);
|
||||||
if (MemoryRuntimeCheck) {
|
if (MemoryRuntimeCheck) {
|
||||||
@@ -812,7 +857,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
// value.
|
// value.
|
||||||
|
|
||||||
// This variable saves the new starting index for the scalar loop.
|
// This variable saves the new starting index for the scalar loop.
|
||||||
Value *ResumeIndex = 0;
|
PHINode *ResumeIndex = 0;
|
||||||
LoopVectorizationLegality::InductionList::iterator I, E;
|
LoopVectorizationLegality::InductionList::iterator I, E;
|
||||||
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
|
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
|
||||||
for (I = List->begin(), E = List->end(); I != E; ++I) {
|
for (I = List->begin(), E = List->end(); I != E; ++I) {
|
||||||
@@ -830,7 +875,7 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
} else {
|
} else {
|
||||||
// For pointer induction variables, calculate the offset using
|
// For pointer induction variables, calculate the offset using
|
||||||
// the end index.
|
// the end index.
|
||||||
EndValue = GetElementPtrInst::Create(I->second, IdxEndRoundDown,
|
EndValue = GetElementPtrInst::Create(I->second, CountRoundDown,
|
||||||
"ptr.ind.end",
|
"ptr.ind.end",
|
||||||
BypassBlock->getTerminator());
|
BypassBlock->getTerminator());
|
||||||
}
|
}
|
||||||
@@ -841,10 +886,22 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
|||||||
ResumeVal->addIncoming(EndValue, VecBody);
|
ResumeVal->addIncoming(EndValue, VecBody);
|
||||||
|
|
||||||
// Fix the scalar body counter (PHI node).
|
// Fix the scalar body counter (PHI node).
|
||||||
unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
|
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
|
||||||
OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
|
OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are generating a new induction variable then we also need to
|
||||||
|
// generate the code that calculates the exit value. This value is not
|
||||||
|
// simply the end of the counter because we may skip the vectorized body
|
||||||
|
// in case of a runtime check.
|
||||||
|
if (!OldInduction){
|
||||||
|
assert(!ResumeIndex && "Unexpected resume value found");
|
||||||
|
ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
|
||||||
|
MiddleBlock->getTerminator());
|
||||||
|
ResumeIndex->addIncoming(StartIdx, BypassBlock);
|
||||||
|
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
|
||||||
|
}
|
||||||
|
|
||||||
// Make sure that we found the index where scalar loop needs to continue.
|
// Make sure that we found the index where scalar loop needs to continue.
|
||||||
assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
|
assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
|
||||||
"Invalid resume Index");
|
"Invalid resume Index");
|
||||||
@@ -953,43 +1010,54 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
continue;
|
continue;
|
||||||
case Instruction::PHI:{
|
case Instruction::PHI:{
|
||||||
PHINode* P = cast<PHINode>(Inst);
|
PHINode* P = cast<PHINode>(Inst);
|
||||||
// Special handling for the induction var.
|
|
||||||
if (OldInduction == Inst)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Handle reduction variables:
|
// Handle reduction variables:
|
||||||
if (Legal->getReductionVars()->count(P)) {
|
if (Legal->getReductionVars()->count(P)) {
|
||||||
// This is phase one of vectorizing PHIs.
|
// This is phase one of vectorizing PHIs.
|
||||||
Type *VecTy = VectorType::get(Inst->getType(), VF);
|
Type *VecTy = VectorType::get(Inst->getType(), VF);
|
||||||
WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
|
WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
|
||||||
|
LoopVectorBody->getFirstInsertionPt());
|
||||||
RdxPHIsToFix.push_back(P);
|
RdxPHIsToFix.push_back(P);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle pointer inductions:
|
// This PHINode must be an induction variable.
|
||||||
if (Legal->getInductionVars()->count(P)) {
|
// Make sure that we know about it.
|
||||||
Value *StartIdx = Legal->getInductionVars()->lookup(OldInduction);
|
assert(Legal->getInductionVars()->count(P) &&
|
||||||
Value *StartPtr = Legal->getInductionVars()->lookup(P);
|
"Not an induction variable");
|
||||||
// This is the normalized GEP that starts counting at zero.
|
|
||||||
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
|
|
||||||
"normalized.idx");
|
|
||||||
// This is the first GEP in the sequence.
|
|
||||||
Value *FirstGep = Builder.CreateGEP(StartPtr, NormalizedIdx,
|
|
||||||
"induc.ptr");
|
|
||||||
// This is the vector of results. Notice that we don't generate vector
|
|
||||||
// geps because scalar geps result in better code.
|
|
||||||
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
|
|
||||||
for (unsigned int i = 0; i < VF; ++i) {
|
|
||||||
Value *SclrGep = Builder.CreateGEP(FirstGep, Builder.getInt32(i),
|
|
||||||
"next.gep");
|
|
||||||
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
|
|
||||||
Builder.getInt32(i),
|
|
||||||
"insert.gep");
|
|
||||||
}
|
|
||||||
|
|
||||||
WidenMap[Inst] = VecVal;
|
if (P->getType()->isIntegerTy()) {
|
||||||
|
assert(P == OldInduction && "Unexpected PHI");
|
||||||
|
WidenMap[Inst] = getBroadcastInstrs(Induction);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle pointer inductions:
|
||||||
|
assert(P->getType()->isPointerTy() && "Unexpected type.");
|
||||||
|
Value *StartIdx = OldInduction ?
|
||||||
|
Legal->getInductionVars()->lookup(OldInduction) :
|
||||||
|
ConstantInt::get(Induction->getType(), 0);
|
||||||
|
|
||||||
|
// This is the pointer value coming into the loop.
|
||||||
|
Value *StartPtr = Legal->getInductionVars()->lookup(P);
|
||||||
|
|
||||||
|
// This is the normalized GEP that starts counting at zero.
|
||||||
|
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
|
||||||
|
"normalized.idx");
|
||||||
|
|
||||||
|
// This is the vector of results. Notice that we don't generate vector
|
||||||
|
// geps because scalar geps result in better code.
|
||||||
|
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
|
||||||
|
for (unsigned int i = 0; i < VF; ++i) {
|
||||||
|
Constant *Idx = ConstantInt::get(Induction->getType(), i);
|
||||||
|
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
|
||||||
|
Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
|
||||||
|
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
|
||||||
|
Builder.getInt32(i),
|
||||||
|
"insert.gep");
|
||||||
|
}
|
||||||
|
|
||||||
|
WidenMap[Inst] = VecVal;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
case Instruction::Add:
|
case Instruction::Add:
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
@@ -1076,21 +1144,27 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
|
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
|
||||||
|
|
||||||
// This store does not use GEPs.
|
// This store does not use GEPs.
|
||||||
if (!Legal->isConsecutiveGep(Gep)) {
|
if (!Legal->isConsecutivePtr(Ptr)) {
|
||||||
scalarizeInstruction(Inst);
|
scalarizeInstruction(Inst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The last index does not have to be the induction. It can be
|
if (Gep) {
|
||||||
// consecutive and be a function of the index. For example A[I+1];
|
// The last index does not have to be the induction. It can be
|
||||||
unsigned NumOperands = Gep->getNumOperands();
|
// consecutive and be a function of the index. For example A[I+1];
|
||||||
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
|
unsigned NumOperands = Gep->getNumOperands();
|
||||||
LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
|
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
|
||||||
|
LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
|
||||||
|
|
||||||
// Create the new GEP with the new induction variable.
|
// Create the new GEP with the new induction variable.
|
||||||
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
|
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
|
||||||
Gep2->setOperand(NumOperands - 1, LastIndex);
|
Gep2->setOperand(NumOperands - 1, LastIndex);
|
||||||
Ptr = Builder.Insert(Gep2);
|
Ptr = Builder.Insert(Gep2);
|
||||||
|
} else {
|
||||||
|
// Use the induction element ptr.
|
||||||
|
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
|
||||||
|
Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
|
||||||
|
}
|
||||||
Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
|
Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
|
||||||
Value *Val = getVectorValue(SI->getValueOperand());
|
Value *Val = getVectorValue(SI->getValueOperand());
|
||||||
Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
|
Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
|
||||||
@@ -1104,23 +1178,31 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
unsigned Alignment = LI->getAlignment();
|
unsigned Alignment = LI->getAlignment();
|
||||||
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
|
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
|
||||||
|
|
||||||
// If we don't have a gep, or that the pointer is loop invariant,
|
// If the pointer is loop invariant or if it is non consecutive,
|
||||||
// scalarize the load.
|
// scalarize the load.
|
||||||
if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
|
bool Con = Legal->isConsecutivePtr(Ptr);
|
||||||
|
if (Legal->isUniform(Ptr) || !Con) {
|
||||||
scalarizeInstruction(Inst);
|
scalarizeInstruction(Inst);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The last index does not have to be the induction. It can be
|
if (Gep) {
|
||||||
// consecutive and be a function of the index. For example A[I+1];
|
// The last index does not have to be the induction. It can be
|
||||||
unsigned NumOperands = Gep->getNumOperands();
|
// consecutive and be a function of the index. For example A[I+1];
|
||||||
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
|
unsigned NumOperands = Gep->getNumOperands();
|
||||||
LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
|
Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
|
||||||
|
LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
|
||||||
|
|
||||||
|
// Create the new GEP with the new induction variable.
|
||||||
|
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
|
||||||
|
Gep2->setOperand(NumOperands - 1, LastIndex);
|
||||||
|
Ptr = Builder.Insert(Gep2);
|
||||||
|
} else {
|
||||||
|
// Use the induction element ptr.
|
||||||
|
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
|
||||||
|
Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
|
||||||
|
}
|
||||||
|
|
||||||
// Create the new GEP with the new induction variable.
|
|
||||||
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
|
|
||||||
Gep2->setOperand(NumOperands - 1, LastIndex);
|
|
||||||
Ptr = Builder.Insert(Gep2);
|
|
||||||
Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
|
Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
|
||||||
LI = Builder.CreateLoad(Ptr);
|
LI = Builder.CreateLoad(Ptr);
|
||||||
LI->setAlignment(Alignment);
|
LI->setAlignment(Alignment);
|
||||||
@@ -1301,7 +1383,7 @@ bool LoopVectorizationLegality::canVectorize() {
|
|||||||
if (!TheLoop->getLoopPreheader()) {
|
if (!TheLoop->getLoopPreheader()) {
|
||||||
assert(false && "No preheader!!");
|
assert(false && "No preheader!!");
|
||||||
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
|
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can only vectorize single basic block loops.
|
// We can only vectorize single basic block loops.
|
||||||
@@ -1347,6 +1429,7 @@ bool LoopVectorizationLegality::canVectorize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
||||||
|
|
||||||
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
|
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
|
||||||
|
|
||||||
// Scan the instructions in the block and look for hazards.
|
// Scan the instructions in the block and look for hazards.
|
||||||
@@ -1440,8 +1523,8 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
|||||||
} // next instr.
|
} // next instr.
|
||||||
|
|
||||||
if (!Induction) {
|
if (!Induction) {
|
||||||
DEBUG(dbgs() << "LV: Did not find an induction var.\n");
|
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
|
||||||
return false;
|
assert(getInductionVars()->size() && "No induction variables");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't vectorize if the memory dependencies do not allow vectorization.
|
// Don't vectorize if the memory dependencies do not allow vectorization.
|
||||||
@@ -1458,15 +1541,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
|
|||||||
while (Worklist.size()) {
|
while (Worklist.size()) {
|
||||||
Instruction *I = dyn_cast<Instruction>(Worklist.back());
|
Instruction *I = dyn_cast<Instruction>(Worklist.back());
|
||||||
Worklist.pop_back();
|
Worklist.pop_back();
|
||||||
// Look at instructions inside this block.
|
|
||||||
if (!I) continue;
|
|
||||||
if (I->getParent() != &BB) continue;
|
|
||||||
|
|
||||||
// Stop when reaching PHI nodes.
|
// Look at instructions inside this block. Stop when reaching PHI nodes.
|
||||||
if (isa<PHINode>(I)) {
|
if (!I || I->getParent() != &BB || isa<PHINode>(I))
|
||||||
assert(I == Induction && "Found a uniform PHI that is not the induction");
|
continue;
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is a known uniform.
|
// This is a known uniform.
|
||||||
Uniforms.insert(I);
|
Uniforms.insert(I);
|
||||||
@@ -1569,7 +1647,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
|
|||||||
// If the address of i is unknown (for example A[B[i]]) then we may
|
// If the address of i is unknown (for example A[B[i]]) then we may
|
||||||
// read a few words, modify, and write a few words, and some of the
|
// read a few words, modify, and write a few words, and some of the
|
||||||
// words may be written to the same address.
|
// words may be written to the same address.
|
||||||
if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
|
if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr))
|
||||||
Reads.push_back(Ptr);
|
Reads.push_back(Ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1585,7 +1663,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
|
|||||||
bool RT = true;
|
bool RT = true;
|
||||||
for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
|
for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
|
||||||
if (hasComputableBounds(*I)) {
|
if (hasComputableBounds(*I)) {
|
||||||
PtrRtCheck.Pointers.push_back(*I);
|
PtrRtCheck.insert_pointer(SE, TheLoop, *I);
|
||||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
||||||
} else {
|
} else {
|
||||||
RT = false;
|
RT = false;
|
||||||
@@ -1593,7 +1671,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
|
|||||||
}
|
}
|
||||||
for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
|
for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
|
||||||
if (hasComputableBounds(*I)) {
|
if (hasComputableBounds(*I)) {
|
||||||
PtrRtCheck.Pointers.push_back(*I);
|
PtrRtCheck.insert_pointer(SE, TheLoop, *I);
|
||||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
||||||
} else {
|
} else {
|
||||||
RT = false;
|
RT = false;
|
||||||
@@ -1603,7 +1681,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
|
|||||||
// Check that we did not collect too many pointers or found a
|
// Check that we did not collect too many pointers or found a
|
||||||
// unsizeable pointer.
|
// unsizeable pointer.
|
||||||
if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
|
if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
|
||||||
PtrRtCheck.Pointers.clear();
|
PtrRtCheck.reset();
|
||||||
RT = false;
|
RT = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1658,8 +1736,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
|
|||||||
|
|
||||||
// It is safe to vectorize and we don't need any runtime checks.
|
// It is safe to vectorize and we don't need any runtime checks.
|
||||||
DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
|
DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
|
||||||
PtrRtCheck.Pointers.clear();
|
PtrRtCheck.reset();
|
||||||
PtrRtCheck.Need = false;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1917,7 +1994,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
|||||||
SI->getAlignment(), SI->getPointerAddressSpace());
|
SI->getAlignment(), SI->getPointerAddressSpace());
|
||||||
|
|
||||||
// Scalarized stores.
|
// Scalarized stores.
|
||||||
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
|
if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||||
ValTy);
|
ValTy);
|
||||||
@@ -1944,7 +2021,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
|||||||
LI->getPointerAddressSpace());
|
LI->getPointerAddressSpace());
|
||||||
|
|
||||||
// Scalarized loads.
|
// Scalarized loads.
|
||||||
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
|
if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
|
||||||
unsigned Cost = 0;
|
unsigned Cost = 0;
|
||||||
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
|
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
|
||||||
// The cost of inserting the loaded value into the result vector.
|
// The cost of inserting the loaded value into the result vector.
|
||||||
|
|||||||
33
test/Transforms/LoopVectorize/no_int_induction.ll
Normal file
33
test/Transforms/LoopVectorize/no_int_induction.ll
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
|
||||||
|
|
||||||
|
; int __attribute__((noinline)) sum_array(int *A, int n) {
|
||||||
|
; return std::accumulate(A, A + n, 0);
|
||||||
|
; }
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-apple-macosx10.8.0"
|
||||||
|
|
||||||
|
;CHECK: @sum_array
|
||||||
|
;CHECK: phi <4 x i32>
|
||||||
|
;CHECK: load <4 x i32>
|
||||||
|
;CHECK: add nsw <4 x i32>
|
||||||
|
;CHECK: ret i32
|
||||||
|
define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
|
||||||
|
%1 = sext i32 %n to i64
|
||||||
|
%2 = getelementptr inbounds i32* %A, i64 %1
|
||||||
|
%3 = icmp eq i32 %n, 0
|
||||||
|
br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
|
||||||
|
|
||||||
|
.lr.ph.i: ; preds = %0, %.lr.ph.i
|
||||||
|
%.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
|
||||||
|
%.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
|
||||||
|
%4 = load i32* %.03.i, align 4
|
||||||
|
%5 = add nsw i32 %4, %.012.i
|
||||||
|
%6 = getelementptr inbounds i32* %.03.i, i64 1
|
||||||
|
%7 = icmp eq i32* %6, %2
|
||||||
|
br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
|
||||||
|
|
||||||
|
_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %.lr.ph.i, %0
|
||||||
|
%.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
|
||||||
|
ret i32 %.01.lcssa.i
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user